From c331a4289b7cefdcd9a891044aa7cd101051e30f Mon Sep 17 00:00:00 2001 From: Jakub Czajka Date: Fri, 3 Nov 2023 01:14:13 +0100 Subject: [PATCH] Reduce memory usage. - Download to file instead of memory. - Encrypt and decrypt in place using chunks. --- gdrive_knife.py | 60 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/gdrive_knife.py b/gdrive_knife.py index bdadb84..c64fdfb 100644 --- a/gdrive_knife.py +++ b/gdrive_knife.py @@ -56,6 +56,38 @@ def get_file_id(drive, file_path): return None return maybe_id[0]['id'] +# Standard Fernet encryption/decryption requires whole file in memory. This has +# memory contrains. Instead, this program splits the file into blocks and +# encrypts/decrypts them separately. See https://stackoverflow.com/a/71068357. +def encrypt_chunks_in_place(encryption_key, path): + block = 1 << 16 + tmp_path = path + str(uuid.uuid4()) + with open(path, 'rb') as input_file, open(tmp_path, 'wb') as output_file: + while True: + unencrypted_bytes = input_file.read(block) + if len(unencrypted_bytes) == 0: + break + encrypted_bytes = encryption_key.encrypt(unencrypted_bytes) + bytes_as_int = len(encrypted_bytes).to_bytes(4, 'big') + output_file.write(bytes_as_int) + output_file.write(encrypted_bytes) + if len(unencrypted_bytes) < block: + break + os.rename(tmp_path, path) + +def decrypt_chunks_in_place(encryption_key, path): + tmp_path = path + str(uuid.uuid4()) + with open(path, 'rb') as input_file, open(tmp_path, 'wb') as output_file: + while True: + encrypted_bytes = input_file.read(4) + if len(encrypted_bytes) == 0: + break + bytes_as_int = int.from_bytes(encrypted_bytes, 'big') + chunk = input_file.read(bytes_as_int) + decrypted_bytes = encryption_key.decrypt(chunk) + output_file.write(decrypted_bytes) + os.rename(tmp_path, path) + def auth(args): creds = None if os.path.exists(args.token): @@ -93,21 +125,17 @@ def download(args): sys.exit(1) request = drive.files().get_media(fileId=maybe_id, acknowledgeAbuse=True) - file = io.BytesIO() - downloader = MediaIoBaseDownload(file, request) - done = False - while done is False: - status, done = downloader.next_chunk() - print(F'Download {int(status.progress() * 100)}.') - - encrypted_file = file.getvalue() - token = args.key.decrypt(encrypted_file) - print(f'{args.path} decrypted.') path_in_tmp = tempfile.gettempdir() + '/' + str(uuid.uuid4()) - with open(path_in_tmp, 'wb+') as outfile: - outfile.write(token) - print(f'{args.path} written to {path_in_tmp}.') + with io.FileIO(path_in_tmp, mode='wb') as stream_input: + downloader = MediaIoBaseDownload(stream_input, request) + done = False + while done is False: + status, done = downloader.next_chunk() + print(f'Download {int(status.progress() * 100)}.') + + decrypt_chunks_in_place(args.key, path_in_tmp) + print(f'{args.path} decrypted.') if zipfile.is_zipfile(path_in_tmp): os.makedirs(args.output, exist_ok=True) @@ -134,11 +162,7 @@ def upload(args): shutil.copy(args.file, path_in_tmp) print(f'Copied {args.file} to {path_in_tmp}.') - with open(path_in_tmp, 'r+b') as f: - token = args.key.encrypt(f.read()) - f.seek(0) - f.write(token) - f.truncate() + encrypt_chunks_in_place(args.key, path_in_tmp) print(f'Encrypted {args.file} in {path_in_tmp}.') body = { 'name': path_in_tmp, 'originalFilename': path } -- 2.39.5