]> git.ekhem.eu.org Git - gdrive_knife.git/commitdiff
Reduce memory usage.
authorJakub Czajka <jakub@ekhem.eu.org>
Fri, 3 Nov 2023 00:14:13 +0000 (01:14 +0100)
committerJakub Czajka <jakub@ekhem.eu.org>
Sun, 19 Nov 2023 13:58:41 +0000 (14:58 +0100)
- Download to file instead of memory.
- Encrypt and decrypt in place using chunks.

gdrive_knife.py

index bdadb848573ec8ba65868f1a66ea6ef1381ea713..c64fdfbb272088a05795dc988f2777ca35b642d7 100644 (file)
@@ -56,6 +56,38 @@ def get_file_id(drive, file_path):
         return None
     return maybe_id[0]['id']
 
+# Standard Fernet encryption/decryption requires whole file in memory. This has
+# memory contrains. Instead, this program splits the file into blocks and
+# encrypts/decrypts them separately. See https://stackoverflow.com/a/71068357.
+def encrypt_chunks_in_place(encryption_key, path):
+    block = 1 << 16
+    tmp_path = path + str(uuid.uuid4())
+    with open(path, 'rb') as input_file, open(tmp_path, 'wb') as output_file:
+        while True:
+            unencrypted_bytes = input_file.read(block)
+            if len(unencrypted_bytes) == 0:
+                break
+            encrypted_bytes = encryption_key.encrypt(unencrypted_bytes)
+            bytes_as_int = len(encrypted_bytes).to_bytes(4, 'big')
+            output_file.write(bytes_as_int)
+            output_file.write(encrypted_bytes)
+            if len(unencrypted_bytes) < block:
+                break
+    os.rename(tmp_path, path)
+
+def decrypt_chunks_in_place(encryption_key, path):
+    tmp_path = path + str(uuid.uuid4())
+    with open(path, 'rb') as input_file, open(tmp_path, 'wb') as output_file:
+        while True:
+            encrypted_bytes = input_file.read(4)
+            if len(encrypted_bytes) == 0:
+                break
+            bytes_as_int = int.from_bytes(encrypted_bytes, 'big')
+            chunk = input_file.read(bytes_as_int)
+            decrypted_bytes = encryption_key.decrypt(chunk)
+            output_file.write(decrypted_bytes)
+    os.rename(tmp_path, path)
+
 def auth(args):
     creds = None
     if os.path.exists(args.token):
@@ -93,21 +125,17 @@ def download(args):
         sys.exit(1)
 
     request = drive.files().get_media(fileId=maybe_id, acknowledgeAbuse=True)
-    file = io.BytesIO()
-    downloader = MediaIoBaseDownload(file, request)
-    done = False
-    while done is False:
-        status, done = downloader.next_chunk()
-        print(F'Download {int(status.progress() * 100)}.')
-
-    encrypted_file = file.getvalue()
-    token = args.key.decrypt(encrypted_file)
-    print(f'{args.path} decrypted.')
 
     path_in_tmp = tempfile.gettempdir() + '/' + str(uuid.uuid4())
-    with open(path_in_tmp, 'wb+') as outfile:
-        outfile.write(token)
-    print(f'{args.path} written to {path_in_tmp}.')
+    with io.FileIO(path_in_tmp, mode='wb') as stream_input:
+        downloader = MediaIoBaseDownload(stream_input, request)
+        done = False
+        while done is False:
+            status, done = downloader.next_chunk()
+            print(f'Download {int(status.progress() * 100)}.')
+
+    decrypt_chunks_in_place(args.key, path_in_tmp)
+    print(f'{args.path} decrypted.')
 
     if zipfile.is_zipfile(path_in_tmp):
         os.makedirs(args.output, exist_ok=True)
@@ -134,11 +162,7 @@ def upload(args):
         shutil.copy(args.file, path_in_tmp)
         print(f'Copied {args.file} to {path_in_tmp}.')
 
-    with open(path_in_tmp, 'r+b') as f:
-         token = args.key.encrypt(f.read())
-         f.seek(0)
-         f.write(token)
-         f.truncate()
+    encrypt_chunks_in_place(args.key, path_in_tmp)
     print(f'Encrypted {args.file} in {path_in_tmp}.')
 
     body = { 'name': path_in_tmp, 'originalFilename': path }