@@ -271,7 +271,6 @@ def get_pull_changes(self, server_files):
271271 if not self .geodiff :
272272 return changes
273273
274- size_limit = int (os .environ .get ('DIFFS_LIMIT_SIZE' , 1024 * 1024 )) # with smaller values than limit download full file instead of diffs
275274 not_updated = []
276275 for file in changes ['updated' ]:
277276 # for small geodiff files it does not make sense to download diff and then apply it (slow)
@@ -294,8 +293,7 @@ def get_pull_changes(self, server_files):
294293 break # we found force update in history, does not make sense to download diffs
295294
296295 if is_updated :
297- if diffs and file ['size' ] > size_limit and diffs_size < file ['size' ]/ 2 :
298- file ['diffs' ] = diffs
296+ file ['diffs' ] = diffs
299297 else :
300298 not_updated .append (file )
301299
@@ -455,9 +453,23 @@ def apply_pull_changes(self, changes, temp_dir):
455453 if os .path .exists (f'{ dest } -shm' ):
456454 os .remove (f'{ dest } -shm' )
457455 else :
458- # just use server version of file to update both project file and its basefile
459- shutil .copy (src , dest )
460- shutil .copy (src , basefile )
456+ # The local file is not modified -> no rebase needed.
457+ # We just apply the diff between our copy and server to both the local copy and its basefile
458+ try :
459+ server_diff = self .fpath (f'{ path } -server_diff' , temp_dir ) # diff between server file and local basefile
460+ # TODO: it could happen that basefile does not exist.
461+ # It was either never created (e.g. when pushing without geodiff)
462+ # or it was deleted by mistake(?) by the user. We should detect that
463+ # when starting pull and download it as well
464+ self .geodiff .create_changeset (basefile , src , server_diff )
465+ self .geodiff .apply_changeset (dest , server_diff )
466+ self .geodiff .apply_changeset (basefile , server_diff )
467+ except (pygeodiff .GeoDiffLibError , pygeodiff .GeoDiffLibConflictError ):
468+ # something bad happened and we have failed to patch our local files - this should not happen if there
469+ # wasn't a schema change or something similar that geodiff can't handle.
470+ # FIXME: this is a last resort and may corrupt data! (we should warn user)
471+ shutil .copy (src , dest )
472+ shutil .copy (src , basefile )
461473 else :
462474 # backup if needed
463475 if path in modified and item ['checksum' ] != local_files_map [path ]['checksum' ]:
@@ -1011,6 +1023,11 @@ def pull_project(self, directory, parallel=True):
10111023 if local_version == server_info ["version" ]:
10121024 return # Project is up to date
10131025
1026+ # we either download a versioned file using diffs (strongly preferred),
1027+ # but if we don't have history with diffs (e.g. uploaded without diffs)
1028+ # then we just download the whole file
1029+ _pulling_file_with_diffs = lambda f : 'diffs' in f and len (f ['diffs' ]) != 0
1030+
10141031 temp_dir = mp .fpath_meta (f'fetch_{ local_version } -{ server_info ["version" ]} ' )
10151032 os .makedirs (temp_dir , exist_ok = True )
10161033 pull_changes = mp .get_pull_changes (server_info ["files" ])
@@ -1020,7 +1037,7 @@ def pull_project(self, directory, parallel=True):
10201037 fetch_files .append (f )
10211038 # extend fetch files download list with various version of diff files (if needed)
10221039 for f in pull_changes ["updated" ]:
1023- if 'diffs' in f :
1040+ if _pulling_file_with_diffs ( f ) :
10241041 for diff in f ['diffs' ]:
10251042 diff_file = copy .deepcopy (f )
10261043 for k , v in f ['history' ].items ():
@@ -1039,7 +1056,7 @@ def pull_project(self, directory, parallel=True):
10391056 with concurrent .futures .ThreadPoolExecutor () as executor :
10401057 futures_map = {}
10411058 for file in fetch_files :
1042- diff_only = 'diffs' in file
1059+ diff_only = _pulling_file_with_diffs ( f )
10431060 future = executor .submit (self ._download_file , project_path , file , temp_dir , parallel , diff_only )
10441061 futures_map [future ] = file
10451062
@@ -1052,13 +1069,13 @@ def pull_project(self, directory, parallel=True):
10521069 else :
10531070 for file in fetch_files :
10541071 # TODO check it does not fail, do some retry on ClientError
1055- diff_only = 'diffs' in file
1072+ diff_only = _pulling_file_with_diffs ( f )
10561073 self ._download_file (project_path , file , temp_dir , parallel , diff_only )
10571074
10581075 # make sure we can update geodiff reference files (aka. basefiles) with diffs or
10591076 # download their full versions so we have them up-to-date for applying changes
10601077 for file in pull_changes ['updated' ]:
1061- if 'diffs' not in file :
1078+ if not _pulling_file_with_diffs ( f ) :
10621079 continue
10631080 file ['version' ] = server_info ['version' ]
10641081 basefile = mp .fpath_meta (file ['path' ])
0 commit comments