geminispace.info

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/geminispace.info.git
Log | Files | Refs | README | LICENSE

commit ff6a81da4088958affce70a91e60f5e248242156
parent cc7082f08d546dc4542263ce0068dec7b041f5a6
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Sun, 10 May 2020 10:39:05 -0400

[crawl] Backup old index before running crawl

Diffstat:
Mgus/crawl.py | 9+++++++++
1 file changed, 9 insertions(+), 0 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -47,7 +47,16 @@ SEED_URLS = [ ] +def backup_old_index(index_dir, backup_dir): + last_index_modification_time = datetime.fromtimestamp(os.path.getmtime(index_dir)) + print("Backing up last index from {:%Y-%m-%d}...".format(last_index_modification_time)) + backup_index_dir = backup_dir + "/{:%Y-%m-%d}".format(last_index_modification_time) + shutil.rmtree(backup_index_dir, ignore_errors=True) + shutil.copytree(index_dir, backup_index_dir) + + def create_index(index_dir): + backup_old_index(index_dir, index_dir + ".bak") shutil.rmtree(index_dir) pathlib.Path(index_dir).mkdir(parents=True, exist_ok=True) schema = Schema(