diff options
author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-04-20 18:50:52 +0800 |
---|---|---|
committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-04-20 18:50:52 +0800 |
commit | 15354cbf37cee332efe819aec3fe988b5133d769 (patch) | |
tree | 6a4b8a8b9e3c37a2191d494e2f83755574d175c9 | |
parent | ceaaaa4c1ced17be4aefef1f6ee4a77f39b95726 (diff) |
Backup old RNA_SEQ_INFO_DATABASE_JSON and replace it with an updated one
-rw-r--r-- | Code/backup_files.py | 11 | ||||
-rw-r--r-- | Code/parse_ena_xml.py | 17 | ||||
-rw-r--r-- | Code/test_backup_file.py | 4 |
3 files changed, 29 insertions, 3 deletions
diff --git a/Code/backup_files.py b/Code/backup_files.py index 1ab3fab..4984a66 100644 --- a/Code/backup_files.py +++ b/Code/backup_files.py @@ -7,6 +7,7 @@ # Created on 7 December 2019 by Hui Lan (lanhui@zjnu.edu.cn) import os, sys +import gzip from configure import UPDATE_NETWORK_LOG_FILE from datetime import datetime from log import write_log_file @@ -34,6 +35,16 @@ def copy_and_backup_file(src_file, dest_dir): write_log_file('[backup_files.py] File %s has been backed up to %s and zipped (.gz)' % (src_file, dest_file), UPDATE_NETWORK_LOG_FILE) +def backup_file(src_file): + if not os.path.exists(src_file): + raise Exception(f'{src_file} does not exists.') + curr_date = datetime.now().strftime('%Y%m%d') + zip_fname = src_file + '.' + curr_date + '.gz' + with gzip.open(zip_fname, 'wb') as gz: + with open(src_file) as f: + gz.write(f.read().encode('utf-8')) + return zip_fname + ## main if __name__ == '__main__': copy_and_backup_file('../Data/temp/edges.txt', '../Analysis') diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py index 75777a1..4af3a77 100644 --- a/Code/parse_ena_xml.py +++ b/Code/parse_ena_xml.py @@ -30,7 +30,9 @@ import os, json, re, operator import xml.etree.ElementTree import sys import string -from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY +import shutil +from backup_files import backup_file +from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE_JSON MAX_DESCRIPTION_LENGTH = 6000 # max number to characters to keep in json file @@ -326,6 +328,15 @@ if __name__ == '__main__': percent = 100*count_tissue/count_transcriptomic print(f'%% RNA-seq: {count_transcriptomic}, of which {count_tissue} having tissue info ({percent} percent)') - fname = '../Data/information/rnaseq_info_database.json.temp' - with open(fname, 'w') as f: + temp_fname = RNA_SEQ_INFO_DATABASE_JSON + '.temp' + with open(temp_fname, 'w') as f: json.dump(json_dict, f, indent=4) + + # Use rnaseq_info_database.json.temp to replace the exisiting rnaseq_info_database.json + # But make a backup for rnaseq_info_database.json first + try: + bak_fname = backup_file(RNA_SEQ_INFO_DATABASE_JSON) + shutil.move(temp_fname, RNA_SEQ_INFO_DATABASE_JSON) + print(f'Made {bak_fname}') + except Exception as e: + print(f'Backup {RNA_SEQ_INFO_DATABASE_JSON} encountered problem') diff --git a/Code/test_backup_file.py b/Code/test_backup_file.py new file mode 100644 index 0000000..d17f60a --- /dev/null +++ b/Code/test_backup_file.py @@ -0,0 +1,4 @@ +from backup_files import backup_file +from configure import RNA_SEQ_INFO_DATABASE_JSON + +backup_file(RNA_SEQ_INFO_DATABASE_JSON) |