summaryrefslogtreecommitdiff
path: root/Code
diff options
context:
space:
mode:
authorLan Hui <lanhui@zjnu.edu.cn>2025-04-20 18:50:52 +0800
committerLan Hui <lanhui@zjnu.edu.cn>2025-04-20 18:50:52 +0800
commit15354cbf37cee332efe819aec3fe988b5133d769 (patch)
tree6a4b8a8b9e3c37a2191d494e2f83755574d175c9 /Code
parentceaaaa4c1ced17be4aefef1f6ee4a77f39b95726 (diff)
Backup old RNA_SEQ_INFO_DATABASE_JSON and replace it with an updated one
Diffstat (limited to 'Code')
-rw-r--r--Code/backup_files.py11
-rw-r--r--Code/parse_ena_xml.py17
-rw-r--r--Code/test_backup_file.py4
3 files changed, 29 insertions, 3 deletions
diff --git a/Code/backup_files.py b/Code/backup_files.py
index 1ab3fab..4984a66 100644
--- a/Code/backup_files.py
+++ b/Code/backup_files.py
@@ -7,6 +7,7 @@
# Created on 7 December 2019 by Hui Lan (lanhui@zjnu.edu.cn)
import os, sys
+import gzip
from configure import UPDATE_NETWORK_LOG_FILE
from datetime import datetime
from log import write_log_file
@@ -34,6 +35,16 @@ def copy_and_backup_file(src_file, dest_dir):
write_log_file('[backup_files.py] File %s has been backed up to %s and zipped (.gz)' % (src_file, dest_file), UPDATE_NETWORK_LOG_FILE)
+def backup_file(src_file):
+ if not os.path.exists(src_file):
+ raise Exception(f'{src_file} does not exists.')
+ curr_date = datetime.now().strftime('%Y%m%d')
+ zip_fname = src_file + '.' + curr_date + '.gz'
+ with gzip.open(zip_fname, 'wb') as gz:
+ with open(src_file) as f:
+ gz.write(f.read().encode('utf-8'))
+ return zip_fname
+
## main
if __name__ == '__main__':
copy_and_backup_file('../Data/temp/edges.txt', '../Analysis')
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py
index 75777a1..4af3a77 100644
--- a/Code/parse_ena_xml.py
+++ b/Code/parse_ena_xml.py
@@ -30,7 +30,9 @@ import os, json, re, operator
import xml.etree.ElementTree
import sys
import string
-from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY
+import shutil
+from backup_files import backup_file
+from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE_JSON
MAX_DESCRIPTION_LENGTH = 6000 # max number to characters to keep in json file
@@ -326,6 +328,15 @@ if __name__ == '__main__':
percent = 100*count_tissue/count_transcriptomic
print(f'%% RNA-seq: {count_transcriptomic}, of which {count_tissue} having tissue info ({percent} percent)')
- fname = '../Data/information/rnaseq_info_database.json.temp'
- with open(fname, 'w') as f:
+ temp_fname = RNA_SEQ_INFO_DATABASE_JSON + '.temp'
+ with open(temp_fname, 'w') as f:
json.dump(json_dict, f, indent=4)
+
+ # Use rnaseq_info_database.json.temp to replace the exisiting rnaseq_info_database.json
+ # But make a backup for rnaseq_info_database.json first
+ try:
+ bak_fname = backup_file(RNA_SEQ_INFO_DATABASE_JSON)
+ shutil.move(temp_fname, RNA_SEQ_INFO_DATABASE_JSON)
+ print(f'Made {bak_fname}')
+ except Exception as e:
+ print(f'Backup {RNA_SEQ_INFO_DATABASE_JSON} encountered problem')
diff --git a/Code/test_backup_file.py b/Code/test_backup_file.py
new file mode 100644
index 0000000..d17f60a
--- /dev/null
+++ b/Code/test_backup_file.py
@@ -0,0 +1,4 @@
+from backup_files import backup_file
+from configure import RNA_SEQ_INFO_DATABASE_JSON
+
+backup_file(RNA_SEQ_INFO_DATABASE_JSON)