Also look for a run's library strategy/source in EXPERIMENT_LIBRARY_INFO_FILE

author: Lan Hui <lanhui@zjnu.edu.cn> 2025-10-29 18:00:27 +0800
committer: Lan Hui <lanhui@zjnu.edu.cn> 2025-10-29 18:00:27 +0800
commit: 9a85ad752194846f26f2555d1f059f76ed31c43d (patch)
tree: c1b4bb5408e782c48f9528230f79f0365be02426 /Code/download_ena_experiment_records.py
parent: fafb342fe7e756c7a23b9d371565f089afdf18bd (diff)
1 files changed, 8 insertions, 9 deletions
diff --git a/Code/download_ena_experiment_records.py b/Code/download_ena_experiment_records.py
index 14a4a96..c897e9c 100644
--- a/Code/download_ena_experiment_records.py
+++ b/Code/download_ena_experiment_records.py
@@ -18,6 +18,7 @@
 import urllib.request
 import os, time, glob
 from parse_ena_xml import parse_experiment
+from configure import EXPERIMENT_INFO_DIR
 
 def get_xml_for_experiment_id(eid):
     url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid)
@@ -26,23 +27,21 @@ def get_xml_for_experiment_id(eid):
         content = response.read()
     return content.decode('utf-8')
 
-
 if __name__ == '__main__':
-    experiment_info_dir = '../Data/information/EXPERIMENT_SET'
-    if not os.path.exists(experiment_info_dir):
-        os.mkdir(experiment_info_dir)
-    already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))]
+    if not os.path.exists(EXPERIMENT_INFO_DIR):
+        os.mkdir(EXPERIMENT_INFO_DIR)
+    already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(EXPERIMENT_INFO_DIR, '*')))]
     print('%d experiment info has already been downloaded' % (len(already_downloaded_lst)))
-    with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py
+    with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_xml.py
         for line in f:
             experiment_id = line.strip()
             if experiment_id not in already_downloaded_lst:
                 print(f'Downloading EXPERIMENT info for {experiment_id}')
                 xml_content = get_xml_for_experiment_id(experiment_id)
-                fname = os.path.join(experiment_info_dir, experiment_id)
+                fname = os.path.join(EXPERIMENT_INFO_DIR, experiment_id)
                 with open(fname, 'w', encoding='utf-8') as f2:
                     f2.write(xml_content)
                 d = parse_experiment(fname)
-                print(d[experiment_id]['library_strategy'])
-                print(d[experiment_id]['library_source'])
+                print('  ' +  d[experiment_id]['library_strategy'])
+                print('  ' + d[experiment_id]['library_source'])
                 time.sleep(6)
author	Lan Hui <lanhui@zjnu.edu.cn>	2025-10-29 18:00:27 +0800
committer	Lan Hui <lanhui@zjnu.edu.cn>	2025-10-29 18:00:27 +0800
commit	9a85ad752194846f26f2555d1f059f76ed31c43d (patch)
tree	c1b4bb5408e782c48f9528230f79f0365be02426 /Code/download_ena_experiment_records.py
parent	fafb342fe7e756c7a23b9d371565f089afdf18bd (diff)