summaryrefslogtreecommitdiff
path: root/Code/download_ena_experiment_records.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/download_ena_experiment_records.py')
-rw-r--r--Code/download_ena_experiment_records.py32
1 files changed, 27 insertions, 5 deletions
diff --git a/Code/download_ena_experiment_records.py b/Code/download_ena_experiment_records.py
index f3c9126..11bfc70 100644
--- a/Code/download_ena_experiment_records.py
+++ b/Code/download_ena_experiment_records.py
@@ -1,7 +1,17 @@
-# Given an experiment's accession SRX6711770, get its full information from the following link
-# https://www.ebi.ac.uk/ena/browser/api/xml/SRX6711770
+# Purpose: Download missing experiment info one by one into ../Data/information/EXPERIMENT_SET
+#
+# Usage: python3 download_ena_experiment_records.py
+#
+#
+# Note:
+# (1) The experiment accession IDs are stored in ../Data/information/experiment_ids_lacking_strategy_or_source.txt
+# (2) Given an experiment's accession SRX6711770, we can get its full information from the following link
+# https://www.ebi.ac.uk/ena/browser/api/xml/SRX6711770
+#
+# 2025-10-23
import urllib.request
+import os, time, glob
def get_xml_for_experiment_id(eid):
url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid)
@@ -11,7 +21,19 @@ def get_xml_for_experiment_id(eid):
return content.decode('utf-8')
-
if __name__ == '__main__':
- print(get_xml_for_experiment_id('SRX6711770'))
-
+ experiment_info_dir = '../Data/information/EXPERIMENT_SET'
+ if not os.path.exists(experiment_info_dir):
+ os.mkdir(experiment_info_dir)
+ already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))]
+ print('%d experiment info has already been downloaded' % (len(already_downloaded_lst)))
+ with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py
+ for line in f:
+ experiment_id = line.strip()
+ if experiment_id not in already_downloaded_lst:
+ print(f'Downloading EXPERIMENT info for {experiment_id}')
+ xml_content = get_xml_for_experiment_id(experiment_id)
+ fname = os.path.join(experiment_info_dir, experiment_id)
+ with open(fname, 'w', encoding='utf-8') as f2:
+ f2.write(xml_content)
+ time.sleep(6)