diff options
| author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-10-29 18:00:27 +0800 |
|---|---|---|
| committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-10-29 18:00:27 +0800 |
| commit | 9a85ad752194846f26f2555d1f059f76ed31c43d (patch) | |
| tree | c1b4bb5408e782c48f9528230f79f0365be02426 /Code/download_ena_experiment_records.py | |
| parent | fafb342fe7e756c7a23b9d371565f089afdf18bd (diff) | |
Also look for a run's library strategy/source in EXPERIMENT_LIBRARY_INFO_FILE
Diffstat (limited to 'Code/download_ena_experiment_records.py')
| -rw-r--r-- | Code/download_ena_experiment_records.py | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/Code/download_ena_experiment_records.py b/Code/download_ena_experiment_records.py index 14a4a96..c897e9c 100644 --- a/Code/download_ena_experiment_records.py +++ b/Code/download_ena_experiment_records.py @@ -18,6 +18,7 @@ import urllib.request import os, time, glob from parse_ena_xml import parse_experiment +from configure import EXPERIMENT_INFO_DIR def get_xml_for_experiment_id(eid): url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid) @@ -26,23 +27,21 @@ def get_xml_for_experiment_id(eid): content = response.read() return content.decode('utf-8') - if __name__ == '__main__': - experiment_info_dir = '../Data/information/EXPERIMENT_SET' - if not os.path.exists(experiment_info_dir): - os.mkdir(experiment_info_dir) - already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))] + if not os.path.exists(EXPERIMENT_INFO_DIR): + os.mkdir(EXPERIMENT_INFO_DIR) + already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(EXPERIMENT_INFO_DIR, '*')))] print('%d experiment info has already been downloaded' % (len(already_downloaded_lst))) - with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py + with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_xml.py for line in f: experiment_id = line.strip() if experiment_id not in already_downloaded_lst: print(f'Downloading EXPERIMENT info for {experiment_id}') xml_content = get_xml_for_experiment_id(experiment_id) - fname = os.path.join(experiment_info_dir, experiment_id) + fname = os.path.join(EXPERIMENT_INFO_DIR, experiment_id) with open(fname, 'w', encoding='utf-8') as f2: f2.write(xml_content) d = parse_experiment(fname) - print(d[experiment_id]['library_strategy']) - print(d[experiment_id]['library_source']) + print(' ' + d[experiment_id]['library_strategy']) + print(' ' + d[experiment_id]['library_source']) time.sleep(6) |
