diff options
| -rw-r--r-- | Code/download_ena_experiment_records.py | 32 | 
1 files changed, 27 insertions, 5 deletions
| diff --git a/Code/download_ena_experiment_records.py b/Code/download_ena_experiment_records.py index f3c9126..11bfc70 100644 --- a/Code/download_ena_experiment_records.py +++ b/Code/download_ena_experiment_records.py @@ -1,7 +1,17 @@ -# Given an experiment's accession SRX6711770, get its full information from the following link -# https://www.ebi.ac.uk/ena/browser/api/xml/SRX6711770 +# Purpose: Download missing experiment info one by one into ../Data/information/EXPERIMENT_SET +# +# Usage: python3 download_ena_experiment_records.py +# +# +# Note: +# (1) The experiment accession IDs are stored in ../Data/information/experiment_ids_lacking_strategy_or_source.txt +# (2) Given an experiment's accession SRX6711770, we can get its full information from the following link +#     https://www.ebi.ac.uk/ena/browser/api/xml/SRX6711770 +# +# 2025-10-23  import urllib.request +import os, time, glob  def get_xml_for_experiment_id(eid):      url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid) @@ -11,7 +21,19 @@ def get_xml_for_experiment_id(eid):      return content.decode('utf-8') -  if __name__ == '__main__': -    print(get_xml_for_experiment_id('SRX6711770')) - +    experiment_info_dir = '../Data/information/EXPERIMENT_SET' +    if not os.path.exists(experiment_info_dir): +        os.mkdir(experiment_info_dir) +    already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))] +    print('%d experiment info has already been downloaded' % (len(already_downloaded_lst))) +    with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py +        for line in f: +            experiment_id = line.strip() +            if experiment_id not in already_downloaded_lst: +                print(f'Downloading EXPERIMENT info for {experiment_id}') +                xml_content = get_xml_for_experiment_id(experiment_id) +                fname = os.path.join(experiment_info_dir, experiment_id) +                with open(fname, 'w', encoding='utf-8') as f2: +                    f2.write(xml_content) +                time.sleep(6) | 
