diff options
Diffstat (limited to 'Code/download_ena_experiment_records.py')
| -rw-r--r-- | Code/download_ena_experiment_records.py | 17 | 
1 files changed, 8 insertions, 9 deletions
| diff --git a/Code/download_ena_experiment_records.py b/Code/download_ena_experiment_records.py index 14a4a96..c897e9c 100644 --- a/Code/download_ena_experiment_records.py +++ b/Code/download_ena_experiment_records.py @@ -18,6 +18,7 @@  import urllib.request  import os, time, glob  from parse_ena_xml import parse_experiment +from configure import EXPERIMENT_INFO_DIR  def get_xml_for_experiment_id(eid):      url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid) @@ -26,23 +27,21 @@ def get_xml_for_experiment_id(eid):          content = response.read()      return content.decode('utf-8') -  if __name__ == '__main__': -    experiment_info_dir = '../Data/information/EXPERIMENT_SET' -    if not os.path.exists(experiment_info_dir): -        os.mkdir(experiment_info_dir) -    already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))] +    if not os.path.exists(EXPERIMENT_INFO_DIR): +        os.mkdir(EXPERIMENT_INFO_DIR) +    already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(EXPERIMENT_INFO_DIR, '*')))]      print('%d experiment info has already been downloaded' % (len(already_downloaded_lst))) -    with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py +    with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_xml.py          for line in f:              experiment_id = line.strip()              if experiment_id not in already_downloaded_lst:                  print(f'Downloading EXPERIMENT info for {experiment_id}')                  xml_content = get_xml_for_experiment_id(experiment_id) -                fname = os.path.join(experiment_info_dir, experiment_id) +                fname = os.path.join(EXPERIMENT_INFO_DIR, experiment_id)                  with open(fname, 'w', encoding='utf-8') as f2:                      f2.write(xml_content)                  d = parse_experiment(fname) -                print(d[experiment_id]['library_strategy']) -                print(d[experiment_id]['library_source']) +                print('  ' +  d[experiment_id]['library_strategy']) +                print('  ' + d[experiment_id]['library_source'])                  time.sleep(6) | 
