# Purpose: Download missing experiment info one by one into ../Data/information/EXPERIMENT_SET # # Usage: python3 download_ena_experiment_records.py # # # Note: # (1) The experiment accession IDs are stored in ../Data/information/experiment_ids_lacking_strategy_or_source.txt # (2) Given an experiment's accession SRX6711770, we can get its full information from the following link # https://www.ebi.ac.uk/ena/browser/api/xml/SRX6711770 # # 2025-10-23 import urllib.request import os, time, glob def get_xml_for_experiment_id(eid): url = 'https://www.ebi.ac.uk/ena/browser/api/xml/%s' % (eid) content = '' with urllib.request.urlopen(url) as response: content = response.read() return content.decode('utf-8') if __name__ == '__main__': experiment_info_dir = '../Data/information/EXPERIMENT_SET' if not os.path.exists(experiment_info_dir): os.mkdir(experiment_info_dir) already_downloaded_lst = [os.path.split(path)[1] for path in sorted(glob.glob(os.path.join(experiment_info_dir, '*')))] print('%d experiment info has already been downloaded' % (len(already_downloaded_lst))) with open('../Data/information/experiment_ids_lacking_strategy_or_source.txt') as f: # the file experiment_ids_lacking_strategy_or_source.txt is produced by parse_ena_records.py for line in f: experiment_id = line.strip() if experiment_id not in already_downloaded_lst: print(f'Downloading EXPERIMENT info for {experiment_id}') xml_content = get_xml_for_experiment_id(experiment_id) fname = os.path.join(experiment_info_dir, experiment_id) with open(fname, 'w', encoding='utf-8') as f2: f2.write(xml_content) time.sleep(6)