diff options
Diffstat (limited to 'Code/download_ena_records.py')
| -rw-r--r-- | Code/download_ena_records.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/Code/download_ena_records.py b/Code/download_ena_records.py index 842fc52..79f0cb7 100644 --- a/Code/download_ena_records.py +++ b/Code/download_ena_records.py @@ -1,5 +1,13 @@ -# Download ENA records so that we can discover newly uploaded data +# Usage: python3 download_ena_records.py 3702 +# +# Note: 3702 is arabidopsis's organism taxonomy code +# +# Download ENA records so that we can discover newly uploaded data. +# The downloaded data will be put in directory ../Data/info/ and named as ena_{type}.xml. +# # 2025-04-12 +# Last modified on 2025-10-20 +# # Hui import os @@ -10,10 +18,10 @@ from configure import TEMP_DIR, UPDATE_NETWORK_LOG_FILE from log import write_log_file from configure import INFO_DIR -accession = 3702 # arabidopsis +accession = sys.argv[1] # arabidopsis types = ['read_run', 'read_experiment', 'sample', 'study'] for t in types: - url = f'https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession={accession}&result={t}' + url = f'https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession={accession}&result={t}' # on how to build the URL, check How to Access ENA Programmatically (https://ena-docs.readthedocs.io/en/latest/retrieval/programmatic-access.html) print(url) if not os.path.exists(TEMP_DIR): sys.exit() |
