summaryrefslogtreecommitdiff
path: root/Code/download_ena_records.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/download_ena_records.py')
-rw-r--r--Code/download_ena_records.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/Code/download_ena_records.py b/Code/download_ena_records.py
index 842fc52..79f0cb7 100644
--- a/Code/download_ena_records.py
+++ b/Code/download_ena_records.py
@@ -1,5 +1,13 @@
-# Download ENA records so that we can discover newly uploaded data
+# Usage: python3 download_ena_records.py 3702
+#
+# Note: 3702 is arabidopsis's organism taxonomy code
+#
+# Download ENA records so that we can discover newly uploaded data.
+# The downloaded data will be put in directory ../Data/info/ and named as ena_{type}.xml.
+#
# 2025-04-12
+# Last modified on 2025-10-20
+#
# Hui
import os
@@ -10,10 +18,10 @@ from configure import TEMP_DIR, UPDATE_NETWORK_LOG_FILE
from log import write_log_file
from configure import INFO_DIR
-accession = 3702 # arabidopsis
+accession = sys.argv[1] # arabidopsis
types = ['read_run', 'read_experiment', 'sample', 'study']
for t in types:
- url = f'https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession={accession}&result={t}'
+ url = f'https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession={accession}&result={t}' # on how to build the URL, check How to Access ENA Programmatically (https://ena-docs.readthedocs.io/en/latest/retrieval/programmatic-access.html)
print(url)
if not os.path.exists(TEMP_DIR):
sys.exit()