blob: 79f0cb701f5733ed199d2d4541e3531911c833ff (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# Usage: python3 download_ena_records.py 3702
#
# Note: 3702 is arabidopsis's organism taxonomy code
#
# Download ENA records so that we can discover newly uploaded data.
# The downloaded data will be put in directory ../Data/info/ and named as ena_{type}.xml.
#
# 2025-04-12
# Last modified on 2025-10-20
#
# Hui
import os
import sys
import time
import shutil
from configure import TEMP_DIR, UPDATE_NETWORK_LOG_FILE
from log import write_log_file
from configure import INFO_DIR
accession = sys.argv[1] # arabidopsis
types = ['read_run', 'read_experiment', 'sample', 'study']
for t in types:
url = f'https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession={accession}&result={t}' # on how to build the URL, check How to Access ENA Programmatically (https://ena-docs.readthedocs.io/en/latest/retrieval/programmatic-access.html)
print(url)
if not os.path.exists(TEMP_DIR):
sys.exit()
cmd = 'wget "%s" -O %s' % (url, os.path.join(TEMP_DIR, 'ena_'+t+'.xml')) # save XML data to file
os.system(cmd)
time.sleep(5)
for t in types:
fname = os.path.join(TEMP_DIR, 'ena_'+t+'.xml')
if os.path.exists(fname):
print(f'Move {fname} to {INFO_DIR}')
shutil.move(fname, INFO_DIR)
time.sleep(5)
write_log_file('[download_ena_records.py] ENA records updated. Check folder %s' % (INFO_DIR), UPDATE_NETWORK_LOG_FILE)
#https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession=3702&result=read_run
#https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession=3702&result=read_experiment
#https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession=3702&result=sample
#https://www.ebi.ac.uk/ena/browser/api/xml/links/taxon?accession=3702&result=study
|