From c1dcc3ecf65c081468794eeb134c5fdad6fb4081 Mon Sep 17 00:00:00 2001 From: Lan Hui Date: Wed, 21 May 2025 16:01:52 +0800 Subject: TAG 'development stage' may also contain tissue info --- Code/parse_ena_xml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Code/parse_ena_xml.py') diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py index eea7029..f0ddfb3 100644 --- a/Code/parse_ena_xml.py +++ b/Code/parse_ena_xml.py @@ -208,7 +208,7 @@ def parse_sample(fname): #print(i) tag = i.find('./TAG') value = i.find('./VALUE') - if 'tissue' in tag.text or 'organism part' in tag.text: + if 'tissue' in tag.text or 'organism part' in tag.text or 'developmental stage' in tag.text: #print(value.text) tissue_type += value.text + ' ' d2['tissue'] = clean_tissue_info(tissue_type) # remove space, lower letters, and remove punctuations @@ -324,6 +324,8 @@ def clean_tissue_info(tissue_type): return '' if 'seedings' in tissue_type: # a typo I guess return 'seedlings' + if 'rootstock' in tissue_type: + return 'root' return tissue_type.strip().lower().translate(str.maketrans('', '', string.punctuation)) -- cgit v1.2.1