summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLan Hui <lanhui@zjnu.edu.cn>2025-05-21 16:01:52 +0800
committerLan Hui <lanhui@zjnu.edu.cn>2025-05-21 16:01:52 +0800
commitc1dcc3ecf65c081468794eeb134c5fdad6fb4081 (patch)
tree4f094563f89f593b15cbdb4c71ada9d34304e0d9
parentd72bea8b80cdb8a05b0fc28cb0d1f6421a9974ed (diff)
TAG 'development stage' may also contain tissue info
-rw-r--r--Code/parse_ena_xml.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py
index eea7029..f0ddfb3 100644
--- a/Code/parse_ena_xml.py
+++ b/Code/parse_ena_xml.py
@@ -208,7 +208,7 @@ def parse_sample(fname):
#print(i)
tag = i.find('./TAG')
value = i.find('./VALUE')
- if 'tissue' in tag.text or 'organism part' in tag.text:
+ if 'tissue' in tag.text or 'organism part' in tag.text or 'developmental stage' in tag.text:
#print(value.text)
tissue_type += value.text + ' '
d2['tissue'] = clean_tissue_info(tissue_type) # remove space, lower letters, and remove punctuations
@@ -324,6 +324,8 @@ def clean_tissue_info(tissue_type):
return ''
if 'seedings' in tissue_type: # a typo I guess
return 'seedlings'
+ if 'rootstock' in tissue_type:
+ return 'root'
return tissue_type.strip().lower().translate(str.maketrans('', '', string.punctuation))