diff options
author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-05-21 16:01:52 +0800 |
---|---|---|
committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-05-21 16:01:52 +0800 |
commit | c1dcc3ecf65c081468794eeb134c5fdad6fb4081 (patch) | |
tree | 4f094563f89f593b15cbdb4c71ada9d34304e0d9 | |
parent | d72bea8b80cdb8a05b0fc28cb0d1f6421a9974ed (diff) |
TAG 'development stage' may also contain tissue info
-rw-r--r-- | Code/parse_ena_xml.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py index eea7029..f0ddfb3 100644 --- a/Code/parse_ena_xml.py +++ b/Code/parse_ena_xml.py @@ -208,7 +208,7 @@ def parse_sample(fname): #print(i) tag = i.find('./TAG') value = i.find('./VALUE') - if 'tissue' in tag.text or 'organism part' in tag.text: + if 'tissue' in tag.text or 'organism part' in tag.text or 'developmental stage' in tag.text: #print(value.text) tissue_type += value.text + ' ' d2['tissue'] = clean_tissue_info(tissue_type) # remove space, lower letters, and remove punctuations @@ -324,6 +324,8 @@ def clean_tissue_info(tissue_type): return '' if 'seedings' in tissue_type: # a typo I guess return 'seedlings' + if 'rootstock' in tissue_type: + return 'root' return tissue_type.strip().lower().translate(str.maketrans('', '', string.punctuation)) |