diff options
Diffstat (limited to 'Code')
-rw-r--r-- | Code/parse_ena_xml.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py index eea7029..f0ddfb3 100644 --- a/Code/parse_ena_xml.py +++ b/Code/parse_ena_xml.py @@ -208,7 +208,7 @@ def parse_sample(fname): #print(i) tag = i.find('./TAG') value = i.find('./VALUE') - if 'tissue' in tag.text or 'organism part' in tag.text: + if 'tissue' in tag.text or 'organism part' in tag.text or 'developmental stage' in tag.text: #print(value.text) tissue_type += value.text + ' ' d2['tissue'] = clean_tissue_info(tissue_type) # remove space, lower letters, and remove punctuations @@ -324,6 +324,8 @@ def clean_tissue_info(tissue_type): return '' if 'seedings' in tissue_type: # a typo I guess return 'seedlings' + if 'rootstock' in tissue_type: + return 'root' return tissue_type.strip().lower().translate(str.maketrans('', '', string.punctuation)) |