summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Code/parse_ena_xml.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py
index 4af3a77..8cb7946 100644
--- a/Code/parse_ena_xml.py
+++ b/Code/parse_ena_xml.py
@@ -1,4 +1,4 @@
-# Usage: python parse_ena_xml.py > rnaseq_info_database.txt
+# Usage: python parse_ena_xml.py
#
# Search in this script for 'd_run', 'd_sample', 'd_experiment' and
# 'd_study', and set their input files. The input files are generated
@@ -32,7 +32,7 @@ import sys
import string
import shutil
from backup_files import backup_file
-from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE_JSON
+from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE, RNA_SEQ_INFO_DATABASE_JSON
MAX_DESCRIPTION_LENGTH = 6000 # max number to characters to keep in json file
@@ -269,8 +269,11 @@ if __name__ == '__main__':
cmd = 'export PYTHONIOENCODING=UTF-8' # since xml files contains non-ascii characters, use this command to avoid encoding error during redirection
os.system(cmd)
-
- print('%s' % ('\t'.join(['run_id', 'sample_id', 'experiment_id', 'study_id', 'study_id_PRJ', 'title', 'alias', 'description', 'library_strategy', 'library_source']))) # description comes from three sources, STUDY, SAMPLE and EXPERIMENT
+
+ backup_file(RNA_SEQ_INFO_DATABASE)
+ f = open(RNA_SEQ_INFO_DATABASE, 'w', encoding='utf-8')
+
+ f.write('%s\n' % ('\t'.join(['run_id', 'sample_id', 'experiment_id', 'study_id', 'study_id_PRJ', 'title', 'alias', 'description', 'library_strategy', 'library_source']))) # description comes from three sources, STUDY, SAMPLE and EXPERIMENT
d_run_keys = d_run.keys()
d_run_keys = list(set(d_run_keys))
@@ -299,7 +302,9 @@ if __name__ == '__main__':
lst.append(description)
lst.append(library_strategy)
lst.append(library_source)
- print('%s' % ('\t'.join(lst)))
+ f.write('%s\n' % ('\t'.join(lst)))
+
+ f.close()
# Make a json file as well. this file is used to display rna-seq information in scatterplots.
json_dict = {}