diff options
author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-04-20 19:02:15 +0800 |
---|---|---|
committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-04-20 19:02:15 +0800 |
commit | e81676c04c6505a46070c9ab700f908b89565e18 (patch) | |
tree | b11d7c4db0f93a8a540caeccd4516c9442e4f13b | |
parent | d09f2fb57a1ae584a9d06dae75348b6b2ca6805f (diff) |
Save RNA_SEQ_INFO_DATABASE; do not need commland-line redirect now.
-rw-r--r-- | Code/parse_ena_xml.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/Code/parse_ena_xml.py b/Code/parse_ena_xml.py index 4af3a77..8cb7946 100644 --- a/Code/parse_ena_xml.py +++ b/Code/parse_ena_xml.py @@ -1,4 +1,4 @@ -# Usage: python parse_ena_xml.py > rnaseq_info_database.txt +# Usage: python parse_ena_xml.py # # Search in this script for 'd_run', 'd_sample', 'd_experiment' and # 'd_study', and set their input files. The input files are generated @@ -32,7 +32,7 @@ import sys import string import shutil from backup_files import backup_file -from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE_JSON +from configure import ENA_RECORDS_READ_RUN, ENA_RECORDS_READ_EXPERIMENT, ENA_RECORDS_SAMPLE, ENA_RECORDS_STUDY, RNA_SEQ_INFO_DATABASE, RNA_SEQ_INFO_DATABASE_JSON MAX_DESCRIPTION_LENGTH = 6000 # max number to characters to keep in json file @@ -269,8 +269,11 @@ if __name__ == '__main__': cmd = 'export PYTHONIOENCODING=UTF-8' # since xml files contains non-ascii characters, use this command to avoid encoding error during redirection os.system(cmd) - - print('%s' % ('\t'.join(['run_id', 'sample_id', 'experiment_id', 'study_id', 'study_id_PRJ', 'title', 'alias', 'description', 'library_strategy', 'library_source']))) # description comes from three sources, STUDY, SAMPLE and EXPERIMENT + + backup_file(RNA_SEQ_INFO_DATABASE) + f = open(RNA_SEQ_INFO_DATABASE, 'w', encoding='utf-8') + + f.write('%s\n' % ('\t'.join(['run_id', 'sample_id', 'experiment_id', 'study_id', 'study_id_PRJ', 'title', 'alias', 'description', 'library_strategy', 'library_source']))) # description comes from three sources, STUDY, SAMPLE and EXPERIMENT d_run_keys = d_run.keys() d_run_keys = list(set(d_run_keys)) @@ -299,7 +302,9 @@ if __name__ == '__main__': lst.append(description) lst.append(library_strategy) lst.append(library_source) - print('%s' % ('\t'.join(lst))) + f.write('%s\n' % ('\t'.join(lst))) + + f.close() # Make a json file as well. this file is used to display rna-seq information in scatterplots. json_dict = {} |