Input file: GPL1440_family.tar.gz
Output file: parsing_gsm_relation.txt, parsing_gsm_err.txt, parsing_gsm_results.txt
import glob
import gzip
## Reading GPL Files
files_gpl = glob.glob('./GPLRaw/*.soft.gz')
## Recording parsing results
files_result = open('parsing_GSM_results.txt', 'w')
## Recording Relation GPL, GSE, GSM
files_relation = open('parsing_GSM_relation.txt', 'w')
## Recording parsing errors
files_err = open('parsing_GSM_err.txt', 'w')
str_err = ''
def splicing_line(line):
tmp = line.split('=')
arr = []
if(tmp[0].startswith('!')):
_tmp = tmp[0].lstrip('!')
arr.append(_tmp.strip())
else:
arr.append(tmp[0].strip())
arr.append(tmp[1].strip())
return arr
for afile in files_gpl:
gpl_name = afile.replace('./GPLRaw/', '')
gpl_name = gpl_name.rstrip('_family.soft.gz')
print afile
line_err = ''
# Opening a gzip file
file_gz = gzip.open(afile)
# Extracting lines for SAMPLE
sample_name = ''
flag_sample_start = False
for line in file_gz:
#print line
try:
line_err = line
if(line.startswith('^SAMPLE')):
flag_sample_start = True
arr = line.split('=')
sample_name = arr[1].strip()
#arr = splicing_line(line)
if(line.startswith('!sample_table_begin')):
flag_sample_start = False
if(line.startswith('!sample_table_end')):
flag_sample_start = False
if(flag_sample_start):
if(line.startswith('!')):
arr = splicing_line(line)
files_result.write(sample_name + '\t' + arr[0] + '\t' + arr[1] + '\n')
if(line.startswith('!Sample_series_id')):
arr = splicing_line(line)
files_relation.write(gpl_name + '\t' + arr[1] + '\t' +sample_name + '\n')
except Exception, e:
print "Error", line.strip(), '%s' % e
files_err.write(line.strip() + '\t' + '%s' % e + '\n')
file_gz.close()
# Break for Test
#break
files_err.close()
files_relation.close()
files_result.close()
'Informatics > Genome Informatics' 카테고리의 다른 글
| Genome / dbsnp에서 rsID 추적하기(통합된 것 찾기) (4) | 2011.09.23 |
|---|---|
| Genome / Genetic Risk (0) | 2011.08.30 |
| Genome / 뉴클레오티드, 뉴클레오티드키나아제 (0) | 2011.06.22 |
| Genome / SNP Genotype 표 이해하기 (0) | 2011.06.16 |
| FASTQ Format (0) | 2011.05.30 |