Author: Yonglae Cho(yonglae@snu.ac.kr)

First. Counting GSM among a GDS from GDSfull Dataset.

#gds_expr_desc.txt
#GDS3684    108    86


Second. Making GSM data set from GDSfull.soft
#ID_REF    IDENTIFIER    GSM455124    GSM455125    GSM455126    Gene title    Gene symbol    Gene ID


Third. Executing Python Code. ^^


fx = open('gds_expr_desc1.txt', 'r')
for line in fx.readlines() :
	arr_gds = line.split('\t')
	#arr_gds = ['GDS3717','34','12']
	
	fname = './value/value_' + arr_gds[0] + '.txt'
	isheader = 0

	for gsm in range(int(arr_gds[2])):
	
		f = open(fname, 'r')
		#print gsm #0 1 2 3 4 - 11
		
		#file write
		ff = ""
		file_name = ""
		
		for gds_line in f.readlines():
			
			row = gds_line.split('\t')
			
			# to do list first.
			if (isheader == 0):
				print row[gsm+2]
				file_name = './value_div/' + arr_gds[0] + '_' + row[gsm+2] + '.txt'
				ff = open(file_name, 'w')
				
				gene_pos = 2 + int(arr_gds[2]) + 2
				ff.write(row[0])
				ff.write('\t')
				ff.write(row[1])
				ff.write('\t')
				ff.write(row[gsm+2])
				ff.write('\t')
				ff.write(row[gene_pos-1])
				ff.write('\t')
				ff.write(row[gene_pos])
				ff.write('\n')
				isheader = 1
			else:
				ff.write(row[0])
				ff.write('\t')
				ff.write(row[1])
				ff.write('\t')
				ff.write(row[gsm+2])
				ff.write('\t')
				ff.write(row[gene_pos-1])
				ff.write('\t')
				ff.write(row[gene_pos])
				ff.write('\n')
			
		isheader = 0
		ff.close()
		f.close()
fx.close()

+ Recent posts