1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import sys
24 import string
25 import types
26 import re
27 import copy
28
29
31 if x1[1] < x2[1] :
32 return -1
33 elif x1[1] > x2[1] :
34 return 1
35 return 0
36
37
39
40 - def __init__(self, col_index, time_list, title = '(untitled)') :
41 if len(col_index) != len(time_list) :
42 raise StandardError, 'ExperimentSpec::__init__: lengths of column_index (%d) and time_list (%d) differ' % (len(col_index), len(time_list))
43 self.column_index = copy.deepcopy(col_index)
44 self.time_list = copy.deepcopy(time_list)
45 self.title = title
46
47
49 return len(self.time_list)
50
51
53 row = arrdata.data[row_index]
54 pd = []
55 for i in range(self.num_columns()) :
56 ci = self.column_index[i]
57 if type(row[ci]) is types.FloatType :
58 pd.append([self.time_list[i], row[ci]])
59 return pd
60
61
63 for row_index in range(arrdata.num_rows()) :
64 pd = self.gene_plot_data(arrdata, row_index)
65 f.write('# %s\n' % arrdata.data[row_index][0])
66 if len(pd) > 0 :
67 for p in pd :
68 f.write('%f %f\n' % (p[0], p[1]))
69 f.write('\n')
70
71
73
75 self.column_label = []
76 self.data = []
77 self.gene_dict = {}
78
79
81 self.gene_dict = {}
82 for i in range(len(self.data)) :
83 gene_id = self.data[i][0]
84 if type(gene_id) is not types.StringType :
85 raise StandardError, 'MicroarrayData::setup_gene_dict: non-string gene ID'
86 if gene_id == '' :
87 raise StandardError, 'MicroarrayData::setup_gene_dict: empty gene ID'
88 if gene_id in self.gene_dict.keys() :
89 raise StandardError, 'MicroarrayData::setup_gene_dict: duplicate gene ID %s' % gene_id
90 self.gene_dict[gene_id] = i
91
92
95
96
98 return len(self.column_label)
99
100
102
103 def get_line(f) :
104 line = f.readline()
105 while line != '' :
106 if line.strip()[0] == '#' :
107 line = f.readline()
108 else :
109 break
110 return line
111
112 int_re = re.compile('[+-]?[0-9]+')
113 float_re = re.compile('[+-]?([0-9]+(\\.[0-9]+)?)|(([0-9]+)?\\.[0-9]+)([Ee][+-]?[0-9]+)?')
114 line = get_line(f)
115 if line == '' :
116 raise StandardError, 'MicroarrayData::read_file: no header line'
117 if line[-1] == '\n' :
118 line = line[:-1]
119 self.column_label = line.split('\t')
120 line = get_line(f)
121 while line != '' :
122 if line[-1] == '\n' :
123 line = line[:-1]
124 raw_vlist = line.split('\t')
125 vlist = []
126 for raw_v in raw_vlist :
127 m = float_re.match(raw_v)
128 if m :
129 vlist.append(string.atof(raw_v))
130 else :
131 vlist.append(raw_v)
132 self.data.append(vlist)
133 line = get_line(f)
134 self.setup_gene_dict()
135
136
138 for row in self.data :
139 if len(row) < col :
140 raise StandardError, 'MicroarrayData::column_type: column index %d out of range' % col
141 if type(row[col]) is not types.FloatType :
142 return types.StringType
143 return types.FloatType
144
145
147 floatrows = []
148 for i in xrange(len(self.column_label)) :
149 if self.column_type is types.FloatType :
150 f.write('# %d: %s\n' % (i, self.column_label[i]))
151 floatrows.append(i)
152 for row in self.data :
153 for i in floatrows :
154 f.write('%1.12g ' % row[i])
155 f.write('\n')
156
157
159 n = 0
160 s = 0.0
161 for x in self.data[row_index] :
162 if type(x) is types.FloatType :
163 n = n + 1
164 s = s + abs(x)
165 if n == 0 :
166 return None
167 return s / n
168
169
171 l = []
172 for row_index in range(self.num_rows()) :
173 r = self.regulation_index(row_index)
174 if r is not None :
175 l.append((self.data[row_index][0], r))
176 l.sort(cmp_by_score)
177 return l
178
179
180 - def experiment_plots(self, gpcfile, espec_list, histo_max, histo_nbins, basename, cmp_basename = None) :
181 """write a gnuplot file containing the expression profiles of all genes, and
182 also a histogram showing a profile of regulatory strengths"""
183 for e in espec_list :
184 fname = '%s_%s.plt' % (basename, e.title)
185 f = open(fname, 'w')
186 e.write_gnuplot_file(f, self)
187 f.close()
188 gpcfile.write('plot \'%s\' with linespoints' % fname)
189 if cmp_basename :
190 cmp_fname = '%s_%s.plt' % (cmp_basename, e.title)
191 gpcfile.write(', \'%s\' with linespoints\n' % cmp_fname)
192 gpcfile.write('\n')
193 gpcfile.write('pause -1 \'Hit return\'\n')
194 fname = '%s_strength.plt' % basename
195 f = open(fname, 'w')
196 f.write('# regulation index values\n')
197 histogram = histo_nbins * [0]
198 for r in self.regulation_index_list() :
199 hi = int((r[1] * histo_nbins) / histo_max)
200 if hi < histo_nbins :
201 histogram[hi] = histogram[hi] + 1
202 else :
203 sys.stderr.write('arraydata_plots: regulation strength %f out of histogram range %f: %d >= %d\n' % (r[1], histo_max, hi, histo_nbins))
204 f.write('# %s\n' % r[0])
205 f.write('%f\n' % r[1])
206 f.close()
207 gpcfile.write('plot \'%s\' with boxes' % fname)
208
209
210
211 gpcfile.write('\n')
212 gpcfile.write('pause -1 \'Hit return\'\n')
213 fname = '%s_shist.plt' % basename
214 f = open(fname, 'w')
215 f.write('# regulation strength histogram\n')
216 for i in xrange(histo_nbins) :
217 f.write('%d %f %d\n' % (i, (i * histo_max) / histo_nbins, histogram[i]))
218 f.close()
219 gpcfile.write('plot \'%s\' using 2:3 with boxes\n' % fname)
220 gpcfile.write('pause -1 \'Hit return\'\n')
221