1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import sys
24 import math
25 import getopt
26 import copy
27 import string
28 import StringIO
29 import re
30 import os
31
32 import transsys
33 import transsys.utils
34
35
37 line = f.readline()
38 while line != '' :
39 line = line.strip()
40 if line != '' :
41 if line[0] != '#' :
42 break
43 line = f.readline()
44 if line == '' :
45 return None, None
46 nv = line.split(':', 1)
47 if len(nv) == 1 :
48 return nv[0].strip(), None
49 else :
50 return nv[0].strip(), nv[1].strip()
51
52
54 l = []
55 for v in s.split(',') :
56 l.append(int(v.strip()))
57 return l
58
59
61 l = []
62 for v in s.split(',') :
63 l.append(v.strip())
64 return l
65
66
68
69 - def __init__(self, name = '???', ancestor = None, distance = 0.0, descendant_list = None) :
70 self.name = name
71 self.ancestor = ancestor
72 if ancestor is not None :
73 ancestor.descendant_list.append(self)
74 self.distance = distance
75 if descendant_list is None :
76 descendant_list = []
77 self.descendant_list = copy.copy(descendant_list)
78 for d in self.descendant_list :
79 d.ancestor = self
80
81
83 if self.ancestor is not None :
84 i = self.ancestor.descendant_list.index(self)
85 del self.ancestor.descendant_list[i]
86 self.ancestor = ancestor
87 if distance is not None :
88 self.distance = distance
89 ancestor.descendant_list.append(self)
90
91
93 if len(self.descendant_list) == 0 :
94 return '%s:%f' % (self.name, self.distance)
95 else :
96 s = '('
97 glue = ''
98 for d in self.descendant_list :
99 s = s + glue + str(d)
100 glue = ', '
101 s = s + ')'
102 return s + '%s:%f' % (self.name, self.distance)
103
104
106 s = 1
107 for d in self.descendant_list :
108 s = s + d.cluster_size()
109 return s
110
111
113 self.distance = d
114 dl = self.descendant_list
115 while len(dl) > 0 :
116 self.distance = self.distance - dl[0].distance
117 dl = dl[0].descendant_list
118
119
121
122 def get_cluster(s, cluster_dict) :
123 m = re.match('[0-9]+', s)
124 if m :
125 s = int(s)
126 else :
127 cluster_dict[s] = ClusterNode(s)
128 return cluster_dict[s]
129
130 int_re = re.compile('[0-9]+')
131 cluster_dict = {}
132 cluster_index = 1
133 line = f.readline()
134 a = None
135 while line :
136 if line.strip() == '' :
137 break
138 l = line.split()
139 d0 = get_cluster(l[0], cluster_dict)
140 d1 = get_cluster(l[1], cluster_dict)
141 distance = float(l[2])
142 a = ClusterNode('', descendant_list = [d0, d1])
143 d0.set_ultrametric_distance(distance)
144 d1.set_ultrametric_distance(distance)
145 cluster_dict[cluster_index] = a
146 cluster_index = cluster_index + 1
147 line = f.readline()
148 if a.cluster_size() != len(cluster_dict) :
149 raise StandardError, 'parse_cluster: unconnected components'
150 return a
151
152
154 return 'regulon_%02d' % r
155
156
158 return 'strp%04d_%02d' % (r, i)
159
160
162 return 'sgene%04d_%02d' % (r, i)
163
164
166 return 'gctrl%04d' % r
167
168
170 return 'fctrl%04d' % r
171
172
174
175 - def __init__(self, name, controlling_factors = None, coregulated_factors = None) :
176 if controlling_factors is None :
177 controlling_factors = []
178 if coregulated_factors is None :
179 coregulated_factors = []
180 self.name = name
181 self.controlling_factors = controlling_factors[:]
182 self.coregulated_factors = coregulated_factors[:]
183
184
186 r = ClusterNode(string.join(self.controlling_factors, '/'))
187 for f in self.controlling_factors :
188 ClusterNode(f, r, 2.0)
189 c = ClusterNode('', r, 1.0)
190 for f in self.coregulated_factors :
191 ClusterNode(f, c, 1.0)
192 return r
193
194
196 s = 'RegulonDescriptor %s\n' % self.name
197 s = s + 'controlling_factors: %s\n' % string.join(self.controlling_factors, ', ')
198 s = s + 'coregulated_factors: %s\n' % string.join(self.coregulated_factors, ', ')
199 return s
200
201
203
204 def parse_factor_list(f, label) :
205 l = '%s:' % label
206 line = f.readline()
207 if line[:len(l)] != l :
208 raise StandardError, 'RegulonDescriptor::parse: expected label "%s" but found "%s"' % (label, line.strip())
209 return parse_string_csv(line[len(l):])
210
211 line = f.readline()
212 m = re.match('RegulonDescriptor (.*)', line)
213 if m is None :
214 raise StandardError, 'RegulonDescriptor::parse: bad header line "%s"' % line.strip()
215 self.name = m.group(1).strip()
216 self.controlling_factors = parse_factor_list(f, 'controlling_factors')
217 self.coregulated_factors = parse_factor_list(f, 'coregulated_factors')
218
219
221 s = self.name
222 for f in self.controlling_factors :
223 s = s + '\t%s' % f
224 for f in self.coregulated_factors :
225 s = s + '\t%s' % f
226 return s
227
228
230
231 - def __init__(self, tp_name, reg_rtp, struct_rtp, regulon_size_list, radius) :
232 x0 = radius
233 y0 = radius
234 r_radius = 0.6 * radius
235 self.tp = reg_rtp.generate_transsys(tp_name)
236 self.noncontrolling_factors = self.tp.factor_names()
237 self.controlling_factors = []
238 self.perturbed_factors = []
239 self.structural_factors = []
240 self.regulon_list = []
241 self.regulons = {}
242 self.tp.dot_positions_circle(x0, y0, r_radius)
243 a_step = 2.0 * math.pi / len(self.tp.gene_list)
244 a_range = 0.6 * a_step
245 rlist = range(len(self.tp.gene_list))
246 for num_structural_genes in regulon_size_list :
247 r = struct_rtp.rng.random_range(len(rlist))
248 g = rlist[r]
249 del rlist[r]
250 angle = g * a_step
251 self.add_regulon(g, num_structural_genes, struct_rtp, angle, a_range, radius)
252 self.tp.resolve()
253
254
255 - def add_regulon(self, g, num_structural_genes, struct_rtp, angle, a_range, radius) :
256 """add a regulon, consisting of num_structural_genes controlled by
257 gene number g, to transsys program self.tp. Parameters for the promoter and
258 the factor are generated using RandomTranssysParameters instance struct_rtp,
259 but not all facilities are used."""
260 x0 = radius
261 y0 = radius
262 a_start = angle - 0.5 * a_range
263 self.tp.gene_list[g].name = regcontrol_gene_name(g)
264 del self.noncontrolling_factors[self.noncontrolling_factors.index(self.tp.gene_list[g].product.name)]
265 reg_name = regulon_name(g)
266 ctrl_name = regcontrol_factor_name(g)
267 self.controlling_factors.append(ctrl_name)
268 self.tp.gene_list[g].product.name = ctrl_name
269 self.regulons[reg_name] = [ctrl_name]
270
271
272 cfactor = self.tp.gene_list[g].product_name()
273 angle = a_start
274 coregulated_factors = []
275 for i in xrange(num_structural_genes) :
276 struct_name = struct_protein_name(g, i)
277 self.regulons[reg_name].append(struct_name)
278 self.tp.factor_list.append(transsys.Factor(struct_name, transsys.ExpressionNodeValue(struct_rtp.decay.nextval()), transsys.ExpressionNodeValue(struct_rtp.diffusibility.nextval())))
279 coregulated_factors.append(struct_name)
280 p = []
281 p.append(transsys.PromoterElementConstitutive(transsys.ExpressionNodeValue(struct_rtp.constitutive.nextval())))
282 p.append(transsys.PromoterElementActivate(transsys.ExpressionNodeValue(struct_rtp.km_activation.nextval()), transsys.ExpressionNodeValue(struct_rtp.vmax_activation.nextval()), [cfactor]))
283 sgene = transsys.Gene(struct_gene_name(g, i), struct_protein_name(g, i), p)
284 sgene.dot_attributes['pos'] = '%f,%f!' % (x0 + radius * math.cos(angle), y0 + radius * math.sin(angle))
285 if num_structural_genes > 1 :
286 angle = angle + a_range / (num_structural_genes - 1)
287 self.tp.gene_list.append(sgene)
288 self.regulon_list.append(RegulonDescriptor(reg_name, [cfactor], coregulated_factors))
289 self.structural_factors.extend(coregulated_factors)
290
291
293 return self.noncontrolling_factors + self.controlling_factors
294
296 if num_perturbed_factors is None :
297 self.perturbed_factors = self.noncontrolling_factors[:]
298 return
299 if num_perturbed_factors > len(self.noncontrolling_factors) :
300 raise StandardError, 'RegStructTranssys::choose_perturbed_factors: cannot choose %d factors out of %d' % (num_perturbed_factors, len(self.noncontrolling_factors))
301 l = range(len(self.noncontrolling_factors))
302 self.perturbed_factors = []
303 for i in xrange(num_perturbed_factors) :
304 r = rng.random_range(len(l))
305 j = l[r]
306 del l[r]
307 self.perturbed_factors.append(self.noncontrolling_factors[j])
308
309
311 f.write('perturbed')
312 for factor in self.perturbed_factors :
313 f.write('\t%s' % factor)
314 f.write('\n')
315 f.write('nonperturbed')
316 for factor in self.nonperturbed_factor_list() :
317 f.write('\t%s' % factor)
318 f.write('\n')
319 for r in self.regulons.keys() :
320 f.write(r)
321 for factor in self.regulons[r] :
322 f.write('\t%s' % factor)
323 f.write('\n')
324
325
327
328 - def __init__(self, skew, dispersion, rndseed, perturbed_factor_list = None) :
329 self.perturbed_factor_list = copy.deepcopy(perturbed_factor_list)
330 self.skew = skew
331 self.dispersion = dispersion
332 self.rng = transsys.utils.transrnd(rndseed)
333
334
336
337 def perturb(c, factor_name, skew, dispersion, rng) :
338 x = rng.gauss() * dispersion
339 c_new = (c + skew) * 2.0**(x)
340
341 return c_new
342
343 if self.perturbed_factor_list is None :
344 return perturb(c, factor_name, self.skew, self.dispersion, self.rng)
345 if factor_name in self.perturbed_factor_list :
346 return perturb(c, factor_name, self.skew, self.dispersion, self.rng)
347 else :
348 return c
349
350
352
353 - def __init__(self, halfmax, rndseed, perturbed_factor_list = None) :
354 self.perturbed_factor_list = copy.deepcopy(perturbed_factor_list)
355 self.halfmax = halfmax
356 self.rng = transsys.utils.transrnd(rndseed)
357
358
360
361 def perturb(c, factor_name, halfmax, rng) :
362 x = rng.rnd()
363 c_new = halfmax * math.log(x) / math.log(0.5)
364
365 return c_new
366
367 if self.perturbed_factor_list is None :
368 return perturb(c, factor_name, self.halfmax, self.rng)
369 if factor_name in self.perturbed_factor_list :
370 return perturb(c, factor_name, self.halfmax, self.rng)
371 else :
372 return c
373
374
392
393
395
397 self.transsys_program = None
398 self.regnet_factors = None
399 self.structural_factors = None
400 self.perturbed_factors = None
401 self.regulon_list = []
402 self.min_initial_concentration = None
403 self.max_initial_concentration = None
404 self.num_environments = None
405 self.perturbation_type = None
406 self.delta_t = 1
407 self.samples_per_environment = None
408 self.array_offset = 0.0
409 self.array_dispersion = 1.0
410 self.num_timesteps_init = 0
411 self.rndseed = 1
412 self.histo_max = 1.0
413 self.histo_nbins = 0
414 self.basename = None
415 self.exp_threshold = 1e-8
416
417
419 if self.perturbed_factors is None :
420 raise StandardError, 'RTAControlParameters::environment_perturber: perturbed_factors list missing'
421 if self.perturbation_type == 'ExpGauss' :
422 return ExpGaussPerturber(self.perturbation_skew, self.perturbation_dispersion, self.rndseed, self.perturbed_factors)
423 elif self.perturbation_type == 'ExponentialReplacement' :
424 return ExponentialReplacementPerturber(self.perturbation_halfmax, self.rndseed, self.perturbed_factors)
425 elif self.perturbation_type == 'UniformReplacement' :
426 return UniformReplacementPerturber(self.perturbation_min, self.perturbation_max, self.rndseed, self.perturbed_factors)
427 else :
428 raise StandardError, 'RTAControlParameters::environment_perturber: unknown type "%s"' % self.perturbation_type
429
430
432 l = []
433 for f in self.transsys_program.factor_names() :
434 if f not in self.perturbed_factors :
435 l.append(f)
436 return l
437
438
440 l = []
441 for f in self.regnet_factors :
442 if f not in self.perturbed_factors :
443 inlist = 1
444 for r in self.regulon_list :
445 inlist = inlist and (f not in r.controlling_factors)
446 inlist = inlist and (f not in r.coregulated_factors)
447 if not inlist :
448 break
449 if inlist :
450 l.append(f)
451 return l
452
453
462
463
466
467
470
471
473 return '%s.tra' % self.basename
474
475
477 return '%s.dot' % self.basename
478
479
481 return '%s.gpc' % self.basename
482
483
489
490
496
497
499 return '%s_nonperturbed.tre' % self.basename
500
501
503 return '%s_nonperturbed' % self.basename
504
505
507 return '%s_clusterlist.txt' % self.basename
508
509
511 return '%s_nonperturbed_euclidean_avg.tre' % self.basename
512
513
515 return '%s_nonperturbed_euclidean_single.tre' % self.basename
516
517
519 return '%s_nonperturbed_euclidean_complete.tre' % self.basename
520
521
523 return '%s_expspec.dat' % self.basename
524
525
527 return '%s_ggroups.dat' % self.basename
528
529
531 return '%s.r' % self.basename
532
533
537
538
545
546
548 f = open(self.r_fname(), 'w')
549 f.write('library(transarr)\n')
550 f.write('distfunc <- list();\n')
551 f.write('distfunc[["euclidean"]] <- function(m) { dist(m, method = "euclidean", upper = TRUE); };\n')
552 f.write('distfunc[["eisen"]] <- function(m) { dist.eisen(m); };\n');
553 f.write('clustfunc <- list();\n');
554 f.write('clustfunc[["average"]] <- function(dm) { hclust(dm, method = "average"); };\n')
555 f.write('clustfunc[["single"]] <- function(dm) { hclust(dm, method = "single"); };\n')
556 f.write('clustfunc[["complete"]] <- function(dm) { hclust(dm, method = "complete"); };\n')
557 f.write('%sarr <- read.transarr("%s", espfile = "%s", ggfile = "%s");\n' % (self.basename, self.complete_array_fname(), self.experimentspecs_fname(), self.genegroups_fname()))
558 f.write('expnames <- setdiff(names(attr(%sarr, "expspec")), "ref_init");\n'% self.basename)
559 f.write('allenv <- c();\n')
560 f.write('for (env in expnames)\n')
561 f.write('{\n')
562 f.write(' allenv <- c(allenv, attr(%sarr, "expspec")[[env]]);\n' % self.basename)
563 f.write('}\n')
564 f.write('attr(%sarr, "expspec")[["allenv"]] <- allenv;\n' % self.basename)
565 f.write('ggnames <- "nonperturbed";\n')
566 f.write('for (df in names(distfunc))\n')
567 f.write('{\n')
568 f.write(' for (cl in names(clustfunc))\n')
569 f.write(' {\n')
570 f.write(' fname <- sprintf("%s_%%s_%%s.tre", df, cl);\n' % self.nonperturbed_clustertree_basename())
571 f.write(' write(fname, file = "");\n')
572 f.write(' clustset <- cluster.transarr(%sarr, distfunc[[df]], clustfunc[[cl]], expnames = expnames, ggnames = ggnames);\n' % self.basename)
573 f.write(' write(clusterstring(clustset), fname);\n')
574 f.write(' fname <- sprintf("%s_%%s_%%s_allenv.tre", df, cl);\n' % self.nonperturbed_clustertree_basename())
575 f.write(' allclust <- cluster.transarr(%sarr, distfunc[[df]], clustfunc[[cl]], expnames = "allenv", ggnames = ggnames);\n' % self.basename)
576 f.write(' write(clusterstring(allclust), fname);\n')
577 f.write(' }\n')
578 f.write('}\n')
579
580
581
585
586
588 f = open(self.treelist_fname(), 'r')
589 line = f.readline()
590 while line :
591 fname = line.strip()
592 if fname != '' :
593 if fname[-4:] != '.tre' :
594 raise StandardError, 'RTAControlParameters::run_plottree: malformed file name "%s"' % fname
595 psname = '%s.ps' % fname[:-4]
596 cmd = 'plottree -f 10 -u -l r -B -i %s -b %s -o %s' % (self.nonperturbed_clustertree_fname(), fname, psname)
597 print cmd
598 os.system(cmd)
599 line = f.readline()
600
601
603 if self.histo_nbins == 0 :
604 self.histo_nbins = num_factors / 20
605 if self.histo_nbins < 5 :
606 self.histo_nbins = 5
607
608
610 if self.transsys_program is None :
611 return 0
612 if self.regnet_factors is None :
613 return 0
614 if self.structural_factors is None :
615 return 0
616 if self.perturbed_factors is None :
617 return 0
618 if self.perturbation_type is None :
619 return 0
620 if self.regulon_list is None :
621 return 0
622 if self.min_initial_concentration is None :
623 return 0
624 if self.max_initial_concentration is None :
625 return 0
626 if self.num_environments is None :
627 return 0
628 if self.samples_per_environment is None :
629 return 0
630 if self.basename is None :
631 return 0
632 return 1
633
634
636
637 def write_perturbation_parameters(f, rta) :
638 f.write('perturbation\n')
639 f.write('perturbation_type: %s\n' % rta.perturbation_type)
640 if rta.perturbation_type == 'ExpGauss' :
641 f.write('perturbation_skew: %g\n' % rta.perturbation_skew)
642 f.write('perturbation_dispersion: %g\n' % rta.perturbation_dispersion)
643 elif rta.perturbation_type == 'ExponentialReplacement' :
644 f.write('perturbation_halfmax: %g\n' % rta.perturbation_halfmax)
645 elif rta.perturbation_type == 'UniformReplacement' :
646 f.write('perturbation_min: %g\n' % rta.perturbation_min)
647 f.write('perturbation_max: %g\n' % rta.perturbation_max)
648 else :
649 raise StandardError, 'RTAControlParameters::write: unknown perturbation type "%s"' % rta.perturbation_type
650
651 if not self.complete() :
652 sys.stderr.write('RTAControlParameters::write: writing incomplete instance\n')
653 f.write('min_initial_concentration: %g\n' % self.min_initial_concentration)
654 f.write('max_initial_concentration: %g\n' % self.max_initial_concentration)
655 f.write('num_environments: %d\n' % self.num_environments)
656 write_perturbation_parameters(f, self)
657 f.write('delta_t: %d\n' % self.delta_t)
658 f.write('samples_per_environment: %d\n' % self.samples_per_environment)
659 f.write('array_offset: %g\n' % self.array_offset)
660 f.write('array_dispersion: %g\n' % self.array_dispersion)
661 f.write('exp_threshold: %g\n' % self.exp_threshold)
662 f.write('num_timesteps_init: %d\n' % self.num_timesteps_init)
663 f.write('rndseed: %d\n' % self.rndseed)
664 f.write('histo_max: %g\n' % self.histo_max)
665 f.write('histo_nbins: %d\n' % self.histo_nbins)
666 f.write('basename: %s\n' % self.basename)
667 f.write('regnet_factors: %s\n' % string.join(self.regnet_factors, ', '))
668 f.write('structural_factors: %s\n' % string.join(self.structural_factors, ', '))
669 f.write('perturbed_factors: %s\n' % string.join(self.perturbed_factors, ', '))
670 for r in self.regulon_list :
671 f.write('regulon\n')
672 f.write(str(r))
673 f.write('transsys_program\n')
674 f.write(str(self.transsys_program))
675 f.write('transsys_program: end\n')
676
677
679
680 def parse_transsys_program(f) :
681 line = f.readline()
682 tp_string = ''
683 while line :
684 if line.strip() == 'transsys_program: end' :
685 break
686 tp_string = tp_string + line
687 line = f.readline()
688 if line == '' :
689 raise StandardError, 'no end tag for transsys program -- premature end of file'
690 sf = StringIO.StringIO(tp_string)
691 p = transsys.TranssysProgramParser(sf)
692 return p.parse_transsys()
693
694 def parse_perturbation_spec(f, rta) :
695 n, v = get_namevalue(f)
696 if n != 'perturbation_type' :
697 raise StandardError, 'RTAControlParameters::parse: expected perturbation_type but got "%s"' % n
698 rta.perturbation_type = v
699 if rta.perturbation_type == 'ExpGauss' :
700 n, v = get_namevalue(f)
701 if n != 'perturbation_skew' :
702 raise StandardError, 'RTAControlParameters::parse: expected perturbation_skew but got "%s"' % n
703 rta.perturbation_skew = float(v)
704 n, v = get_namevalue(f)
705 if n != 'perturbation_dispersion' :
706 raise StandardError, 'RTAControlParameters::parse: expected perturbation_dispersion but got "%s"' % n
707 rta.perturbation_dispersion = float(v)
708 elif rta.perturbation_type == 'ExponentialReplacement' :
709 n, v = get_namevalue(f)
710 if n != 'perturbation_halfmax' :
711 raise StandardError, 'RTAControlParameters::parse: expected perturbation_halfmax but got "%s"' % n
712 rta.perturbation_halfmax = float(v)
713 elif rta.perturbation_type == 'UniformReplacement' :
714 n, v = get_namevalue(f)
715 if n != 'perturbation_min' :
716 raise StandardError, 'RTAControlParameters::parse: expected perturbation_min but got "%s"' % n
717 rta.perturbation_min = float(v)
718 n, v = get_namevalue(f)
719 if n != 'perturbation_max' :
720 raise StandardError, 'RTAControlParameters::parse: expected perturbation_max but got "%s"' % n
721 rta.perturbation_max = float(v)
722 else :
723 raise StandardError, 'RTAControlParameters::parse: unknown perturbation type "%s"' % n
724
725 float_re = re.compile('[+-]?([0-9]+(\\.[0-9]+)?)|(\\.[0-9]+)([Ee][+-]?[0-9]+)?')
726 int_re = re.compile('[0-9]+')
727 n, v = get_namevalue(f)
728 while n is not None :
729 if n == 'transsys_program' :
730 self.transsys_program = parse_transsys_program(f)
731 elif n == 'regnet_factors' :
732 self.regnet_factors = parse_string_csv(v)
733 elif n == 'structural_factors' :
734 self.structural_factors = parse_string_csv(v)
735 elif n == 'perturbed_factors' :
736 self.perturbed_factors = parse_string_csv(v)
737 elif n == 'regulon' :
738 r = RegulonDescriptor('')
739 r.parse(f)
740 self.regulon_list.append(r)
741 elif n == 'min_initial_concentration' :
742 self.min_initial_concentration = float(v)
743 elif n == 'max_initial_concentration' :
744 self.max_initial_concentration = float(v)
745 elif n == 'num_environments' :
746 self.num_environments = int(v)
747 elif n == 'perturbation' :
748 parse_perturbation_spec(f, self)
749 elif n == 'delta_t' :
750 self.delta_t = int(v)
751 elif n == 'samples_per_environment' :
752 self.samples_per_environment = int(v)
753 elif n == 'array_offset' :
754 self.array_offset = float(v)
755 elif n == 'array_dispersion' :
756 self.array_dispersion = float(v)
757 elif n == 'exp_threshold' :
758 self.exp_threshold = float(v)
759 elif n == 'num_timesteps_init' :
760 self.num_timesteps_init = int(v)
761 elif n == 'rndseed' :
762 self.rndseed = int(v)
763 elif n == 'histo_max' :
764 self.histo_max = float(v)
765 elif n == 'histo_nbins' :
766 self.histo_nbins = int(v)
767 elif n == 'basename' :
768 self.basename = v
769 else :
770 raise StandardError, 'RTAControlParameters::parse: unknown attribute "%s"' % n
771 n, v = get_namevalue(f)
772