%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % This is the master file for the model_train graph % #include "commonParams.txt" %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DENSE PROBABILITY MASS FUNCTIONS % % none needed at this time ... DPMF_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DIRICHLET TABLES % % none needed at this time ... DIRICHLET_TAB_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DECISION TREEs % % % % a decision tree is needed whenever we use a DeterministicCPT or a mapping; % % here we list first those DTs that are paired with a DeterministicCPT of the % % same name (w/o the _DT ending), then those that are used as part of a % % mapping of some sort ... % DT_IN_FILE inline 20 % total number of decision trees to follow 0 % index setZero_DT % name 0 % no parents -1 0 % always return 0 1 % index setOne_DT % name 0 % no parents -1 1 % always return 1 2 % index badValue_DT % name 0 % no parents -1 999999 % always return a "bad" value % % here are the indexes and names of the states, with the emitClass and % lengthClass that they each map to -- the states are listed in an order % that loosely follows a path of a protein that starts with a short % cytoplasmic (inside) loop, then goes thru the membrane to the non- % cytoplasmic side (outside), then back thru the membrane ... % % the value "0" in all cases is being held back in case it comes in % handy as some sort of "special" value ... % % STATE NEXT EMIT LENGTH % # name STATE(s) CLASS CLASS % % 0 unknown state 0 0 0 % 1 loop_i 2 1 1 [1,20] % 2 cap_im 3 2 2 [4] % 3 helix_io 4 3 3 [5,25] % 4 cap_mo 5,8,9 4 2 [4] % 5 short_loop_obg 6 5 4 [10] % 6 short-outside-glob 7 6 5 [soExp] % 7 short_loop_oag 12 5 4 [10] % 8 loop_o 12 5 6 [1,20] % 9 long-loop_obg 10 7 4 [10] % 10 long-outside-glob 27 6 7 [loExp] % 27 long-outside-glob2 28 6 14 [loExp2] % 28 long-outside-glob3 11 6 15 [loExp3] % 11 long-loop_oag 12 7 4 [10] % 12 cap_om 13 4 2 [4] % 13 helix_oi 14 3 3 [5,25] % 14 cap_mi 1,15 2 2 [4] % 15 loop_ibg 16 1 4 [10] % 16 inside-glob 17 6 8 [iExp] % 17 loop_iag 2 1 4 [10] % % %%%%%%%% added states for SignalP model (dsv) %%%%%%%%%%%%%%%%% % % 29 methionine 26 15 12 % 26 n-region-exp 18 8 13 [nExp] % 18 n-region 19 8 9 [1,10] % 19 h-region 20 9 10 [6,20] % 20 c-region 21 10 11 [1,15] % 21 c3 22 11 12 [1] % 22 c2 23 12 12 [1] % 23 c1 24 13 12 [1] % 24 cut 6,10,25 14 12 [1] % % %%%%%%%% added one new state to model globular-only proteins %% % % 25 globular 25 6 12 3 % index state2emitClass_DT % name 1 % one parent (the state) 0 30 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default -1 0 -1 1 -1 2 -1 3 -1 4 -1 5 -1 6 -1 5 -1 5 -1 7 -1 6 -1 7 -1 4 -1 3 -1 2 -1 1 -1 6 -1 1 -1 8 -1 9 -1 10 -1 11 -1 12 -1 13 -1 14 -1 6 -1 8 -1 6 -1 6 -1 15 4 % index state2lengthClass_DT % name 1 % one parent (the state) 0 30 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default -1 0 -1 1 -1 2 -1 3 -1 2 -1 4 -1 5 -1 4 -1 6 -1 4 -1 7 -1 4 -1 2 -1 3 -1 2 -1 4 -1 8 -1 4 -1 9 -1 10 -1 11 -1 12 -1 12 -1 12 -1 12 -1 12 -1 13 -1 14 -1 15 -1 12 % TOPOLOGY CLASSES % % there are currently 5 different topology classes: % 0 : unused % 1 : inside % 2 : membrane % 3 : outside % 4 : signal-peptide 5 % index state2topoClass_DT % name 1 % one parent (the state) 0 30 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default -1 0 -1 1 -1 2 -1 2 -1 2 -1 3 -1 3 -1 3 -1 3 -1 3 -1 3 -1 3 -1 2 -1 2 -1 2 -1 1 -1 1 -1 1 -1 4 -1 4 -1 4 -1 4 -1 4 -1 4 -1 4 -1 3 -1 4 -1 3 -1 3 -1 4 % LABELS % % there are 10 different labels: % . : 0 wildcard % % i : 1 inside / cytoplasmic 1 % M : 2 membrane 2 % o : 3 outside / non-cytoplasmic *short* 3 % O : 4 outside / non-cytoplasmic *long* 4 % % n : 5 -- different regions of a signal peptide % h : 6 % c : 7 % C : 8 % s : 9 -- generic signal peptide label (n,h,c) % % a relationship needs to be defined between which labels correspond % to which states -- for example the label "i" corresponds to 4 different % allowable states: loop_i, loop_ibg, inside-glob, and loop_iag % % in addition, the the integer label *zero* will be interpreted as % placing no constraint whatsoever on the current state % % so the label to state relationships are such that we want this decision tree % to return TRUE (1) under the following conditions: % p1=0 p0=anything (*) % p1=1 p0=1,15,16,17 (i) % p1=2 p0=2,3,4,12,13,14 (m) % p1=3 p0=5,6,7,8 (o) % p1=4 p0=9,10,11,25,27,28 (O) % % p1=5 p0=18,26,29 (n-region) % p1=6 p0=19 (h-region) % p1=7 p0=20,21,22,23 (c-region,c3,c2,c1) % p1=8 p0=24 (cut) % p1=9 p0=18,19,20,21,22,23,26,29 6 % index stateANDlabel_DT % name 2 % two parents (state, label) 1 10 0 1 2 3 4 5 6 7 8 default % split on p1(label); 9 splits -1 1 % if p1=0, always return TRUE 0 3 1 15:17 default % if p1=1, split on p0(state); 3 splits -1 1 % if p0=1, return TRUE -1 1 % if p0 in [15,17], return TRUE -1 0 % otherwise, return FALSE 0 3 2:4 12:14 default % if p1=2, split on p0(state); 3 splits -1 1 % if p0 in [2,4], return TRUE -1 1 % if p0 in [12,14], return TRUE -1 0 % otherwise, return FALSE 0 2 5:8 default % if p1=3, split on p0(state); 2 splits -1 1 % if p0 in [5,8], return TRUE -1 0 % otherwise, return FALSE 0 4 9:11 25 27:28 default % if p1=4, split on p0(state); 2 splits -1 1 % if p0 in [9,11], return TRUE -1 1 % if p0=25, return TRUE -1 1 % if p0 in [27,28], return TRUE -1 0 % otherwise, return FALSE 0 4 18 26 29 default % if p1=5, split on p0(state); 2 splits -1 1 % if p0=18, return TRUE -1 1 % if p0=26, return TRUE -1 1 % if p0=29, return TRUE -1 0 % otherwise, return FALSE 0 2 19 default % if p1=6, split on p0(state); 2 splits -1 1 % if p0=19, return TRUE -1 0 % otherwise, return FALSE 0 2 20:23 default % if p1=7, split on p0(state); 2 splits -1 1 % if p0 in [20,23], return TRUE -1 0 % otherwise, return FALSE 0 2 24 default % if p1=8, split on p0(state); 2 splits -1 1 % if p0=24, return TRUE -1 0 % otherwise, return FALSE 0 4 18:23 26 29 default % if p1=9, split on p0(state); 4 splits -1 1 -1 1 -1 1 -1 0 7 copyParent_DT % name 1 % one parent -1 {p0} % just return parent value 8 % index incParent_DT % name 1 % one parent -1 {p0+1} % return parent+1 9 % index decParent_DT % name 1 % one parent 0 2 0 default % split on p0; 2 splits -1 0 % if p0=0, return 0 -1 {p0-1} % if p0>0, return p0-1 % % this decision tree is used to constrain the final state (which is the parent % in this case), and the value returned is 1 if the final state is a valid % final state, and is 0 if the final state is NOT valid ... % % at this time, any state that is NOT in the membrane is considered a valid % final state, therefore the INVALID states are [2,3,4,12,13,14] % % --> changing to make the following states INVALID end states : % 2,3,4,7,11,12,13,14,17,18,19,20,21,22,23,24,26,29 % % --> VALID end states : 1, 5, 6, 8, 9, 10, 15, 16, 25, 27, 28 % 10 % index finalState_DT % name 1 % one parent (state) 0 7 2:4 7 11:14 17:24 26 29 default % split on p0; 7 splits -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 1 % otherwise, return TRUE % % this mapping is used for switching parents when we just want the mapping % to imitate the parent value (typically binary) ... % 11 % index oneD_map % name 1 % one parent -1 { p0 } % return copy of parent % % this mapping is used to map the stateCountDown variable from [0,N] % down to just 3 classes: 0 --> 0, 1 --> 1, and >1 --> 2 % 12 % index counter_map % name 1 % one parent 0 3 0 1 default % split on p0; 3 splits -1 0 % if counter=0, return 0 -1 1 % if counter=1, return 1 -1 2 % if counter>1, return 2 13 % index forceEqual_DT % name 2 % two parents -1 { (p0==p1) } 14 % index isSP_DT % name 1 % one parent (topoClass) 0 2 4 default -1 1 % if topoClass==4, return 1 -1 0 % else, return 0 15 % index isTM_DT % name 1 % one parent (topoClass) 0 2 2 default -1 1 % if topoClass==2, return 1 -1 0 % else, return 0 % this used to be set based on two binary variables which only required a % simple leaf computation to determine the protein type: % -1 { p0 + 2*p1 } % but now p0=spBit and p1=iNumTM and the output pType is defined as: % G:0 SP+G:1 TM:2 SP+TM:3 16 % index setPtype_DT % name 2 % two parents (spBit, iNumTM) 1 2 0 default % split on p1=iNumTM: 2 splits: 0 vs >0 -1 { p0 } % if iNumTM=0, return spBit -1 { p0+2 } % if iNumTM>0, return spBit+2 17 % index areDiff_map % name 2 % two parents -1 { p0 != p1 } % return TRUE if different % we only want to increment the number of TMs if the previous % state is either 4 or 14 ... % iNumTM : 0 --> 1 --> 2 --> 3 --> 2 --> 3 --> etc 18 % index incNumTM_DT % name 2 % two parents: p0=prevState, p1=prevNumTM 0 3 4 14 default % split on p0 1 2 3 default % if p0= 4, split on p1 -1 { 2 } % if p1=3, return 2 -1 { p1+1 } % otherwise, return p1+1 1 2 3 default % if p0=14, split on p1 -1 { 2 } % if p1=3, return 2 -1 { p1+1 } % otherwise, return p1+1 -1 { p1 } % otherwise, return p1 19 % index two2one_DT % name 2 % two parents -1 { p0*cp1 + p1 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DETERMINISTIC CPTs % % % DETERMINISTIC_CPT_IN_FILE inline 18 % total number of Det CPTs to follow 0 % index setBinaryFALSE % name 0 % no parents BINARY_CARD % self-cardinality setZero_DT % DT name 1 % index setBinaryTRUE % name 0 % no parents BINARY_CARD % self-cardinality setOne_DT % DT name 2 % index badLength % name 0 % no parents MAX_STATE_LENGTH_CARD % self-cardinality badValue_DT % DT name 3 % index state2emitClass % name 1 % one parent (state) STATE_CARD EMIT_CLASS_CARD % parent, self cardinalities state2emitClass_DT % DT name 4 % index state2lengthClass % name 1 % one parent (state) STATE_CARD LENGTH_CLASS_CARD % parent, self cardinalities state2lengthClass_DT % DT name 5 % index state2topoClass % name 1 % one parent (state) STATE_CARD TOPO_CLASS_CARD % parent, self cardinalities state2topoClass_DT % DT name 6 % index stateANDlabel % name 2 % two parents STATE_CARD LABEL_CARD BINARY_CARD % parents, then self cardinalities stateANDlabel_DT % DT name 7 % index copyState % name 1 % one parent (state) STATE_CARD STATE_CARD % parent, then self cardinalities copyParent_DT % DT name 8 % index decLength % name 1 % one parent (length) MAX_STATE_LENGTH_CARD MAX_STATE_LENGTH_CARD decParent_DT % DT name 9 % index finalState % name 1 % one parent(state) STATE_CARD BINARY_CARD % parent, then self cardinalities finalState_DT % DT name 10 % index forceEqual % name 2 % two parents TOPO_CLASS_CARD TOPO_CLASS_CARD BINARY_CARD forceEqual_DT % DT name 11 % index isSP % name 1 % one parent TOPO_CLASS_CARD BINARY_CARD % cardinalities isSP_DT % DT name 12 % index isTM % name 1 % one parent TOPO_CLASS_CARD BINARY_CARD % cardinalities isTM_DT % DT name 13 % index setPtype % name 2 % one parent BINARY_CARD NUM_TM_CARD PROTEIN_TYPE_CARD setPtype_DT % DT name 14 % index incNumTM % name 2 % two parents STATE_CARD NUM_TM_CARD NUM_TM_CARD % cardinalities incNumTM_DT % DT name 15 % index copyNumTM % name 1 % one parent NUM_TM_CARD NUM_TM_CARD % cardinalities copyParent_DT % DT name 16 % index setNumTMZero % name 0 % no parents NUM_TM_CARD % self cardinality setZero_DT % DT name 17 % index two2oneMap % name 2 % two parents BINARY_CARD TOPO_CLASS_CARD TOPO_10_CARD two2one_DT % DT name %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % MEANs and COVARs for GAUSSIAN COMPONENTS % % none needed at this time ... MEAN_IN_FILE inline 0 COVAR_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % GAUSSIAN COMPONENTS % % none needed at this time ... MC_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % MIXTURES OF GAUSSIAN COMPONENTS % % none needed at this time ... MX_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % NAME COLLECTIONs % % none needed at this time ... NAME_COLLECTION_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DENSE CPTs % % % DENSE_CPT_IN_FILE inline 0 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % END % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%