%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This is the master file for the model_train graph
%

#include "commonParams.txt"

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                    DENSE PROBABILITY MASS FUNCTIONS                         %
% none needed at this time ...

DPMF_IN_FILE	inline
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                              DIRICHLET TABLES                               %
% none needed at this time ...

DIRICHLET_TAB_IN_FILE	inline
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                               DECISION TREEs                                %
%									      %
% a decision tree is needed whenever we use a DeterministicCPT or a mapping;  %
% here we list first those DTs that are paired with a DeterministicCPT of the %
% same name (w/o the _DT ending), then those that are used as part of a       %
% mapping of some sort ...						      %

DT_IN_FILE	inline

20				% total number of decision trees to follow

0				% index
setZero_DT			% name
0				% no parents
    -1    0			% always return 0


1				% index
setOne_DT			% name
0				% no parents
    -1    1			% always return 1


2				% index
badValue_DT			% name
0				% no parents
    -1    999999		% always return a "bad" value


%
% here are the indexes and names of the states, with the emitClass and
% lengthClass that they each map to -- the states are listed in an order
% that loosely follows a path of a protein that starts with a short
% cytoplasmic (inside) loop, then goes thru the membrane to the non-
% cytoplasmic side (outside), then back thru the membrane ...
%
% the value "0" in all cases is being held back in case it comes in 
% handy as some sort of "special" value ...
%
%    	STATE			NEXT		EMIT		LENGTH	
%  #  name			STATE(s)	CLASS		CLASS
%
%  0	unknown state		0		0		0
%  1	loop_i			2		1		1		[1,20]
%  2    cap_im			3		2		2		[4]
%  3	helix_io 		4		3		3		[5,25]
%  4	cap_mo			5,8,9		4		2		[4]
%  5	short_loop_obg		6		5		4		[10]
%  6	short-outside-glob 	7		6		5		[soExp]
%  7	short_loop_oag		12		5		4		[10]
%  8	loop_o			12		5		6		[1,20]
%  9	long-loop_obg		10		7		4		[10]
% 10	long-outside-glob	27		6		7		[loExp]
% 27    long-outside-glob2      28              6               14		[loExp2]
% 28    long-outside-glob3      11              6               15		[loExp3]
% 11	long-loop_oag		12		7		4		[10]
% 12	cap_om			13		4		2		[4]
% 13	helix_oi		14		3		3		[5,25]
% 14	cap_mi			1,15		2		2		[4]
% 15	loop_ibg		16		1		4		[10]
% 16	inside-glob		17		6		8		[iExp]
% 17	loop_iag		2		1		4		[10]
% 
% %%%%%%%% added states for SignalP model (dsv) %%%%%%%%%%%%%%%%%
%
% 29    methionine              26              15              12
% 26    n-region-exp            18              8               13              [nExp]
% 18	n-region		19		8		9		[1,10]
% 19	h-region		20		9		10		[6,20]
% 20	c-region		21		10		11		[1,15]
% 21	c3			22		11		12		[1]
% 22	c2			23		12		12		[1]
% 23	c1			24		13		12		[1]
% 24	cut			6,10,25		14		12		[1]
%
% %%%%%%%% added one new state to model globular-only proteins %%
%
% 25    globular                25              6               12


3				% index
state2emitClass_DT		% name
1				% one parent (the state)
    0  30  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default
	-1  0
	-1  1
	-1  2
	-1  3
	-1  4
	-1  5
	-1  6
	-1  5
	-1  5
	-1  7
	-1  6
	-1  7
	-1  4
	-1  3
	-1  2
	-1  1
	-1  6
	-1  1
	-1  8
	-1  9
	-1  10
	-1  11
	-1  12
	-1  13
	-1  14
        -1  6
	-1  8
	-1  6
	-1  6
	-1  15
	

4				% index
state2lengthClass_DT		% name
1				% one parent (the state)
    0  30  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default
	-1  0
	-1  1
	-1  2
	-1  3
	-1  2
	-1  4
	-1  5
	-1  4
	-1  6
	-1  4
	-1  7
	-1  4
	-1  2
	-1  3
	-1  2
	-1  4
	-1  8
	-1  4
	-1  9
	-1  10
	-1  11
	-1  12
	-1  12
	-1  12
	-1  12
	-1  12
	-1  13
	-1  14
	-1  15
	-1  12

% TOPOLOGY CLASSES %
% there are currently 5 different topology classes:
%	0 : unused
%	1 : inside
%	2 : membrane
%	3 : outside
%	4 : signal-peptide

5				% index
state2topoClass_DT		% name
1				% one parent (the state)
    0  30  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 default
	-1  0
	-1  1
	-1  2
	-1  2
	-1  2
	-1  3
	-1  3
	-1  3
	-1  3
	-1  3
	-1  3
	-1  3
	-1  2
	-1  2
	-1  2
	-1  1
	-1  1
	-1  1
	-1  4
	-1  4
	-1  4
	-1  4
	-1  4
	-1  4
	-1  4
	-1  3
	-1  4
	-1  3
	-1  3
	-1  4

% LABELS %	
% there are 10 different labels:
%	. : 0	wildcard
%
%	i : 1	inside / cytoplasmic			1
%	M : 2	membrane				2
%	o : 3	outside / non-cytoplasmic  *short*	3
%	O : 4	outside / non-cytoplasmic  *long*	4
%
%	n : 5	-- different regions of a signal peptide
%	h : 6
%	c : 7
%	C : 8
%	s : 9	-- generic signal peptide label (n,h,c)
%
% a relationship needs to be defined between which labels correspond
% to which states -- for example the label "i" corresponds to 4 different
% allowable states: loop_i, loop_ibg, inside-glob, and loop_iag
%
% in addition, the the integer label *zero* will be interpreted as
% placing no constraint whatsoever on the current state
%
% so the label to state relationships are such that we want this decision tree
% to return TRUE (1) under the following conditions:
%	p1=0	p0=anything		(*)
%	p1=1	p0=1,15,16,17		(i)
%	p1=2	p0=2,3,4,12,13,14	(m)
%	p1=3	p0=5,6,7,8		(o)
%	p1=4	p0=9,10,11,25,27,28	(O)
% 
%       p1=5    p0=18,26,29	(n-region)
%	p1=6	p0=19		(h-region)
%	p1=7	p0=20,21,22,23	(c-region,c3,c2,c1)
%	p1=8	p0=24		(cut)
%	p1=9	p0=18,19,20,21,22,23,26,29

6					% index
stateANDlabel_DT			% name
2					% two parents (state, label)
   1  10  0 1 2 3 4 5 6 7 8 default	% split on p1(label); 9 splits
	-1  1				%     if p1=0, always return TRUE
	 0  3  1  15:17  default	%     if p1=1, split on p0(state); 3 splits
	   -1  1			%         if p0=1, return TRUE
	   -1  1			%         if p0 in [15,17], return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  3  2:4  12:14  default	%     if p1=2, split on p0(state); 3 splits
	   -1  1			%         if p0 in [2,4], return TRUE
	   -1  1			%         if p0 in [12,14], return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  2  5:8  default		%     if p1=3, split on p0(state); 2 splits
	   -1  1			%         if p0 in [5,8], return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  4  9:11  25  27:28  default	%     if p1=4, split on p0(state); 2 splits
	   -1  1			%         if p0 in [9,11], return TRUE
	   -1  1			%         if p0=25, return TRUE
	   -1  1			%         if p0 in [27,28], return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  4  18  26  29  default	%     if p1=5, split on p0(state); 2 splits
	   -1  1			%         if p0=18, return TRUE
	   -1  1			%         if p0=26, return TRUE
	   -1  1			%         if p0=29, return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  2  19  default		%     if p1=6, split on p0(state); 2 splits
	   -1  1			%         if p0=19, return TRUE
	   -1  0			%         otherwise, return FALSE	   
	 0  2  20:23  default		%     if p1=7, split on p0(state); 2 splits
	   -1  1			%         if p0 in [20,23], return TRUE
	   -1  0			%         otherwise, return FALSE	   
	 0  2  24  default		%     if p1=8, split on p0(state); 2 splits
	   -1  1			%         if p0=24, return TRUE
	   -1  0			%         otherwise, return FALSE
	 0  4  18:23  26  29  default	%     if p1=9, split on p0(state); 4 splits
	   -1  1
	   -1  1
	   -1  1
	   -1  0

7
copyParent_DT			% name
1									% one parent
    -1   {p0}			% just return parent value


8									% index
incParent_DT			% name
1									% one parent
    -1   {p0+1}		% return parent+1


9									% index
decParent_DT			% name
1									% one parent
    0  2  0  default		% split on p0; 2 splits
      -1  0			%     if p0=0, return 0
      -1   {p0-1}		%     if p0>0, return p0-1


%
% this decision tree is used to constrain the final state (which is the parent
% in this case), and the value returned is 1 if the final state is a valid
% final state, and is 0 if the final state is NOT valid ...
%
% at this time, any state that is NOT in the membrane is considered a valid
% final state, therefore the INVALID states are [2,3,4,12,13,14]
%
% --> changing to make the following states INVALID end states :
%	2,3,4,7,11,12,13,14,17,18,19,20,21,22,23,24,26,29
%
% --> VALID end states : 1, 5, 6, 8, 9, 10, 15, 16, 25, 27, 28
%

10							% index
finalState_DT						% name
1							% one parent (state)
    0  7  2:4  7  11:14  17:24  26  29  default		% split on p0; 7 splits
      -1  0
      -1  0
      -1  0
      -1  0
      -1  0
      -1  0
      -1  1				%     otherwise, return TRUE


% 
% this mapping is used for switching parents when we just want the mapping
% to imitate the parent value (typically binary) ...
%

11				% index
oneD_map			% name
1				% one parent
    -1  { p0 }			% return copy of parent


%
% this mapping is used to map the stateCountDown variable from [0,N]
% down to just 3 classes: 0 --> 0, 1 --> 1, and >1 --> 2
%

12				% index
counter_map			% name
1				% one parent
    0    3   0  1  default	% split on p0; 3 splits
	-1  0			%     if counter=0, return 0
	-1  1			%     if counter=1, return 1
	-1  2			%     if counter>1, return 2


13				% index
forceEqual_DT			% name
2				% two parents
    -1 { (p0==p1) }


14                              % index
isSP_DT                         % name
1                               % one parent (topoClass)
    0    2   4  default
        -1  1                   % if topoClass==4, return 1
        -1  0                   % else, return 0

15                              % index
isTM_DT                         % name
1                               % one parent (topoClass)
    0    2   2  default
        -1  1                   % if topoClass==2, return 1
        -1  0                   % else, return 0

% this used to be set based on two binary variables which only required a
% simple leaf computation to determine the protein type:
%               -1 { p0 + 2*p1 }
% but now p0=spBit and p1=iNumTM and the output pType is defined as:
% G:0  SP+G:1  TM:2  SP+TM:3

16                              % index
setPtype_DT                     % name
2                               % two parents (spBit, iNumTM)
    1    2    0    default      % split on p1=iNumTM: 2 splits: 0 vs >0
        -1 { p0 }               %     if iNumTM=0, return spBit
        -1 { p0+2 }             %     if iNumTM>0, return spBit+2

17				% index
areDiff_map			% name
2				% two parents
    -1  { p0 != p1 }		% return TRUE if different


% we only want to increment the number of TMs if the previous
% state is either 4 or 14 ...
%	iNumTM : 0 --> 1 --> 2 --> 3 --> 2 --> 3 --> etc

18				% index
incNumTM_DT			% name
2				% two parents: p0=prevState, p1=prevNumTM
    0   3   4  14  default	% split on p0
	1   2   3  default	%   if p0= 4, split on p1
	   -1  { 2 }		%     if p1=3, return 2
	   -1  { p1+1 }		%     otherwise, return p1+1
	1   2   3  default	%   if p0=14, split on p1
	   -1  { 2 }		%     if p1=3, return 2
	   -1  { p1+1 }		%     otherwise, return p1+1
	-1 { p1 }		%   otherwise, return p1

19				% index
two2one_DT			% name
2				% two parents
	-1 { p0*cp1 + p1 }


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                             DETERMINISTIC CPTs                              %
%									      %

DETERMINISTIC_CPT_IN_FILE	inline

18				% total number of Det CPTs to follow


0				% index
setBinaryFALSE			% name
0				% no parents
BINARY_CARD			% self-cardinality
setZero_DT			% DT name


1				% index
setBinaryTRUE			% name
0				% no parents
BINARY_CARD			% self-cardinality
setOne_DT			% DT name


2				% index
badLength			% name
0				% no parents
MAX_STATE_LENGTH_CARD		% self-cardinality
badValue_DT			% DT name


3				% index
state2emitClass			% name
1				% one parent (state)
STATE_CARD  EMIT_CLASS_CARD	% parent, self cardinalities
state2emitClass_DT		% DT name


4				% index
state2lengthClass		% name
1				% one parent (state)
STATE_CARD  LENGTH_CLASS_CARD	% parent, self cardinalities
state2lengthClass_DT		% DT name

5				% index
state2topoClass			% name
1				% one parent (state)
STATE_CARD  TOPO_CLASS_CARD	% parent, self cardinalities
state2topoClass_DT		% DT name

6				% index
stateANDlabel			% name
2				% two parents
STATE_CARD  LABEL_CARD  BINARY_CARD	% parents, then self cardinalities
stateANDlabel_DT		% DT name

7				% index
copyState			% name
1				% one parent (state)
STATE_CARD  STATE_CARD		% parent, then self cardinalities
copyParent_DT			% DT name

8				% index
decLength			% name
1				% one parent (length)
MAX_STATE_LENGTH_CARD  MAX_STATE_LENGTH_CARD
decParent_DT			% DT name

9				% index
finalState			% name
1				% one parent(state)
STATE_CARD  BINARY_CARD		% parent, then self cardinalities
finalState_DT			% DT name

10				% index
forceEqual			% name
2				% two parents
TOPO_CLASS_CARD  TOPO_CLASS_CARD  BINARY_CARD
forceEqual_DT			% DT name

11                              % index
isSP                            % name
1                               % one parent
TOPO_CLASS_CARD  BINARY_CARD    % cardinalities
isSP_DT                         % DT name

12                              % index
isTM                            % name
1                               % one parent
TOPO_CLASS_CARD  BINARY_CARD    % cardinalities
isTM_DT                         % DT name

13                              % index
setPtype                        % name
2                               % one parent
BINARY_CARD  NUM_TM_CARD  PROTEIN_TYPE_CARD
setPtype_DT                     % DT name

14                              % index
incNumTM                        % name
2                               % two parents
STATE_CARD  NUM_TM_CARD  NUM_TM_CARD    % cardinalities
incNumTM_DT                     % DT name

15                              % index
copyNumTM                       % name
1                               % one parent
NUM_TM_CARD  NUM_TM_CARD        % cardinalities
copyParent_DT                   % DT name

16                              % index
setNumTMZero                    % name
0                               % no parents
NUM_TM_CARD                     % self cardinality
setZero_DT                      % DT name

17				% index
two2oneMap			% name
2				% two parents
BINARY_CARD  TOPO_CLASS_CARD  TOPO_10_CARD
two2one_DT			% DT name


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                  MEANs and COVARs for GAUSSIAN COMPONENTS		      %
% none needed at this time ...

MEAN_IN_FILE	inline
0

COVAR_IN_FILE	inline
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                               GAUSSIAN COMPONENTS		              %
% none needed at this time ...

MC_IN_FILE	inline
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                          MIXTURES OF GAUSSIAN COMPONENTS		      %
% none needed at this time ...

MX_IN_FILE	inline
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                              NAME COLLECTIONs                               %
% none needed at this time ...

NAME_COLLECTION_IN_FILE		inline 
0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                 DENSE CPTs                                  %
%									      %

DENSE_CPT_IN_FILE	inline
0


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                    END				      %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%