#include #include #include #include int dim=108931; // dimension of database int dim_query=7392;// dimension of query set float **matrix; // e-values of matrix edges for protein i int **matrixind; // indices of matrix edges for protein i int *size; // number of edges for protein i char *line[100000]; float sigma=100; // sigma, controls size of local weights float alpha=0.95; // alpha parameter, controls amount of "clustering" int max_loops=20; // number of iterations of algorithm void load_matrix(char *fname) // load the whole matrix { FILE *f; int index,amount,i,j,k,ind; char c; char buffer[20]; float eval;int flag=0; f=fopen(fname,"r"); if (f==NULL) {printf("ERROR: File %s not found!\n",fname); exit(0);} j=fscanf(f,"%d",&dim); printf("Proteins in database: %d\n",dim); matrix=new float*[dim+1]; matrixind=new int*[dim+1]; size=new int[dim+1]; size[0]=0; for(k=1;k%d %d\n",query_index,query_size); if (!first_time) { delete query_row; delete query_rowind; } else first_time=0; query_row= new float[query_size]; query_rowind= new int[query_size]; for(i=0;i1e-20) // otherwise we get nans for(i=0;i1e-20) // otherwise we get nans for(i=0;idim_query) end=dim_query; sprintf(fname,"rm %s_%d-%d",fnam,start,end); system(fname); // delete pre-existing file, we will concatenate each query on to this for(k=start;k<=end;k++) // all examples - make matrix!!!! { next_query_matrix(); // load next query in query file // printf("%d %d %d \n",query_index,query_size,k); for(i=0;i<=dim+2;i++) // setup labels {y[i]=0.0; yorig[i]=0;} y[k]=1.0; // query point row_ind=query_rowind; row_eval=query_row; // take query from query file for(i=0;istart) sprintf(fname,"cat tmp.rankprop%d-%d >> %s_%d-%d",start,end,fnam,start,end); else sprintf(fname,"cat tmp.rankprop%d-%d > %s_%d-%d",start,end,fnam,start,end); //printf("[%s]\n",fname); printf("query %d ranked - %2.1f%% complete\n",k,100.0*(((float)k+1-start)/((float)end-start+1))); system(fname); } sprintf(fname,"rm tmp.rankprop%d-%d",start,end); // clean up temp file system(fname); } int main(int argc, char *argv[]) { if (argc<6) { printf("Usage: rankprop [-s ] [-i ] [-a ]\n\n"); printf("Performs the propagation ranking algorithm on the given queries in .\n"); printf(" and are indices into the query_matrix in order to run a subset of the queries.\n"); printf("For each query, propagation is performed on the network given in .\n"); printf("The ranking of the examples in the network is given as output by \n(the ranking scores of each item in the matrix file).\n"); printf("-a,-s and -i options set the parameters slpha, sigma and the iterations of the algorithm."); printf("\n FILE FORMATS:\n"); printf(" Both and are of the following repeated format:\n"); printf(" \n"); printf(" for each entry:\n"); printf(" .\n"); printf(" (Indices should start from 1.)\n"); exit(0); } /* Example matrix file: 2 <--- number of proteins 1 2 <--- protein 1 has 2 edges 1 2 <--- edge indices are to proteins 1 & 2 0.1 100 <--- with the following weights 2 2 <--- protein 2 also has 2 edges 1 2 <--- edge indices are also to proteins 1 & 2 (fully connected) 100 0.1 <--- with the following weights (small means highly similar) */ for(int i=6;i