import os,sys # if PyML is installed in your site-packages directory you won't # need an equivalent of the following lines sys.path.append("/home/asa/PyML") sys.path.append("/home/asa/PyML/ext") sys.path.append("/home/asa/") # definitions that reflect the file structure on my machine # you will need to change those according to your setup homeDir = os.path.join(os.environ['HOME'], 'sppi') dataDir = os.path.join(homeDir, 'data') datasetDir = os.path.join(dataDir, 'interactionDatasets') # read the data: import datafunc data = datafunc.SparseCDataSet(os.path.join(datasetDir, 'yeastBindHomologyGOmcc.data'), labelsColumn = 1) # center each variable and divide by its standard deviation: import preproc rescaler = preproc.Rescale() rescaler.train(data) # the mcc feature needs to be computed on the fly based on the training data # one function is for training, and one for testing # we use the mechanism of using the PyML feature of naming a "train" and "test" # method for the dataset that is called prior to training or testing a classifier from sppi.src import mcc data.setTrainingFunc(mcc.addDataTrain) data.setTestingFunc(mcc.addDataTest) import ker k = ker.Gaussian() import svm import modelSelection # define a model selection object that chooses the width parameter of the kernel # using CV according to its roc50 score: param = modelSelection.Param(svm.SVM(k), 'kernel.gamma', [0.001, 0.01, 0.1, 1]) m = modelSelection.ModelSelector(param, measure = 'roc50') # perform CV with a given random number generator seed: r=m.stratifiedCV(data, seed = 1) # save the results: r.save('goMCChomology.gaussian.select.pyd')