|
SHOGUN
v1.1.0
|
本页面包含了所有Python静态接口的例子。
要运行这些例子只需要
python name_of_example.py
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]
def classifier_gmnpsvm (fm_train_real=traindat,fm_test_real=testdat,
label_train_multiclass=train_label,
size_cache=10, width=2.1,C=1.2,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'GMNPSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'GMNPSVM'
classifier_gmnpsvm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]
def classifier_gpbtsvm (fm_train_real=traindat,fm_test_real=testdat,
label_train_twoclass=train_label,
size_cache=10, width=2.1,C=1.2,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'GPBTSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
return result
if __name__=='__main__':
print 'GPBTSVM'
classifier_gpbtsvm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,3],
[traindat,testdat,train_label,4]]
def classifier_knn (fm_train_real=traindat,fm_test_real=testdat,
label_train_multiclass=train_label,k=3):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_classifier', 'KNN')
sg('train_classifier', k)
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
return result
if __name__=='__main__':
print 'KNN'
classifier_knn(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label],
[traindat,testdat,train_label]]
def classifier_lda (fm_train_real=traindat,fm_test_real=testdat,
label_train_twoclass=train_label):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'LDA')
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
return result
if __name__=='__main__':
print 'LDA'
classifier_lda(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]
def classifier_libsvm (fm_train_real=traindat,fm_test_real=testdat,
label_train_twoclass=train_label,
size_cache=10, width=2.1,C=1.2,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'LIBSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'LibSVM'
classifier_libsvm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,10.,1e-5,False],
[traindat,testdat,train_label,10,2.1,11.,1e-4,False]]
def classifier_libsvm_multiclass (fm_train_real=traindat,fm_test_real=testdat,
label_train_multiclass=train_label,
size_cache=10, width=2.1,C=10.,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'LIBSVM_MULTICLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'LibSVMMultiClass'
classifier_libsvm_multiclass(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,10,2.1,10.,1e-5,False],
[traindat,testdat,10,2.1,11.,1e-4,False]]
def classifier_libsvm_oneclass (fm_train_real=traindat,fm_test_real=testdat,
size_cache=10, width=2.1,C=10.,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('new_classifier', 'LIBSVM_ONECLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'LibSVMOneClass'
classifier_libsvm_oneclass(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]
def classifier_mpdsvm (fm_train_real=traindat,fm_test_real=testdat,
label_train_twoclass=train_label,
size_cache=10, width=2.1,C=1.2,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'MPDSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'MPDSVM'
classifier_mpdsvm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label],
[traindat,testdat,train_label]]
def classifier_perceptron (fm_train_real=traindat,fm_test_real=testdat,
label_train_twoclass=train_label):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'PERCEPTRON')
# often does not converge, mind your data!
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
return result
if __name__=='__main__':
print 'Perceptron'
classifier_perceptron(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
train_label=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna, train_label,10,20,1.2,1e-5,False],
[traindna,testdna,train_label,10,21,1.3,1e-4,False]]
def classifier_svmlight (fm_train_dna=traindna,fm_test_dna=testdna,label_train_dna=train_label,
size_cache=10, degree=20,C=1.2,
epsilon=1e-5,use_bias=False):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
sg('set_labels', 'TRAIN', label_train_dna)
try:
sg('new_classifier', 'SVMLIGHT')
except RuntimeError:
return
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
result=sg('classify')
kernel_matrix = sg('get_kernel_matrix', 'TEST')
return result, kernel_matrix
if __name__=='__main__':
print 'SVMLight'
classifier_svmlight(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
parameter_list=[[traindat,10,3],[traindat,11,4]]
def clustering_hierarchical (fm_train=traindat, size_cache=10,merges=3):
sg('set_features', 'TRAIN', fm_train)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_clustering', 'HIERARCHICAL')
sg('train_clustering', merges)
[merge_distance, pairs]=sg('get_clustering')
return [merge_distance, pairs]
if __name__=='__main__':
print 'Hierarchical'
clustering_hierarchical(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
parameter_list=[[traindat,10,3,1000],[traindat,11,4,1500]]
def clustering_kmeans (fm_train=traindat, size_cache=10,k=3,iter=1000):
sg('set_features', 'TRAIN', fm_train)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_clustering', 'KMEANS')
sg('train_clustering', k, iter)
[radi, centers]=sg('get_clustering')
return [radi, centers]
if __name__=='__main__':
print 'KMeans'
clustering_kmeans(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_braycurtis (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'BRAYCURTIS', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'BrayCurtisDistance'
distance_braycurtis(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_canberra (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'CANBERRA', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'CanberraMetric'
distance_canberra(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]
def distance_canberraword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
gap=0,reverse='n'):
sg('set_distance', 'CANBERRA', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'CanberraWordDistance'
distance_canberraword(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_chebyshew (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'CHEBYSHEW', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'ChebyshewMetric'
distance_chebyshew(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_chisquare (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'CHISQUARE', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'ChiSquareDistance'
distance_chisquare(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_cosine (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'COSINE', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'CosineDistance'
distance_cosine(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_euclidian (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'EuclidianDistance'
distance_euclidian(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_geodesic (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'GEODESIC', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'GeodesicMetric'
distance_geodesic(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]
def distance_hammingword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
gap=0,reverse='n'):
sg('set_distance', 'HAMMING', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'HammingWordDistance'
distance_hammingword(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_jensen (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'JENSEN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'JensenMetric'
distance_jensen(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_manhatten (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'MANHATTAN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'ManhattanMetric'
distance_manhatten(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]
def distance_manhattenword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
gap=0,reverse='n'):
sg('set_distance', 'MANHATTAN', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'ManhattanWordDistance'
distance_manhattenword(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,3.],[traindat,testdat,4.]]
def distance_minkowski (fm_train_real=traindat,fm_test_real=testdat,k=3.):
sg('set_distance', 'MINKOWSKI', 'REAL', k)
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'MinkowskiMetric'
distance_minkowski(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]
def distance_tanimoto (fm_train_real=traindat,fm_test_real=testdat):
sg('set_distance', 'TANIMOTO', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
return dm
if __name__=='__main__':
print 'TanimotoDistance'
distance_tanimoto(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,0,'n'],[traindna,cubedna,4,0,'n']]
def distribution_histogram(fm_train=traindna,fm_cube=cubedna,order=3,
gap=0,reverse='n'):
# sg('new_distribution', 'HISTOGRAM')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood')
# return sg('get_log_likelihood_sample')
if __name__=='__main__':
print 'Histogram'
distribution_histogram(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,6,1,list(),list()],
[traindna,cubedna,3,6,1,list(),list()]]
def distribution_hmm(fm_train=traindna,fm_cube=cubedna,N=3,M=6,
order=1,hmms=list(),links=list()):
sg('new_hmm',N, M)
sg('set_features', 'TRAIN', fm_cube, 'CUBE')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order)
sg('bw')
hmm=sg('get_hmm')
sg('new_hmm', N, M)
sg('set_hmm', hmm[0], hmm[1], hmm[2], hmm[3])
likelihood=sg('hmm_likelihood')
return likelihood
if __name__=='__main__':
print 'HMM'
distribution_hmm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,0,'n'],
[traindna,cubedna,3,0,'n']]
def distribution_linearhmm (fm_train=traindna,fm_cube=cubedna,
order=3,gap=0,reverse='n'):
# sg('new_distribution', 'LinearHMM')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood_sample')
if __name__=='__main__':
print 'LinearHMM'
distribution_linearhmm(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]
def kernel_chi2 (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Chi2'
kernel_chi2(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.,10],[traindat,testdat,1.5,11]]
def kernel_combined(fm_train_real=traindat,fm_test_real=testdat,
weight=1.,size_cache=10):
sg('clean_kernel')
sg('clean_features', 'TRAIN')
sg('clean_features', 'TEST')
sg('set_kernel', 'COMBINED', size_cache)
sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, 1.)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 3, False)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Combined'
kernel_combined(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
[traindna,testdna,11,4,0,'n',False,'FULL']]
def kernel_commulongstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,
order=3,gap=0,reverse='n',
use_sign=False,normalization='FULL'):
sg('add_preproc', 'SORTULONGSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'CommUlongString'
kernel_commulongstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
[traindna,testdna,11,4,0,'n',False,'FULL']]
def kernel_commwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,
order=3,gap=0,reverse='n',
use_sign=False,normalization='FULL'):
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'CommWordString'
kernel_commwordstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,23.,10],[traindat,testdat,24.,11]]
def kernel_const (fm_train_real=traindat,fm_test_real=testdat,c=23.,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'CONST', 'REAL', size_cache, c)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Const'
kernel_const(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,23.,10],[traindat,testdat,24.,11]]
def kernel_diag (fm_train_real=traindat,fm_test_real=testdat,diag=23.,
size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'DIAG', 'REAL', size_cache, diag)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Diag'
kernel_diag(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,10],[traindna,testdna,4,11]]
def kernel_fixeddegreestring (fm_train_dna=traindna,fm_test_dna=testdna,degree=3,
size_cache=10):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'FixedDegreeString'
kernel_fixeddegreestring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.9,11]]
def kernel_gaussian (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Gaussian'
kernel_gaussian(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.9,2,1,10],[traindat,testdat,1.5,2,1,11]]
def kernel_gaussianshift (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,max_shift=2,shift_step=1,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'GaussianShift'
kernel_gaussianshift(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.2,10],[traindat,testdat,1.5,11]]
def kernel_linear (fm_train_real=traindat,fm_test_real=testdat,
scale=1.2,size_cache=10):
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Linear'
kernel_linear(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10],
[traindna,testdna,11]]
def kernel_linearstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LINEAR', 'CHAR', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'LinearString'
kernel_linearstring(*parameter_list[0])
from tools.load import LoadMatrix
from numpy import ushort
from sg import sg
lm=LoadMatrix()
trainword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
testword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
parameter_list=[[trainword,testword,10,1.4],
[trainword,testword,11,1.5]]
def kernel_linearword (fm_train_word=trainword,fm_test_word=testword,
size_cache=10, scale=1.4):
sg('set_features', 'TRAIN', fm_train_word)
sg('set_features', 'TEST', fm_test_word)
sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'LinearWord'
kernel_linearword(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10],
[traindna,testdna,11]]
def kernel_localalignmentstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'LocalAlignmentString'
kernel_localalignmentstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,5,5,7],
[traindna,testdna,trainlabel,11,6,6,8]]
def kernel_localityimprovedstring (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
length=5,inner_degree=5,outer_degree=7):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'LocalityImprovedString'
kernel_localityimprovedstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,1.2],
[traindna,testdna,11,4,1.3]]
def kernel_oligostring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,k=3,width=1.2):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'OligoString'
kernel_oligostring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n'],
[traindna,testdna,trainlabel,11,4,0,'n']]
def kernel_pluginestimatehistogram (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
order=3,gap=0,reverse='n',):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
pseudo_pos=1e-1
pseudo_neg=1e-1
sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
sg('set_labels', 'TRAIN', label_train_dna)
sg('train_estimator')
sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
# not supported yet
# lab=sg('plugin_estimate_classify')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'PluginEstimate w/ HistogramWord'
kernel_pluginestimatehistogram(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,4,False,True,10],
[traindat,testdat,5,False,True,11]]
def kernel_poly (fm_train_real=traindat,fm_test_real=testdat,
degree=4,inhomogene=False,use_normalization=True,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Poly'
kernel_poly(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,False],
[traindna,testdna,11,4,False]]
def kernel_polymatchstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,degree=3,inhomogene=False):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'PolyMatchString'
kernel_polymatchstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,2,True,True,3,0,'n'],
[traindna,testdna,trainlabel,11,3,True,True,4,0,'n']]
def kernel_polymatchword (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
degree=2,inhomogene=True,normalize=True,
order=3,gap=0,reverse='n'):
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene, normalize)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'PolyMatchWord'
kernel_polymatchword(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n',False,'FULL'],
[traindna,testdna,trainlabel,11,4,0,'n',False,'FULL']]
def kernel_salzbergstring (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
order=3,gap=0,reverse='n',use_sign=False,
normalization='FULL'):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
pseudo_pos=1e-1
pseudo_neg=1e-1
sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
sg('set_labels', 'TRAIN', label_train_dna)
sg('train_estimator')
sg('set_kernel', 'SALZBERG', 'WORD', size_cache)
#sg('set_prior_probs', 0.4, 0.6)
sg('set_prior_probs_from_labels', label_train_dna)
km=sg('get_kernel_matrix', 'TRAIN')
# not supported yet
# lab=sg('plugin_estimate_classify')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'PluginEstimate w/ SalzbergWord'
kernel_salzbergstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,11,1.2,1.3,10],[traindat,testdat,12,1.3,1.4,11]]
def kernel_sigmoid (fm_train_real=traindat,fm_test_real=testdat,
num_feats=11,gamma=1.2,coef0=1.3,size_cache=10):
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'Sigmoid'
kernel_sigmoid(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,5,5,7],
[traindna,testdna,trainlabel,11,6,6,8]]
def kernel_simplelocalityimprovedstring (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
length=5,inner_degree=5,outer_degree=7):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'SimpleLocalityImprovedString'
kernel_simplelocalityimprovedstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n',False,'FULL'],
[traindna,testdna,trainlabel,11,4,0,'n',False,'FULL']]
def kernel_weightedcommwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
label_train_dna=trainlabel,size_cache=10,
order=3,gap=0,reverse='n',use_sign=False,
normalization='FULL'):
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'WeightedCommWordString'
kernel_weightedcommwordstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,20],
[traindna,testdna,11,21]]
def kernel_weighteddegreepositonstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,degree=20):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'WeightedDegreePositionString'
kernel_weighteddegreepositonstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,20],
[traindna,testdna,11,21]]
def kernel_weighteddegreestring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,degree=20):
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'WeightedDegreeString'
kernel_weighteddegreestring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,1.2,1.2,1e-5,0.001,1.5,1.0],
[traindat,testdat,trainlabel,11,1.3,1.3,1e-5,0.002,1.6,1.1]]
def mkl_multiclass (fm_train_real=traindat,fm_test_real=testdat,
label_train_multiclass=trainlabel,
size_cache=10,width=1.2,C=1.2,epsilon=1e-5,
mkl_eps=0.001,mkl_norm=1.5,weight=1.0):
sg('clean_kernel')
sg('clean_features', 'TRAIN')
sg('clean_features', 'TEST')
sg('set_kernel', 'COMBINED', size_cache)
sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, width)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 2)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'MKL_MULTICLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('mkl_parameters', mkl_eps, 0.0, mkl_norm)
sg('train_classifier')
#sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
return result
if __name__=='__main__':
print 'mkl_multiclass'
mkl_multiclass(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
from numpy import *
num=100
labelstrain=concatenate((-ones([1,num]), ones([1,num])),1)[0]
featuretrain=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
parameter_list=[[1.,labelstrain,featuretrain,1e-2],
[1.,labelstrain,featuretrain,1e-2]]
def mkl_regression (weight=1.,
labels=labelstrain,features=featuretrain,
tube_epsilon=1e-2):
sg('new_classifier', 'MKL_REGRESSION')
sg('c', 1.)
sg('svr_tube_epsilon', tube_epsilon)
sg('set_labels', 'TRAIN', labels)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('set_kernel', 'COMBINED', 100)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
sg('train_classifier')
[bias, alphas]=sg('get_svm');
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'MKL_REGRESSION'
mkl_regression(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
from numpy import *
num=100
labelstrain=concatenate((-ones([1,num]), ones([1,num])),1)[0]
featuretrain=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
parameter_list=[[1.,labelstrain,featuretrain],
[1.,labelstrain,featuretrain]]
def mkl_twoclass (weight=1.,
labels=labelstrain,features=featuretrain):
sg('c', 10.)
sg('new_classifier', 'MKL_CLASSIFICATION')
sg('set_labels', 'TRAIN', labels)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('set_kernel', 'COMBINED', 100)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
sg('train_classifier')
[bias, alphas]=sg('get_svm');
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'MKL_TWOCLASS'
mkl_twoclass(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]
def preproc_logplusone (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,size_cache=10):
sg('add_preproc', 'LOGPLUSONE')
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'LogPlusOne'
preproc_logplusone(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]
def preproc_normone (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,size_cache=10):
sg('add_preproc', 'NORMONE')
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'NormOne'
preproc_normone(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10,True],[traindat,testdat,1.5,11,True]]
def preproc_prunevarsubmean (fm_train_real=traindat,fm_test_real=testdat,
width=1.4,size_cache=10,divide_by_std=True):
sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std)
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'PruneVarSubMean'
preproc_prunevarsubmean(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
[traindna,testdna,11,4,0,'n',False,'FULL']]
def preproc_sortulongstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,order=3,gap=0,reverse='n',
use_sign=False,normalization='FULL'):
sg('add_preproc', 'SORTULONGSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'CommUlongString'
preproc_sortulongstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
[traindna,testdna,11,4,0,'n',False,'FULL']]
def preproc_sortwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
size_cache=10,order=3,gap=0,reverse='n',
use_sign=False,normalization='FULL'):
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
return km
if __name__=='__main__':
print 'CommWordString'
preproc_sortwordstring(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-6],
[traindat,testdat,trainlabel,11,2.3,1.3,1e-6]]
def regression_krr (fm_train=traindat,fm_test=testdat,
label_train=trainlabel,size_cache=10,width=2.1,
C=1.2,tau=1e-6):
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
sg('new_regression', 'KRR')
sg('krr_tau', tau)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
return result
if __name__=='__main__':
print 'KRR'
regression_krr(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-5,1e-2],
[traindat,testdat,trainlabel,11,2.3,1.3,1e-6,1e-3]]
def regression_libsvr (fm_train=traindat,fm_test=testdat,
label_train=trainlabel,size_cache=10,width=2.1,
C=1.2,epsilon=1e-5,tube_epsilon=1e-2):
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
sg('new_regression', 'LIBSVR')
sg('svr_tube_epsilon', tube_epsilon)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
return result
if __name__=='__main__':
print 'LibSVR'
regression_libsvr(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-5,1e-2],
[traindat,testdat,trainlabel,11,2.3,1.3,1e-6,1e-3]]
def regression_svrlight (fm_train=traindat,fm_test=testdat,
label_train=trainlabel,size_cache=10,width=2.1,
C=1.2,epsilon=1e-5,tube_epsilon=1e-2):
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
try:
sg('new_regression', 'SVRLIGHT')
except RuntimeError:
return
sg('svr_tube_epsilon', tube_epsilon)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
return result
if __name__=='__main__':
print 'SVRLight'
regression_svrlight(*parameter_list[0])