# -*- coding: utf-8 -*-
"""
A linear classifier to be used in conjunction with the Scikit Learn python package.
"""
# Copyright (C) 2016 by
# Luis Rocha <rocha@indiana.edu>
# Artemy Kolchinsky <artemyk@gmail.com >
# Rion Brattig Correia <rionbr@gmail.com>
# Ian B Wood <ibwood@indiana.edu >
# All rights reserved.
# MIT license.
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model.base import LinearClassifierMixin
from sklearn.utils.multiclass import unique_labels
from scipy.sparse import csr_matrix
import numpy as np
__name__ = 'vtt'
__version__ = '0.3'
__release__ = '0.3.0b1'
__authors__ = ' and '.join(['Luis M. Rocha', 'Artemy Kolchinsky', 'Rion Brattig Correia', 'Ian B. Wood'])
__all__ = ['VTT']
[docs]class VTT(BaseEstimator, LinearClassifierMixin, TransformerMixin):
"""The Variable Trigonometric Threshold (VTT) linear classifier class
Attributes:
coef_ (array-like) : Feature weights. Also known as the coefficients.
intercept (array-like) : This is the classifier bias. For a linear classifier also known as the intercept.
"""
def __init__(self, weights=None, bias=None, *args, **kwargs):
self.coef_ = weights # Weights. Has to be named coef_ so scikit-learn will understand
self.intercept_ = bias
self.B = {} #pass keys to set_params of the form 'b_{index}' to treat index as an NER count and B[index] as the weight for the NER
self.y_predict = None
def __get_vtt_angles(self, pvals, nvals):
""" Fit the angles to the model
Args:
pvals (array-like) : positive values
nvals (array-like) : negative values
Returns: normalized coef_ values
"""
# https://www.khanacademy.org/math/trigonometry/unit-circle-trig-func/inverse_trig_functions/v/inverse-trig-functions--arctan
angles = np.arctan2(pvals, nvals)-np.pi/4
norm = np.maximum(np.minimum(angles, np.pi-angles), -1*np.pi-angles)
norm = csr_matrix(norm)
for key, value in self.B.items():
norm[0, key] = 0.
return norm
[docs] def fit(self, X, y):
""" Fit the VTT classifier model
Args:
X (sparse matrix, shape = [n_samples, n_features]) : Training data
y (array-like, shape = [n_samples]) : Target values
"""
self.classes_ = unique_labels(y)
X = csr_matrix(X, dtype=bool)#.tocsr()
pvals = X[np.array(y==1),:].mean(axis=0)
nvals = X[np.array(y!=1),:].mean(axis=0)
self.coef_ = self.__get_vtt_angles(pvals, nvals).toarray()
pnvals = (nvals + pvals).T
if self.intercept_ is None:
self.intercept_ = -(self.coef_.dot(pnvals)/2.0)[0,0]
for b, val in self.B.items():
#self.intercept_ -= 1
self.coef_[0,b] = 1./val
[docs] def set_params(self, **params):
""" Set the parameters of the estimator.
Args:
bias (array-like) : bias of the estimator. Also known as the intercept
weights (array-like) : weights of the features. Also known as coeficients.
NER bias (array-like) : NER entities infering column position on X and bias value. Ex: `b_4=10, b_5=6`.
Example:
>>> cls = VTT()
>>> cls.set_params(b_4=10, b_5=6, b_6=8)
"""
if 'bias' in params.keys():
self.intercept_ = params['bias']
if 'weights' in params.keys():
self.coef_ = params['weights']
for key in params.keys():
if 'b_' == key[:2]:
self.B[int(key[2:])] = params[key]
[docs] def get_params(self, deep=True):
""" Get parameters for the estimator.
Args:
deep (boolean, optional) : If True, will return the parameters for this estimator and contained subobjects that are estimators.
Returns:
params : mapping of string to any contained subobjects that are estimators.
"""
params = {'weights':self.coef_, 'bias':self.intercept_}
for key, value in self.B.items():
params['b_'+str(key)] = value
return(params)
"""
### This is now handled by `LinearClassifierMixin`
def predict(self, X):
""
""
values = X.dot(self.coef_.T)
values.X[:] = values.X + self.intercept_
result = values.sign().astype(int)
result[result==-1] = 0 # Change -1 values to 0
return result.toarray().ravel()
"""
"""
### This is now handed by `BaseEstimator`
def score(self, X, y):
print '--- Scoring ---'
print 'X',X
print 'y',y
y_predict = self.y_predict
mean_accuracy = (y_predict.toarray().T == y)
#print mean_accuracy
mean_accuracy = np.mean(mean_accuracy)
y_predict = y_predict.toarray(order=1).T
return mean_accuracy
"""