Source code for vtt

# -*- coding: utf-8 -*-
"""
A linear classifier to be used in conjunction with the Scikit Learn python package.

"""
#    Copyright (C) 2016 by
#    Luis Rocha <rocha@indiana.edu>
#    Artemy Kolchinsky <artemyk@gmail.com >
#    Rion Brattig Correia <rionbr@gmail.com>
#    Ian B Wood <ibwood@indiana.edu >
#    All rights reserved.
#    MIT license.
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model.base import LinearClassifierMixin
from sklearn.utils.multiclass import unique_labels
from scipy.sparse import csr_matrix
import numpy as np

__name__ = 'vtt'
__version__ = '0.3'
__release__ = '0.3.0b1'
__authors__ = ' and '.join(['Luis M. Rocha', 'Artemy Kolchinsky', 'Rion Brattig Correia', 'Ian B. Wood'])
__all__ = ['VTT']

[docs]class VTT(BaseEstimator, LinearClassifierMixin, TransformerMixin):
	"""The Variable Trigonometric Threshold (VTT) linear classifier class

	Attributes:
		coef_ (array-like) : Feature weights. Also known as the coefficients.
		intercept (array-like) : This is the classifier bias. For a linear classifier also known as the intercept.
	"""
	def __init__(self, weights=None, bias=None, *args, **kwargs):
		self.coef_ = weights # Weights. Has to be named coef_ so scikit-learn will understand
		self.intercept_ = bias
		self.B = {} #pass keys to set_params of the form 'b_{index}' to treat index as an NER count and B[index] as the weight for the NER
		self.y_predict = None
	
	def __get_vtt_angles(self, pvals, nvals):
		""" Fit the angles to the model

		Args:
			pvals (array-like) : positive values
			nvals (array-like) : negative values

		Returns: normalized coef_ values
		"""
		# https://www.khanacademy.org/math/trigonometry/unit-circle-trig-func/inverse_trig_functions/v/inverse-trig-functions--arctan
		angles = np.arctan2(pvals, nvals)-np.pi/4
		norm = np.maximum(np.minimum(angles, np.pi-angles), -1*np.pi-angles)
		norm = csr_matrix(norm)
		for key, value in self.B.items():
			norm[0, key] = 0.
		return norm

[docs]  	def fit(self, X, y):
  		""" Fit the VTT classifier model

  		Args:
  			X (sparse matrix, shape = [n_samples, n_features]) : Training data
  			y (array-like, shape = [n_samples]) : Target values
  		"""
		self.classes_ = unique_labels(y)
		X = csr_matrix(X, dtype=bool)#.tocsr()
		pvals = X[np.array(y==1),:].mean(axis=0)
		nvals = X[np.array(y!=1),:].mean(axis=0)
		
		self.coef_ = self.__get_vtt_angles(pvals, nvals).toarray()
		
		pnvals = (nvals + pvals).T
		
		if self.intercept_ is None:
			self.intercept_ = -(self.coef_.dot(pnvals)/2.0)[0,0]
		
		for b, val in self.B.items():
			#self.intercept_ -= 1
			self.coef_[0,b] = 1./val

[docs]	def set_params(self, **params):
		""" Set the parameters of the estimator.

		Args:
			bias (array-like) : bias of the estimator. Also known as the intercept
			weights (array-like) : weights of the features. Also known as coeficients.
			NER bias (array-like) : NER entities infering column position on X and bias value. Ex: `b_4=10, b_5=6`.

		Example:
			>>> cls = VTT()
			>>> cls.set_params(b_4=10, b_5=6, b_6=8)
		"""
		if 'bias' in params.keys():
			self.intercept_ = params['bias']
		if 'weights' in params.keys():
			self.coef_ = params['weights']
		for key in params.keys():
			if 'b_' == key[:2]:
				self.B[int(key[2:])] = params[key]
	
[docs]	def get_params(self, deep=True):
		""" Get parameters for the estimator.

		Args:
			deep (boolean, optional) : If True, will return the parameters for this estimator and contained subobjects that are estimators.

		Returns:
			params : mapping of string to any contained subobjects that are estimators.
		"""
		params = {'weights':self.coef_, 'bias':self.intercept_}
		for key, value in self.B.items():
			params['b_'+str(key)] = value
		return(params)

	"""
	### This is now handled by `LinearClassifierMixin`
  	def predict(self, X):
  		""

  		""
		values = X.dot(self.coef_.T)
		values.X[:] = values.X + self.intercept_

		result = values.sign().astype(int)
		result[result==-1] = 0 # Change -1 values to 0
		return result.toarray().ravel()
	"""

	"""
	### This is now handed by `BaseEstimator`
	def score(self, X, y):
		print '--- Scoring ---'
		print 'X',X
		print 'y',y
		y_predict = self.y_predict
		mean_accuracy = (y_predict.toarray().T == y)
		#print mean_accuracy
		mean_accuracy = np.mean(mean_accuracy)
		y_predict = y_predict.toarray(order=1).T
		return mean_accuracy
	"""