Milk:Machine Learning Toolkit in Python

0关注
62粉丝

VIP

已卖：4194份资源

院士

67%

还不是VIP/贵宾

-

TA的文库 其他...

Bayesian NewOccidental

Spatial Data Analysis

东西方数据挖掘

0%

威望: 0 级
论坛币: 50288 个
通用积分: 83.6306
学术水平: 253 点
热心指数: 300 点
信用等级: 208 点
经验: 41518 点
帖子: 3256
精华: 14
在线时间: 766 小时
注册时间: 2006-5-4
最后登录: 2022-11-6

楼主

Lisrelchen 发表于 2016-4-25 03:20:51 |AI写论文

是否 +2 论坛币

k人参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群

赵安豆老师微信：zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

立即领取

感谢您参与论坛问题回答

经管之家送您两个论坛币！

+2 论坛币

MILK

Milk is a machine learning toolkit in Python. Its focus is on supervised classification with several classifiers available: SVMs, k-NN, random forests, decision trees. It also performs feature selection. These classifiers can be combined in many ways to form different classification systems.For unsupervised learning, milk supports k-means clustering and affinity propagation.

本帖隐藏的内容

milk-master.zip (197.44 KB)

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

分享0 收藏0 回帖

关键词：Learning machine earning Toolkit python different available learning machine several

本帖被以下文库推荐

· Python(Must-Read Books)|主题: 1687, 订阅: 407

沙发

Lisrelchen 发表于 2016-4-25 03:21:33

# -*- coding: utf-8 -*-
# Copyright (C) 2008-2012, Luis Pedro Coelho <luis@luispedro.org>
# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
# License: MIT. See COPYING.MIT file in the milk distribution
from __future__ import division
import numpy as np
from .normalise import normaliselabels
from .base import supervised_model
'''
AdaBoost
Simple implementation of Adaboost
Learner
-------
boost_learner
'''
__all__ = [
'boost_learner',
]
def _adaboost(features, labels, base, max_iters):
m = len(features)
D = np.ones(m, dtype=float)
D /= m
Y = np.ones(len(labels), dtype=float)
names = np.array([-1, +1])
Y = names[labels]
H = []
A = []
for t in range(max_iters):
Ht = base.train(features, labels, weights=D)
train_out = np.array(list(map(Ht.apply, features)))
train_out = names[train_out.astype(int)]
Et = np.dot(D, (Y != train_out))
if Et > .5:
# early return
break
At = .5 * np.log((1. + Et) / (1. - Et))
D *= np.exp((-At) * Y * train_out)
D /= np.sum(D)
A.append(At)
H.append(Ht)
return H, A
class boost_model(supervised_model):
def __init__(self, H, A, names):
self.H = H
self.A = A
self.names = names
def apply(self, f):
v = sum((a*h.apply(f)) for h,a in zip(self.H, self.A))
v /= np.sum(self.A)
return self.names[v > .5]
class boost_learner(object):
'''
learner = boost_learner(weak_learner_type(), max_iters=100)
model = learner.train(features, labels)
test = model.apply(f)
AdaBoost learner
Attributes
----------
base : learner
Weak learner
max_iters : integer
Nr of iterations (default: 100)
'''
def __init__(self, base, max_iters=100):
self.base = base
self.max_iters = max_iters
def train(self, features, labels, normalisedlabels=False, names=(0,1), weights=None, **kwargs):
if not normalisedlabels:
labels,names = normaliselabels(labels)
H,A = _adaboost(features, labels, self.base, self.max_iters)
return boost_model(H, A, names)

复制代码

藤椅

Lisrelchen 发表于 2016-4-25 03:22:20

# -*- coding: utf-8 -*-
# Copyright (C) 2011, Luis Pedro Coelho <luis@luispedro.org>
# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
# License: MIT. See COPYING.MIT file in the milk distribution
from __future__ import division
class supervised_model(object):
def apply_many(self, fs):
'''
labels = model.apply_many( examples )
This is equivalent to ``map(model.apply, examples)`` but may be
implemented in a faster way.
Parameters
----------
examples : sequence of training examples
Returns
-------
labels : sequence of labels
'''
return list(map(self.apply, fs))
class base_adaptor(object):
def __init__(self, base):
self.base = base
def set_option(self, k, v):
self.base.set_option(k, v)

复制代码

板凳

Lisrelchen 发表于 2016-4-25 03:22:57

from __future__ import division
import numpy as np
from .normalise import normaliselabels
from .base import supervised_model
__all__ = ['normaliselabels', 'ctransforms']
class threshold_model(object):
'''
threshold_model
Attributes
----------
threshold : float
threshold value
'''
def __init__(self, threshold=.5):
self.threshold = .5
def apply(self, f):
return f >= self.threshold
def __repr__(self):
return 'threshold_model({})'.format(self.threshold)
__str__ = __repr__
class fixed_threshold_learner(object):
def __init__(self, threshold=.5):
self.threshold = threshold
def train(self, features, labels, **kwargs):
return threshold_model(self.threshold)
def __repr__(self):
return 'fixed_threshold_learner({})'.format(self.threshold)
__str__ = __repr__
class ctransforms_model(supervised_model):
'''
model = ctransforms_model(models)
A model that consists of a series of transformations.
See Also
--------
ctransforms
'''
def __init__(self, models):
self.models = models
def apply_many(self, features):
if len(features) == 0:
return features
for m in self.models:
features = m.apply_many(features)
return features
def __repr__(self):
return 'ctransforms_model({})'.format(self.models)
__str__ = __repr__
def __getitem__(self, ix):
return self.models[ix]
def apply(self,features):
for T in self.models:
features = T.apply(features)
return features
class ctransforms(object):
'''
ctransf = ctransforms(c0, c1, c2, ...)
Concatenate transforms.
'''
def __init__(self,*args):
self.transforms = args
def train(self, features, labels, **kwargs):
models = []
model = None
for T in self.transforms:
if model is not None:
features = np.array([model.apply(f) for f in features])
model = T.train(features, labels, **kwargs)
models.append(model)
return ctransforms_model(models)
def __repr__(self):
return 'ctransforms(*{})'.format(self.transforms)
__str__ = __repr__
def set_option(self, opt, val):
idx, opt = opt
self.transforms[idx].set_option(opt,val)

复制代码

报纸

Lisrelchen 发表于 2016-4-25 03:24:42

from __future__ import division
import numpy as np
from . classifier import normaliselabels
__all__ = [
'sda',
'linearly_independent_subset',
'linear_independent_features',
'filterfeatures',
'featureselector',
'sda_filter',
'rank_corr',
'select_n_best',
]
def _sweep(A, k, flag):
Akk = A[k,k]
if Akk == 0:
Akk = 1.e-5
# cross[i,j] = A[i,k] * A[k,j]
cross = (A[:,k][:, np.newaxis] * A[k])
B = A - cross/Akk
# currently: B[i,j] = A[i,j] - A[i,k]*A[k,j]/Akk
# Now fix row k and col k, followed by Bkk
B[k] = flag * A[k]/A[k,k]
B[:,k] = flag * A[:,k]/A[k,k]
B[k,k] = -1./Akk
return B
def sda(features, labels, tolerance=.01, significance_in=.05, significance_out=.05, loose=False):
'''
features_idx = sda(features, labels, tolerance=.01, significance_in=.05, significance_out=.05)
Stepwise Discriminant Analysis for feature selection
Pre-filter the feature matrix to remove linearly dependent features
before calling this function. Behaviour is undefined otherwise.
This implements the algorithm described in Jennrich, R.I. (1977), "Stepwise
Regression" & "Stepwise Discriminant Analysis," both in Statistical Methods
for Digital Computers, eds. K. Enslein, A. Ralston, and H. Wilf, New York;
John Wiley & Sons, Inc.
Parameters
----------
features : ndarray
feature matrix. There should not be any perfectly correlated features.
labels : 1-array
labels
tolerance : float, optional
significance_in : float, optional
significance_out : float, optional
Returns
-------
features_idx : sequence
sequence of integer indices
'''
from scipy import stats
assert len(features) == len(labels), 'milk.supervised.featureselection.sda: length of features not the same as length of labels'
N, m = features.shape
labels,labelsu = normaliselabels(labels)
q = len(labelsu)
df = features - features.mean(0)
T = np.dot(df.T, df)
dfs = [(features[labels == i] - features[labels == i].mean(0)) for i in range(q)]
W = np.sum(np.dot(d.T, d) for d in dfs)
ignoreidx = ( W.diagonal() == 0 )
if ignoreidx.any():
idxs, = np.where(~ignoreidx)
if not len(idxs):
return np.arange(m)
selected = sda(features[:,~ignoreidx],labels)
return idxs[selected]
output = []
D = W.diagonal()
df1 = q-1
last_enter_k = -1
while True:
V = W.diagonal()/T.diagonal()
W_d = W.diagonal()
V_neg = (W_d < 0)
p = V_neg.sum()
if V_neg.any():
V_m = V[V_neg].min()
k, = np.where(V == V_m)
k = k[0]
Fremove = (N-p-q+1)/(q-1)*(V_m-1)
df2 = N-p-q+1
PrF = 1 - stats.f.cdf(Fremove,df1,df2)
if PrF > significance_out:
#print 'removing ',k, 'V(k)', 1./V_m, 'Fremove', Fremove, 'df1', df1, 'df2', df2, 'PrF', PrF
if k == last_enter_k:
# We are going into an infinite loop.
import warnings
warnings.warn('milk.featureselection.sda: infinite loop detected (maybe bug?).')
break
W = _sweep(W,k,1)
T = _sweep(T,k,1)
continue
ks = ( (W_d / D) > tolerance)
if ks.any():
V_m = V[ks].min()
k, = np.where(V==V_m)
k = k[0]
Fenter = (N-p-q)/(q-1) * (1-V_m)/V_m
df2 = N-p-q
PrF = 1 - stats.f.cdf(Fenter,df1,df2)
if PrF < significance_in:
#print 'adding ',k, 'V(k)', 1./V_m, 'Fenter', Fenter, 'df1', df1, 'df2', df2, 'PrF', PrF
W = _sweep(W,k,-1)
T = _sweep(T,k,-1)
if loose or (PrF < 0.0001):
output.append((Fenter,k))
last_enter_k = k
continue
break
output.sort(reverse=True)
return np.array([idx for _,idx in output])
def linearly_independent_subset(V, threshold=1.e-5, return_orthogonal_basis=False):
'''
subset = linearly_independent_subset(V, threshold=1.e-5)
subset,U = linearly_independent_subset(V, threshold=1.e-5, return_orthogonal_basis=True)
Discover a linearly independent subset of `V`
Parameters
----------
V : sequence of input vectors
threshold : float, optional
vectors with 2-norm smaller or equal to this are considered zero
(default: 1e.-5)
return_orthogonal_basis : Boolean, optional
whether to return orthogonal basis set
Returns
-------
subset : ndarray of integers
indices used for basis
U : 2-array
orthogonal basis into span{V}
Implementation Reference
------------------------
Use Gram-Schmidt with a check for when the v_k is close enough to zero to ignore
See http://en.wikipedia.org/wiki/Gram-Schmidt_process
'''
V = np.array(V, copy=True)
orthogonal = []
used = []
for i,u in enumerate(V):
for v in orthogonal:
u -= np.dot(u,v)/np.dot(v,v) * v
if np.dot(u,u) > threshold:
orthogonal.append(u)
used.append(i)
if return_orthogonal_basis:
return np.array(used),np.array(orthogonal)
return np.array(used)
def linear_independent_features(features, labels=None):
'''
indices = linear_independent_features(features, labels=None)
Returns the indices of a set of linearly independent features (columns).
Parameters
----------
features : ndarray
labels : ignored
This argument is only here to conform to the learner interface.
Returns
-------
indices : ndarray of integers
indices of features to keep
See Also
--------
`linearly_independent_subset` :
this function is equivalent to `linearly_independent_subset(features.T)`
'''
return linearly_independent_subset(features.T)
class filterfeatures(object):
'''
selector = filterfeatures(idxs)
Returns a transformer which selects the features given by idxs. I.e.,
``apply(features)`` is equivalent to ``features[idxs]``
Parameters
----------
idxs : ndarray
This can be either an array of integers (positions) or an array of booleans
'''
def __init__(self, idxs):
self.idxs = idxs
def apply(self, features):
return features[self.idxs]
def apply_many(self, features):
if len(features) == 0:
return features
features = np.asanyarray(features)
return features[:,self.idxs]
def __repr__(self):
return 'filterfeatures(%s)' % self.idxs
class featureselector(object):
'''
selector = featureselector(function)
Returns a transformer which selects features according to
selected_idxs = function(features,labels)
'''
def __init__(self, selector):
self.selector = selector
def train(self, features, labels, **kwargs):
idxs = self.selector(features, labels)
if len(idxs) == 0:
import warnings
warnings.warn('milk.featureselection: No features selected! Using all features as fall-back.')
idxs = np.arange(len(features[0]))
return filterfeatures(idxs)
def __repr__(self):
return 'featureselector(%s)' % self.selector
def sda_filter():
return featureselector(sda)
def rank_corr(features, labels):
'''
rs = rank_corr(features, labels)
Computes the following expression::
rs[i] = max_e COV2(rank(features[:,i]), labels == e)
This is appropriate for numeric features and categorical labels.
Parameters
----------
features : ndarray
feature matrix
labels : sequence
Returns
-------
rs : ndarray of float
rs are the rank correlations
'''
features = np.asanyarray(features)
labels = np.asanyarray(labels)
n = len(features)
ranks = features.argsort(0)
ranks = ranks.astype(float)
binlabels = np.array([(labels == ell) for ell in set(labels)], dtype=float)
mx = ranks.mean(0)
my = binlabels.mean(1)
sx = ranks.std(0)
sy = binlabels.std(1)
r = np.dot(binlabels,ranks)
r -= np.outer(n*my, mx)
r /= np.outer(sy, sx)
r /= n # Use n [instead of n-1] to match numpy's corrcoef
r **= 2
return r.max(0)
class select_n_best(object):
'''
select_n_best(n, measure)
Selects the `n` features that score the highest in `measure`
'''
def __init__(self, n, measure):
self.n = n
self.measure = measure
def train(self, features, labels, **kwargs):
values = self.measure(features, labels)
values = values.argsort()
return filterfeatures(values[:self.n])

复制代码

地板

Lisrelchen 发表于 2016-4-25 03:25:27

from __future__ import division
from collections import defaultdict
from milk.utils import get_nprandom
import numpy as np
from .base import supervised_model
__all__ = [
'kNN',
'knn_learner',
'approximate_knn_learner',
]
def _plurality(xs):
from collections import defaultdict
counts = defaultdict(int)
for x in xs: counts[x] += 1
best,_ = max(iter(counts.items()), key=(lambda k_v: k_v[1]))
return best
class kNN(object):
'''
k-Nearest Neighbour Classifier
Naive implementation of a k-nearest neighbour classifier.
C = kNN(k)
Attributes:
-----------
k : integer
number of neighbours to consider
'''
def __init__(self, k=1):
self.k = k
def train(self, features, labels, normalisedlabels=False, copy_features=False):
features = np.asanyarray(features)
labels = np.asanyarray(labels)
if copy_features:
features = features.copy()
labels = labels.copy()
features2 = np.sum(features**2, axis=1)
return kNN_model(self.k, features, features2, labels)
knn_learner = kNN
class kNN_model(supervised_model):
def __init__(self, k, features, features2, labels):
self.k = k
self.features = features
self.f2 = features2
self.labels = labels
def apply(self, features):
features = np.asanyarray(features)
diff2 = np.dot(self.features, (-2.)*features)
diff2 += self.f2
neighbours = diff2.argsort()[:self.k]
labels = self.labels[neighbours]
return _plurality(labels)
class approximate_knn_model(supervised_model):
def __init__(self, k, X, projected):
self.k = k
self.X = X
self.projected = projected
self.p2 = np.array([np.dot(p,p) for p in projected])
def apply(self, t):
tx = np.dot(self.X.T, t)
d = np.dot(self.projected,tx)
d *= -2
d += self.p2
if self.k == 1:
return np.array([d.argmin()])
d = d.argsort()
return d[:self.k]
class approximate_knn_classification_model(supervised_model):
def __init__(self, k, X, projected, labels):
self.base = approximate_knn_model(k, X, projected)
self.labels = labels
def apply(self, f):
idxs = self.base.apply(f)
return _plurality(self.labels[idxs])
class approximate_knn_learner(object):
'''
approximate_knn_learner
Learns a k-nearest neighbour classifier, where the proximity is approximate
as it is computed on a small dimensional subspace (random subspace
projection). For many datasets, this is acceptable.
'''
def __init__(self, k, ndims=8):
self.k = k
self.ndims = ndims
def train(self, features, labels, **kwargs):
labels = np.asanyarray(labels)
R = get_nprandom(kwargs.get('R'))
_, n_features = features.shape
X = R.random_sample((n_features, self.ndims))
projected = np.dot(features, X)
return approximate_knn_classification_model(self.k, X, projected, labels.copy())

复制代码

7楼

Lisrelchen 发表于 2016-4-25 03:36:01

# -*- coding: utf-8 -*-
import numpy as np
from . import _lasso
from .base import supervised_model
from milk.unsupervised import center
def lasso(X, Y, B=None, lam=1., max_iter=None, tol=None):
'''
B = lasso(X, Y, B={np.zeros()}, lam=1. max_iter={1024}, tol={1e-5})
Solve LASSO Optimisation
B* = arg min_B ½/n || Y - BX ||₂2 + λ||B||₁
where $n$ is the number of samples.
Milk uses coordinate descent, looping through the coordinates in order
(with an active set strategy to update only non-zero βs, if possible). The
problem is convex and the solution is guaranteed to be optimal (within
floating point accuracy).
Parameters
----------
X : ndarray
Design matrix
Y : ndarray
Matrix of outputs
B : ndarray, optional
Starting values for approximation. This can be used for a warm start if
you have an estimate of where the solution should be. If used, the
solution might be written in-place (if the array has the right format).
lam : float, optional
λ (default: 1.0)
max_iter : int, optional
Maximum nr of iterations (default: 1024)
tol : float, optional
Tolerance. Whenever a parameter is to be updated by a value smaller
than ``tolerance``, that is considered a null update. Be careful that
if the value is too small, performance will degrade horribly.
(default: 1e-5)
Returns
-------
B : ndarray
'''
X = np.ascontiguousarray(X, dtype=np.float32)
Y = np.ascontiguousarray(Y, dtype=np.float32)
if B is None:
B = np.zeros((Y.shape[0],X.shape[0]), np.float32)
else:
B = np.ascontiguousarray(B, dtype=np.float32)
if max_iter is None:
max_iter = 1024
if tol is None:
tol = 1e-5
if X.shape[0] != B.shape[1] or \
Y.shape[0] != B.shape[0] or \
X.shape[1] != Y.shape[1]:
raise ValueError('milk.supervised.lasso: Dimensions do not match')
if np.any(np.isnan(X)) or np.any(np.isnan(B)):
raise ValueError('milk.supervised.lasso: NaNs are only supported in the ``Y`` matrix')
W = np.ascontiguousarray(~np.isnan(Y), dtype=np.float32)
Y = np.nan_to_num(Y)
n = Y.size
_lasso.lasso(X, Y, W, B, max_iter, float(2*n*lam), float(tol))
return B
def lasso_walk(X, Y, B=None, nr_steps=None, start=None, step=None, tol=None, return_lams=False):
'''
Bs = lasso_walk(X, Y, B={np.zeros()}, nr_steps={64}, start={automatically inferred}, step={.9}, tol=None, return_lams=False)
Bs,lams = lasso_walk(X, Y, B={np.zeros()}, nr_steps={64}, start={automatically inferred}, step={.9}, tol=None, return_lams=True)
Repeatedly solve LASSO Optimisation
B* = arg min_B ½/n || Y - BX ||₂2 + λ||B||₁
for different values of λ.
Parameters
----------
X : ndarray
Design matrix
Y : ndarray
Matrix of outputs
B : ndarray, optional
Starting values for approximation. This can be used for a warm start if
you have an estimate of where the solution should be.
start : float, optional
first λ to use (default is ``np.abs(Y).max()``)
nr_steps : int, optional
How many steps in the path (default is 64)
step : float, optional
Multiplicative step to take (default is 0.9)
tol : float, optional
This is the tolerance parameter. It is passed to the lasso function
unmodified.
return_lams : bool, optional
Whether to return the values of λ used (default: False)
Returns
-------
Bs : ndarray
'''
if nr_steps is None:
nr_steps = 64
if step is None:
step = .9
if start is None:
n = Y.size
start = 0.5/n*np.nanmax(np.abs(Y))*np.abs(X).max()
lam = start
lams = []
Bs = []
for i in range(nr_steps):
# The central idea is that each iteration is already "warm" and this
# should be faster than starting from zero each time
B = lasso(X, Y, B, lam=lam, tol=tol)
lams.append(lam)
Bs.append(B.copy())
lam *= step
if return_lams:
return np.array(Bs), np.array(lams)
return np.array(Bs)
def _dict_subset(mapping, keys):
return dict(
[(k,mapping[k]) for k in keys])
class lasso_model(supervised_model):
def __init__(self, betas, mean):
self.betas = betas
self.mean = mean
def retrain(self, features, labels, lam, **kwargs):
features, mean = center(features)
betas = lasso(features, labels, self.betas.copy(), lam=lam, **_dict_subset(kwargs, ['tol', 'max_iter']))
return lasso_model(betas, mean)
def apply(self, features):
return np.dot(self.betas, features) + self.mean
class lasso_learner(object):
def __init__(self, lam=1.0):
self.lam = lam
def train(self, features, labels, betas=None, **kwargs):
labels, mean = center(labels, axis=1)
betas = lasso(features, labels, betas, lam=self.lam)
return lasso_model(betas, mean)
def lasso_model_walk(X, Y, B=None, nr_steps=64, start=None, step=.9, tol=None, return_lams=False):
Y, mean = center(Y, axis=1)
Bs,lams = lasso_walk(X,Y, B, nr_steps, start, step, tol, return_lams=True)
models = [lasso_model(B, mean) for B in Bs]
if return_lams:
return models, lams
return models

复制代码

8楼

Lisrelchen 发表于 2016-4-25 03:37:11

from __future__ import division
import numpy as np
from .normalise import normaliselabels
from .base import supervised_model
__all__ = [
'logistic_learner',
]
@np.vectorize
def _sigmoidal(z):
if (z > 300): return 1.
if z < -300: return 0.
return 1./(1+np.exp(-z))
class logistic_model(supervised_model):
def __init__(self, bs):
self.bs = bs
def apply(self, fs):
return _sigmoidal(self.bs[0] + np.dot(fs, self.bs[1:]))
class logistic_learner(object):
'''
learner = logistic_learner(alpha=0.0)
Logistic regression learner
There are two implementations:
1. One which depends on ``scipy.optimize``. This is the default and is
extremely fast.
2. If ``import scipy`` fails, then we fall back to a Python only
gradient-descent. This gives good results, but is many times slower.
Properties
----------
alpha : real, optional
penalty for L2-normalisation. Default is zero, for no penalty.
'''
def __init__(self, alpha=0.0):
self.alpha = alpha
def train(self, features, labels, normalisedlabels=False, names=None, **kwargs):
def error(bs):
response = bs[0] + np.dot(features, bs[1:])
response = _sigmoidal(response)
diff = response - labels
log_like = np.dot(diff, diff)
L2_penalty = self.alpha * np.dot(bs, bs)
return log_like + L2_penalty
def error_prime(bs):
fB = np.dot(features, bs[1:])
response = _sigmoidal(bs[0] + fB)
sprime = response * (1-response)
ds = (response - labels) * sprime
b0p = np.sum(ds)
b1p = np.dot(features.T, ds)
bp = np.concatenate( ([b0p], b1p) )
return 2.*(bp + self.alpha*bs)
features = np.asanyarray(features)
if not normalisedlabels:
labels, _ = normaliselabels(labels)
N,f = features.shape
bs = np.zeros(f+1)
try:
from scipy import optimize
# Some testing revealed that this was a good combination
# call fmin_cg twice first and then fmin
# I do not understand why 100%, but there it is
bs = optimize.fmin_cg(error, bs, error_prime, disp=False)
bs = optimize.fmin_cg(error, bs, error_prime, disp=False)
bs = optimize.fmin(error, bs, disp=False)
except ImportError:
import warnings
warnings.warn('''\
milk.supervised.logistic.train: Could not import scipy.optimize.
Fall back to very simple gradient descent (which is slow).''')
bs = np.zeros(f+1)
cur = 1.e-6
ebs = error(bs)
for i in range(1000000):
dir = error_prime(bs)
step = (lambda e : bs - e *dir)
enbs = ebs + 1
while enbs > ebs:
cur /= 2.
if cur == 0.:
break
nbs = step(cur)
enbs = error(nbs)
while cur < 10.:
cur *= 2
nnbs = step(cur)
ennbs = error(nnbs)
if ennbs < enbs:
nbs = nnbs
enbs = ennbs
else:
break
bs = nbs
ebs = enbs
return logistic_model(bs)

复制代码

9楼

Lisrelchen 发表于 2016-4-25 03:38:47

# -*- coding: utf-8 -*-
# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
#
# License: MIT. See COPYING.MIT file in the milk distribution
from __future__ import division
import numpy as np
def get_parzen_rbf_loocv(features,labels):
xij = np.dot(features,features.T)
f2 = np.sum(features**2,1)
d = f2-2*xij
d = d.T + f2
d_argsorted = d.argsort(1)
d_sorted = d.copy()
d_sorted.sort(1)
e_d = np.exp(-d_sorted)
labels_sorted = labels[d_argsorted].astype(np.double)
labels_sorted *= 2
labels_sorted -= 1
def f(sigma):
k = e_d ** (1./sigma)
return (((k[:,1:] * labels_sorted[:,1:]).sum(1) > 0) == labels).mean()
return f

复制代码

10楼

Lisrelchen 发表于 2016-4-25 03:39:28

# -*- coding: utf-8 -*-
# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
#
# License: MIT. See COPYING.MIT file in the milk distribution
import numpy as np
from .classifier import normaliselabels
from .base import supervised_model
from . import _perceptron
class perceptron_model(supervised_model):
def __init__(self, w):
self.w = w
def apply(self, f):
f = np.asanyarray(f)
v = self.w[0] + np.dot(f, self.w[1:])
return v > 0
class perceptron_learner(object):
def __init__(self, eta=.1, max_iters=128):
self.eta = eta
self.max_iters = max_iters
def train(self, features, labels, normalisedlabels=False, **kwargs):
if not normalisedlabels:
labels, _ = normaliselabels(labels)
features = np.asanyarray(features)
if features.dtype not in (np.float32, np.float64):
features = features.astype(np.float64)
weights = np.zeros(features.shape[1]+1, features.dtype)
for i in range(self.max_iters):
errors = _perceptron.perceptron(features, labels, weights, self.eta)
if not errors:
break
return perceptron_model(weights)

复制代码

Milk:Machine Learning Toolkit in Python [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

本帖隐藏的内容

扫码加我拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

本版微信群

Milk:Machine Learning Toolkit in Python [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

本帖隐藏的内容

扫码加我 拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

本版微信群

扫码加我拉你入群