Ramp:Python Library for Prototyping of Machine Learning Solutions

0关注
6粉丝

已卖：1556份资源

副教授

31%

还不是VIP/贵宾

-

TA的文库 其他...

Complex Data Analysis

东西方金融数据分析

eBook with Data and Code

0%

威望: 0 级
论坛币: 11779 个
通用积分: 2.3950
学术水平: 119 点
热心指数: 115 点
信用等级: 114 点
经验: 8940 点
帖子: 173
精华: 10
在线时间: 30 小时
注册时间: 2006-9-19
最后登录: 2022-11-3

楼主

NewOccidental 发表于 2016-4-25 03:12:28 |AI写论文

是否 +2 论坛币

k人参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群

赵安豆老师微信：zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

立即领取

感谢您参与论坛问题回答

经管之家送您两个论坛币！

+2 论坛币

Ramp

Ramp is a python library for rapid prototyping of machine learning solutions. It's a light-weight pandas-based machine learning framework pluggable with existing python machine learning and statistics tools (scikit-learn, rpy2, etc.). Ramp provides a simple, declarative syntax for exploring features, algorithms and transformations quickly and efficiently.

ramp-master.zip (70.55 KB)

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

分享0 收藏0 回帖

关键词：solutions Solution Learning machine Library framework learning existing features provides

相关帖子

沙发

NewOccidental 发表于 2016-4-25 03:13:42

import urllib2
import pandas as pd
import sklearn
from sklearn import decomposition
import ramp
from ramp.features import *
from ramp.metrics import PositiveRate, Recall
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# fetch and clean iris data from UCI
data = pd.read_csv(urllib2.urlopen(
"http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"))
data = data.drop([149]) # bad line
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
data.columns = columns
# all features
features = [FillMissing(f, 0) for f in columns[:-1]]
# features, log transformed features, and interaction terms
expanded_features = (
features +
[Log(F(f) + 1) for f in features] +
[
F('sepal_width') ** 2,
combo.Interactions(features),
]
)
reporters = [
ramp.reporters.MetricReporter.factory(Recall(.4)),
ramp.reporters.DualThresholdMetricReporter.factory(Recall(), PositiveRate())
]
# Define several models and feature sets to explore,
# run 5 fold cross-validation on each and print the results.
# We define 2 models and 4 feature sets, so this will be
# 4 * 2 = 8 models tested.
outcomes = ramp.shortcuts.cv_factory(
data=data,
folds=10,
target=[AsFactor('class')],
reporter_factories=reporters,
# Try out two algorithms
estimator=[
sklearn.ensemble.RandomForestClassifier(
n_estimators=20),
sklearn.linear_model.LogisticRegression(),
],
# and 4 feature sets
features=[
expanded_features,
# Feature selection
# [trained.FeatureSelector(
# expanded_features,
# # use random forest's importance to trim
# ramp.selectors.BinaryFeatureSelector(),
# target=AsFactor('class'), # target to use
# data=data,
# n_keep=5, # keep top 5 features
# )],
# Reduce feature dimension (pointless on this dataset)
[combo.DimensionReduction(expanded_features,
decomposer=decomposition.PCA(n_components=4))],
# Normalized features
[Normalize(f) for f in expanded_features],
]
)
print outcomes.values()[0]['reporters'][0]

复制代码

藤椅

NewOccidental 发表于 2016-4-25 03:14:16

import pandas
from ramp import *
from ramp.estimators.sk import BinaryProbabilities
import sklearn
from sklearn import naive_bayes
import gensim
import tempfile
try:
training_data = pandas.read_csv('train.csv')
except IOError:
raise IOError("You need to download the 'Detecting Insults' dataset \
from Kaggle to run this example. \
http://www.kaggle.com/c/detecting-insults-in-social-commentary")
tmpdir = tempfile.mkdtemp()
context = DataContext(
store=tmpdir,
data=training_data)
base_config = Configuration(
target='Insult',
metrics=[metrics.AUC()],
)
base_features = [
Length('Comment'),
Log(Length('Comment') + 1)
]
factory = ConfigFactory(
base_config,
features=[
# first feature set is basic attributes
base_features,
# second feature set adds word features
base_features + [
text.NgramCounts(
text.Tokenizer('Comment'),
mindocs=5,
bool_=True)],
# third feature set creates character 5-grams
# and then selects the top 1000 most informative
base_features + [
trained.FeatureSelector(
[text.NgramCounts(
text.CharGrams('Comment', chars=5),
bool_=True,
mindocs=30)
],
selector=selectors.BinaryFeatureSelector(),
n_keep=1000,
target=F('Insult')),
],
# the fourth feature set creates 100 latent vectors
# from the character 5-grams
base_features + [
text.LSI(
text.CharGrams('Comment', chars=5),
mindocs=30,
num_topics=100),
]
],
# we'll try two estimators (and wrap them so
# we get class probabilities as output):
model=[
BinaryProbabilities(
sklearn.linear_model.LogisticRegression()),
BinaryProbabilities(
naive_bayes.GaussianNB())
]
)
for config in factory:
models.cv(config, context, folds=5, repeat=2,
print_results=True)
def probability_of_insult(config, ctx, txt):
# create a unique index for this text
idx = int(md5(txt).hexdigest()[:10], 16)
# add the new comment to our DataFrame
d = DataFrame(
{'Comment':[txt]},
index=pandas.Index([idx]))
ctx.data = ctx.data.append(d)
# Specify which instances to predict with predict_index
# and make the prediction
pred, predict_x, predict_y = models.predict(
config,
ctx,
predict_index=pandas.Index([idx]))
return pred[idx]

复制代码

板凳

NewOccidental 发表于 2016-4-25 03:14:58

# class Filter(Storable):
# def __init__(self, exclude_func=None, include_func=None):
# self.exclude_func = exclude_func
# self.include_func = include_func
# def filter(self, df):
# if self.include_func is not None:
# df =
def filter_incomplete(df):
df = df.dropna()
return df

复制代码

报纸

NewOccidental 发表于 2016-4-25 03:16:36

import types
import numpy as np
__all__ = ['Wrapper', 'Estimator', 'ConstantClassifier', 'ConstantRegressor',
'Probabilities', 'BinaryProbabilities', 'wrap_sklearn_like_estimator']
class Wrapper(object):
def __init__(self, obj):
self._obj = obj
def __getattr__(self, attr):
if hasattr(self._obj, attr):
attr_value = getattr(self._obj,attr)
if isinstance(attr_value, types.MethodType):
def callable(*args, **kwargs):
return attr_value(*args, **kwargs)
return callable
else:
return attr_value
else:
raise AttributeError
def __getstate__(self): return self.__dict__
def __setstate__(self, d): self.__dict__.update(d)
class Estimator(Wrapper):
def __init__(self, estimator):
self.base_estimator_ = estimator
super(Estimator, self).__init__(estimator)
def __repr__(self):
return repr(self.base_estimator_)
def fit(self, x, y, **kwargs):
return self.base_estimator_.fit(x.values, y.values, **kwargs)
def predict_maxprob(self, x, **kwargs):
"""
Most likely value. Generally equivalent to predict.
"""
return self.base_estimator_.predict(x.values, **kwargs)
def predict(self, x, **kwargs):
"""
Model output. Not always the same as scikit_learn predict. E.g., in the
case of logistic regression, returns the probability of each outome.
"""
return self.base_estimator_.predict(x.values, **kwargs)
class Probabilities(Estimator):
"""
Wraps a scikit-learn-like estimator to return probabilities (if
it supports it)
"""
def __init__(self, estimator, binary=False):
"""
binary: If True, predict returns only the probability
for the positive class. If False, returns probabilities for
all classes.
"""
self.binary = binary
super(Probabilities, self).__init__(estimator)
def __str__(self):
return u"Probabilites for %s" % self.base_estimator_
def predict(self, x):
probs = self.base_estimator_.predict_proba(x)
if probs.shape[1] == 2 or self.binary:
return probs[:,1]
return probs
class BinaryProbabilities(Probabilities):
def __init__(self, estimator):
super(BinaryProbabilities, self).__init__(estimator, binary=True)
class ConstantClassifier(object):
def __init__(self, func):
self.func = func
self.constant = None
def fit(self, x, y, **kwargs):
self.constant = self.func(y)
def predict(self, x, **kwargs):
return np.full((x.shape[0],), int(self.constant > .5))
def predict_proba(self, x, **kwargs):
p = np.zeros((x.shape[0], 2))
p[:,0] = 1 - self.constant
p[:,1] = self.constant
return p
class ConstantRegressor(object):
def __init__(self, func):
self.func = func
self.constant = None
def fit(self, x, y, **kwargs):
self.constant = self.func(y)
def predict(self, x, **kwargs):
return np.full((x.shape[0],), self.constant)
def wrap_sklearn_like_estimator(estimator):
if isinstance(estimator, Estimator):
return estimator
elif estimator is None:
return None
elif not (hasattr(estimator, "fit") and (hasattr(estimator, "predict")
or hasattr(estimator, "predict_proba"))):
raise ValueError, "Invalid estimator: %s" % estimator
elif hasattr(estimator, "predict_proba"):
return Probabilities(estimator)
else:
return Estimator(estimator)

复制代码

Ramp:Python Library for Prototyping of Machine Learning Solutions [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

扫码加我拉你入群

相关帖子

浏览过的帖子

浏览过的版块

本版微信群

Ramp:Python Library for Prototyping of Machine Learning Solutions [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

扫码加我 拉你入群

相关帖子

浏览过的帖子

浏览过的版块

本版微信群

扫码加我拉你入群