楼主: Lisrelchen
862 1

How To Implement Naive Bayes From Scratch in Python [推广有奖]

  • 0关注
  • 62粉丝

VIP

院士

67%

还不是VIP/贵宾

-

TA的文库  其他...

Bayesian NewOccidental

Spatial Data Analysis

东西方数据挖掘

威望
0
论坛币
49957 个
通用积分
79.5487
学术水平
253 点
热心指数
300 点
信用等级
208 点
经验
41518 点
帖子
3256
精华
14
在线时间
766 小时
注册时间
2006-5-4
最后登录
2022-11-6

+2 论坛币
k人 参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群
赵安豆老师微信:zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

感谢您参与论坛问题回答

经管之家送您两个论坛币!

+2 论坛币
  1. # Example of Naive Bayes implemented from Scratch in Python
  2. import csv
  3. import random
  4. import math

  5. def loadCsv(filename):
  6.         lines = csv.reader(open(filename, "rb"))
  7.         dataset = list(lines)
  8.         for i in range(len(dataset)):
  9.                 dataset[i] = [float(x) for x in dataset[i]]
  10.         return dataset

  11. def splitDataset(dataset, splitRatio):
  12.         trainSize = int(len(dataset) * splitRatio)
  13.         trainSet = []
  14.         copy = list(dataset)
  15.         while len(trainSet) < trainSize:
  16.                 index = random.randrange(len(copy))
  17.                 trainSet.append(copy.pop(index))
  18.         return [trainSet, copy]

  19. def separateByClass(dataset):
  20.         separated = {}
  21.         for i in range(len(dataset)):
  22.                 vector = dataset[i]
  23.                 if (vector[-1] not in separated):
  24.                         separated[vector[-1]] = []
  25.                 separated[vector[-1]].append(vector)
  26.         return separated

  27. def mean(numbers):
  28.         return sum(numbers)/float(len(numbers))

  29. def stdev(numbers):
  30.         avg = mean(numbers)
  31.         variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
  32.         return math.sqrt(variance)

  33. def summarize(dataset):
  34.         summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
  35.         del summaries[-1]
  36.         return summaries

  37. def summarizeByClass(dataset):
  38.         separated = separateByClass(dataset)
  39.         summaries = {}
  40.         for classValue, instances in separated.iteritems():
  41.                 summaries[classValue] = summarize(instances)
  42.         return summaries

  43. def calculateProbability(x, mean, stdev):
  44.         exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
  45.         return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

  46. def calculateClassProbabilities(summaries, inputVector):
  47.         probabilities = {}
  48.         for classValue, classSummaries in summaries.iteritems():
  49.                 probabilities[classValue] = 1
  50.                 for i in range(len(classSummaries)):
  51.                         mean, stdev = classSummaries[i]
  52.                         x = inputVector[i]
  53.                         probabilities[classValue] *= calculateProbability(x, mean, stdev)
  54.         return probabilities
  55.                        
  56. def predict(summaries, inputVector):
  57.         probabilities = calculateClassProbabilities(summaries, inputVector)
  58.         bestLabel, bestProb = None, -1
  59.         for classValue, probability in probabilities.iteritems():
  60.                 if bestLabel is None or probability > bestProb:
  61.                         bestProb = probability
  62.                         bestLabel = classValue
  63.         return bestLabel

  64. def getPredictions(summaries, testSet):
  65.         predictions = []
  66.         for i in range(len(testSet)):
  67.                 result = predict(summaries, testSet[i])
  68.                 predictions.append(result)
  69.         return predictions

  70. def getAccuracy(testSet, predictions):
  71.         correct = 0
  72.         for i in range(len(testSet)):
  73.                 if testSet[i][-1] == predictions[i]:
  74.                         correct += 1
  75.         return (correct/float(len(testSet))) * 100.0

  76. def main():
  77.         filename = 'pima-indians-diabetes.data.csv'
  78.         splitRatio = 0.67
  79.         dataset = loadCsv(filename)
  80.         trainingSet, testSet = splitDataset(dataset, splitRatio)
  81.         print('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet))
  82.         # prepare model
  83.         summaries = summarizeByClass(trainingSet)
  84.         # test model
  85.         predictions = getPredictions(summaries, testSet)
  86.         accuracy = getAccuracy(testSet, predictions)
  87.         print('Accuracy: {0}%').format(accuracy)

  88. main()
复制代码
http://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

关键词:implement scratch python Bayes naive 文山

沙发
Lisrelchen 发表于 2016-12-11 04:01:24 |只看作者 |坛友微信交流群
  1. >>> from sklearn import datasets
  2. >>> iris = datasets.load_iris()
  3. >>> from sklearn.naive_bayes import GaussianNB
  4. >>> gnb = GaussianNB()
  5. >>> y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
  6. >>> print("Number of mislabeled points out of a total %d points : %d"
  7. ...       % (iris.data.shape[0],(iris.target != y_pred).sum()))
复制代码
http://scikit-learn.org/stable/modules/naive_bayes.html

使用道具

您需要登录后才可以回帖 登录 | 我要注册

本版微信群
加好友,备注jltj
拉您入交流群

京ICP备16021002-2号 京B2-20170662号 京公网安备 11010802022788号 论坛法律顾问:王进律师 知识产权保护声明   免责及隐私声明

GMT+8, 2024-4-30 16:42