经管之家送您一份
应届毕业生专属福利!
求职就业群
感谢您参与论坛问题回答
经管之家送您两个论坛币!
+2 论坛币
- # Example of Naive Bayes implemented from Scratch in Python
- import csv
- import random
- import math
-
- def loadCsv(filename):
- lines = csv.reader(open(filename, "rb"))
- dataset = list(lines)
- for i in range(len(dataset)):
- dataset[i] = [float(x) for x in dataset[i]]
- return dataset
-
- def splitDataset(dataset, splitRatio):
- trainSize = int(len(dataset) * splitRatio)
- trainSet = []
- copy = list(dataset)
- while len(trainSet) < trainSize:
- index = random.randrange(len(copy))
- trainSet.append(copy.pop(index))
- return [trainSet, copy]
-
- def separateByClass(dataset):
- separated = {}
- for i in range(len(dataset)):
- vector = dataset[i]
- if (vector[-1] not in separated):
- separated[vector[-1]] = []
- separated[vector[-1]].append(vector)
- return separated
-
- def mean(numbers):
- return sum(numbers)/float(len(numbers))
-
- def stdev(numbers):
- avg = mean(numbers)
- variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
- return math.sqrt(variance)
-
- def summarize(dataset):
- summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
- del summaries[-1]
- return summaries
-
- def summarizeByClass(dataset):
- separated = separateByClass(dataset)
- summaries = {}
- for classValue, instances in separated.iteritems():
- summaries[classValue] = summarize(instances)
- return summaries
-
- def calculateProbability(x, mean, stdev):
- exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
- return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
-
- def calculateClassProbabilities(summaries, inputVector):
- probabilities = {}
- for classValue, classSummaries in summaries.iteritems():
- probabilities[classValue] = 1
- for i in range(len(classSummaries)):
- mean, stdev = classSummaries[i]
- x = inputVector[i]
- probabilities[classValue] *= calculateProbability(x, mean, stdev)
- return probabilities
-
- def predict(summaries, inputVector):
- probabilities = calculateClassProbabilities(summaries, inputVector)
- bestLabel, bestProb = None, -1
- for classValue, probability in probabilities.iteritems():
- if bestLabel is None or probability > bestProb:
- bestProb = probability
- bestLabel = classValue
- return bestLabel
-
- def getPredictions(summaries, testSet):
- predictions = []
- for i in range(len(testSet)):
- result = predict(summaries, testSet[i])
- predictions.append(result)
- return predictions
-
- def getAccuracy(testSet, predictions):
- correct = 0
- for i in range(len(testSet)):
- if testSet[i][-1] == predictions[i]:
- correct += 1
- return (correct/float(len(testSet))) * 100.0
-
- def main():
- filename = 'pima-indians-diabetes.data.csv'
- splitRatio = 0.67
- dataset = loadCsv(filename)
- trainingSet, testSet = splitDataset(dataset, splitRatio)
- print('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet))
- # prepare model
- summaries = summarizeByClass(trainingSet)
- # test model
- predictions = getPredictions(summaries, testSet)
- accuracy = getAccuracy(testSet, predictions)
- print('Accuracy: {0}%').format(accuracy)
-
- main()
复制代码http://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
扫码加我 拉你入群
请注明:姓名-公司-职位
以便审核进群资格,未注明则拒绝
|