楼主: Limdep
1155 1

[问答] kNN Code? [推广有奖]

  • 0关注
  • 2粉丝

已卖:117份资源

本科生

98%

还不是VIP/贵宾

-

TA的文库  其他...

Java资源全汇

Data Science NewOccidental

Database NewOccidental

威望
0
论坛币
4718 个
通用积分
4.2550
学术水平
8 点
热心指数
3 点
信用等级
3 点
经验
1089 点
帖子
133
精华
0
在线时间
20 小时
注册时间
2006-5-15
最后登录
2017-10-27

楼主
Limdep 发表于 2015-2-28 23:07:25 |AI写论文

+2 论坛币
k人 参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群
赵安豆老师微信:zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

感谢您参与论坛问题回答

经管之家送您两个论坛币!

+2 论坛币
Attached please find the kNN Python code from "Machine Learning in Action by Peter Harrington". Unfortunately it is not running. Anybody can take a look and tell me how to fix it(I use Spyder(Python 2.7)?
Thanks
  1. '''
  2. Created on Sep 16, 2010
  3. kNN: k Nearest Neighbors

  4. Input:      inX: vector to compare to existing dataset (1xN)
  5.             dataSet: size m data set of known vectors (NxM)
  6.             labels: data set labels (1xM vector)
  7.             k: number of neighbors to use for comparison (should be an odd number)
  8.             
  9. Output:     the most popular class label

  10. @author: pbharrin
  11. '''
  12. from numpy import *
  13. import operator
  14. from os import listdir

  15. def classify0(inX, dataSet, labels, k):
  16.     dataSetSize = dataSet.shape[0]
  17.     diffMat = tile(inX, (dataSetSize,1)) - dataSet
  18.     sqDiffMat = diffMat**2
  19.     sqDistances = sqDiffMat.sum(axis=1)
  20.     distances = sqDistances**0.5
  21.     sortedDistIndicies = distances.argsort()     
  22.     classCount={}         
  23.     for i in range(k):
  24.         voteIlabel = labels[sortedDistIndicies[i]]
  25.         classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
  26.     sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
  27.     return sortedClassCount[0][0]

  28. def createDataSet():
  29.     group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
  30.     labels = ['A','A','B','B']
  31.     return group, labels

  32. def file2matrix(filename):
  33.     fr = open(filename)
  34.     numberOfLines = len(fr.readlines())         #get the number of lines in the file
  35.     returnMat = zeros((numberOfLines,3))        #prepare matrix to return
  36.     classLabelVector = []                       #prepare labels return   
  37.     fr = open(filename)
  38.     index = 0
  39.     for line in fr.readlines():
  40.         line = line.strip()
  41.         listFromLine = line.split('\t')
  42.         returnMat[index,:] = listFromLine[0:3]
  43.         classLabelVector.append(int(listFromLine[-1]))
  44.         index += 1
  45.     return returnMat,classLabelVector
  46.    
  47. def autoNorm(dataSet):
  48.     minVals = dataSet.min(0)
  49.     maxVals = dataSet.max(0)
  50.     ranges = maxVals - minVals
  51.     normDataSet = zeros(shape(dataSet))
  52.     m = dataSet.shape[0]
  53.     normDataSet = dataSet - tile(minVals, (m,1))
  54.     normDataSet = normDataSet/tile(ranges, (m,1))   #element wise divide
  55.     return normDataSet, ranges, minVals
  56.    
  57. def datingClassTest():
  58.     hoRatio = 0.50      #hold out 10%
  59.     datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')       #load data setfrom file
  60.     normMat, ranges, minVals = autoNorm(datingDataMat)
  61.     m = normMat.shape[0]
  62.     numTestVecs = int(m*hoRatio)
  63.     errorCount = 0.0
  64.     for i in range(numTestVecs):
  65.         classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
  66.         print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
  67.         if (classifierResult != datingLabels[i]): errorCount += 1.0
  68.     print "the total error rate is: %f" % (errorCount/float(numTestVecs))
  69.     print errorCount
  70.    
  71. def img2vector(filename):
  72.     returnVect = zeros((1,1024))
  73.     fr = open(filename)
  74.     for i in range(32):
  75.         lineStr = fr.readline()
  76.         for j in range(32):
  77.             returnVect[0,32*i+j] = int(lineStr[j])
  78.     return returnVect

  79. def handwritingClassTest():
  80.     hwLabels = []
  81.     trainingFileList = listdir('trainingDigits')           #load the training set
  82.     m = len(trainingFileList)
  83.     trainingMat = zeros((m,1024))
  84.     for i in range(m):
  85.         fileNameStr = trainingFileList[i]
  86.         fileStr = fileNameStr.split('.')[0]     #take off .txt
  87.         classNumStr = int(fileStr.split('_')[0])
  88.         hwLabels.append(classNumStr)
  89.         trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)
  90.     testFileList = listdir('testDigits')        #iterate through the test set
  91.     errorCount = 0.0
  92.     mTest = len(testFileList)
  93.     for i in range(mTest):
  94.         fileNameStr = testFileList[i]
  95.         fileStr = fileNameStr.split('.')[0]     #take off .txt
  96.         classNumStr = int(fileStr.split('_')[0])
  97.         vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
  98.         classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
  99.         print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)
  100.         if (classifierResult != classNumStr): errorCount += 1.0
  101.     print "\nthe total number of errors is: %d" % errorCount
  102.     print "\nthe total error rate is: %f" % (errorCount/float(mTest))
复制代码


二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

关键词:code COD knn ODE handwriting comparison existing running compare please

沙发
zouguangyong 在职认证  发表于 2015-3-1 09:53:42

您需要登录后才可以回帖 登录 | 我要注册

本版微信群
加好友,备注cda
拉您进交流群
GMT+8, 2025-12-26 16:34