K-Nearest Neighbors (KNN) for recognizing handwritten digitsBeijing Institute of Technology | Ming-Jian Li
The following Python code is a companion code for the course on Artificial Intelligence and Simulation Science. It functions to convert an image of a handwritten digit, and use a KNN model to recognize the number. The dataset is in data.zip (click to download) .
x
1# convert image into txt2import os3from PIL import Image4
5def imgtotext(imgfile,txtfile,size=(32,32)):6 # convert image to txt file, with size 32*327 image_file = Image.open(imgfile)8 image_file = image_file.resize(size,Image.LANCZOS)9 image_file=image_file.convert('L')10 width,height = image_file.size11 f =open(txtfile,'w')12 ascii_char = '10'13 for i in range(height):14 pix_char='';15 for j in range(width):16 pixel = image_file.getpixel((j,i))17 pix_char+=ascii_char[int(pixel/128)]18 pix_char+='\n'19 f.write(pix_char)20 f.close()21
22imgtotext(r'test.jpg', r'test.txt')23
24from os import listdir25from numpy import *26import numpy as np27import operator28
29def KNN(test_data,train_data,train_label,k):30 # rows of dataset31 dataSetSize = train_data.shape[0]32 # expand input points to matrix, get distance33 all_distances = np.sqrt(np.sum(np.square(tile(test_data,(dataSetSize,1))-train_data),axis=1))34 print("all distances: ",all_distances)35 # sort distance36 sort_distance_index = all_distances.argsort()37 # choose k points with min distances38 classCount = {}39 for i in range(k):40 voteIlabel = train_label[sort_distance_index[i]]41 classCount[voteIlabel] = classCount.get(voteIlabel,0)+142 sortedClassCount = sorted(classCount.items(), key = operator.itemgetter(1), reverse = True)43 return sortedClassCount[0][0]44
45# turn txt into vector 32x32 -> 1x102446def img2vector(filename):47 returnVect = []48 fr = open(filename)49 for i in range(32):50 lineStr = fr.readline()51 for j in range(32):52 returnVect.append(int(lineStr[j]))53 return returnVect54
55# get label from file name56def classnumCut(fileName):57 # ref file name: 0_3.txt58 fileStr = fileName.split('.')[0]59 classNumStr = int(fileStr.split('_')[0])60 return classNumStr61
62# training set63def trainingDataSet():64 train_label = []65 trainingFileList = listdir('data/trainingDigits')66 m = len(trainingFileList)67 train_data = zeros((m,1024))68 # get label69 for i in range(m):70 fileNameStr = trainingFileList[i]71 train_label.append(classnumCut(fileNameStr))72 train_data[i,:] = img2vector('data/trainingDigits/%s' % fileNameStr)73 return train_label,train_data74
75# test number76Nearest_Neighbor_number = 377train_label,train_data = trainingDataSet()78test_data = img2vector('test.txt')79# predict80classifierResult = KNN(test_data, train_data, train_label, Nearest_Neighbor_number)81print ("I guess the number is: ", classifierResult)The following figure is a handwritten digit:

The prediction by KNN is:
I guess the number is: 6