154 lines
3.9 KiB
Text
154 lines
3.9 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# http://cs231n.github.io/classification/#nn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Archive exists, proceeding: /Users/eryn/Code/learnmeamachine/scratch/cifar10.tar.gz\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cifar10 = datasets.cifar10()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create two arrays: one containing images, one containing labels.\n",
|
|
"# Partition those arrays into a training set, *tr, and a validation set, *val\n",
|
|
"\n",
|
|
"# Training set is the first 4 batches. (40k rows)\n",
|
|
"Xtr = np.concatenate([b[b'data'] for b in cifar10.all_data_batches])\n",
|
|
"Ytr = np.concatenate([b[b'labels'] for b in cifar10.all_data_batches])\n",
|
|
"\n",
|
|
"# Validation set is the last 1000 rows.\n",
|
|
"Xte = cifar10.test_batch[b'data']\n",
|
|
"Yte = cifar10.test_batch[b'labels']\n",
|
|
"\n",
|
|
"# Reshape the images into 1D arrays\n",
|
|
"Xtr = Xtr.reshape(Xtr.shape[0], 32*32*3)\n",
|
|
"Xte = Xval.reshape(Xval.shape[0], 32*32*3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create a NearestNeighbor classifier. Copied from the lecture material above.\n",
|
|
"\n",
|
|
"class NearestNeighbor(object):\n",
|
|
" def __init__(self):\n",
|
|
" pass\n",
|
|
"\n",
|
|
" def train(self, X, y):\n",
|
|
" \"\"\" X is N x D where each row is an example. Y is 1-dimension of size N \"\"\"\n",
|
|
" # the nearest neighbor classifier simply remembers all the training data\n",
|
|
" self.Xtr = X\n",
|
|
" self.ytr = y\n",
|
|
"\n",
|
|
" def predict(self, X):\n",
|
|
" \"\"\" X is N x D where each row is an example we wish to predict label for \"\"\"\n",
|
|
" num_test = X.shape[0]\n",
|
|
" # lets make sure that the output type matches the input type\n",
|
|
" Ypred = np.zeros(num_test, dtype = self.ytr.dtype)\n",
|
|
"\n",
|
|
" # loop over all test rows\n",
|
|
" for i in range(num_test):\n",
|
|
" # find the nearest training image to the i'th test image\n",
|
|
" # using the L1 distance (sum of absolute value differences)\n",
|
|
" distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)\n",
|
|
" min_index = np.argmin(distances) # get the index with smallest distance\n",
|
|
" Ypred[i] = self.ytr[min_index] # predict the label of the nearest example\n",
|
|
"\n",
|
|
" return Ypred"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Train the NN classifier on the data.\n",
|
|
"nn = NearestNeighbor()\n",
|
|
"nn.train(Xtr, Ytr)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"accuracy: 0.2492\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"Yte_predict = nn.predict(Xte)\n",
|
|
"print('accuracy: {}'.format(np.mean(Yte_predict == Yte)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|