learn-me-a-machine/CS231n-1 Nearest Neighbor.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# http://cs231n.github.io/classification/#nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Archive exists, proceeding: /Users/eryn/Code/learnmeamachine/scratch/cifar10.tar.gz\n"
     ]
    }
   ],
   "source": [
    "cifar10 = datasets.cifar10()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create two arrays: one containing images, one containing labels.\n",
    "# Partition those arrays into a training set, *tr, and a validation set, *val\n",
    "\n",
    "# Training set is the first 4 batches. (40k rows)\n",
    "Xtr = np.concatenate([b[b'data'] for b in cifar10.all_data_batches])\n",
    "Ytr = np.concatenate([b[b'labels'] for b in cifar10.all_data_batches])\n",
    "\n",
    "# Validation set is the last 1000 rows.\n",
    "Xte = cifar10.test_batch[b'data']\n",
    "Yte = cifar10.test_batch[b'labels']\n",
    "\n",
    "# Reshape the images into 1D arrays\n",
    "Xtr = Xtr.reshape(Xtr.shape[0], 32*32*3)\n",
    "Xte = Xval.reshape(Xval.shape[0], 32*32*3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a NearestNeighbor classifier. Copied from the lecture material above.\n",
    "\n",
    "class NearestNeighbor(object):\n",
    "  def __init__(self):\n",
    "    pass\n",
    "\n",
    "  def train(self, X, y):\n",
    "    \"\"\" X is N x D where each row is an example. Y is 1-dimension of size N \"\"\"\n",
    "    # the nearest neighbor classifier simply remembers all the training data\n",
    "    self.Xtr = X\n",
    "    self.ytr = y\n",
    "\n",
    "  def predict(self, X):\n",
    "    \"\"\" X is N x D where each row is an example we wish to predict label for \"\"\"\n",
    "    num_test = X.shape[0]\n",
    "    # lets make sure that the output type matches the input type\n",
    "    Ypred = np.zeros(num_test, dtype = self.ytr.dtype)\n",
    "\n",
    "    # loop over all test rows\n",
    "    for i in range(num_test):\n",
    "      # find the nearest training image to the i'th test image\n",
    "      # using the L1 distance (sum of absolute value differences)\n",
    "      distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)\n",
    "      min_index = np.argmin(distances) # get the index with smallest distance\n",
    "      Ypred[i] = self.ytr[min_index] # predict the label of the nearest example\n",
    "\n",
    "    return Ypred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train the NN classifier on the data.\n",
    "nn = NearestNeighbor()\n",
    "nn.train(Xtr, Ytr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy: 0.2492\n"
     ]
    }
   ],
   "source": [
    "Yte_predict = nn.predict(Xte)\n",
    "print('accuracy: {}'.format(np.mean(Yte_predict == Yte)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}