dataviz-solutions.ipynb 71.5 KB
Newer Older
George Mount's avatar
George Mount committed

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "In C:\\Users\\User\\Anaconda3\\lib\\site-packages\\matplotlib\\mpl-data\\stylelib\\_classic_test.mplstyle: \n",
      "The savefig.frameon rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.\n",
      "In C:\\Users\\User\\Anaconda3\\lib\\site-packages\\matplotlib\\mpl-data\\stylelib\\_classic_test.mplstyle: \n",
      "The verbose.level rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.\n",
      "In C:\\Users\\User\\Anaconda3\\lib\\site-packages\\matplotlib\\mpl-data\\stylelib\\_classic_test.mplstyle: \n",
      "The verbose.fileo rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>lotsize</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrms</th>\n",
       "      <th>stories</th>\n",
       "      <th>driveway</th>\n",
       "      <th>recroom</th>\n",
       "      <th>fullbase</th>\n",
       "      <th>gashw</th>\n",
       "      <th>airco</th>\n",
       "      <th>garagepl</th>\n",
       "      <th>prefarea</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>42000.0</td>\n",
       "      <td>5850</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38500.0</td>\n",
       "      <td>4000</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>49500.0</td>\n",
       "      <td>3060</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60500.0</td>\n",
       "      <td>6650</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>61000.0</td>\n",
       "      <td>6360</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     price  lotsize  bedrooms  bathrms  stories driveway recroom fullbase  \\\n",
       "0  42000.0     5850         3        1        2      yes      no      yes   \n",
       "1  38500.0     4000         2        1        1      yes      no       no   \n",
       "2  49500.0     3060         3        1        1      yes      no       no   \n",
       "3  60500.0     6650         3        1        2      yes     yes       no   \n",
       "4  61000.0     6360         2        1        1      yes      no       no   \n",
       "\n",
       "  gashw airco  garagepl prefarea  \n",
       "0    no    no         1       no  \n",
       "1    no    no         0       no  \n",
       "2    no    no         0       no  \n",
       "3    no    no         0       no  \n",
       "4    no    no         0       no  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "housing = pd.read_csv('data/housing.csv')\n",
    "housing.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DRILLS\n",
    "\n",
    "For this exercise, analyze the `housing` dataset from the `data` folder: \n",
    "\n",
    "1. Plot a histogram of the `price` variable using 50 bins. \n",
    "2. Count the number of observations by number of `stories`.\n",
    "\n",
    "You can code this from scratch, or fill in the code below. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With a bar chart we will count up the observations of each value of a category. For example we could find how many observations are placed for each category. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x252dd672668>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Plot a histogram of price with 50 bins\n",
    "sns.distplot(housing['price'],bins=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x252dde80668>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOxElEQVR4nO3df6xfd13H8eeLbgz56ZZ2c7QLbbQqRWXItQ4XFTYCFZQOIqRLIBWXlJhhmJLoZoigZhEioASBpMLYkLk5HbCpqCx1QFTcuMMJ3cakoZXV1vWOoRsQpi1v//iefnbpbtsv7T3fc388H8nN93s+55xvX/cku6+dn99UFZIkATxu6ACSpIXDUpAkNZaCJKmxFCRJjaUgSWpOGTrAyVi5cmWtXbt26BiStKjccccdD1TVqrnmLepSWLt2LdPT00PHkKRFJcl/HG2eh48kSY2lIElqLAVJUmMpSJIaS0GS1FgKkqTGUpAkNZaCJKmxFCRJzaK+o1mTtW7dnqEjLBi7d68dOoLUC/cUJEmNpSBJaiwFSVJjKUiSGktBktRYCpKkxlKQJDWWgiSpWfI3r3nD1aO84UrS8binIElqLAVJUmMpSJIaS0GS1FgKkqTGUpAkNZaCJKmxFCRJjaUgSWosBUlSYylIkhpLQZLUWAqSpMZSkCQ1loIkqbEUJEmNpSBJaiwFSVJjKUiSGktBktT0VgpJzklya5J7ktyV5A3d+BlJbknype719FnrXJFkV5J7k7y4r2ySpLn1uadwEHhjVT0TOA+4NMkG4HJgR1WtB3Z003TztgDPAjYB702yosd8kqQj9FYKVbW/qj7XvX8YuAdYDWwGrukWuwa4qHu/Gbi+qh6pqt3ALmBjX/kkSY81kXMKSdYCzwFuA86qqv0wKg7gzG6x1cB9s1bb240d+VnbkkwnmZ6ZmekztiQtO72XQpInAzcCl1XVQ8dadI6xesxA1faqmqqqqVWrVs1XTEkSPZdCklMZFcK1VfWRbvj+JGd3888GDnTje4FzZq2+BtjXZz5J0nfq8+qjAB8A7qmqd86adTOwtXu/Fbhp1viWJKclWQesB27vK58k6bFO6fGzzwdeA3whyZ3d2G8BbwVuSHIJ8BXglQBVdVeSG4C7GV25dGlVHeoxnyTpCL2VQlX9I3OfJwC48CjrXAlc2VcmSdKxeUezJKmxFCRJjaUgSWosBUlSYylIkhpLQZLUWAqSpMZSkCQ1loIkqbEUJEmNpSBJaiwFSVJjKUiSGktBktRYCpKkxlKQJDWWgiSpsRQkSY2lIElqLAVJUmMpSJIaS0GS1FgKkqTGUpAkNZaCJKmxFCRJjaUgSWosBUlSYylIkhpLQZLUWAqSpMZSkCQ1loIkqbEUJElNb6WQ5KokB5LsnDX2liT/meTO7ucls+ZdkWRXknuTvLivXJKko+tzT+FqYNMc439YVed2Px8HSLIB2AI8q1vnvUlW9JhNkjSH3kqhqj4NPDjm4puB66vqkaraDewCNvaVTZI0tyHOKbw+yee7w0und2OrgftmLbO3G3uMJNuSTCeZnpmZ6TurJC0rky6F9wHfD5wL7Afe0Y1njmVrrg+oqu1VNVVVU6tWreonpSQtUxMthaq6v6oOVdW3gT/h0UNEe4FzZi26Btg3yWySpAmXQpKzZ02+HDh8ZdLNwJYkpyVZB6wHbp9kNkkSnNLXBye5Dng+sDLJXuDNwPOTnMvo0NAe4HUAVXVXkhuAu4GDwKVVdaivbJKkufVWClV18RzDHzjG8lcCV/aVR5J0fN7RLElqLAVJUmMpSJIaS0GS1FgKkqRmrFJIsmOcMUnS4nbMS1KTPAF4IqN7DU7n0cdRPBV4es/ZJEkTdrz7FF4HXMaoAO7g0VJ4CHhPj7kkSQM4ZilU1buAdyX51ap694QySZIGMtYdzVX17iQ/BaydvU5VfainXJKkAYxVCkn+lNEjr+8EDj+TqABLQZKWkHGffTQFbKiqOb/jQJK0NIx7n8JO4Pv6DCJJGt64eworgbuT3A48cniwql7WSypJ0iDGLYW39BlCkrQwjHv10af6DiJJGt64Vx89zOhqI4DHA6cC36iqp/YVTJI0eePuKTxl9nSSi4CNvSSSJA3mhJ6SWlUfAy6Y5yySpIGNe/joFbMmH8fovgXvWZCkJWbcq49+Ydb7g8AeYPO8p5EkDWrccwqv7TuIJGl4437JzpokH01yIMn9SW5MsqbvcJKkyRr3RPMHgZsZfa/CauCvujFJ0hIybimsqqoPVtXB7udqYFWPuSRJAxi3FB5I8uokK7qfVwNf7TOYJGnyxi2FXwZeBfwXsB/4RcCTz5K0xIx7ServAVur6msASc4A3s6oLCRJS8S4ewo/drgQAKrqQeA5/USSJA1l3FJ4XJLTD090ewrj7mVIkhaJcf+wvwP45yR/yejxFq8CruwtlSRpEOPe0fyhJNOMHoIX4BVVdXevySRJEzf2IaCuBCwCSVrCTujR2ZKkpclSkCQ1vZVCkqu6B+jtnDV2RpJbknype519RdMVSXYluTfJi/vKJUk6uj73FK4GNh0xdjmwo6rWAzu6aZJsALYAz+rWeW+SFT1mkyTNobdSqKpPAw8eMbwZuKZ7fw1w0azx66vqkaraDezC74CWpImb9DmFs6pqP0D3emY3vhq4b9Zye7uxx0iyLcl0kumZmZlew0rScrNQTjRnjrE5vwO6qrZX1VRVTa1a5dO7JWk+TboU7k9yNkD3eqAb3wucM2u5NcC+CWeTpGVv0qVwM7C1e78VuGnW+JYkpyVZB6wHbp9wNkla9np7qF2S64DnAyuT7AXeDLwVuCHJJcBXgFcCVNVdSW5gdMf0QeDSqjrUVzZJ0tx6K4Wquvgosy48yvJX4kP2JGlQC+VEsyRpAbAUJEmNpSBJaiwFSVJjKUiSGktBktRYCpKkxlKQJDWWgiSpsRQkSY2lIElqLAVJUmMpSJIaS0GS1FgKkqTGUpAkNZaCJKmxFCRJjaUgSWosBUlSYylIkhpLQZLUWAqSpMZSkCQ1loIkqbEUJEmNpSBJaiwFSVJjKUiSGktBktRYCpKkxlKQJDWWgiSpsRQkSc0pQ/yjSfYADwOHgINVNZXkDODPgbXAHuBVVfW1IfJJ0nI15J7CC6rq3Kqa6qYvB3ZU1XpgRzctSZqghXT4aDNwTff+GuCiAbNI0rI0VCkU8IkkdyTZ1o2dVVX7AbrXM+daMcm2JNNJpmdmZiYUV5KWh0HOKQDnV9W+JGcCtyT54rgrVtV2YDvA1NRU9RVQkpajQfYUqmpf93oA+CiwEbg/ydkA3euBIbJJ0nI28VJI8qQkTzn8HngRsBO4GdjaLbYVuGnS2SRpuRvi8NFZwEeTHP73/6yq/i7JZ4EbklwCfAV45QDZJGlZm3gpVNWXgWfPMf5V4MJJ55EkPWohXZIqSRqYpSBJaoa6JFWS5tW6dXuGjrBg7N699oTXtRSkgfhH7FEn80dM88vDR5KkxlKQJDWWgiSpsRQkSY2lIElqLAVJUmMpSJIaS0GS1FgKkqTGUpAkNZaCJKmxFCRJjaUgSWosBUlSYylIkhpLQZLUWAqSpMZSkCQ1loIkqbEUJEmNpSBJaiwFSVJjKUiSGktBktRYCpKkxlKQJDWWgiSpsRQkSY2lIElqLAVJUmMpSJKaBVcKSTYluTfJriSXD51HkpaTBVUKSVYA7wF+DtgAXJxkw7CpJGn5WFClAGwEdlXVl6vqf4Hrgc0DZ5KkZeOUoQMcYTVw36zpvcBPzl4gyTZgWzf59ST3TijbyVgJPDB0iGToBPNm8O25hLYluD3n0+DbEsbans842oyFVgpz/Sr1HRNV24Htk4kzP5JMV9XU0DmWCrfn/HJ7zp+lsC0X2uGjvcA5s6bXAPsGyiJJy85CK4XPAuuTrEvyeGALcPPAmSRp2VhQh4+q6mCS1wN/D6wArqqquwaONR8W1eGuRcDtOb/cnvNn0W/LVNXxl5IkLQsL7fCRJGlAloIkqbEUepTkqiQHkuwcOstil+ScJLcmuSfJXUneMHSmxSzJE5LcnuTfuu35O0NnWgqSrEjyr0n+eugsJ8pS6NfVwKahQywRB4E3VtUzgfOAS30Eykl5BLigqp4NnAtsSnLewJmWgjcA9wwd4mRYCj2qqk8DDw6dYymoqv1V9bnu/cOM/sNbPWyqxatGvt5Nntr9eNXJSUiyBngp8P6hs5wMS0GLTpK1wHOA24ZNsrh1hzruBA4At1SV2/Pk/BHwG8C3hw5yMiwFLSpJngzcCFxWVQ8NnWcxq6pDVXUuoycHbEzyI0NnWqyS/DxwoKruGDrLybIUtGgkOZVRIVxbVR8ZOs9SUVX/DXwSz3+djPOBlyXZw+jpzhck+fCwkU6MpaBFIUmADwD3VNU7h86z2CVZleR7u/ffA7wQ+OKwqRavqrqiqtZU1VpGj+f5h6p69cCxToil0KMk1wGfAX4oyd4klwydaRE7H3gNo/8Du7P7ecnQoRaxs4Fbk3ye0TPHbqmqRXsZpeaPj7mQJDXuKUiSGktBktRYCpKkxlKQJDWWgiSpsRSk70KSy5I88QTWe78P8NNi4CWp0nehu2N1qqoe+C7WWVFVh/pLJc0f9xSko0jypCR/033nwM4kbwaezuimr1u7ZS5O8oVu/ttmrfv1JL+b5DbgeUk+mWSqm/eiJJ9J8rkkf9E9z4kkb01yd5LPJ3n7AL+yxClDB5AWsE3Avqp6KUCSpwGvBV5QVQ8keTrwNuC5wNeATyS5qKo+BjwJ2FlVv92tS/e6EngT8MKq+kaS3wR+PckfAy8Hfriq6vAjKKRJc09BOrovAC9M8rYkP11V/3PE/J8APllVM1V1ELgW+Jlu3iFGD+870nnABuCfusdWbwWeATwEfAt4f5JXAN+c/19HOj73FKSjqKp/T/Jc4CXA7yf5xBGL5Birf+so5xHC6DlDFz9mRrIRuJDRA9VeD1xwYsmlE+eegnQU3eGhb1bVh4G3Az8OPAw8pVvkNuBnk6xMsgK4GPjUcT72X4Dzk/xA9288MckPducVnlZVHwcuY/QVmdLEuacgHd2PAn+Q5NvA/wG/AjwP+Nsk+6vqBUmuAG5ltAfw8aq66VgfWFUzSX4JuC7Jad3wmxiVzU1JntB91q/18htJx+ElqZKkxsNHkqTGUpAkNZaCJKmxFCRJjaUgSWosBUlSYylIkpr/BxB+Z+Fq9qW6AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Count the number of observations by number of `stories`\n",
    "sns.countplot(housing['stories'], color='blue')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DRILLS\n",
    "\n",
    "For this exercise, analyze the `housing` dataset from the `data` folder: \n",
    "\n",
    "1. Make a box plot of the distribution of `lotsize` for each value of the `prefarea` category.  \n",
    "2.  Make a scatterplot of `lotsize` versus `price`. Add a custom title to the plot.\n",
    "\n",
    "You can code this from scratch, or fill in the code below. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x252ddeed908>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Box plot of the distribution of `lotsize` for each value of the `prefarea` category.  \n",
    "sns.boxplot(x='prefarea', y='lotsize', data=housing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'Lot size versus sales price of houses')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#  Make a scatterplot of `lotsize` versus `price`. Add a custom title to the plot.\n",
    "\n",
    "# First we need to call pyplot\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Now let's make our scatterplot\n",
    "sns.scatterplot(data=housing,x='lotsize',y='price')\n",
    "\n",
    "# Now let's add a custom title\n",
    "plt.title('Lot size versus sales price of houses')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}