Commit 2f978b60 authored by George Mount's avatar George Mount
Browse files

update files

parent f6d3aae4
......@@ -56,47 +56,23 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[47. 21. 23. 24. 45. 6. 30. 43. 45. 23. 2. 46. 4. 34. 42. 2. 47. 14.\n",
" 18. 9. 50. 34. 12. 24. 42. 24. 3. 39. 17. 15. 37. 18. 46. 25. 9. 41.\n",
" 45. 34. 22. 26. 27. 44. 28. 4. 15. 31. 3. 39. 15. 23. 5. 27. 11. 25.\n",
" 16. 11. 2. 43. 35. 45. 27. 48. 44. 20. 4. 21. 8. 48. 29. 20. 15. 20.\n",
" 37. 17. 6. 13. 39. 25. 5. 11. 4. 20. 47. 9. 2. 8. 44. 40. 8. 1.\n",
" 45. 26. 43. 10. 22. 24. 3. 48. 29. 49.]\n"
]
}
],
"outputs": [],
"source": [
"# Don't worry about this part -- I am reading the file into Python.\n",
"# You will learn how to read files into Python in the next unit. \n",
"my_array = np.genfromtxt('numpy-drill.csv')\n",
"print(my_array)"
"my_array"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# What is the shape of this array?\n",
"# This also tells us how many dimensions there are --\n",
......@@ -106,20 +82,9 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# What is its datatype?\n",
"my_array.dtype"
......@@ -127,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
......@@ -137,20 +102,9 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 10)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# What is the shape of our array now?\n",
"my_array.shape"
......@@ -158,41 +112,11 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([[2.6183305 , 2.14069514, 2.1899387 , 2.21336384, 2.59002006,\n",
" 1.56508458, 2.34034732, 2.5607496 , 2.59002006, 2.1899387 ],\n",
" [1.18920712, 2.60429069, 1.41421356, 2.4147364 , 2.5457299 ,\n",
" 1.18920712, 2.6183305 , 1.93433642, 2.05976714, 1.73205081],\n",
" [2.65914795, 2.4147364 , 1.86120972, 2.21336384, 2.5457299 ,\n",
" 2.21336384, 1.31607401, 2.4989994 , 2.03054318, 1.96798967],\n",
" [2.46632571, 2.05976714, 2.60429069, 2.23606798, 1.73205081,\n",
" 2.53043953, 2.59002006, 2.4147364 , 2.16573677, 2.25810086],\n",
" [2.27950706, 2.57550958, 2.30032663, 1.41421356, 1.96798967,\n",
" 2.35961106, 1.31607401, 2.4989994 , 1.96798967, 2.1899387 ],\n",
" [1.49534878, 2.27950706, 1.82116029, 2.23606798, 2. ,\n",
" 1.82116029, 1.18920712, 2.5607496 , 2.43229928, 2.59002006],\n",
" [2.27950706, 2.63214803, 2.57550958, 2.11474253, 1.41421356,\n",
" 2.14069514, 1.68179283, 2.63214803, 2.32059579, 2.11474253],\n",
" [1.96798967, 2.11474253, 2.46632571, 2.03054318, 1.56508458,\n",
" 1.89882892, 2.4989994 , 2.23606798, 1.49534878, 1.82116029],\n",
" [1.41421356, 2.11474253, 2.6183305 , 1.73205081, 1.18920712,\n",
" 1.68179283, 2.57550958, 2.51486686, 1.68179283, 1. ],\n",
" [2.59002006, 2.25810086, 2.5607496 , 1.77827941, 2.16573677,\n",
" 2.21336384, 1.31607401, 2.63214803, 2.32059579, 2.64575131]])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Take the sqrt of this array\n",
"my_array = np.sqrt(my_array)\n",
......@@ -201,20 +125,9 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.0597671439071177"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Access the element in the fourth row\n",
"# and second column of the array\n",
......@@ -299,88 +212,50 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"city object\n",
"population int64\n",
"pop_change float64\n",
"land_area float64\n",
"dtype: object\n",
"(10, 4)\n",
" city population pop_change land_area\n",
"0 New York 8336817 0.0198 301.5\n",
"1 Los Angeles 3979576 0.0493 468.7\n",
"2 Chicago 2693976 -0.0006 227.3\n",
"3 Houston 2320268 0.1048 637.5\n",
"4 Phoenix 1680992 0.1628 517.6\n"
]
}
],
"outputs": [],
"source": [
"big_cities = pd.read_csv('practice/largest-us-cities.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"big_cities = pd.read_csv('practice/largest-us-cities.csv')\n",
"\n",
"# Data types\n",
"print(big_cities.dtypes)\n",
"\n",
"big_cities.dtypes\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Dimensions\n",
"print(big_cities.shape)\n",
"\n",
"big_cities.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# First five rows\n",
"print(big_cities.head())"
"big_cities.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GO BUCKEYES!</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [GO BUCKEYES!]\n",
"Index: []"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"chicago = pd.read_excel('practice/chicago-big-ten.xlsx')\n",
"chicago"
......@@ -388,133 +263,9 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>school</th>\n",
" <th>school_state</th>\n",
" <th>alumni</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>77873</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Northwestern</td>\n",
" <td>IL</td>\n",
" <td>37326</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Purdue</td>\n",
" <td>IN</td>\n",
" <td>24395</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Indiana</td>\n",
" <td>IN</td>\n",
" <td>22060</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Wisconsin</td>\n",
" <td>WI</td>\n",
" <td>17669</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>Iowa</td>\n",
" <td>IO</td>\n",
" <td>15930</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>Michigan State</td>\n",
" <td>MI</td>\n",
" <td>14525</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>Michigan</td>\n",
" <td>MI</td>\n",
" <td>14167</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>Ohio State</td>\n",
" <td>OH</td>\n",
" <td>5481</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>Minnesota</td>\n",
" <td>MI</td>\n",
" <td>3975</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>Penn State</td>\n",
" <td>PA</td>\n",
" <td>3167</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>Nebraska</td>\n",
" <td>NE</td>\n",
" <td>1800</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" school school_state alumni\n",
"0 Illinois IL 77873\n",
"1 Northwestern IL 37326\n",
"2 Purdue IN 24395\n",
"3 Indiana IN 22060\n",
"4 Wisconsin WI 17669\n",
"5 Iowa IO 15930\n",
"6 Michigan State MI 14525\n",
"7 Michigan MI 14167\n",
"8 Ohio State OH 5481\n",
"9 Minnesota MI 3975\n",
"10 Penn State PA 3167\n",
"11 Nebraska NE 1800"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Whoops! Make sure we specify the sheet:\n",
"chicago = pd.read_excel('practice/chicago-big-ten.xlsx', sheet_name='alumni')\n",
......@@ -523,34 +274,24 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['school', 'school_state', 'alumni'], dtype='object')\n",
" alumni\n",
"count 12.000000\n",
"mean 19864.000000\n",
"std 20978.347989\n",
"min 1800.000000\n",
"25% 5104.500000\n",
"50% 15227.500000\n",
"75% 22643.750000\n",
"max 77873.000000\n"
]
}
],
"outputs": [],
"source": [
"# Column names\n",
"print(chicago.columns)\n",
"\n",
"chicago.columns\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Descriptive stats\n",
"print(chicago.describe())"
"chicago.describe()"
]
},
{
......@@ -577,7 +318,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"version": "3.7.4"
}
},
"nbformat": 4,
......
......@@ -18,7 +18,7 @@
"outputs": [],
"source": [
"# It's hard to calculate on lists!\n",
"my_list = [4,1,5,2]\n",
"my_list = [4, 1, 5, 2]\n",
"my_list * 2"
]
},
......@@ -174,7 +174,7 @@
"outputs": [],
"source": [
"# Create another array...\n",
"my_other_array = numpy.array([4,16,25,100])\n",
"my_other_array = numpy.array([4, 16, 25, 100])\n",
"\n",
"# numpy has a square root function of its own...\n",
"numpy.sqrt(my_other_array)"
......@@ -264,7 +264,7 @@
},
"outputs": [],
"source": [
"my_array = np.array([4,1,5,2])"
"my_array = np.array([4, 1, 5, 2])"
]
},
{
......@@ -310,10 +310,7 @@
"Source: Nunez-Iglesias, Juan, Stéfan Van Der Walt, and Harriet Dashnow. *Elegant SciPy: The Art of Scientific Python.* O'Reilly Media, 2017.\n",
"\n",
"\n",
"`numpy` can create up to _n_-dimensional arrays, but let's focus on two: this is a familiar way to shape data as it's how data is often is stored in spreadsheets (as rows and columns).\n",
"\n",
"\n",
"We can create a two-dimensional array in `numpy` with the `array()` function. This time we will place each 'row' of the array inside its own set of brackets."
"`numpy` can create up to _n_-dimensional arrays. You can think of one, two, and three-dimensional arrays as being like individual ranges, tables, and worksheets in Excel."
]
},
{
......@@ -326,7 +323,7 @@
"source": [
"# Create a two-dimensional array with `np.array()`\n",
"\n",
"my_2d_array = np.array([[3,4,1],[2,5,0]])\n",
"my_2d_array = np.array([[3, 4, 1], [2 ,5 ,0]])\n",
"type(my_2d_array)"
]
},
......@@ -346,7 +343,7 @@
"outputs": [],
"source": [
"# One-dimensional array\n",
"my_array = np.array([1,2,3,4,5,6])\n",
"my_array = np.array([1, 2, 3, 4, 5, 6])\n",
"my_array\n"
]
},
......@@ -381,8 +378,50 @@
"\n",
"Some attributes we can use to learn more about our `numpy` arrays are:\n",
"\n",
"`shape`: gives us the dimensions of the array. \n",
"`size`: gives us the number of elements of the array. \n",
"`shape`: gives us the dimensions of the array. \n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'my_reshaped_array' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-1-35d51d988aea>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmy_reshaped_array\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'my_reshaped_array' is not defined"
]
}
],
"source": [
"my_reshaped_array.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`size`: gives us the number of elements of the array. \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"my_reshaped_array.size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`dtype`: gives us the data type of the elements of the array. Remember that all elements of a `numpy` array must be of the same type."
]
},
......@@ -394,9 +433,7 @@
},
"outputs": [],
"source": [
"print(my_reshaped_array.shape)\n",
"print(my_reshaped_array.size)\n",
"print(my_reshaped_array.dtype)"
"my_reshaped_array.dtype"
]
},
{
......@@ -423,11 +460,9 @@
},
"outputs": [],
"source": [
"print(my_reshaped_array)\n",
"\n",
"# Get the value in first row, first column\n",
"# Never forget zero-based indexing!\n",
"my_reshaped_array[0,0]"
"my_reshaped_array[0, 0]"
]
},
{
......@@ -437,7 +472,7 @@
"outputs": [],
"source": [
"# What about the second-last row/second-last column?\n",
"my_reshaped_array[-2,-2]"
"my_reshaped_array[-2, -2]"
]
},
{
......@@ -454,7 +489,7 @@
"outputs": [],
"source": [
"# Get data from first through second rows and columns\n",
"my_reshaped_array[0:2,0:2]"
"my_reshaped_array[0:2, 0:2]"