{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Example on Data Preparation: US States Data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 57935 100 57935 0 0 115k 0 --:--:-- --:--:-- --:--:-- 115k\n"
]
}
],
"source": [
"!curl -O https://raw.githubusercontent.com/jakevdp/data-USstates/master/state-population.csv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population\n",
"0 AL under18 2012 1117489.0\n",
"1 AL total 2012 4817528.0\n",
"2 AL under18 2010 1130966.0\n",
"3 AL total 2010 4785570.0\n",
"4 AL under18 2011 1125763.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop = pd.read_csv('state-population.csv')\n",
"pop.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
"
\n",
" \n",
" \n",
" \n",
" 500 | \n",
" GA | \n",
" total | \n",
" 2004 | \n",
" 8769252.0 | \n",
"
\n",
" \n",
" 501 | \n",
" GA | \n",
" under18 | \n",
" 2004 | \n",
" 2308855.0 | \n",
"
\n",
" \n",
" 502 | \n",
" GA | \n",
" total | \n",
" 2001 | \n",
" 8377038.0 | \n",
"
\n",
" \n",
" 503 | \n",
" GA | \n",
" under18 | \n",
" 2001 | \n",
" 2215390.0 | \n",
"
\n",
" \n",
" 504 | \n",
" GA | \n",
" total | \n",
" 2002 | \n",
" 8508256.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 695 | \n",
" IN | \n",
" under18 | \n",
" 2001 | \n",
" 1579527.0 | \n",
"
\n",
" \n",
" 696 | \n",
" IN | \n",
" total | \n",
" 2002 | \n",
" 6155967.0 | \n",
"
\n",
" \n",
" 697 | \n",
" IN | \n",
" under18 | \n",
" 2002 | \n",
" 1580814.0 | \n",
"
\n",
" \n",
" 698 | \n",
" IN | \n",
" total | \n",
" 1999 | \n",
" 6044970.0 | \n",
"
\n",
" \n",
" 699 | \n",
" IN | \n",
" under18 | \n",
" 1999 | \n",
" 1566079.0 | \n",
"
\n",
" \n",
"
\n",
"
200 rows × 4 columns
\n",
"
"
],
"text/plain": [
" state/region ages year population\n",
"500 GA total 2004 8769252.0\n",
"501 GA under18 2004 2308855.0\n",
"502 GA total 2001 8377038.0\n",
"503 GA under18 2001 2215390.0\n",
"504 GA total 2002 8508256.0\n",
".. ... ... ... ...\n",
"695 IN under18 2001 1579527.0\n",
"696 IN total 2002 6155967.0\n",
"697 IN under18 2002 1580814.0\n",
"698 IN total 1999 6044970.0\n",
"699 IN under18 1999 1566079.0\n",
"\n",
"[200 rows x 4 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop[500:700]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 835 100 835 0 0 2221 0 --:--:-- --:--:-- --:--:-- 2220\n"
]
}
],
"source": [
"!curl -O https://raw.githubusercontent.com/jakevdp/data-USstates/master/state-areas.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" area (sq. mi) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Alabama | \n",
" 52423 | \n",
"
\n",
" \n",
" 1 | \n",
" Alaska | \n",
" 656425 | \n",
"
\n",
" \n",
" 2 | \n",
" Arizona | \n",
" 114006 | \n",
"
\n",
" \n",
" 3 | \n",
" Arkansas | \n",
" 53182 | \n",
"
\n",
" \n",
" 4 | \n",
" California | \n",
" 163707 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state area (sq. mi)\n",
"0 Alabama 52423\n",
"1 Alaska 656425\n",
"2 Arizona 114006\n",
"3 Arkansas 53182\n",
"4 California 163707"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"areas = pd.read_csv('state-areas.csv')\n",
"areas.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 872 100 872 0 0 2273 0 --:--:-- --:--:-- --:--:-- 2276\n"
]
}
],
"source": [
"!curl -O https://raw.githubusercontent.com/jakevdp/data-USstates/master/state-abbrevs.csv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" abbreviation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
" 1 | \n",
" Alaska | \n",
" AK | \n",
"
\n",
" \n",
" 2 | \n",
" Arizona | \n",
" AZ | \n",
"
\n",
" \n",
" 3 | \n",
" Arkansas | \n",
" AR | \n",
"
\n",
" \n",
" 4 | \n",
" California | \n",
" CA | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state abbreviation\n",
"0 Alabama AL\n",
"1 Alaska AK\n",
"2 Arizona AZ\n",
"3 Arkansas AR\n",
"4 California CA"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abbrevs = pd.read_csv('state-abbrevs.csv')\n",
"abbrevs.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Merge Data Sets of States Abbreviation"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
" abbreviation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
" Alabama | \n",
" AL | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state abbreviation\n",
"0 AL under18 2012 1117489.0 Alabama AL\n",
"1 AL total 2012 4817528.0 Alabama AL\n",
"2 AL under18 2010 1130966.0 Alabama AL\n",
"3 AL total 2010 4785570.0 Alabama AL\n",
"4 AL under18 2011 1125763.0 Alabama AL"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop_ab = pd.merge(pop, abbrevs, how='outer',\n",
" left_on='state/region', \n",
" right_on='abbreviation')\n",
"pop_ab.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
" Alabama | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
" Alabama | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
" Alabama | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
" Alabama | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state\n",
"0 AL under18 2012 1117489.0 Alabama\n",
"1 AL total 2012 4817528.0 Alabama\n",
"2 AL under18 2010 1130966.0 Alabama\n",
"3 AL total 2010 4785570.0 Alabama\n",
"4 AL under18 2011 1125763.0 Alabama"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop_ab = pop_ab.drop('abbreviation',axis=1) \n",
"pop_ab.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fill in Missing Values for States"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"state/region False\n",
"ages False\n",
"year False\n",
"population True\n",
"state True\n",
"dtype: bool"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop_ab.isnull().any()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
"
\n",
" \n",
" \n",
" \n",
" 2448 | \n",
" PR | \n",
" under18 | \n",
" 1990 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2449 | \n",
" PR | \n",
" total | \n",
" 1990 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2450 | \n",
" PR | \n",
" total | \n",
" 1991 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2451 | \n",
" PR | \n",
" under18 | \n",
" 1991 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2452 | \n",
" PR | \n",
" total | \n",
" 1993 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state\n",
"2448 PR under18 1990 NaN NaN\n",
"2449 PR total 1990 NaN NaN\n",
"2450 PR total 1991 NaN NaN\n",
"2451 PR under18 1991 NaN NaN\n",
"2452 PR total 1993 NaN NaN"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop_ab[pop_ab['state'].isnull()].head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['PR', 'USA'], dtype=object)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Choose ALL null rows, and column 'state/region'. Read its unique values.\n",
"pop_ab.loc[pop_ab['state'].isnull(), 'state/region'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"state/region False\n",
"ages False\n",
"year False\n",
"population True\n",
"state False\n",
"dtype: bool"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# data.loc[row condition, variable_to_be_updated] = 'new value'\n",
"\n",
"pop_ab.loc[pop_ab['state/region'] == 'PR', 'state'] = 'Puerto Rico'\n",
"pop_ab.loc[pop_ab['state/region'] == 'USA', 'state'] = 'United States'\n",
"\n",
"pop_ab.isnull().any()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Merge the 3rd Dataset"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
" area (sq. mi) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state area (sq. mi)\n",
"0 AL under18 2012 1117489.0 Alabama 52423.0\n",
"1 AL total 2012 4817528.0 Alabama 52423.0\n",
"2 AL under18 2010 1130966.0 Alabama 52423.0\n",
"3 AL total 2010 4785570.0 Alabama 52423.0\n",
"4 AL under18 2011 1125763.0 Alabama 52423.0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final = pd.merge(pop_ab, areas, on='state', how='left')\n",
"final.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"state/region False\n",
"ages False\n",
"year False\n",
"population True\n",
"state False\n",
"area (sq. mi) True\n",
"dtype: bool"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final.isnull().any()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
" area (sq. mi) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state area (sq. mi)\n",
"0 AL under18 2012 1117489.0 Alabama 52423.0\n",
"1 AL total 2012 4817528.0 Alabama 52423.0\n",
"2 AL under18 2010 1130966.0 Alabama 52423.0\n",
"3 AL total 2010 4785570.0 Alabama 52423.0\n",
"4 AL under18 2011 1125763.0 Alabama 52423.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final.dropna(inplace=True)\n",
"final.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating New Variables"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
" area (sq. mi) | \n",
" density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" under18 | \n",
" 2012 | \n",
" 1117489.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 21.316769 | \n",
"
\n",
" \n",
" 1 | \n",
" AL | \n",
" total | \n",
" 2012 | \n",
" 4817528.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 91.897221 | \n",
"
\n",
" \n",
" 2 | \n",
" AL | \n",
" under18 | \n",
" 2010 | \n",
" 1130966.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 21.573851 | \n",
"
\n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 91.287603 | \n",
"
\n",
" \n",
" 4 | \n",
" AL | \n",
" under18 | \n",
" 2011 | \n",
" 1125763.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 21.474601 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state area (sq. mi) density\n",
"0 AL under18 2012 1117489.0 Alabama 52423.0 21.316769\n",
"1 AL total 2012 4817528.0 Alabama 52423.0 91.897221\n",
"2 AL under18 2010 1130966.0 Alabama 52423.0 21.573851\n",
"3 AL total 2010 4785570.0 Alabama 52423.0 91.287603\n",
"4 AL under18 2011 1125763.0 Alabama 52423.0 21.474601"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final['density'] = final['population'] / final['area (sq. mi)']\n",
"final.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Subsetting the Data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state/region | \n",
" ages | \n",
" year | \n",
" population | \n",
" state | \n",
" area (sq. mi) | \n",
" density | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" AL | \n",
" total | \n",
" 2010 | \n",
" 4785570.0 | \n",
" Alabama | \n",
" 52423.0 | \n",
" 91.287603 | \n",
"
\n",
" \n",
" 91 | \n",
" AK | \n",
" total | \n",
" 2010 | \n",
" 713868.0 | \n",
" Alaska | \n",
" 656425.0 | \n",
" 1.087509 | \n",
"
\n",
" \n",
" 101 | \n",
" AZ | \n",
" total | \n",
" 2010 | \n",
" 6408790.0 | \n",
" Arizona | \n",
" 114006.0 | \n",
" 56.214497 | \n",
"
\n",
" \n",
" 189 | \n",
" AR | \n",
" total | \n",
" 2010 | \n",
" 2922280.0 | \n",
" Arkansas | \n",
" 53182.0 | \n",
" 54.948667 | \n",
"
\n",
" \n",
" 197 | \n",
" CA | \n",
" total | \n",
" 2010 | \n",
" 37333601.0 | \n",
" California | \n",
" 163707.0 | \n",
" 228.051342 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state/region ages year population state area (sq. mi) \\\n",
"3 AL total 2010 4785570.0 Alabama 52423.0 \n",
"91 AK total 2010 713868.0 Alaska 656425.0 \n",
"101 AZ total 2010 6408790.0 Arizona 114006.0 \n",
"189 AR total 2010 2922280.0 Arkansas 53182.0 \n",
"197 CA total 2010 37333601.0 California 163707.0 \n",
"\n",
" density \n",
"3 91.287603 \n",
"91 1.087509 \n",
"101 56.214497 \n",
"189 54.948667 \n",
"197 228.051342 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data2010 = final.query(\" year == 2010 & ages == 'total' \")\n",
"data2010.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"data2010.set_index('state', inplace=True) "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'data2010' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn [13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata2010\u001b[49m\n",
"\u001b[0;31mNameError\u001b[0m: name 'data2010' is not defined"
]
}
],
"source": [
"data2010"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}