aboutsummaryrefslogtreecommitdiff
path: root/CS105MiniProject.ipynb
diff options
context:
space:
mode:
authorGravatar Anshul Gupta <ansg191@anshulg.com> 2024-02-22 22:15:21 -0800
committerGravatar Anshul Gupta <ansg191@anshulg.com> 2024-02-22 22:15:21 -0800
commit53863f0c1c5bf6f31c50b1efb2c4d00df811a095 (patch)
tree093fc4569ae453f37fc4a1ae0e4de8344c0adc38 /CS105MiniProject.ipynb
parent431bff4fe84778d8bfa70ee7f712ec892883b969 (diff)
parent3036e854b98315b7086bc06a3c585645a46a17b6 (diff)
downloadCS105MiniProject-53863f0c1c5bf6f31c50b1efb2c4d00df811a095.tar.gz
CS105MiniProject-53863f0c1c5bf6f31c50b1efb2c4d00df811a095.tar.zst
CS105MiniProject-53863f0c1c5bf6f31c50b1efb2c4d00df811a095.zip
Merge branch 'main' into pie-chart
Diffstat (limited to 'CS105MiniProject.ipynb')
-rw-r--r--CS105MiniProject.ipynb172
1 files changed, 101 insertions, 71 deletions
diff --git a/CS105MiniProject.ipynb b/CS105MiniProject.ipynb
index dd77c8f..35ebf16 100644
--- a/CS105MiniProject.ipynb
+++ b/CS105MiniProject.ipynb
@@ -1,6 +1,30 @@
{
"cells": [
{
+ "cell_type": "markdown",
+ "source": [
+ "<div>\n",
+ " <h1><center>CS105 Mini-Project</center></h1>\n",
+ " <h2><center>Does who a student is living with effect if and how they work jobs?</center></h2>\n",
+ " <p>By: <b>NAMES HERE</b></p>\n",
+ "</div>"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "21abd26c73fd0070"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Data Loading & Preprocessing"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "69d8e8ad7c61ba61"
+ },
+ {
"cell_type": "code",
"execution_count": 54,
"id": "daa13044",
@@ -316,13 +340,29 @@
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
+ "import seaborn as sns\n",
+ "import matplotlib\n",
+ "import matplotlib.pyplot as plt\n",
"\n",
+ "# Load dataframe from data.csv\n",
"df = pd.read_csv(\"data.csv\")\n",
- "df = df.iloc[:, [0, 2, 7, 8, 9, 58, 59, 60, 61, 26]]\n",
+ "\n",
+ "# Select relevant columns\n",
+ "df = df.iloc[:, [2, 3, 7, 8, 9, 58, 59, 60, 61, 26]]\n",
"df"
]
},
{
+ "cell_type": "markdown",
+ "source": [
+ "## Preprocessing"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "3f7614a5665d55b6"
+ },
+ {
"cell_type": "code",
"execution_count": 55,
"id": "29889175",
@@ -630,6 +670,10 @@
}
],
"source": [
+ "# Fixes empty values\n",
+ "df['Do you currently work?'] = df['Do you currently work?'].fillna('No')\n",
+ "\n",
+ "# Replaces custom text answers with appropriate values\n",
"df['How many people live in your household?'] = (df['How many people live in your household?']\n",
" .fillna(0)\n",
" .replace('4 in total', '4')\n",
@@ -640,9 +684,15 @@
" .replace('North District 4 bed 2 bath', '4')\n",
" .replace('3 (room), 8 (hall), ~70 (building)', '3')\n",
" .astype(int))\n",
+ "df['Who do you live with? '] = df['Who do you live with? '].replace('Family, Friends', 'Both').replace(\n",
+ " 'Family, Friends, Both', 'Both')\n",
+ "df['Do you currently live in a house, apartnment, or dorm? '] = (\n",
+ " df['Do you currently live in a house, apartnment, or dorm? ']\n",
+ " .replace('house (renting)', 'House'))\n",
+ "# Normalizes non-applicable answers\n",
"df.loc[df['Do you currently work?'] == 'No', 'How many hours do you work per week on average?'] = 0\n",
- "df['Who do you live with? '] = df['Who do you live with? '].replace('Family, Friends', 'Both').replace('Family, Friends, Both', 'Both')\n",
"df.loc[df['Do you currently work?'] == 'No', 'Do you work in a department related to your major?'] = np.nan\n",
+ "\n",
"df"
]
},
@@ -1279,41 +1329,13 @@
"collapsed": false
},
"source": [
- "<div>\n",
- " <h1>CS105 Project</h2>\n",
- " <p>Ali Naqvi, ...</p>\n",
- " <p>Topic: Does who a student is living with effect if and how they work jobs?</p>\n",
- "</div>\n"
+ "# Analysis"
]
},
{
"cell_type": "code",
- "execution_count": 58,
- "id": "bfa40c9e9693481d",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2024-02-23T01:01:41.526696Z",
- "start_time": "2024-02-23T01:01:41.430135Z"
- },
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "<Figure size 800x800 with 1 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
- "import matplotlib.pyplot as plt\n",
- "\n",
- "# Assuming 'df' is your DataFrame\n",
- "\n",
"# Count the number of people who work and don't work\n",
"work_counts = df['Do you currently work?'].value_counts()\n",
"\n",
@@ -1321,58 +1343,62 @@
"plt.figure(figsize=(8, 8))\n",
"plt.pie(work_counts, labels=work_counts.index, autopct='%1.1f%%', startangle=90, colors=['lightblue', 'lightcoral'])\n",
"plt.title('Distribution of People Who Work and Don\\'t Work')\n",
- "plt.show()\n"
- ]
+ "plt.show()"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "6bc50ddc195d88a"
},
{
"cell_type": "code",
- "execution_count": 65,
- "id": "9c830283e9b26466",
+ "outputs": [],
+ "source": [
+ "df_2dhist = pd.DataFrame({\n",
+ " x_label: grp['Do you currently work?'].value_counts()\n",
+ " for x_label, grp in df.groupby('Do you currently live in a house, apartnment, or dorm? ')\n",
+ "})\n",
+ "\n",
+ "# Plot heatmap\n",
+ "plt.subplots(figsize=(8, 8))\n",
+ "sns.heatmap(df_2dhist, cmap='viridis')\n",
+ "plt.xlabel('Do you currently live in a house, apartnment, or dorm? ')\n",
+ "_ = plt.ylabel('Do you currently work?')"
+ ],
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-02-23T01:01:41.532148Z",
- "start_time": "2024-02-23T01:01:41.528825Z"
- },
"collapsed": false
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Do you have roommates that are part of your major? No Yes Total\n",
- "Do you currently live in a house, apartnment, o... \n",
- "Apartment 83 44 127\n",
- "Dorm 17 11 28\n",
- "House 77 21 98\n",
- "Room 1 0 1\n",
- "house (renting) 1 0 1\n",
- "Total 179 76 255\n"
- ]
- }
- ],
+ "id": "15f1e14311b1b17f"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "2b499b750ea3aec9"
+ },
+ {
+ "cell_type": "code",
+ "outputs": [],
"source": [
+ "df.groupby('Do you currently live in a house, apartnment, or dorm? ').size().plot(kind='barh',\n",
+ " color=sns.palettes.mpl_palette(\n",
+ " 'Dark2'))\n",
+ "plt.gca().spines[['top', 'right', ]].set_visible(False)\n",
"roommates_major_table = pd.crosstab(df.iloc[:, 3], df.iloc[:, 9], margins=True, margins_name='Total')\n",
"\n",
"# Print the table\n",
"print(roommates_major_table)\n"
- ]
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "2ee7f39b5d8df8de"
},
{
"cell_type": "code",
- "execution_count": 66,
- "id": "aef1e802",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Chi-squared Value: 6.761293513057266\n",
- "Degrees of Freedom: 10\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Extract the observed values from the contingency table\n",
"observed_values = roommates_major_table.iloc[:-1, :-1].values\n",
@@ -1392,7 +1418,11 @@
"\n",
"# Print results\n",
"print(f\"Chi-squared Value: {chi2_statistic}\\nDegrees of Freedom: {degrees_of_freedom}\")\n"
- ]
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "957406c164cf2ef1"
}
],
"metadata": {