{ "cells": [ { "cell_type": "markdown", "source": [ "
\n", "

CS105 Mini-Project

\n", "

By: NAMES HERE

\n", "
" ], "metadata": { "collapsed": false }, "id": "845bdbd833f03cba" }, { "cell_type": "markdown", "source": [ "# Data Loading & Preprocessing" ], "metadata": { "collapsed": false }, "id": "d720609d765d221b" }, { "cell_type": "code", "outputs": [], "source": [ "%matplotlib inline\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# Load dataframe from data.csv\n", "df = pd.read_csv(\"data.csv\")\n", "\n", "# Select relevant columns\n", "df = df.iloc[:, [0, 2, 7, 8, 9, 58, 59, 60, 61, 26]]\n", "df" ], "metadata": { "collapsed": false }, "id": "3bea6ea662d6c063" }, { "cell_type": "markdown", "source": [ "## Preprocessing" ], "metadata": { "collapsed": false }, "id": "7e69a5a21a9de4ee" }, { "cell_type": "code", "outputs": [], "source": [ "# Fixes empty values\n", "df['Do you currently work?'] = df['Do you currently work?'].fillna('No')\n", "\n", "# Replaces custom text answers with appropriate values\n", "df['How many people live in your household?'] = (df['How many people live in your household?']\n", " .fillna(0)\n", " .replace('4 in total', '4')\n", " .replace('4 (Including me)', '4')\n", " .replace('at school 4 including me ', '4')\n", " .replace('3 excluding me', '4')\n", " .replace('5 including me', '5')\n", " .replace('North District 4 bed 2 bath', '4')\n", " .replace('3 (room), 8 (hall), ~70 (building)', '3')\n", " .astype(int))\n", "df['Who do you live with? '] = df['Who do you live with? '].replace('Family, Friends', 'Both').replace('Family, Friends, Both', 'Both')\n", "# Normalizes non-applicable answers\n", "df.loc[df['Do you currently work?'] == 'No', 'How many hours do you work per week on average?'] = 0\n", "df.loc[df['Do you currently work?'] == 'No', 'Do you work in a department related to your major?'] = np.nan\n", "\n", "df" ], "metadata": { "collapsed": false }, "id": "f71f8085d5f66b0" }, { "cell_type": "code", "outputs": [], "source": [ "# Working DataFrame\n", "w_df = df[df['Do you currently work?'] == 'Yes']\n", "# Not working DataFrame\n", "nw_df = df[df['Do you currently work?'] == 'No']\n", "w_df" ], "metadata": { "collapsed": false }, "id": "6c1d9ee7948e6b9a" }, { "cell_type": "code", "outputs": [], "source": [ "nw_df" ], "metadata": { "collapsed": false }, "id": "34f69a756f513fb7" }, { "cell_type": "markdown", "source": [ "
\n", "

CS105 Project

\n", "

Ali Naqvi, ...

\n", "

Topic: Does who a student is living with effect if and how they work jobs?

\n", "
\n" ], "metadata": { "collapsed": false }, "id": "d5c1424ddd30ca97" }, { "cell_type": "code", "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Assuming 'df' is your DataFrame\n", "\n", "# Count the number of people who work and don't work\n", "work_counts = df['Do you currently work?'].value_counts()\n", "\n", "# Plotting a pie chart\n", "plt.figure(figsize=(8, 8))\n", "plt.pie(work_counts, labels=work_counts.index, autopct='%1.1f%%', startangle=90, colors=['lightblue', 'lightcoral'])\n", "plt.title('Distribution of People Who Work and Don\\'t Work')\n", "plt.show()\n" ], "metadata": { "collapsed": false }, "id": "da1811cc63b41845" }, { "cell_type": "code", "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "201db70188d3e778" }, { "cell_type": "markdown", "source": [], "metadata": { "collapsed": false }, "id": "8d65fec230193b72" } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }