aboutsummaryrefslogtreecommitdiff
path: root/CS105MiniProject.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'CS105MiniProject.ipynb')
-rw-r--r--CS105MiniProject.ipynb200
1 files changed, 147 insertions, 53 deletions
diff --git a/CS105MiniProject.ipynb b/CS105MiniProject.ipynb
index 9203683..0663bc0 100644
--- a/CS105MiniProject.ipynb
+++ b/CS105MiniProject.ipynb
@@ -12,7 +12,7 @@
"metadata": {
"collapsed": false
},
- "id": "845bdbd833f03cba"
+ "id": "21abd26c73fd0070"
},
{
"cell_type": "markdown",
@@ -22,10 +22,24 @@
"metadata": {
"collapsed": false
},
- "id": "d720609d765d221b"
+ "id": "69d8e8ad7c61ba61"
},
{
"cell_type": "code",
+ "execution_count": 1,
+ "id": "daa13044",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 614
+ },
+ "id": "daa13044",
+ "outputId": "4d440aaa-1ee7-4771-c526-f55e9458ca8a",
+ "ExecuteTime": {
+ "end_time": "2024-02-23T06:18:35.652667Z",
+ "start_time": "2024-02-23T06:18:35.617295Z"
+ }
+ },
"outputs": [
{
"data": {
@@ -51,16 +65,7 @@
"# Select relevant columns\n",
"df = df.iloc[:, [2, 3, 7, 8, 9, 58, 59, 60, 61, 26]]\n",
"df"
- ],
- "metadata": {
- "collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-02-23T02:35:17.657074Z",
- "start_time": "2024-02-23T02:35:17.625812Z"
- }
- },
- "id": "3bea6ea662d6c063",
- "execution_count": 1
+ ]
},
{
"cell_type": "markdown",
@@ -70,10 +75,19 @@
"metadata": {
"collapsed": false
},
- "id": "7e69a5a21a9de4ee"
+ "id": "3f7614a5665d55b6"
},
{
"cell_type": "code",
+ "execution_count": 2,
+ "id": "29889175",
+ "metadata": {
+ "id": "29889175",
+ "ExecuteTime": {
+ "end_time": "2024-02-23T06:18:35.665240Z",
+ "start_time": "2024-02-23T06:18:35.654717Z"
+ }
+ },
"outputs": [
{
"data": {
@@ -110,19 +124,19 @@
"df.loc[df['Do you currently work?'] == 'No', 'Do you work in a department related to your major?'] = np.nan\n",
"\n",
"df"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "de4448fd64205d85",
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-02-23T02:35:17.671536Z",
- "start_time": "2024-02-23T02:35:17.657952Z"
+ "end_time": "2024-02-23T06:18:35.672377Z",
+ "start_time": "2024-02-23T06:18:35.665944Z"
}
},
- "id": "f71f8085d5f66b0",
- "execution_count": 2
- },
- {
- "cell_type": "code",
"outputs": [
{
"data": {
@@ -140,19 +154,19 @@
"# Not working DataFrame\n",
"nw_df = df[df['Do you currently work?'] == 'No']\n",
"w_df"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "5fe8ec7f22878e60",
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-02-23T02:35:17.682158Z",
- "start_time": "2024-02-23T02:35:17.673510Z"
+ "end_time": "2024-02-23T06:18:35.679365Z",
+ "start_time": "2024-02-23T06:18:35.673583Z"
}
},
- "id": "6c1d9ee7948e6b9a",
- "execution_count": 3
- },
- {
- "cell_type": "code",
"outputs": [
{
"data": {
@@ -166,26 +180,17 @@
],
"source": [
"nw_df"
- ],
- "metadata": {
- "collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-02-23T02:35:17.690742Z",
- "start_time": "2024-02-23T02:35:17.683032Z"
- }
- },
- "id": "34f69a756f513fb7",
- "execution_count": 4
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "# Analysis"
- ],
+ "id": "899d85626b77db20",
"metadata": {
"collapsed": false
},
- "id": "d5c1424ddd30ca97"
+ "source": [
+ "# Analysis"
+ ]
},
{
"cell_type": "code",
@@ -212,11 +217,11 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-02-23T02:35:17.897912Z",
- "start_time": "2024-02-23T02:35:17.691750Z"
+ "end_time": "2024-02-23T06:18:35.781584Z",
+ "start_time": "2024-02-23T06:18:35.680024Z"
}
},
- "id": "da1811cc63b41845",
+ "id": "6bc50ddc195d88a",
"execution_count": 5
},
{
@@ -246,11 +251,11 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-02-23T02:35:18.116782Z",
- "start_time": "2024-02-23T02:35:17.901109Z"
+ "end_time": "2024-02-23T06:18:36.021224Z",
+ "start_time": "2024-02-23T06:18:35.783111Z"
}
},
- "id": "201db70188d3e778",
+ "id": "15f1e14311b1b17f",
"execution_count": 6
},
{
@@ -259,7 +264,7 @@
"metadata": {
"collapsed": false
},
- "id": "8d65fec230193b72"
+ "id": "2b499b750ea3aec9"
},
{
"cell_type": "code",
@@ -282,12 +287,101 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-02-23T02:35:18.248057Z",
- "start_time": "2024-02-23T02:35:18.118242Z"
+ "end_time": "2024-02-23T06:18:36.170073Z",
+ "start_time": "2024-02-23T06:18:36.024936Z"
}
},
- "id": "5e460707e32c4a2a",
+ "id": "a3d9a4a3b5eba149",
"execution_count": 7
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Hypotheses"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "4df3824f641fb18b"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Hypothesis 2: Students who live on-campus are more likely to have roommates of the same major."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "796d474b4650e712"
+ },
+ {
+ "cell_type": "code",
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "Do you have roommates that are part of your major? No Yes Total\nDo you currently live in a house, apartnment, o... \nApartment 83 44 127\nDorm 17 11 28\nHouse 78 21 99\nRoom 1 0 1\nTotal 179 76 255",
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Do you have roommates that are part of your major?</th>\n <th>No</th>\n <th>Yes</th>\n <th>Total</th>\n </tr>\n <tr>\n <th>Do you currently live in a house, apartnment, or dorm?</th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Apartment</th>\n <td>83</td>\n <td>44</td>\n <td>127</td>\n </tr>\n <tr>\n <th>Dorm</th>\n <td>17</td>\n <td>11</td>\n <td>28</td>\n </tr>\n <tr>\n <th>House</th>\n <td>78</td>\n <td>21</td>\n <td>99</td>\n </tr>\n <tr>\n <th>Room</th>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>Total</th>\n <td>179</td>\n <td>76</td>\n <td>255</td>\n </tr>\n </tbody>\n</table>\n</div>"
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "roommates_major_table = pd.crosstab(df.iloc[:, 3], df.iloc[:, 9], margins=True, margins_name='Total')\n",
+ "roommates_major_table"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-02-23T06:18:36.201284Z",
+ "start_time": "2024-02-23T06:18:36.179437Z"
+ }
+ },
+ "id": "2ee7f39b5d8df8de",
+ "execution_count": 8
+ },
+ {
+ "cell_type": "code",
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Chi-squared Value: 6.54402786926266\n",
+ "Degrees of Freedom: 8\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Extract the observed values from the contingency table\n",
+ "observed_values = roommates_major_table.iloc[:-1, :-1].values\n",
+ "\n",
+ "# Calculate expected values\n",
+ "row_totals = roommates_major_table.iloc[:-1, -1].values\n",
+ "col_totals = roommates_major_table.iloc[-1, :-1].values\n",
+ "total = np.sum(row_totals)\n",
+ "\n",
+ "expected_values = np.outer(row_totals, col_totals) / total\n",
+ "\n",
+ "# Calculate chi-squared statistic\n",
+ "chi2_statistic = np.sum((observed_values - expected_values)**2 / expected_values)\n",
+ "\n",
+ "# Degrees of freedom\n",
+ "degrees_of_freedom = (roommates_major_table.shape[0] - 1) * (roommates_major_table.shape[1] - 1)\n",
+ "\n",
+ "# Print results\n",
+ "print(f\"Chi-squared Value: {chi2_statistic}\\nDegrees of Freedom: {degrees_of_freedom}\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-02-23T06:18:36.219063Z",
+ "start_time": "2024-02-23T06:18:36.205767Z"
+ }
+ },
+ "id": "957406c164cf2ef1",
+ "execution_count": 9
}
],
"metadata": {