diff options
author | 2024-02-22 22:24:15 -0800 | |
---|---|---|
committer | 2024-02-22 22:24:15 -0800 | |
commit | bb248b94c47e0128910fb29b4bd5991bab759929 (patch) | |
tree | 16f1aa6e9a365df6876d5646f7056be49c147ba3 /CS105MiniProject.ipynb | |
parent | 3036e854b98315b7086bc06a3c585645a46a17b6 (diff) | |
parent | 9b066744bfcc7992917f93d6d0c2cea2bd6afb6d (diff) | |
download | CS105MiniProject-bb248b94c47e0128910fb29b4bd5991bab759929.tar.gz CS105MiniProject-bb248b94c47e0128910fb29b4bd5991bab759929.tar.zst CS105MiniProject-bb248b94c47e0128910fb29b4bd5991bab759929.zip |
Merge pull request #18 from ansg191/pie-chart
Chi-square equation
Diffstat (limited to 'CS105MiniProject.ipynb')
-rw-r--r-- | CS105MiniProject.ipynb | 200 |
1 files changed, 147 insertions, 53 deletions
diff --git a/CS105MiniProject.ipynb b/CS105MiniProject.ipynb index 9203683..0663bc0 100644 --- a/CS105MiniProject.ipynb +++ b/CS105MiniProject.ipynb @@ -12,7 +12,7 @@ "metadata": { "collapsed": false }, - "id": "845bdbd833f03cba" + "id": "21abd26c73fd0070" }, { "cell_type": "markdown", @@ -22,10 +22,24 @@ "metadata": { "collapsed": false }, - "id": "d720609d765d221b" + "id": "69d8e8ad7c61ba61" }, { "cell_type": "code", + "execution_count": 1, + "id": "daa13044", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 614 + }, + "id": "daa13044", + "outputId": "4d440aaa-1ee7-4771-c526-f55e9458ca8a", + "ExecuteTime": { + "end_time": "2024-02-23T06:18:35.652667Z", + "start_time": "2024-02-23T06:18:35.617295Z" + } + }, "outputs": [ { "data": { @@ -51,16 +65,7 @@ "# Select relevant columns\n", "df = df.iloc[:, [2, 3, 7, 8, 9, 58, 59, 60, 61, 26]]\n", "df" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-02-23T02:35:17.657074Z", - "start_time": "2024-02-23T02:35:17.625812Z" - } - }, - "id": "3bea6ea662d6c063", - "execution_count": 1 + ] }, { "cell_type": "markdown", @@ -70,10 +75,19 @@ "metadata": { "collapsed": false }, - "id": "7e69a5a21a9de4ee" + "id": "3f7614a5665d55b6" }, { "cell_type": "code", + "execution_count": 2, + "id": "29889175", + "metadata": { + "id": "29889175", + "ExecuteTime": { + "end_time": "2024-02-23T06:18:35.665240Z", + "start_time": "2024-02-23T06:18:35.654717Z" + } + }, "outputs": [ { "data": { @@ -110,19 +124,19 @@ "df.loc[df['Do you currently work?'] == 'No', 'Do you work in a department related to your major?'] = np.nan\n", "\n", "df" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "de4448fd64205d85", "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-23T02:35:17.671536Z", - "start_time": "2024-02-23T02:35:17.657952Z" + "end_time": "2024-02-23T06:18:35.672377Z", + "start_time": "2024-02-23T06:18:35.665944Z" } }, - "id": "f71f8085d5f66b0", - "execution_count": 2 - }, - { - "cell_type": "code", "outputs": [ { "data": { @@ -140,19 +154,19 @@ "# Not working DataFrame\n", "nw_df = df[df['Do you currently work?'] == 'No']\n", "w_df" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5fe8ec7f22878e60", "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-23T02:35:17.682158Z", - "start_time": "2024-02-23T02:35:17.673510Z" + "end_time": "2024-02-23T06:18:35.679365Z", + "start_time": "2024-02-23T06:18:35.673583Z" } }, - "id": "6c1d9ee7948e6b9a", - "execution_count": 3 - }, - { - "cell_type": "code", "outputs": [ { "data": { @@ -166,26 +180,17 @@ ], "source": [ "nw_df" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-02-23T02:35:17.690742Z", - "start_time": "2024-02-23T02:35:17.683032Z" - } - }, - "id": "34f69a756f513fb7", - "execution_count": 4 + ] }, { "cell_type": "markdown", - "source": [ - "# Analysis" - ], + "id": "899d85626b77db20", "metadata": { "collapsed": false }, - "id": "d5c1424ddd30ca97" + "source": [ + "# Analysis" + ] }, { "cell_type": "code", @@ -212,11 +217,11 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-23T02:35:17.897912Z", - "start_time": "2024-02-23T02:35:17.691750Z" + "end_time": "2024-02-23T06:18:35.781584Z", + "start_time": "2024-02-23T06:18:35.680024Z" } }, - "id": "da1811cc63b41845", + "id": "6bc50ddc195d88a", "execution_count": 5 }, { @@ -246,11 +251,11 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-23T02:35:18.116782Z", - "start_time": "2024-02-23T02:35:17.901109Z" + "end_time": "2024-02-23T06:18:36.021224Z", + "start_time": "2024-02-23T06:18:35.783111Z" } }, - "id": "201db70188d3e778", + "id": "15f1e14311b1b17f", "execution_count": 6 }, { @@ -259,7 +264,7 @@ "metadata": { "collapsed": false }, - "id": "8d65fec230193b72" + "id": "2b499b750ea3aec9" }, { "cell_type": "code", @@ -282,12 +287,101 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-23T02:35:18.248057Z", - "start_time": "2024-02-23T02:35:18.118242Z" + "end_time": "2024-02-23T06:18:36.170073Z", + "start_time": "2024-02-23T06:18:36.024936Z" } }, - "id": "5e460707e32c4a2a", + "id": "a3d9a4a3b5eba149", "execution_count": 7 + }, + { + "cell_type": "markdown", + "source": [ + "## Hypotheses" + ], + "metadata": { + "collapsed": false + }, + "id": "4df3824f641fb18b" + }, + { + "cell_type": "markdown", + "source": [ + "### Hypothesis 2: Students who live on-campus are more likely to have roommates of the same major." + ], + "metadata": { + "collapsed": false + }, + "id": "796d474b4650e712" + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "Do you have roommates that are part of your major? No Yes Total\nDo you currently live in a house, apartnment, o... \nApartment 83 44 127\nDorm 17 11 28\nHouse 78 21 99\nRoom 1 0 1\nTotal 179 76 255", + "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Do you have roommates that are part of your major?</th>\n <th>No</th>\n <th>Yes</th>\n <th>Total</th>\n </tr>\n <tr>\n <th>Do you currently live in a house, apartnment, or dorm?</th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Apartment</th>\n <td>83</td>\n <td>44</td>\n <td>127</td>\n </tr>\n <tr>\n <th>Dorm</th>\n <td>17</td>\n <td>11</td>\n <td>28</td>\n </tr>\n <tr>\n <th>House</th>\n <td>78</td>\n <td>21</td>\n <td>99</td>\n </tr>\n <tr>\n <th>Room</th>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>Total</th>\n <td>179</td>\n <td>76</td>\n <td>255</td>\n </tr>\n </tbody>\n</table>\n</div>" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "roommates_major_table = pd.crosstab(df.iloc[:, 3], df.iloc[:, 9], margins=True, margins_name='Total')\n", + "roommates_major_table" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T06:18:36.201284Z", + "start_time": "2024-02-23T06:18:36.179437Z" + } + }, + "id": "2ee7f39b5d8df8de", + "execution_count": 8 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-squared Value: 6.54402786926266\n", + "Degrees of Freedom: 8\n" + ] + } + ], + "source": [ + "# Extract the observed values from the contingency table\n", + "observed_values = roommates_major_table.iloc[:-1, :-1].values\n", + "\n", + "# Calculate expected values\n", + "row_totals = roommates_major_table.iloc[:-1, -1].values\n", + "col_totals = roommates_major_table.iloc[-1, :-1].values\n", + "total = np.sum(row_totals)\n", + "\n", + "expected_values = np.outer(row_totals, col_totals) / total\n", + "\n", + "# Calculate chi-squared statistic\n", + "chi2_statistic = np.sum((observed_values - expected_values)**2 / expected_values)\n", + "\n", + "# Degrees of freedom\n", + "degrees_of_freedom = (roommates_major_table.shape[0] - 1) * (roommates_major_table.shape[1] - 1)\n", + "\n", + "# Print results\n", + "print(f\"Chi-squared Value: {chi2_statistic}\\nDegrees of Freedom: {degrees_of_freedom}\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T06:18:36.219063Z", + "start_time": "2024-02-23T06:18:36.205767Z" + } + }, + "id": "957406c164cf2ef1", + "execution_count": 9 } ], "metadata": { |