[go: up one dir, main page]

0% found this document useful (0 votes)
11 views23 pages

House Prices.ipynb

The document contains Python code for loading and displaying datasets related to house prices using the Kaggle platform. It lists the available input data files and reads the training and testing datasets from specified CSV files. The output includes a preview of the training dataset in a tabular format.

Uploaded by

xoyon67607
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views23 pages

House Prices.ipynb

The document contains Python code for loading and displaying datasets related to house prices using the Kaggle platform. It lists the available input data files and reads the training and testing datasets from specified CSV files. The output includes a preview of the training dataset in a tabular format.

Uploaded by

xoyon67607
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 23

{

"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "445a57d6",
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2024-11-27T04:51:06.787885Z",
"iopub.status.busy": "2024-11-27T04:51:06.787491Z",
"iopub.status.idle": "2024-11-27T04:51:07.529462Z",
"shell.execute_reply": "2024-11-27T04:51:07.528596Z"
},
"papermill": {
"duration": 0.749425,
"end_time": "2024-11-27T04:51:07.531460",
"exception": false,
"start_time": "2024-11-27T04:51:06.782035",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [

"/kaggle/input/house-prices-advanced-regression-techniques/sample_submission.csv\
n",

"/kaggle/input/house-prices-advanced-regression-techniques/data_description.txt\n",
"/kaggle/input/house-prices-advanced-regression-techniques/train.csv\n",
"/kaggle/input/house-prices-advanced-regression-techniques/test.csv\n"
]
}
],
"source": [
"# This Python 3 environment comes with many helpful analytics libraries
installed\n",
"# It is defined by the kaggle/python Docker image:
https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load\n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will
list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# You can write up to 20GB to the current directory (/kaggle/working/) that
gets preserved as output when you create a version using \"Save & Run All\" \n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved
outside of the current session"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "09923a7c",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.539105Z",
"iopub.status.busy": "2024-11-27T04:51:07.538698Z",
"iopub.status.idle": "2024-11-27T04:51:07.602042Z",
"shell.execute_reply": "2024-11-27T04:51:07.601045Z"
},
"papermill": {
"duration": 0.069457,
"end_time": "2024-11-27T04:51:07.604181",
"exception": false,
"start_time": "2024-11-27T04:51:07.534724",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"train = pd.read_csv('/kaggle/input/house-prices-advanced-regression-
techniques/train.csv')\n",
"test = pd.read_csv('/kaggle/input/house-prices-advanced-regression-
techniques/test.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a9f32841",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.611940Z",
"iopub.status.busy": "2024-11-27T04:51:07.611652Z",
"iopub.status.idle": "2024-11-27T04:51:07.650769Z",
"shell.execute_reply": "2024-11-27T04:51:07.649820Z"
},
"papermill": {
"duration": 0.04498,
"end_time": "2024-11-27T04:51:07.652593",
"exception": false,
"start_time": "2024-11-27T04:51:07.607613",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id</th>\n",
" <th>MSSubClass</th>\n",
" <th>MSZoning</th>\n",
" <th>LotFrontage</th>\n",
" <th>LotArea</th>\n",
" <th>Street</th>\n",
" <th>Alley</th>\n",
" <th>LotShape</th>\n",
" <th>LandContour</th>\n",
" <th>Utilities</th>\n",
" <th>...</th>\n",
" <th>PoolArea</th>\n",
" <th>PoolQC</th>\n",
" <th>Fence</th>\n",
" <th>MiscFeature</th>\n",
" <th>MiscVal</th>\n",
" <th>MoSold</th>\n",
" <th>YrSold</th>\n",
" <th>SaleType</th>\n",
" <th>SaleCondition</th>\n",
" <th>SalePrice</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>65.0</td>\n",
" <td>8450</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>208500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>80.0</td>\n",
" <td>9600</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2007</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>181500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>68.0</td>\n",
" <td>11250</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>223500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>70</td>\n",
" <td>RL</td>\n",
" <td>60.0</td>\n",
" <td>9550</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2006</td>\n",
" <td>WD</td>\n",
" <td>Abnorml</td>\n",
" <td>140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>84.0</td>\n",
" <td>14260</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1455</th>\n",
" <td>1456</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>62.0</td>\n",
" <td>7917</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>2007</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>175000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1456</th>\n",
" <td>1457</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>85.0</td>\n",
" <td>13175</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>MnPrv</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2010</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>210000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1457</th>\n",
" <td>1458</td>\n",
" <td>70</td>\n",
" <td>RL</td>\n",
" <td>66.0</td>\n",
" <td>9042</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>GdPrv</td>\n",
" <td>Shed</td>\n",
" <td>2500</td>\n",
" <td>5</td>\n",
" <td>2010</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>266500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1458</th>\n",
" <td>1459</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>68.0</td>\n",
" <td>9717</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>2010</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>142125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1459</th>\n",
" <td>1460</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>75.0</td>\n",
" <td>9937</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>147500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1460 rows × 81 columns</p>\n",
"</div>"
],
"text/plain": [
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape
\\\n",
"0 1 60 RL 65.0 8450 Pave NaN Reg
\n",
"1 2 20 RL 80.0 9600 Pave NaN Reg
\n",
"2 3 60 RL 68.0 11250 Pave NaN IR1
\n",
"3 4 70 RL 60.0 9550 Pave NaN IR1
\n",
"4 5 60 RL 84.0 14260 Pave NaN IR1
\n",
"... ... ... ... ... ... ... ... ...
\n",
"1455 1456 60 RL 62.0 7917 Pave NaN Reg
\n",
"1456 1457 20 RL 85.0 13175 Pave NaN Reg
\n",
"1457 1458 70 RL 66.0 9042 Pave NaN Reg
\n",
"1458 1459 20 RL 68.0 9717 Pave NaN Reg
\n",
"1459 1460 20 RL 75.0 9937 Pave NaN Reg
\n",
"\n",
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal
\\\n",
"0 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"1 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"2 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"3 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"4 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"... ... ... ... ... ... ... ... ...
\n",
"1455 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"1456 Lvl AllPub ... 0 NaN MnPrv NaN 0
\n",
"1457 Lvl AllPub ... 0 NaN GdPrv Shed 2500
\n",
"1458 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"1459 Lvl AllPub ... 0 NaN NaN NaN 0
\n",
"\n",
" MoSold YrSold SaleType SaleCondition SalePrice \n",
"0 2 2008 WD Normal 208500 \n",
"1 5 2007 WD Normal 181500 \n",
"2 9 2008 WD Normal 223500 \n",
"3 2 2006 WD Abnorml 140000 \n",
"4 12 2008 WD Normal 250000 \n",
"... ... ... ... ... ... \n",
"1455 8 2007 WD Normal 175000 \n",
"1456 2 2010 WD Normal 210000 \n",
"1457 5 2010 WD Normal 266500 \n",
"1458 4 2010 WD Normal 142125 \n",
"1459 6 2008 WD Normal 147500 \n",
"\n",
"[1460 rows x 81 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5930ea17",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.661364Z",
"iopub.status.busy": "2024-11-27T04:51:07.661065Z",
"iopub.status.idle": "2024-11-27T04:51:07.668890Z",
"shell.execute_reply": "2024-11-27T04:51:07.668205Z"
},
"papermill": {
"duration": 0.013753,
"end_time": "2024-11-27T04:51:07.670425",
"exception": false,
"start_time": "2024-11-27T04:51:07.656672",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"nulls = train.columns[train.isnull().sum() != 0]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4f916b9e",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.678031Z",
"iopub.status.busy": "2024-11-27T04:51:07.677720Z",
"iopub.status.idle": "2024-11-27T04:51:07.683036Z",
"shell.execute_reply": "2024-11-27T04:51:07.682218Z"
},
"papermill": {
"duration": 0.011108,
"end_time": "2024-11-27T04:51:07.684750",
"exception": false,
"start_time": "2024-11-27T04:51:07.673642",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['LotFrontage', 'Alley', 'MasVnrType', 'MasVnrArea', 'BsmtQual',\n",
" 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',\n",
" 'Electrical', 'FireplaceQu', 'GarageType', 'GarageYrBlt',\n",
" 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence',\n",
" 'MiscFeature'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nulls"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9e0e963f",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.692515Z",
"iopub.status.busy": "2024-11-27T04:51:07.692269Z",
"iopub.status.idle": "2024-11-27T04:51:07.712945Z",
"shell.execute_reply": "2024-11-27T04:51:07.711979Z"
},
"papermill": {
"duration": 0.026557,
"end_time": "2024-11-27T04:51:07.714671",
"exception": false,
"start_time": "2024-11-27T04:51:07.688114",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>LotFrontage</th>\n",
" <th>Alley</th>\n",
" <th>MasVnrType</th>\n",
" <th>MasVnrArea</th>\n",
" <th>BsmtQual</th>\n",
" <th>BsmtCond</th>\n",
" <th>BsmtExposure</th>\n",
" <th>BsmtFinType1</th>\n",
" <th>BsmtFinType2</th>\n",
" <th>Electrical</th>\n",
" <th>FireplaceQu</th>\n",
" <th>GarageType</th>\n",
" <th>GarageYrBlt</th>\n",
" <th>GarageFinish</th>\n",
" <th>GarageQual</th>\n",
" <th>GarageCond</th>\n",
" <th>PoolQC</th>\n",
" <th>Fence</th>\n",
" <th>MiscFeature</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>65.0</td>\n",
" <td>NaN</td>\n",
" <td>BrkFace</td>\n",
" <td>196.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>NaN</td>\n",
" <td>Attchd</td>\n",
" <td>2003.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>80.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1976.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>68.0</td>\n",
" <td>NaN</td>\n",
" <td>BrkFace</td>\n",
" <td>162.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>Mn</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>2001.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>60.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>No</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>Gd</td>\n",
" <td>Detchd</td>\n",
" <td>1998.0</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>84.0</td>\n",
" <td>NaN</td>\n",
" <td>BrkFace</td>\n",
" <td>350.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>Av</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>2000.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1455</th>\n",
" <td>62.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>Unf</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1999.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1456</th>\n",
" <td>85.0</td>\n",
" <td>NaN</td>\n",
" <td>Stone</td>\n",
" <td>119.0</td>\n",
" <td>Gd</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>ALQ</td>\n",
" <td>Rec</td>\n",
" <td>SBrkr</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1978.0</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>MnPrv</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1457</th>\n",
" <td>66.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>No</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>Gd</td>\n",
" <td>Attchd</td>\n",
" <td>1941.0</td>\n",
" <td>RFn</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>GdPrv</td>\n",
" <td>Shed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1458</th>\n",
" <td>68.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>Mn</td>\n",
" <td>GLQ</td>\n",
" <td>Rec</td>\n",
" <td>FuseA</td>\n",
" <td>NaN</td>\n",
" <td>Attchd</td>\n",
" <td>1950.0</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1459</th>\n",
" <td>75.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>BLQ</td>\n",
" <td>LwQ</td>\n",
" <td>SBrkr</td>\n",
" <td>NaN</td>\n",
" <td>Attchd</td>\n",
" <td>1965.0</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1460 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" LotFrontage Alley MasVnrType MasVnrArea BsmtQual BsmtCond
BsmtExposure \\\n",
"0 65.0 NaN BrkFace 196.0 Gd TA
No \n",
"1 80.0 NaN NaN 0.0 Gd TA
Gd \n",
"2 68.0 NaN BrkFace 162.0 Gd TA
Mn \n",
"3 60.0 NaN NaN 0.0 TA Gd
No \n",
"4 84.0 NaN BrkFace 350.0 Gd TA
Av \n",

"... ... ... ... ... ... ... ... \


n",
"1455 62.0 NaN NaN 0.0 Gd TA
No \n",
"1456 85.0 NaN Stone 119.0 Gd TA
No \n",
"1457 66.0 NaN NaN 0.0 TA Gd
No \n",
"1458 68.0 NaN NaN 0.0 TA TA
Mn \n",
"1459 75.0 NaN NaN 0.0 TA TA
No \n",
"\n",
" BsmtFinType1 BsmtFinType2 Electrical FireplaceQu GarageType
GarageYrBlt \\\n",
"0 GLQ Unf SBrkr NaN Attchd
2003.0 \n",
"1 ALQ Unf SBrkr TA Attchd
1976.0 \n",
"2 GLQ Unf SBrkr TA Attchd
2001.0 \n",
"3 ALQ Unf SBrkr Gd Detchd
1998.0 \n",
"4 GLQ Unf SBrkr TA Attchd
2000.0 \n",

"... ... ... ... ... ... ... \


n",
"1455 Unf Unf SBrkr TA Attchd
1999.0 \n",
"1456 ALQ Rec SBrkr TA Attchd
1978.0 \n",
"1457 GLQ Unf SBrkr Gd Attchd
1941.0 \n",
"1458 GLQ Rec FuseA NaN Attchd
1950.0 \n",
"1459 BLQ LwQ SBrkr NaN Attchd
1965.0 \n",
"\n",
" GarageFinish GarageQual GarageCond PoolQC Fence MiscFeature \n",
"0 RFn TA TA NaN NaN NaN \n",
"1 RFn TA TA NaN NaN NaN \n",
"2 RFn TA TA NaN NaN NaN \n",
"3 Unf TA TA NaN NaN NaN \n",
"4 RFn TA TA NaN NaN NaN \n",
"... ... ... ... ... ... ... \n",
"1455 RFn TA TA NaN NaN NaN \n",
"1456 Unf TA TA NaN MnPrv NaN \n",
"1457 RFn TA TA NaN GdPrv Shed \n",
"1458 Unf TA TA NaN NaN NaN \n",
"1459 Fin TA TA NaN NaN NaN \n",
"\n",
"[1460 rows x 19 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train[nulls]"
]
},
{
"cell_type": "markdown",
"id": "cfb9a125",
"metadata": {
"papermill": {
"duration": 0.003754,
"end_time": "2024-11-27T04:51:07.722566",
"exception": false,
"start_time": "2024-11-27T04:51:07.718812",
"status": "completed"
},
"tags": []
},
"source": [
"# EDA"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4097a012",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.731532Z",
"iopub.status.busy": "2024-11-27T04:51:07.731239Z",
"iopub.status.idle": "2024-11-27T04:51:07.753152Z",
"shell.execute_reply": "2024-11-27T04:51:07.752118Z"
},
"papermill": {
"duration": 0.029006,
"end_time": "2024-11-27T04:51:07.755351",
"exception": false,
"start_time": "2024-11-27T04:51:07.726345",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1460 entries, 0 to 1459\n",
"Data columns (total 81 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Id 1460 non-null int64 \n",
" 1 MSSubClass 1460 non-null int64 \n",
" 2 MSZoning 1460 non-null object \n",
" 3 LotFrontage 1201 non-null float64\n",
" 4 LotArea 1460 non-null int64 \n",
" 5 Street 1460 non-null object \n",
" 6 Alley 91 non-null object \n",
" 7 LotShape 1460 non-null object \n",
" 8 LandContour 1460 non-null object \n",
" 9 Utilities 1460 non-null object \n",
" 10 LotConfig 1460 non-null object \n",
" 11 LandSlope 1460 non-null object \n",
" 12 Neighborhood 1460 non-null object \n",
" 13 Condition1 1460 non-null object \n",
" 14 Condition2 1460 non-null object \n",
" 15 BldgType 1460 non-null object \n",
" 16 HouseStyle 1460 non-null object \n",
" 17 OverallQual 1460 non-null int64 \n",
" 18 OverallCond 1460 non-null int64 \n",
" 19 YearBuilt 1460 non-null int64 \n",
" 20 YearRemodAdd 1460 non-null int64 \n",
" 21 RoofStyle 1460 non-null object \n",
" 22 RoofMatl 1460 non-null object \n",
" 23 Exterior1st 1460 non-null object \n",
" 24 Exterior2nd 1460 non-null object \n",
" 25 MasVnrType 588 non-null object \n",
" 26 MasVnrArea 1452 non-null float64\n",
" 27 ExterQual 1460 non-null object \n",
" 28 ExterCond 1460 non-null object \n",
" 29 Foundation 1460 non-null object \n",
" 30 BsmtQual 1423 non-null object \n",
" 31 BsmtCond 1423 non-null object \n",
" 32 BsmtExposure 1422 non-null object \n",
" 33 BsmtFinType1 1423 non-null object \n",
" 34 BsmtFinSF1 1460 non-null int64 \n",
" 35 BsmtFinType2 1422 non-null object \n",
" 36 BsmtFinSF2 1460 non-null int64 \n",
" 37 BsmtUnfSF 1460 non-null int64 \n",
" 38 TotalBsmtSF 1460 non-null int64 \n",
" 39 Heating 1460 non-null object \n",
" 40 HeatingQC 1460 non-null object \n",
" 41 CentralAir 1460 non-null object \n",
" 42 Electrical 1459 non-null object \n",
" 43 1stFlrSF 1460 non-null int64 \n",
" 44 2ndFlrSF 1460 non-null int64 \n",
" 45 LowQualFinSF 1460 non-null int64 \n",
" 46 GrLivArea 1460 non-null int64 \n",
" 47 BsmtFullBath 1460 non-null int64 \n",
" 48 BsmtHalfBath 1460 non-null int64 \n",
" 49 FullBath 1460 non-null int64 \n",
" 50 HalfBath 1460 non-null int64 \n",
" 51 BedroomAbvGr 1460 non-null int64 \n",
" 52 KitchenAbvGr 1460 non-null int64 \n",
" 53 KitchenQual 1460 non-null object \n",
" 54 TotRmsAbvGrd 1460 non-null int64 \n",
" 55 Functional 1460 non-null object \n",
" 56 Fireplaces 1460 non-null int64 \n",
" 57 FireplaceQu 770 non-null object \n",
" 58 GarageType 1379 non-null object \n",
" 59 GarageYrBlt 1379 non-null float64\n",
" 60 GarageFinish 1379 non-null object \n",
" 61 GarageCars 1460 non-null int64 \n",
" 62 GarageArea 1460 non-null int64 \n",
" 63 GarageQual 1379 non-null object \n",
" 64 GarageCond 1379 non-null object \n",
" 65 PavedDrive 1460 non-null object \n",
" 66 WoodDeckSF 1460 non-null int64 \n",
" 67 OpenPorchSF 1460 non-null int64 \n",
" 68 EnclosedPorch 1460 non-null int64 \n",
" 69 3SsnPorch 1460 non-null int64 \n",
" 70 ScreenPorch 1460 non-null int64 \n",
" 71 PoolArea 1460 non-null int64 \n",
" 72 PoolQC 7 non-null object \n",
" 73 Fence 281 non-null object \n",
" 74 MiscFeature 54 non-null object \n",
" 75 MiscVal 1460 non-null int64 \n",
" 76 MoSold 1460 non-null int64 \n",
" 77 YrSold 1460 non-null int64 \n",
" 78 SaleType 1460 non-null object \n",
" 79 SaleCondition 1460 non-null object \n",
" 80 SalePrice 1460 non-null int64 \n",
"dtypes: float64(3), int64(35), object(43)\n",
"memory usage: 924.0+ KB\n"
]
}
],
"source": [
"train.info()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8c92a060",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.765459Z",
"iopub.status.busy": "2024-11-27T04:51:07.764683Z",
"iopub.status.idle": "2024-11-27T04:51:07.771340Z",
"shell.execute_reply": "2024-11-27T04:51:07.770613Z"
},
"papermill": {
"duration": 0.013471,
"end_time": "2024-11-27T04:51:07.773118",
"exception": false,
"start_time": "2024-11-27T04:51:07.759647",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"#Droping ID\n",
"train = train.drop(columns='Id',axis= 1)\n",
"object_train = train.columns[train.dtypes == 'object']\n",
"num_train = train.columns[train.dtypes == 'int64']"
]
},
{
"cell_type": "markdown",
"id": "8d902bdf",
"metadata": {
"papermill": {
"duration": 0.004051,
"end_time": "2024-11-27T04:51:07.781310",
"exception": false,
"start_time": "2024-11-27T04:51:07.777259",
"status": "completed"
},
"tags": []
},
"source": [
"### EDA of Numerical"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "7db83e19",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.790676Z",
"iopub.status.busy": "2024-11-27T04:51:07.790096Z",
"iopub.status.idle": "2024-11-27T04:51:07.795599Z",
"shell.execute_reply": "2024-11-27T04:51:07.794730Z"
},
"papermill": {
"duration": 0.012245,
"end_time": "2024-11-27T04:51:07.797383",
"exception": false,
"start_time": "2024-11-27T04:51:07.785138",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',\
n",
" 'YearRemodAdd', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF',
'TotalBsmtSF',\n",
" '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
'BsmtFullBath',\n",
" 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr',
'KitchenAbvGr',\n",
" 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars', 'GarageArea',
'WoodDeckSF',\n",
" 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch',
'PoolArea',\n",
" 'MiscVal', 'MoSold', 'YrSold', 'SalePrice'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num_train"
]
},
{
"cell_type": "markdown",
"id": "5edbbea4",
"metadata": {
"papermill": {
"duration": 0.003834,
"end_time": "2024-11-27T04:51:07.805328",
"exception": false,
"start_time": "2024-11-27T04:51:07.801494",
"status": "completed"
},
"tags": []
},
"source": [
"#### MSSubClass\n"
]
},
{
"cell_type": "markdown",
"id": "0a097e4e",
"metadata": {
"papermill": {
"duration": 0.003748,
"end_time": "2024-11-27T04:51:07.813178",
"exception": false,
"start_time": "2024-11-27T04:51:07.809430",
"status": "completed"
},
"tags": []
},
"source": [
"### EDA of Object"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "764609a2",
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-27T04:51:07.822471Z",
"iopub.status.busy": "2024-11-27T04:51:07.822194Z",
"iopub.status.idle": "2024-11-27T04:51:07.827729Z",
"shell.execute_reply": "2024-11-27T04:51:07.826869Z"
},
"papermill": {
"duration": 0.01221,
"end_time": "2024-11-27T04:51:07.829445",
"exception": false,
"start_time": "2024-11-27T04:51:07.817235",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour',
'Utilities',\n",
" 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1',
'Condition2',\n",
" 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',\
n",
" 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond',
'Foundation',\n",
" 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2',\n",
" 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',\
n",
" 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish',
'GarageQual',\n",
" 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature',\n",
" 'SaleType', 'SaleCondition'],\n",
" dtype='object')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"object_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9a86d2c",
"metadata": {
"papermill": {
"duration": 0.003814,
"end_time": "2024-11-27T04:51:07.837249",
"exception": false,
"start_time": "2024-11-27T04:51:07.833435",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": []
}
],
"metadata": {
"kaggle": {
"accelerator": "none",
"dataSources": [
{
"databundleVersionId": 868283,
"sourceId": 5407,
"sourceType": "competition"
}
],
"dockerImageVersionId": 30761,
"isGpuEnabled": false,
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
},
"papermill": {
"default_parameters": {},
"duration": 3.725654,
"end_time": "2024-11-27T04:51:08.159480",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2024-11-27T04:51:04.433826",
"version": "2.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

You might also like