import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('user.csv')
df.head()
User ID Device Model Operating System App Usage Time (min/day)
\
0 1 Google Pixel 5 Android 393
1 2 OnePlus 9 Android 268
2 3 Xiaomi Mi 11 Android 154
3 4 Google Pixel 5 Android 239
4 5 iPhone 12 iOS 187
Screen On Time (hours/day) Battery Drain (mAh/day) \
0 6.4 1872
1 4.7 1331
2 4.0 761
3 4.8 1676
4 4.3 1367
Number of Apps Installed Data Usage (MB/day) Age Gender \
0 67 1122 40 Male
1 42 944 47 Female
2 32 322 42 Male
3 56 871 20 Male
4 58 988 31 Female
User Behavior Class
0 4
1 3
2 2
3 3
4 3
df.tail()
User ID Device Model Operating System App Usage Time
(min/day) \
695 696 iPhone 12 iOS
92
696 697 Xiaomi Mi 11 Android
316
697 698 Google Pixel 5 Android
99
698 699 Samsung Galaxy S21 Android
62
699 700 OnePlus 9 Android
212
Screen On Time (hours/day) Battery Drain (mAh/day) \
695 3.9 1082
696 6.8 1965
697 3.1 942
698 1.7 431
699 5.4 1306
Number of Apps Installed Data Usage (MB/day) Age Gender \
695 26 381 22 Male
696 68 1201 59 Male
697 22 457 50 Female
698 13 224 44 Male
699 49 828 23 Female
User Behavior Class
695 2
696 4
697 2
698 1
699 3
df.isnull().sum()
User ID 0
Device Model 0
Operating System 0
App Usage Time (min/day) 0
Screen On Time (hours/day) 0
Battery Drain (mAh/day) 0
Number of Apps Installed 0
Data Usage (MB/day) 0
Age 0
Gender 0
User Behavior Class 0
dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 User ID 700 non-null int64
1 Device Model 700 non-null object
2 Operating System 700 non-null object
3 App Usage Time (min/day) 700 non-null int64
4 Screen On Time (hours/day) 700 non-null float64
5 Battery Drain (mAh/day) 700 non-null int64
6 Number of Apps Installed 700 non-null int64
7 Data Usage (MB/day) 700 non-null int64
8 Age 700 non-null int64
9 Gender 700 non-null object
10 User Behavior Class 700 non-null int64
dtypes: float64(1), int64(7), object(3)
memory usage: 60.3+ KB
df.duplicated().sum()
Device Model Vs. Gender¶
plt.figure(figsize=(12,5))
sns.countplot(x='Device Model', hue='Gender',data=df)
plt.title('Device Model by Gender ')
plt.show()
plt.figure(figsize=(12,6))
sns.barplot(x='Gender',y='Data Usage (MB/day)',data=df)
plt.title('Data Usage by Gender')
plt.show()
plt.figure(figsize=(12,6))
sns.boxplot(x='Gender',y='App Usage Time (min/day)',data=df)
plt.title('App Usage Time by Gender')
plt.show()
plt.figure(figsize=(12,6))
sns.countplot(hue='Gender',x='Operating System',data=df)
plt.title('Operating System by Gender')
plt.show()
plt.figure(figsize=(12,6))
sns.lineplot(x='Age',y='Screen On Time (hours/day)',data=df)
plt.title('Screen On Time by Age')
plt.show()
plt.figure(figsize=(12,6))
sns.lineplot(x='Age',y='App Usage Time (min/day)',data=df)
plt.title('App Usage Time by Age')
plt.show()
df = df.drop(columns = ["User ID"])
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Device Model 700 non-null object
1 Operating System 700 non-null object
2 App Usage Time (min/day) 700 non-null int64
3 Screen On Time (hours/day) 700 non-null float64
4 Battery Drain (mAh/day) 700 non-null int64
5 Number of Apps Installed 700 non-null int64
6 Data Usage (MB/day) 700 non-null int64
7 Age 700 non-null int64
8 Gender 700 non-null object
9 User Behavior Class 700 non-null int64
dtypes: float64(1), int64(6), object(3)
memory usage: 54.8+ KB
df['Device Model'].value_counts()
Device Model
Xiaomi Mi 11 146
iPhone 12 146
Google Pixel 5 142
OnePlus 9 133
Samsung Galaxy S21 133
Name: count, dtype: int64
df['Operating System'].value_counts()
Operating System
Android 554
iOS 146
Name: count, dtype: int64
How to find which device drains the battery very quick
sns.barplot(data=df, x='Battery Drain (mAh/day)', hue='Device Model')
<Axes: xlabel='Battery Drain (mAh/day)'>
sns.barplot(x='Age', hue="Gender", data=df)
<Axes: xlabel='Age'>
sns.barplot(x='Number of Apps Installed', hue="Gender", data=df)
<Axes: xlabel='Number of Apps Installed'>
sns.barplot(x='Age', hue="Device Model", data=df)
<Axes: xlabel='Age'>
# https://www.kaggle.com/code/khairnaratharva/eda-and-accuracy-of-99
# EDA & Accuracy of 99% #
https://www.kaggle.com/code/khairnaratharva/eda-and-accuracy-of-
99#Model-Trainig