import pandas as pd
import numpy as np
import seaborn as sns
transaction_data= pd.read_excel('/content/QVI_transaction_data.xlsx')
transaction_data.head()
{"type":"dataframe","variable_name":"transaction_data"}
customer_data= pd.read_csv('/content/QVI_purchase_behaviour.csv')
customer_data.head()
{"summary":"{\n \"name\": \"customer_data\",\n \"rows\": 72637,\n
\"fields\": [\n {\n \"column\": \"LYLTY_CARD_NBR\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
89892,\n \"min\": 1000,\n \"max\": 2373711,\n
\"num_unique_values\": 72637,\n \"samples\": [\n
34250,\n 224159,\n 107092\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"LIFESTAGE\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 7,\n \"samples\": [\n \"YOUNG
SINGLES/COUPLES\",\n \"YOUNG FAMILIES\",\n \"OLDER
FAMILIES\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PREMIUM_CUSTOMER\",\n \"properties\": {\n \"dtype\":
\"category\",\n \"num_unique_values\": 3,\n \"samples\":
[\n \"Premium\",\n \"Mainstream\",\n
\"Budget\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n }\n ]\
n}","type":"dataframe","variable_name":"customer_data"}
transaction_data.describe()
{"summary":"{\n \"name\": \"transaction_data\",\n \"rows\": 8,\n
\"fields\": [\n {\n \"column\": \"DATE\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
81874.91928740985,\n \"min\": 105.38928199808275,\n
\"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 43464.03626017611,\n 43464.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"STORE_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93588.85886517387,\n \"min\":
1.0,\n \"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 135.08010995483997,\n 130.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"LYLTY_CARD_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 798650.3268791955,\n \"min\":
1000.0,\n \"max\": 2373711.0,\n \"num_unique_values\":
8,\n \"samples\": [\n 135549.47640426527,\n
130357.5,\n 264836.0\n ],\n \"semantic_type\":
\"\",\n \"description\": \"\"\n }\n },\n {\n
\"column\": \"TXN_ID\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 813629.9030235903,\n \"min\":
1.0,\n \"max\": 2415841.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 135158.31081499494,\n 135137.5,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PROD_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93614.81219548377,\n \"min\":
1.0,\n \"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 56.58315712365388,\n 56.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PROD_QTY\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93623.10757379017,\n \"min\":
0.6436539890116252,\n \"max\": 264836.0,\n
\"num_unique_values\": 6,\n \"samples\": [\n
264836.0,\n 1.907308674047335,\n 200.0\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"TOT_SALES\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
93599.39583319426,\n \"min\": 1.5,\n \"max\": 264836.0,\
n \"num_unique_values\": 8,\n \"samples\": [\n
7.3041995801175075,\n 7.4,\n 264836.0\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe"}
transaction_data.isnull().sum()
DATE 0
STORE_NBR 0
LYLTY_CARD_NBR 0
TXN_ID 0
PROD_NBR 0
PROD_NAME 0
PROD_QTY 0
TOT_SALES 0
dtype: int64
data_type= transaction_data.dtypes
print(data_type)
DATE int64
STORE_NBR int64
LYLTY_CARD_NBR int64
TXN_ID int64
PROD_NBR int64
PROD_NAME object
PROD_QTY int64
TOT_SALES float64
dtype: object
import matplotlib.pyplot as plt
import seaborn as sns
sns.displot(transaction_data.TOT_SALES, kde=True)
<seaborn.axisgrid.FacetGrid at 0x7cbba5d46440>
numericdata= transaction_data.select_dtypes(['float','int'])
numericdata.head()
{"type":"dataframe","variable_name":"numericdata"}
x=numericdata[numericdata['TOT_SALES']<8.000]
sns.displot(x.TOT_SALES, kde=True)
<seaborn.axisgrid.FacetGrid at 0x7cbbe4e08730>
sns.boxplot(x.TOT_SALES)
<Axes: ylabel='TOT_SALES'>