| |
| """.211 |
| |
| Automatically generated by Colab. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD |
| """ |
|
|
| import numpy as np |
| import pandas as pd |
|
|
| import os |
| for dirname, _, filenames in os.walk('/kaggle/input'): |
| for filename in filenames: |
| print(os.path.join(dirname, filename)) |
|
|
| import pandas as pd |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
|
|
| data = pd.read_csv('/content/synthetic_ecommerce_data.csv') |
|
|
| print("Dataset Preview:") |
| print(data.head()) |
|
|
| print("\nDescriptive Statistics:") |
| print(data.describe(include='all')) |
|
|
| print("\nMissing Values:") |
| print(data.isnull().sum()) |
|
|
| data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date']) |
| daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum() |
|
|
| plt.figure(figsize=(10, 5)) |
| plt.plot(daily_revenue, label='Daily Revenue') |
| plt.title('Revenue Over Time') |
| plt.xlabel('Date') |
| plt.ylabel('Revenue') |
| plt.legend() |
| plt.show() |
|
|
| top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10) |
|
|
| plt.figure(figsize=(10, 5)) |
| top_products.plot(kind='bar') |
| plt.title('Top 10 Products by Revenue') |
| plt.xlabel('Product Id') |
| plt.show() |
|
|
| category_revenue = data.groupby('Category')['Revenue'].sum() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue']) |
| plt.title('Ad Spend vs Revenue') |
| plt.xlabel('Ad Spend') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.histplot(data['Ad_CTR'], bins=20, kde=True) |
| plt.title('Distribution of Ad Click-Through Rate (CTR)') |
| plt.xlabel('CTR') |
| plt.ylabel('Frequency') |
| plt.show() |
|
|
| region_revenue = data.groupby('Region')['Revenue'].sum() |
|
|
| plt.figure(figsize=(10, 5)) |
| region_revenue.plot(kind='bar') |
| plt.title('Revenue by Region') |
| plt.xlabel('Region') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| data['Month'] = data['Transaction_Date'].dt.month |
| monthly_revenue = data.groupby('Month')['Revenue'].sum() |
|
|
| plt.figure(figsize=(10, 5)) |
| monthly_revenue.plot(kind='bar') |
| plt.title('Monthly Reveneu Trend') |
| plt.xlabel('Month') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue']) |
| plt.title('Discount Applied vs Revenue') |
| plt.xlabel('Discount (%)') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.scatterplot(x=data['Clicks'], y=data['Revenue']) |
| plt.title('Clicks vs Revenue') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.histplot(data['Conversion_Rate'], bins=20, kde=True) |
| plt.title('Distribution of Conversion Rate') |
| plt.xlabel('Conversion Rate') |
| plt.ylabel('Frequency') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue']) |
| plt.title('Conversion Rate vs Revenue') |
| plt.xlabel('Conversion Rate') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| region_revenue = data.groupby('Region')['Revenue'].sum() |
| total_revenue = region_revenue.sum() |
| region_contribution = (region_revenue / total_revenue) * 100 |
|
|
| plt.figure(figsize=(10, 5)) |
| region_contribution.plot(kind='bar') |
| plt.title('Revenue Contribution by Reigion (%)') |
| plt.xlabel('Region') |
| plt.ylabel('Revenue Contribution (%)') |
| plt.show() |
|
|
| data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend'] |
| plt.figure(figsize=(10, 5)) |
| sns.boxplot(data=data, x='Category', y='Ad_Efficiency') |
| plt.title('Ad Spend Efficiency by Category') |
| plt.xlabel('Category') |
| plt.ylabel('Revenue per Unit of Ad Spend') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.histplot(data['Units_Sold'], bins=20, kde=True) |
| plt.title('Distribution of Units Sold') |
| plt.xlabel('Units Sold') |
| plt.ylabel('Frequency') |
| plt.show() |
|
|
| plt.figure(figsize=(10, 5)) |
| sns.scatterplot(x=data['Units_Sold'], y=data['Revenue']) |
| plt.title('Units Sold vs Revenue') |
| plt.xlabel('Units Sold') |
| plt.ylabel('Revenue') |
| plt.show() |
|
|
| units_by_category = data.groupby('Category')['Units_Sold'].sum() |
|
|
| plt.figure(figsize=(10, 5)) |
| units_by_category.plot(kind='bar') |
| plt.title('Units Sold by Category') |
| plt.xlabel('Category') |
| plt.ylabel('Units Sold') |
| plt.show() |
|
|
| data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float) |
| plt.figure(figsize=(10, 5)) |
| sns.boxplot(data=data, x='Category', y='Revenue_per_Impression') |
| plt.title('Revenue per Impression by Category') |
| plt.xlabel('Category') |
| plt.ylabel('Revenue per Impression') |
| plt.show() |