This page shows the source code for 1-analyze_sales.py in browser-friendly HTML format. It was generated automatically from the original Python file.
"""
# and: Logical AND operator
Sales Data Analysis and Visualization
# for: Loop that iterates over a sequence
Demonstrates: pandas, matplotlib, seaborn for business analytics
"""
# import: Imports a module or library
import sys
# if: Conditional statement - executes code if condition is True
# hasattr(): Checks if object has an attribute
# Documentation: https://docs.python.org/3/library/functions.html#hasattr
if hasattr(sys.stdout, 'reconfigure'): # Check condition
# Variable assignment
sys.stdout.reconfigure(encoding='utf-8')
# if: Conditional statement - executes code if condition is True
# hasattr(): Checks if object has an attribute
# Documentation: https://docs.python.org/3/library/functions.html#hasattr
if hasattr(sys.stderr, 'reconfigure'): # Check condition
# Variable assignment
sys.stderr.reconfigure(encoding='utf-8')
# import: Imports a module or library
# as: Creates an alias for an import
import pandas as pd
# import: Imports a module or library
# as: Creates an alias for an import
import matplotlib.pyplot as plt
# import: Imports a module or library
# as: Creates an alias for an import
import seaborn as sns
# import: Imports a module or library
# as: Creates an alias for an import
import numpy as np
# Load the data
# Variable assignment
df = pd.read_csv('dataset1_sales.csv')
# Variable assignment
df['Date'] = pd.to_datetime(df['Date'])
# Variable assignment
df['Month'] = df['Date'].dt.to_period('M')
# Variable assignment
df['Profit'] = df['Revenue'] - df['Cost']
# Variable assignment
df['Profit_Margin'] = (df['Profit'] / df['Revenue']) * 100
# Display basic statistics
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("SALES DATA ANALYSIS")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\nDataset Overview:")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print(df.info())
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\nBasic Statistics:")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print(df.describe())
# Create a comprehensive visualization dashboard
# Variable assignment
fig = plt.figure(figsize=(16, 12))
# 1. Revenue Trend Over Time
plt.subplot(3, 3, 1)
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
monthly_revenue = df.groupby('Month')['Revenue'].sum()
# Variable assignment
plt.plot(monthly_revenue.index.astype(str), monthly_revenue.values, marker='o', linewidth=2, color='#2E86AB')
# Variable assignment
plt.title('Revenue Trend Over Time', fontsize=12, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Revenue ($)')
# Variable assignment
plt.xticks(rotation=45, ha='right')
# Variable assignment
plt.grid(True, alpha=0.3)
# 2. Revenue by Region
plt.subplot(3, 3, 2)
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
region_revenue = df.groupby('Region')['Revenue'].sum().sort_values()
# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# Variable assignment
colors = plt.cm.Spectral(np.linspace(0, 1, len(region_revenue)))
# Variable assignment
plt.barh(region_revenue.index, region_revenue.values, color=colors)
# Variable assignment
plt.title('Total Revenue by Region', fontsize=12, fontweight='bold')
plt.xlabel('Revenue ($)')
# 3. Product Category Performance
plt.subplot(3, 3, 3)
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
category_profit = df.groupby('Product_Category')['Profit'].sum()
# Variable assignment
plt.pie(category_profit.values, labels=category_profit.index, autopct='%1.1f%%',
# Variable assignment
startangle=90, colors=['#FF6B6B', '#4ECDC4', '#45B7D1'])
# Variable assignment
plt.title('Profit Share by Category', fontsize=12, fontweight='bold')
# 4. Units Sold vs Revenue Scatter
plt.subplot(3, 3, 4)
# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for category in df['Product_Category'].unique():
cat_data = df[df['Product_Category'] == category]
plt.scatter(cat_data['Units_Sold'], cat_data['Revenue'],
# Variable assignment
label=category, alpha=0.6, s=100)
# Variable assignment
plt.title('Units Sold vs Revenue', fontsize=12, fontweight='bold')
plt.xlabel('Units Sold')
plt.ylabel('Revenue ($)')
plt.legend()
# Variable assignment
plt.grid(True, alpha=0.3)
# 5. Profit Margin Distribution
plt.subplot(3, 3, 5)
# Variable assignment
plt.hist(df['Profit_Margin'], bins=20, color='#95E1D3', edgecolor='black', alpha=0.7)
# Variable assignment
plt.axvline(df['Profit_Margin'].mean(), color='red', linestyle='--',
# Variable assignment
label=f'Mean: {df["Profit_Margin"].mean():.1f}%')
# Variable assignment
plt.title('Profit Margin Distribution', fontsize=12, fontweight='bold')
plt.xlabel('Profit Margin (%)')
plt.ylabel('Frequency')
plt.legend()
# 6. Promotion Impact on Revenue
plt.subplot(3, 3, 6)
# Variable assignment
promo_revenue = df.groupby('Promotion')['Revenue'].mean()
plt.bar(['No Promotion', 'With Promotion'], promo_revenue.values,
# Variable assignment
color=['#E07A5F', '#81B29A'])
# Variable assignment
plt.title('Average Revenue: Promotion Impact', fontsize=12, fontweight='bold')
plt.ylabel('Average Revenue ($)')
# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
# enumerate(): Returns index and value pairs
# Documentation: https://docs.python.org/3/library/functions.html#enumerate
for i, v in enumerate(promo_revenue.values):
# Variable assignment
plt.text(i, v + 500, f'${v:,.0f}', ha='center', fontweight='bold')
# 7. Heatmap: Region vs Category Revenue
plt.subplot(3, 3, 7)
# Variable assignment
pivot_table = df.pivot_table(values='Revenue', index='Region',
# Variable assignment
columns='Product_Category', aggfunc='sum')
# Variable assignment
sns.heatmap(pivot_table, annot=True, fmt='.0f', cmap='YlOrRd', cbar_kws={'label': 'Revenue ($)'})
# Variable assignment
plt.title('Revenue Heatmap: Region vs Category', fontsize=12, fontweight='bold')
# 8. Monthly Profit Trend
plt.subplot(3, 3, 8)
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
monthly_profit = df.groupby('Month')['Profit'].sum()
# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# range(): Generates a sequence of numbers
# Documentation: https://docs.python.org/3/library/functions.html#range
# Variable assignment
plt.fill_between(range(len(monthly_profit)), monthly_profit.values, alpha=0.3, color='#F38181')
# Variable assignment
plt.plot(monthly_profit.values, marker='o', color='#AA4465', linewidth=2)
# Variable assignment
plt.title('Monthly Profit Trend', fontsize=12, fontweight='bold')
plt.xlabel('Month Index')
plt.ylabel('Profit ($)')
# Variable assignment
plt.grid(True, alpha=0.3)
# 9. Customer Count by Product Category
plt.subplot(3, 3, 9)
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
category_customers = df.groupby('Product_Category')['Customer_Count'].sum()
# Variable assignment
wedges, texts, autotexts = plt.pie(category_customers.values, labels=category_customers.index,
# Variable assignment
autopct='%1.1f%%', startangle=45,
# Variable assignment
colors=['#F4A261', '#E76F51', '#2A9D8F'])
# Variable assignment
plt.title('Customer Distribution by Category', fontsize=12, fontweight='bold')
plt.tight_layout()
# Variable assignment
plt.savefig('sales_analysis_dashboard.png', dpi=300, bbox_inches='tight')
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n[OK] Dashboard saved as 'sales_analysis_dashboard.png'")
plt.show()
# Additional Statistical Analysis
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("\n" + "=" * 60)
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("KEY INSIGHTS")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
print(f"\n1. Total Revenue: ${df['Revenue'].sum():,.2f}")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
print(f"2. Total Profit: ${df['Profit'].sum():,.2f}")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print(f"3. Average Profit Margin: {df['Profit_Margin'].mean():.2f}%")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
print(f"\n4. Best Performing Region: {df.groupby('Region')['Revenue'].sum().idxmax()}")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# max(): Returns the maximum value
# Documentation: https://docs.python.org/3/library/functions.html#max
print(f" Revenue: ${df.groupby('Region')['Revenue'].sum().max():,.2f}")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
print(f"\n5. Most Profitable Category: {df.groupby('Product_Category')['Profit'].sum().idxmax()}")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# max(): Returns the maximum value
# Documentation: https://docs.python.org/3/library/functions.html#max
print(f" Profit: ${df.groupby('Product_Category')['Profit'].sum().max():,.2f}")
# Variable assignment
promo_impact = df.groupby('Promotion')['Revenue'].mean()
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print(f"\n6. Promotion Effectiveness: {((promo_impact['Yes'] - promo_impact['No']) / promo_impact['No'] * 100):.1f}% increase")
# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("\n" + "=" * 60)