3-analyze_weather

This page shows the source code for 3-analyze_weather.py in browser-friendly HTML format. It was generated automatically from the original Python file.

Source File 3-analyze_weather.py
Folder Chapter-4-Datasets-Medium
"""
Weather & Climate Analysis
Demonstrates: time series analysis, geographic comparisons, plotly interactive plots
"""

# import: Imports a module or library
import sys

# if: Conditional statement - executes code if condition is True
# hasattr(): Checks if object has an attribute
# Documentation: https://docs.python.org/3/library/functions.html#hasattr
if hasattr(sys.stdout, 'reconfigure'):  # Check condition

    # Variable assignment
    sys.stdout.reconfigure(encoding='utf-8')

# if: Conditional statement - executes code if condition is True
# hasattr(): Checks if object has an attribute
# Documentation: https://docs.python.org/3/library/functions.html#hasattr
if hasattr(sys.stderr, 'reconfigure'):  # Check condition

    # Variable assignment
    sys.stderr.reconfigure(encoding='utf-8')

# import: Imports a module or library
# as: Creates an alias for an import
import pandas as pd

# import: Imports a module or library
# as: Creates an alias for an import
import matplotlib.pyplot as plt

# import: Imports a module or library
# as: Creates an alias for an import
import seaborn as sns

# import: Imports a module or library
# as: Creates an alias for an import
import numpy as np

# import: Imports a module or library
# from: Imports specific items from a module
from datetime import datetime

# Load data

# Variable assignment
df = pd.read_csv('dataset3_weather.csv')

# Variable assignment
df['Date'] = pd.to_datetime(df['Date'])

# Variable assignment
df['Month'] = df['Date'].dt.month

# Variable assignment
df['Month_Name'] = df['Date'].dt.strftime('%B')

# if: Conditional statement - executes code if condition is True
# lambda: Creates an anonymous function
# in: Checks if value is in a sequence
# Variable assignment
df['Season'] = df['Month'].apply(lambda x: 'Winter' if x in [12, 1, 2] 

                                  # if: Conditional statement - executes code if condition is True
                                  # else: Executes code when all previous conditions are False
                                  # in: Checks if value is in a sequence
                                  else 'Spring' if x in [3, 4, 5]

                                  # if: Conditional statement - executes code if condition is True
                                  # else: Executes code when all previous conditions are False
                                  # in: Checks if value is in a sequence
                                  else 'Summer' if x in [6, 7, 8]

                                  # else: Executes code when all previous conditions are False
                                  else 'Fall')

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("WEATHER & CLIMATE ANALYSIS")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\nDataset Overview:")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print(df.info())

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\nCities in dataset:", df['City'].unique())

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# max(): Returns the maximum value
# Documentation: https://docs.python.org/3/library/functions.html#max
# min(): Returns the minimum value
# Documentation: https://docs.python.org/3/library/functions.html#min
print("\nDate range:", df['Date'].min(), "to", df['Date'].max())

# Create comprehensive visualization

# Variable assignment
fig = plt.figure(figsize=(18, 14))

# 1. Temperature Trends by City
plt.subplot(4, 3, 1)

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for city in df['City'].unique():
    city_data = df[df['City'] == city].sort_values('Date')

    # Variable assignment
    plt.plot(city_data['Date'], city_data['Temperature_C'], marker='o', label=city, linewidth=2)
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')

# Variable assignment
plt.title('Temperature Trends by City', fontsize=12, fontweight='bold')
plt.legend()

# Variable assignment
plt.grid(True, alpha=0.3)

# Variable assignment
plt.xticks(rotation=45)

# 2. Average Temperature by City
plt.subplot(4, 3, 2)

# Variable assignment
avg_temp = df.groupby('City')['Temperature_C'].mean().sort_values()

# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# Variable assignment
colors_temp = plt.cm.RdYlBu_r(np.linspace(0.2, 0.8, len(avg_temp)))

# Variable assignment
plt.barh(avg_temp.index, avg_temp.values, color=colors_temp)
plt.xlabel('Average Temperature (°C)')

# Variable assignment
plt.title('Average Temperature by City', fontsize=12, fontweight='bold')

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
# enumerate(): Returns index and value pairs
# Documentation: https://docs.python.org/3/library/functions.html#enumerate
for i, v in enumerate(avg_temp.values):

    # Variable assignment
    plt.text(v + 0.5, i, f'{v:.1f}°C', va='center')

# 3. Seasonal Temperature Variation
plt.subplot(4, 3, 3)

# Variable assignment
seasonal_temp = df.groupby(['City', 'Season'])['Temperature_C'].mean().unstack()

# Variable assignment
seasonal_temp.plot(kind='bar', ax=plt.gca(), width=0.8)
plt.ylabel('Average Temperature (°C)')

# Variable assignment
plt.title('Seasonal Temperature by City', fontsize=12, fontweight='bold')

# Variable assignment
plt.legend(title='Season')

# Variable assignment
plt.xticks(rotation=45, ha='right')

# Variable assignment
plt.grid(axis='y', alpha=0.3)

# 4. Precipitation Patterns
plt.subplot(4, 3, 4)

# Variable assignment
monthly_precip = df.groupby(['Month_Name', 'City'])['Precipitation_mm'].mean().unstack()

# Variable assignment
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 
               'July', 'August', 'September', 'October', 'November', 'December']

# Variable assignment
monthly_precip = monthly_precip.reindex(month_order)

# Variable assignment
monthly_precip.plot(kind='line', marker='o', ax=plt.gca(), linewidth=2)
plt.xlabel('Month')
plt.ylabel('Average Precipitation (mm)')

# Variable assignment
plt.title('Monthly Precipitation Patterns', fontsize=12, fontweight='bold')

# Variable assignment
plt.legend(title='City', bbox_to_anchor=(1.05, 1), loc='upper left')

# Variable assignment
plt.xticks(rotation=45, ha='right')

# Variable assignment
plt.grid(True, alpha=0.3)

# 5. Humidity Distribution
plt.subplot(4, 3, 5)
humidity_data = [df[df['City'] == city]['Humidity_Percent'].values 

                 # for: Loop that iterates over a sequence
                 # in: Checks if value is in a sequence
                 for city in df['City'].unique()]

# Variable assignment
bp = plt.boxplot(humidity_data, labels=df['City'].unique(), patch_artist=True)

# Variable assignment
colors_box = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
# zip(): Combines multiple iterables
# Documentation: https://docs.python.org/3/library/functions.html#zip
for patch, color in zip(bp['boxes'], colors_box):
    patch.set_facecolor(color)
plt.ylabel('Humidity (%)')

# Variable assignment
plt.title('Humidity Distribution by City', fontsize=12, fontweight='bold')

# Variable assignment
plt.xticks(rotation=45, ha='right')

# Variable assignment
plt.grid(axis='y', alpha=0.3)

# 6. Air Quality Comparison
plt.subplot(4, 3, 6)

# Variable assignment
aqi_avg = df.groupby('City')['Air_Quality_Index'].mean().sort_values()

# if: Conditional statement - executes code if condition is True
# else: Executes code when all previous conditions are False
# Variable assignment
colors_aqi = ['#2ECC71' if x < 50 else '#F39C12' if x < 100 else '#E74C3C' 

              # for: Loop that iterates over a sequence
              # in: Checks if value is in a sequence
              for x in aqi_avg.values]

# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# range(): Generates a sequence of numbers
# Documentation: https://docs.python.org/3/library/functions.html#range
# Variable assignment
plt.bar(range(len(aqi_avg)), aqi_avg.values, color=colors_aqi)

# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# range(): Generates a sequence of numbers
# Documentation: https://docs.python.org/3/library/functions.html#range
# Variable assignment
plt.xticks(range(len(aqi_avg)), aqi_avg.index, rotation=45, ha='right')
plt.ylabel('Air Quality Index')

# Variable assignment
plt.title('Average Air Quality by City', fontsize=12, fontweight='bold')

# Variable assignment
plt.axhline(y=50, color='green', linestyle='--', label='Good (0-50)')

# Variable assignment
plt.axhline(y=100, color='orange', linestyle='--', label='Moderate (51-100)')
plt.legend()

# Variable assignment
plt.grid(axis='y', alpha=0.3)

# 7. Temperature vs Humidity Scatter
plt.subplot(4, 3, 7)

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for city in df['City'].unique():
    city_data = df[df['City'] == city]
    plt.scatter(city_data['Temperature_C'], city_data['Humidity_Percent'], 

               # Variable assignment
               label=city, alpha=0.6, s=80)
plt.xlabel('Temperature (°C)')
plt.ylabel('Humidity (%)')

# Variable assignment
plt.title('Temperature vs Humidity', fontsize=12, fontweight='bold')
plt.legend()

# Variable assignment
plt.grid(True, alpha=0.3)

# 8. Wind Speed Patterns
plt.subplot(4, 3, 8)

# Variable assignment
wind_monthly = df.groupby('Month')['Wind_Speed_kmh'].mean()

# Variable assignment
plt.plot(wind_monthly.index, wind_monthly.values, marker='o', 

         # Variable assignment
         linewidth=2, markersize=8, color='#3498DB')

# Variable assignment
plt.fill_between(wind_monthly.index, wind_monthly.values, alpha=0.3, color='#3498DB')
plt.xlabel('Month')
plt.ylabel('Wind Speed (km/h)')

# Variable assignment
plt.title('Monthly Wind Speed Average', fontsize=12, fontweight='bold')

# range(): Generates a sequence of numbers
# Documentation: https://docs.python.org/3/library/functions.html#range
plt.xticks(range(1, 13))

# Variable assignment
plt.grid(True, alpha=0.3)

# 9. UV Index by Season
plt.subplot(4, 3, 9)

# Variable assignment
seasonal_uv = df.groupby('Season')['UV_Index'].mean()

# Variable assignment
season_order = ['Spring', 'Summer', 'Fall', 'Winter']

# Variable assignment
seasonal_uv = seasonal_uv.reindex(season_order)

# Variable assignment
colors_uv = ['#A8E6CF', '#FFD3B6', '#FFAAA5', '#B4D4FF']

# Variable assignment
plt.bar(seasonal_uv.index, seasonal_uv.values, color=colors_uv)
plt.ylabel('Average UV Index')

# Variable assignment
plt.title('UV Index by Season', fontsize=12, fontweight='bold')

# Variable assignment
plt.grid(axis='y', alpha=0.3)

# 10. Heatmap: City Weather Metrics
plt.subplot(4, 3, 10)

# Variable assignment
city_metrics = df.groupby('City')[['Temperature_C', 'Humidity_Percent', 
                                   'Precipitation_mm', 'Wind_Speed_kmh']].mean()

# Variable assignment
sns.heatmap(city_metrics.T, annot=True, fmt='.1f', cmap='YlOrRd', cbar_kws={'label': 'Value'})

# Variable assignment
plt.title('Weather Metrics by City', fontsize=12, fontweight='bold')
plt.xlabel('City')

# 11. Pressure Trends
plt.subplot(4, 3, 11)

# Variable assignment
pressure_city = df.groupby(['Date', 'City'])['Pressure_hPa'].mean().unstack()

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for city in pressure_city.columns:

    # Variable assignment
    plt.plot(pressure_city.index, pressure_city[city], label=city, linewidth=1.5)
plt.xlabel('Date')
plt.ylabel('Pressure (hPa)')

# Variable assignment
plt.title('Atmospheric Pressure Trends', fontsize=12, fontweight='bold')
plt.legend()

# Variable assignment
plt.xticks(rotation=45)

# Variable assignment
plt.grid(True, alpha=0.3)

# 12. Extreme Weather Days
plt.subplot(4, 3, 12)

# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
extreme_hot = (df['Temperature_C'] > 30).groupby(df['City']).sum()

# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
extreme_cold = (df['Temperature_C'] < 5).groupby(df['City']).sum()

# len(): Returns the length of an object
# Documentation: https://docs.python.org/3/library/functions.html#len
# Variable assignment
x = np.arange(len(extreme_hot.index))

# Variable assignment
width = 0.35

# Variable assignment
plt.bar(x - width/2, extreme_hot.values, width, label='Hot Days (>30°C)', color='#E74C3C')

# Variable assignment
plt.bar(x + width/2, extreme_cold.values, width, label='Cold Days (<5°C)', color='#3498DB')
plt.xlabel('City')
plt.ylabel('Number of Days')

# Variable assignment
plt.title('Extreme Weather Days', fontsize=12, fontweight='bold')

# Variable assignment
plt.xticks(x, extreme_hot.index, rotation=45, ha='right')
plt.legend()

# Variable assignment
plt.grid(axis='y', alpha=0.3)

plt.tight_layout()

# Variable assignment
plt.savefig('weather_analysis_dashboard.png', dpi=300, bbox_inches='tight')

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n[OK] Dashboard saved as 'weather_analysis_dashboard.png'")
plt.show()

# Statistical Summary

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("\n" + "=" * 60)

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("CLIMATE INSIGHTS")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("=" * 60)

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n1. TEMPERATURE EXTREMES:")

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for city in df['City'].unique():
    city_data = df[df['City'] == city]

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    print(f"   {city}:")

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    # max(): Returns the maximum value
    # Documentation: https://docs.python.org/3/library/functions.html#max
    print(f"      Max: {city_data['Temperature_C'].max():.1f}°C")

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    # min(): Returns the minimum value
    # Documentation: https://docs.python.org/3/library/functions.html#min
    print(f"      Min: {city_data['Temperature_C'].min():.1f}°C")

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    # max(): Returns the maximum value
    # Documentation: https://docs.python.org/3/library/functions.html#max
    # min(): Returns the minimum value
    # Documentation: https://docs.python.org/3/library/functions.html#min
    print(f"      Range: {city_data['Temperature_C'].max() - city_data['Temperature_C'].min():.1f}°C")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n2. PRECIPITATION LEADERS:")

# sum(): Sums items in an iterable
# Documentation: https://docs.python.org/3/library/functions.html#sum
# Variable assignment
total_precip = df.groupby('City')['Precipitation_mm'].sum().sort_values(ascending=False)

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
# items(): Returns dictionary key-value pairs
# Documentation: https://docs.python.org/3/library/stdtypes.html#dict.items
for city, precip in total_precip.items():

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    print(f"   {city}: {precip:.1f} mm")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n3. AIR QUALITY RANKING (Best to Worst):")

# Variable assignment
aqi_ranking = df.groupby('City')['Air_Quality_Index'].mean().sort_values()

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
# enumerate(): Returns index and value pairs
# Documentation: https://docs.python.org/3/library/functions.html#enumerate
# items(): Returns dictionary key-value pairs
# Documentation: https://docs.python.org/3/library/stdtypes.html#dict.items
for i, (city, aqi) in enumerate(aqi_ranking.items(), 1):

    # if: Conditional statement - executes code if condition is True
    # else: Executes code when all previous conditions are False
    # Variable assignment
    status = "Good" if aqi < 50 else "Moderate" if aqi < 100 else "Unhealthy"

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    print(f"   {i}. {city}: {aqi:.1f} ({status})")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
print("\n4. SEASONAL PATTERNS:")

# for: Loop that iterates over a sequence
# in: Checks if value is in a sequence
for season in ['Winter', 'Spring', 'Summer', 'Fall']:
    season_data = df[df['Season'] == season]

    # print(): Outputs text to the console
    # Documentation: https://docs.python.org/3/library/functions.html#print
    print(f"   {season}: Avg Temp {season_data['Temperature_C'].mean():.1f}°C, "
          f"Avg Precip {season_data['Precipitation_mm'].mean():.1f}mm")

# print(): Outputs text to the console
# Documentation: https://docs.python.org/3/library/functions.html#print
# Variable assignment
print("\n" + "=" * 60)