This page shows the source code for 4-analyze_health.py in browser-friendly HTML format. It was generated automatically from the original Python file.
"""
Health & Fitness Analysis
Demonstrates: regression analysis, advanced correlations, health metrics visualization
"""
import sys
if hasattr(sys.stdout, 'reconfigure'): # Check condition
sys.stdout.reconfigure(encoding='utf-8')
if hasattr(sys.stderr, 'reconfigure'): # Check condition
sys.stderr.reconfigure(encoding='utf-8')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
# Load data
df = pd.read_csv('dataset4_health.csv')
# Calculate additional metrics
df['Calorie_Balance'] = df['Calories_Consumed'] - df['Calories_Burned']
df['Exercise_Category'] = pd.cut(df['Exercise_Minutes_Daily'],
bins=[0, 30, 45, 60, 100],
labels=['Low', 'Moderate', 'High', 'Very High'])
print("=" * 60)
print("HEALTH & FITNESS ANALYSIS")
print("=" * 60)
print("\nDataset Overview:")
print(df.info())
print("\nBasic Statistics:")
print(df.describe())
# Create comprehensive visualization
fig = plt.figure(figsize=(18, 14))
# 1. Comprehensive Correlation Heatmap
plt.subplot(4, 3, 1)
health_metrics = ['Age', 'BMI', 'Exercise_Minutes_Daily', 'Steps_Daily',
'Sleep_Hours', 'Water_Intake_Liters', 'Heart_Rate_Avg',
'Blood_Pressure_Sys', 'Stress_Level']
corr_matrix = df[health_metrics].corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='RdYlGn_r',
center=0, square=True, linewidths=1, cbar_kws={'label': 'Correlation'})
plt.title('Health Metrics Correlation Matrix', fontsize=12, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
# 2. Exercise vs Calories Burned
plt.subplot(4, 3, 2)
plt.scatter(df['Exercise_Minutes_Daily'], df['Calories_Burned'],
c=df['Age'], cmap='viridis', alpha=0.6, s=100)
plt.colorbar(label='Age')
# Linear regression
X = df['Exercise_Minutes_Daily'].values.reshape(-1, 1)
y = df['Calories_Burned'].values
model = LinearRegression()
model.fit(X, y)
plt.plot(df['Exercise_Minutes_Daily'].sort_values(),
model.predict(df['Exercise_Minutes_Daily'].sort_values().values.reshape(-1, 1)),
'r--', linewidth=2, label=f'R² = {model.score(X, y):.3f}')
plt.xlabel('Exercise Minutes Daily')
plt.ylabel('Calories Burned')
plt.title('Exercise vs Calories Burned', fontsize=12, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
# 3. BMI Distribution
plt.subplot(4, 3, 3)
bmi_categories = pd.cut(df['BMI'], bins=[0, 18.5, 25, 30, 40],
labels=['Underweight', 'Normal', 'Overweight', 'Obese'])
bmi_counts = bmi_categories.value_counts()
colors_bmi = ['#74B9FF', '#55EFC4', '#FDCB6E', '#FF7675']
plt.pie(bmi_counts.values, labels=bmi_counts.index, autopct='%1.1f%%',
colors=colors_bmi, startangle=90)
plt.title('BMI Distribution', fontsize=12, fontweight='bold')
# 4. Steps vs Heart Rate
plt.subplot(4, 3, 4)
for gender in df['Gender'].unique():
gender_data = df[df['Gender'] == gender]
plt.scatter(gender_data['Steps_Daily'], gender_data['Heart_Rate_Avg'],
label=gender, alpha=0.6, s=80)
plt.xlabel('Steps Daily')
plt.ylabel('Average Heart Rate')
plt.title('Daily Steps vs Heart Rate by Gender', fontsize=12, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
# 5. Sleep Hours Impact on Stress
plt.subplot(4, 3, 5)
sleep_stress = df.groupby('Sleep_Hours')['Stress_Level'].mean()
plt.plot(sleep_stress.index, sleep_stress.values, marker='o',
linewidth=2, markersize=10, color='#6C5CE7')
plt.fill_between(sleep_stress.index, sleep_stress.values, alpha=0.3, color='#6C5CE7')
plt.xlabel('Sleep Hours')
plt.ylabel('Average Stress Level')
plt.title('Sleep Duration vs Stress Level', fontsize=12, fontweight='bold')
plt.grid(True, alpha=0.3)
# 6. Activity Type Comparison
plt.subplot(4, 3, 6)
activity_metrics = df.groupby('Activity_Type')[['Calories_Burned', 'Steps_Daily']].mean()
x = np.arange(len(activity_metrics))
width = 0.35
ax = plt.gca()
ax2 = ax.twinx()
bars1 = ax.bar(x - width/2, activity_metrics['Calories_Burned'], width,
label='Calories Burned', color='#FF6B6B')
bars2 = ax2.bar(x + width/2, activity_metrics['Steps_Daily'], width,
label='Steps Daily', color='#4ECDC4')
ax.set_xlabel('Activity Type')
ax.set_ylabel('Calories Burned', color='#FF6B6B')
ax2.set_ylabel('Steps Daily', color='#4ECDC4')
ax.set_xticks(x)
ax.set_xticklabels(activity_metrics.index, rotation=45, ha='right')
plt.title('Activity Type Performance', fontsize=12, fontweight='bold')
ax.legend(loc='upper left')
ax2.legend(loc='upper right')
# 7. Water Intake vs Performance
plt.subplot(4, 3, 7)
water_groups = pd.cut(df['Water_Intake_Liters'], bins=[0, 2, 2.5, 3, 4],
labels=['Low (<2L)', 'Moderate (2-2.5L)', 'Good (2.5-3L)', 'Excellent (>3L)'])
water_perf = df.groupby(water_groups)['Calories_Burned'].mean()
colors_water = ['#FF6B6B', '#FFA07A', '#98D8C8', '#6BCB77']
plt.bar(range(len(water_perf)), water_perf.values, color=colors_water)
plt.xticks(range(len(water_perf)), water_perf.index, rotation=45, ha='right')
plt.ylabel('Average Calories Burned')
plt.title('Hydration Impact on Performance', fontsize=12, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
# 8. Blood Pressure Analysis
plt.subplot(4, 3, 8)
exercise_bp = df.groupby('Exercise_Category')[['Blood_Pressure_Sys', 'Blood_Pressure_Dia']].mean()
x = np.arange(len(exercise_bp))
width = 0.35
plt.bar(x - width/2, exercise_bp['Blood_Pressure_Sys'], width,
label='Systolic', color='#E74C3C')
plt.bar(x + width/2, exercise_bp['Blood_Pressure_Dia'], width,
label='Diastolic', color='#3498DB')
plt.xlabel('Exercise Level')
plt.ylabel('Blood Pressure (mmHg)')
plt.title('Exercise Level vs Blood Pressure', fontsize=12, fontweight='bold')
plt.xticks(x, exercise_bp.index)
plt.legend()
plt.grid(axis='y', alpha=0.3)
# 9. Age vs BMI with Exercise Overlay
plt.subplot(4, 3, 9)
scatter = plt.scatter(df['Age'], df['BMI'], c=df['Exercise_Minutes_Daily'],
cmap='RdYlGn', alpha=0.6, s=100)
plt.colorbar(scatter, label='Exercise Minutes')
plt.xlabel('Age')
plt.ylabel('BMI')
plt.title('Age vs BMI (colored by Exercise)', fontsize=12, fontweight='bold')
plt.axhline(y=25, color='orange', linestyle='--', label='Overweight threshold')
plt.legend()
plt.grid(True, alpha=0.3)
# 10. Calorie Balance Distribution
plt.subplot(4, 3, 10)
plt.hist(df['Calorie_Balance'], bins=30, color='#95E1D3', edgecolor='black', alpha=0.7)
plt.axvline(0, color='red', linestyle='--', linewidth=2, label='Balanced')
plt.axvline(df['Calorie_Balance'].mean(), color='blue', linestyle='--',
linewidth=2, label=f'Mean: {df["Calorie_Balance"].mean():.0f}')
plt.xlabel('Calorie Balance (Consumed - Burned)')
plt.ylabel('Frequency')
plt.title('Calorie Balance Distribution', fontsize=12, fontweight='bold')
plt.legend()
# 11. Multi-metric Performance by Gender
plt.subplot(4, 3, 11)
gender_metrics = df.groupby('Gender')[['Exercise_Minutes_Daily', 'Steps_Daily',
'Sleep_Hours', 'Water_Intake_Liters']].mean()
# Normalize for comparison
scaler = StandardScaler()
gender_metrics_norm = pd.DataFrame(
scaler.fit_transform(gender_metrics),
index=gender_metrics.index,
columns=gender_metrics.columns
)
gender_metrics_norm.T.plot(kind='bar', ax=plt.gca(), width=0.7)
plt.ylabel('Normalized Score')
plt.title('Health Metrics Comparison by Gender', fontsize=12, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Gender')
plt.grid(axis='y', alpha=0.3)
# 12. Stress Level Analysis
plt.subplot(4, 3, 12)
stress_factors = df.groupby('Stress_Level')[['Exercise_Minutes_Daily',
'Sleep_Hours', 'BMI']].mean()
stress_factors_norm = pd.DataFrame(
scaler.fit_transform(stress_factors),
index=stress_factors.index,
columns=stress_factors.columns
)
stress_factors_norm.plot(marker='o', linewidth=2, ax=plt.gca())
plt.xlabel('Stress Level (1-7)')
plt.ylabel('Normalized Score')
plt.title('Factors Contributing to Stress', fontsize=12, fontweight='bold')
plt.legend(title='Metric')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('health_analysis_dashboard.png', dpi=300, bbox_inches='tight')
print("\n[OK] Dashboard saved as 'health_analysis_dashboard.png'")
plt.show()
# Statistical Analysis
print("\n" + "=" * 60)
print("HEALTH INSIGHTS & CORRELATIONS")
print("=" * 60)
# Key correlations
print("\n1. KEY CORRELATIONS WITH CALORIES BURNED:")
calorie_corr = df[['Exercise_Minutes_Daily', 'Steps_Daily', 'Sleep_Hours',
'Water_Intake_Liters']].corrwith(df['Calories_Burned']).sort_values(ascending=False)
for metric, corr in calorie_corr.items():
print(f" {metric}: {corr:.3f}")
# Exercise impact
print("\n2. EXERCISE LEVEL IMPACT:")
for level in ['Low', 'Moderate', 'High', 'Very High']:
level_data = df[df['Exercise_Category'] == level]
print(f" {level}: Avg BMI {level_data['BMI'].mean():.1f}, "
f"Avg Stress {level_data['Stress_Level'].mean():.1f}, "
f"Avg Heart Rate {level_data['Heart_Rate_Avg'].mean():.1f}")
# Gender comparison
print("\n3. GENDER HEALTH METRICS:")
gender_stats = df.groupby('Gender')[['BMI', 'Exercise_Minutes_Daily',
'Sleep_Hours', 'Stress_Level']].mean()
print(gender_stats)
# Optimal ranges
print("\n4. OPTIMAL HEALTH PROFILE (Top 25% Performers):")
top_performers = df.nlargest(25, 'Calories_Burned')
print(f" Exercise: {top_performers['Exercise_Minutes_Daily'].mean():.1f} min/day")
print(f" Steps: {top_performers['Steps_Daily'].mean():.0f} steps/day")
print(f" Sleep: {top_performers['Sleep_Hours'].mean():.1f} hours")
print(f" Water: {top_performers['Water_Intake_Liters'].mean():.1f} liters")
print(f" Average BMI: {top_performers['BMI'].mean():.1f}")
print(f" Average Stress: {top_performers['Stress_Level'].mean():.1f}")
# Activity type effectiveness
print("\n5. MOST EFFECTIVE ACTIVITY TYPES:")
activity_effectiveness = df.groupby('Activity_Type').agg({
'Calories_Burned': 'mean',
'Steps_Daily': 'mean',
'Stress_Level': 'mean'
}).round(1)
print(activity_effectiveness.sort_values('Calories_Burned', ascending=False))
# Stress analysis
print("\n6. LOW STRESS INDIVIDUALS (<3) CHARACTERISTICS:")
low_stress = df[df['Stress_Level'] < 3]
print(f" Average Exercise: {low_stress['Exercise_Minutes_Daily'].mean():.1f} min")
print(f" Average Sleep: {low_stress['Sleep_Hours'].mean():.1f} hours")
print(f" Average BMI: {low_stress['BMI'].mean():.1f}")
print(f" Percentage with High Activity: {(low_stress['Exercise_Minutes_Daily'] > 45).sum() / len(low_stress) * 100:.1f}%")
print("\n" + "=" * 60)