dotfiles/dot_config/private_Code/User/History/-7d2a273a/bWGM.py
2026-04-29 11:50:42 +08:00

77 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import pandas as pd
import numpy as np
# Load the data
df = pd.read_csv('organization_happiness_study_data.csv')
print("=" * 70)
print("DATA GENERATION IMPROVEMENTS VERIFICATION")
print("=" * 70)
print(f"\n✓ Dataset shape: {df.shape}")
print(f"✓ Total rows: {len(df)} (20 participants × 30 days × 2 groups = 1200 expected)")
print("\n--- Intervention Group Statistics ---")
intervention = df[df['Group'] == 'Intervention']
print(f"Participants: {intervention['Participant_ID'].nunique()}")
print(f"Mean Happiness: {intervention['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {intervention['Happiness'].std():.2f}")
print(f"Calendar Adherence Rate: {(intervention['Calendar_Adherence'] == 'Yes').mean():.1%}")
print(f"Cleanliness Adherence Rate: {(intervention['Cleanliness_Adherence'] == 'Yes').mean():.1%}")
print(f"Punctuality Adherence Rate: {(intervention['Punctuality_Adherence'] == 'Yes').mean():.1%}")
print("\n--- Control Group Statistics ---")
control = df[df['Group'] == 'Control']
print(f"Participants: {control['Participant_ID'].nunique()}")
print(f"Mean Happiness: {control['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {control['Happiness'].std():.2f}")
print(f"Reported Calendar: {(control['Calendar_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Cleanliness: {(control['Cleanliness_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Punctuality: {(control['Punctuality_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print("\n--- Natural Data Patterns ---")
# Check for habit momentum (persistence)
intervention['Habits_Count'] = (
(intervention['Calendar_Adherence'] == 'Yes').astype(int) +
(intervention['Cleanliness_Adherence'] == 'Yes').astype(int) +
(intervention['Punctuality_Adherence'] == 'Yes').astype(int)
)
print(f"Habit completion rates by number completed:")
for count in [0, 1, 2, 3]:
subset = intervention[intervention['Habits_Count'] == count]
happiness = subset['Happiness'].mean()
print(f" {count} habits: Happiness = {happiness:.2f} (n={len(subset)})")
# Weekend effect
intervention['DayOfWeek'] = intervention['Day'] % 7
weekend = intervention[intervention['DayOfWeek'].isin([0, 6])]
weekday = intervention[~intervention['DayOfWeek'].isin([0, 6])]
print(f"\nWeekend vs Weekday Adherence:")
print(f" Weekday avg habits: {((weekday['Calendar_Adherence']=='Yes').astype(int) + (weekday['Cleanliness_Adherence']=='Yes').astype(int) + (weekday['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Weekend avg habits: {((weekend['Calendar_Adherence']=='Yes').astype(int) + (weekend['Cleanliness_Adherence']=='Yes').astype(int) + (weekend['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
# Habit formation over time
first_week = intervention[intervention['Day'] <= 7]
mid_month = intervention[(intervention['Day'] > 14) & (intervention['Day'] <= 21)]
last_week = intervention[intervention['Day'] > 23]
print(f"\nHabit Formation Over Time:")
print(f" Days 1-7 (Starting): Avg habits = {((first_week['Calendar_Adherence']=='Yes').astype(int) + (first_week['Cleanliness_Adherence']=='Yes').astype(int) + (first_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 15-21 (Momentum): Avg habits = {((mid_month['Calendar_Adherence']=='Yes').astype(int) + (mid_month['Cleanliness_Adherence']=='Yes').astype(int) + (mid_month['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 24-30 (Late): Avg habits = {((last_week['Calendar_Adherence']=='Yes').astype(int) + (last_week['Cleanliness_Adherence']=='Yes').astype(int) + (last_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f"\nHappiness Persistence (day-to-day correlation):")
intervention_sorted = intervention.sort_values(['Participant_ID', 'Day'])
intervention_sorted['Happiness_prev'] = intervention_sorted.groupby('Participant_ID')['Happiness'].shift(1)
valid = intervention_sorted[intervention_sorted['Happiness_prev'].notna()]
corr = valid[['Happiness', 'Happiness_prev']].corr().iloc[0, 1]
print(f" Correlation between today and yesterday's happiness: {corr:.3f}")
print("\n✓ Data generation complete with natural patterns!")
print("\nKey improvements:")
print(" • Habit momentum: doing it yesterday makes it more likely today")
print(" • Weekly patterns: lower adherence weekends vs weekdays")
print(" • Habit formation: initial difficulty, momentum building, slight fatigue")
print(" • Individual variation: each person has unique habit profiles")
print(" • Happiness persistence: today's mood influenced by yesterday's")
print(" • Control group realism: still report 'No' but data shows natural variation")