Update Fedora state: 2026-04-29 11:50

This commit is contained in:
Breadway 2026-04-29 11:50:42 +08:00
parent 42ca768584
commit 10f0d5de1d
338 changed files with 18983 additions and 32 deletions

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python3
import pandas as pd
import numpy as np
# Load the data
df = pd.read_csv('organization_happiness_study_data.csv')
print("=" * 70)
print("DATA GENERATION IMPROVEMENTS VERIFICATION")
print("=" * 70)
print(f"\n✓ Dataset shape: {df.shape}")
print(f"✓ Total rows: {len(df)} (20 participants × 30 days × 2 groups = 1200 expected)")
print("\n--- Intervention Group Statistics ---")
intervention = df[df['Group'] == 'Intervention']
print(f"Participants: {intervention['Participant_ID'].nunique()}")
print(f"Mean Happiness: {intervention['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {intervention['Happiness'].std():.2f}")
print(f"Calendar Adherence Rate: {(intervention['Calendar_Adherence'] == 'Yes').mean():.1%}")
print(f"Cleanliness Adherence Rate: {(intervention['Cleanliness_Adherence'] == 'Yes').mean():.1%}")
print(f"Punctuality Adherence Rate: {(intervention['Punctuality_Adherence'] == 'Yes').mean():.1%}")
print("\n--- Control Group Statistics ---")
control = df[df['Group'] == 'Control']
print(f"Participants: {control['Participant_ID'].nunique()}")
print(f"Mean Happiness: {control['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {control['Happiness'].std():.2f}")
print(f"Reported Calendar: {(control['Calendar_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Cleanliness: {(control['Cleanliness_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Punctuality: {(control['Punctuality_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print("\n--- Natural Data Patterns ---")
# Check for habit momentum (persistence)
intervention['Habits_Count'] = (
(intervention['Calendar_Adherence'] == 'Yes').astype(int) +
(intervention['Cleanliness_Adherence'] == 'Yes').astype(int) +
(intervention['Punctuality_Adherence'] == 'Yes').astype(int)
)
print(f"Habit completion rates by number completed:")
for count in [0, 1, 2, 3]:
subset = intervention[intervention['Habits_Count'] == count]
happiness = subset['Happiness'].mean()
print(f" {count} habits: Happiness = {happiness:.2f} (n={len(subset)})")
# Weekend effect
intervention['DayOfWeek'] = intervention['Day'] % 7
weekend = intervention[intervention['DayOfWeek'].isin([0, 6])]
weekday = intervention[~intervention['DayOfWeek'].isin([0, 6])]
print(f"\nWeekend vs Weekday Adherence:")
print(f" Weekday avg habits: {((weekday['Calendar_Adherence']=='Yes').astype(int) + (weekday['Cleanliness_Adherence']=='Yes').astype(int) + (weekday['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Weekend avg habits: {((weekend['Calendar_Adherence']=='Yes').astype(int) + (weekend['Cleanliness_Adherence']=='Yes').astype(int) + (weekend['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
# Habit formation over time
first_week = intervention[intervention['Day'] <= 7]
mid_month = intervention[(intervention['Day'] > 14) & (intervention['Day'] <= 21)]
last_week = intervention[intervention['Day'] > 23]
print(f"\nHabit Formation Over Time:")
print(f" Days 1-7 (Starting): Avg habits = {((first_week['Calendar_Adherence']=='Yes').astype(int) + (first_week['Cleanliness_Adherence']=='Yes').astype(int) + (first_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 15-21 (Momentum): Avg habits = {((mid_month['Calendar_Adherence']=='Yes').astype(int) + (mid_month['Cleanliness_Adherence']=='Yes').astype(int) + (mid_month['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 24-30 (Late): Avg habits = {((last_week['Calendar_Adherence']=='Yes').astype(int) + (last_week['Cleanliness_Adherence']=='Yes').astype(int) + (last_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f"\nHappiness Persistence (day-to-day correlation):")
intervention_sorted = intervention.sort_values(['Participant_ID', 'Day'])
intervention_sorted['Happiness_prev'] = intervention_sorted.groupby('Participant_ID')['Happiness'].shift(1)
valid = intervention_sorted[intervention_sorted['Happiness_prev'].notna()]
corr = valid[['Happiness', 'Happiness_prev']].corr().iloc[0, 1]
print(f" Correlation between today and yesterday's happiness: {corr:.3f}")
print("\n✓ Data generation complete with natural patterns!")
print("\nKey improvements:")
print(" • Habit momentum: doing it yesterday makes it more likely today")
print(" • Weekly patterns: lower adherence weekends vs weekdays")
print(" • Habit formation: initial difficulty, momentum building, slight fatigue")
print(" • Individual variation: each person has unique habit profiles")
print(" • Happiness persistence: today's mood influenced by yesterday's")
print(" • Control group realism: still report 'No' but data shows natural variation")