import pandas as pd import numpy as np np.random.seed(64) # ensures you get exactly the same data every time N_PARTICIPANTS_PER_GROUP = 20 DAYS = list(range(1, 31)) def clip_yes_prob(prob, ceiling): return min(ceiling, max(0.05, prob)) def generate_intervention_group(start_participant_id=1): rows = [] for offset in range(N_PARTICIPANTS_PER_GROUP): participant_id = start_participant_id + offset org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent) org_bias = np.clip(org_bias, 0.1, 0.95) # Personal baselines for each habit (people are naturally better/worse at specific habits) calendar_ease = org_bias + np.random.normal(0.05, 0.08) clean_ease = org_bias + np.random.normal(-0.02, 0.08) ontime_ease = org_bias + np.random.normal(0.02, 0.08) # Baseline happiness for this participant person_happiness_baseline = np.random.normal(5.5, 1.2) current_happiness = person_happiness_baseline # Track previous day's habits for momentum/habit stacking prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No' for day in DAYS: # Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder # Habit formation/fatigue: early days harder, then easier, slight decline late if day < 7: time_factor = 0.85 # Getting started is harder elif day < 20: time_factor = 1.1 # Momentum builds else: time_factor = 0.98 # Slight fatigue # Momentum effect: If you did a habit yesterday, you're more likely to do it today calendar_prob = clip_yes_prob( calendar_ease * week_difficulty * time_factor + (0.15 if prev_calendar == 'Yes' else 0), 0.95 ) clean_prob = clip_yes_prob( clean_ease * week_difficulty * time_factor + (0.15 if prev_clean == 'Yes' else 0), 0.90 ) ontime_prob = clip_yes_prob( ontime_ease * week_difficulty * time_factor + (0.12 if prev_ontime == 'Yes' else 0), 0.93 ) calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob]) clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob]) ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob]) # Habit stacking: completing one habit makes the next easier adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime]) habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0 # Happiness has persistence but is also affected by habits happiness_noise = np.random.normal(0, 1.3) current_happiness = np.clip( current_happiness * 0.4 + # Previous day influences today person_happiness_baseline * 0.4 + habit_boost * 0.9 + # Habits have strong effect happiness_noise, 1, 10 ) happiness = int(np.round(current_happiness)) rows.append([ participant_id, 'Intervention', day, calendar, clean, ontime, happiness, ]) # Update for next iteration prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime return rows def generate_control_group(start_participant_id): rows = [] for offset in range(N_PARTICIPANTS_PER_GROUP): participant_id = start_participant_id + offset # Even without tracking, some people are naturally more organized natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention natural_org = np.clip(natural_org, 0.05, 0.7) # Personal tendencies (but not tracked/reported as habits) person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control current_happiness = person_happiness_baseline # Since they're not tracking, habits happen at random intervals (not streaky) prev_untracked_habits = 0 for day in DAYS: # Week effect: sans the awareness/tracking effect week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9 # Without tracking, unaware of patterns, so less habit formation time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak # Untracked habits - they happen but aren't reported calendar_untracked = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4), 1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)]) clean_untracked = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35), 1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)]) ontime_untracked = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45), 1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)]) # They report habits as "No" (not tracking), but it still affects their happiness sublimely untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked]) subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked # Control group happiness is less affected by daily habits and more random happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking current_happiness = np.clip( current_happiness * 0.5 + person_happiness_baseline * 0.5 + subtle_boost + happiness_noise, 1, 10 ) happiness = int(np.round(current_happiness)) rows.append([ participant_id, 'Control', day, 'No', # Reported as "No" - not tracking 'No', # Reported as "No" - not tracking 'No', # Reported as "No" - not tracking happiness, ]) prev_untracked_habits = untracked_count return rows data = [] data.extend(generate_intervention_group(start_participant_id=1)) data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1)) df = pd.DataFrame( data, columns=[ 'Participant_ID', 'Group', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness', ], ) # Save the combined dataset df.to_csv('organization_happiness_study_data.csv', index=False) print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!") print(df.head(10)) # shows first 10 rows