df = pd.DataFrame(data, columns=['Participant_ID', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness']) import pandas as pd import numpy as np np.random.seed(42) # ensures you get exactly the same data every time N_PARTICIPANTS_PER_GROUP = 20 DAYS = list(range(1, 31)) def clip_yes_prob(prob, ceiling): return min(ceiling, max(0.05, prob)) def generate_intervention_group(start_participant_id=1): rows = [] for offset in range(N_PARTICIPANTS_PER_GROUP): participant_id = start_participant_id + offset org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency for day in DAYS: calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)]) clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)]) ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)]) adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime]) baseline_happiness = np.random.normal(5.5, 1.0) happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10)) rows.append([ participant_id, 'Intervention', day, calendar, clean, ontime, happiness, ]) return rows def generate_control_group(start_participant_id): rows = [] for offset in range(N_PARTICIPANTS_PER_GROUP): participant_id = start_participant_id + offset for day in DAYS: # Control group only records happiness; all habit columns are No. calendar = 'No' clean = 'No' ontime = 'No' baseline_happiness = np.random.normal(5.5, 1.0) control_noise = np.random.normal(0.0, 1.1) happiness = int(np.clip(baseline_happiness + control_noise, 1, 10)) rows.append([ participant_id, 'Control', day, calendar, clean, ontime, happiness, ]) return rows data = [] data.extend(generate_intervention_group(start_participant_id=1)) data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1)) df = pd.DataFrame( data, columns=[ 'Participant_ID', 'Group', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness', ], ) # Save the combined dataset df.to_csv('organization_happiness_study_data.csv', index=False) print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!") print(df.head(10)) # shows first 10 rows