dotfiles/dot_config/private_Code/User/History/6c11eec7/PpFf.py
2026-04-29 11:50:42 +08:00

92 lines
No EOL
2.9 KiB
Python

import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
for day in DAYS:
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
baseline_happiness = np.random.normal(5.5, 1.0)
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows