93 lines
No EOL
3 KiB
Python
93 lines
No EOL
3 KiB
Python
df = pd.DataFrame(data, columns=['Participant_ID', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness'])
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
|
|
np.random.seed(42) # ensures you get exactly the same data every time
|
|
|
|
|
|
N_PARTICIPANTS_PER_GROUP = 20
|
|
DAYS = list(range(1, 31))
|
|
|
|
|
|
def clip_yes_prob(prob, ceiling):
|
|
return min(ceiling, max(0.05, prob))
|
|
|
|
|
|
def generate_intervention_group(start_participant_id=1):
|
|
rows = []
|
|
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
|
participant_id = start_participant_id + offset
|
|
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
|
|
|
|
for day in DAYS:
|
|
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
|
|
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
|
|
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
|
|
|
|
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
|
baseline_happiness = np.random.normal(5.5, 1.0)
|
|
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
|
|
|
|
rows.append([
|
|
participant_id,
|
|
'Intervention',
|
|
day,
|
|
calendar,
|
|
clean,
|
|
ontime,
|
|
happiness,
|
|
])
|
|
|
|
return rows
|
|
|
|
|
|
def generate_control_group(start_participant_id):
|
|
rows = []
|
|
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
|
participant_id = start_participant_id + offset
|
|
|
|
for day in DAYS:
|
|
# Control group only records happiness; all habit columns are No.
|
|
calendar = 'No'
|
|
clean = 'No'
|
|
ontime = 'No'
|
|
|
|
baseline_happiness = np.random.normal(5.5, 1.0)
|
|
control_noise = np.random.normal(0.0, 1.1)
|
|
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
|
|
|
|
rows.append([
|
|
participant_id,
|
|
'Control',
|
|
day,
|
|
calendar,
|
|
clean,
|
|
ontime,
|
|
happiness,
|
|
])
|
|
|
|
return rows
|
|
|
|
|
|
data = []
|
|
data.extend(generate_intervention_group(start_participant_id=1))
|
|
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
|
|
|
df = pd.DataFrame(
|
|
data,
|
|
columns=[
|
|
'Participant_ID',
|
|
'Group',
|
|
'Day',
|
|
'Calendar_Adherence',
|
|
'Cleanliness_Adherence',
|
|
'Punctuality_Adherence',
|
|
'Happiness',
|
|
],
|
|
)
|
|
|
|
# Save the combined dataset
|
|
df.to_csv('organization_happiness_study_data.csv', index=False)
|
|
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
|
print(df.head(10)) # shows first 10 rows |