180 lines
No EOL
7.5 KiB
Python
180 lines
No EOL
7.5 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
|
|
|
|
np.random.seed(64) # ensures you get exactly the same data every time
|
|
|
|
|
|
N_PARTICIPANTS_PER_GROUP = 20
|
|
DAYS = list(range(1, 31))
|
|
|
|
|
|
def clip_yes_prob(prob, ceiling):
|
|
return min(ceiling, max(0.05, prob))
|
|
|
|
|
|
def generate_intervention_group(start_participant_id=1):
|
|
rows = []
|
|
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
|
participant_id = start_participant_id + offset
|
|
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
|
org_bias = np.clip(org_bias, 0.1, 0.95)
|
|
|
|
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
|
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
|
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
|
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
|
|
|
# Baseline happiness for this participant
|
|
person_happiness_baseline = np.random.normal(5.5, 1.2)
|
|
current_happiness = person_happiness_baseline
|
|
|
|
# Track previous day's habits for momentum/habit stacking
|
|
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
|
|
|
for day in DAYS:
|
|
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
|
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
|
|
|
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
|
if day < 7:
|
|
time_factor = 0.85 # Getting started is harder
|
|
elif day < 20:
|
|
time_factor = 1.1 # Momentum builds
|
|
else:
|
|
time_factor = 0.98 # Slight fatigue
|
|
|
|
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
|
calendar_prob = clip_yes_prob(
|
|
calendar_ease * week_difficulty * time_factor +
|
|
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
|
)
|
|
clean_prob = clip_yes_prob(
|
|
clean_ease * week_difficulty * time_factor +
|
|
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
|
)
|
|
ontime_prob = clip_yes_prob(
|
|
ontime_ease * week_difficulty * time_factor +
|
|
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
|
)
|
|
|
|
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
|
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
|
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
|
|
|
# Habit stacking: completing one habit makes the next easier
|
|
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
|
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
|
|
|
|
# Happiness has persistence but is also affected by habits
|
|
happiness_noise = np.random.normal(0, 1.3)
|
|
current_happiness = np.clip(
|
|
current_happiness * 0.4 + # Previous day influences today
|
|
person_happiness_baseline * 0.4 +
|
|
habit_boost * 0.9 + # Habits have strong effect
|
|
happiness_noise,
|
|
1, 10
|
|
)
|
|
happiness = int(np.round(current_happiness))
|
|
|
|
rows.append([
|
|
participant_id,
|
|
'Intervention',
|
|
day,
|
|
calendar,
|
|
clean,
|
|
ontime,
|
|
happiness,
|
|
])
|
|
|
|
# Update for next iteration
|
|
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
|
|
|
return rows
|
|
|
|
|
|
def generate_control_group(start_participant_id):
|
|
rows = []
|
|
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
|
participant_id = start_participant_id + offset
|
|
|
|
# Even without tracking, some people are naturally more organized
|
|
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
|
natural_org = np.clip(natural_org, 0.05, 0.7)
|
|
|
|
# Personal tendencies (but not tracked/reported as habits)
|
|
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
|
|
current_happiness = person_happiness_baseline
|
|
|
|
# Since they're not tracking, habits happen at random intervals (not streaky)
|
|
prev_untracked_habits = 0
|
|
|
|
for day in DAYS:
|
|
# Week effect: sans the awareness/tracking effect
|
|
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
|
|
|
# Without tracking, unaware of patterns, so less habit formation
|
|
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
|
|
|
# Untracked habits - they happen but aren't reported
|
|
calendar_untracked = np.random.choice(['Yes', 'No'],
|
|
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
|
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
|
clean_untracked = np.random.choice(['Yes', 'No'],
|
|
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
|
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
|
ontime_untracked = np.random.choice(['Yes', 'No'],
|
|
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
|
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
|
|
|
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
|
|
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
|
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
|
|
|
|
# Control group happiness is less affected by daily habits and more random
|
|
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
|
|
current_happiness = np.clip(
|
|
current_happiness * 0.5 +
|
|
person_happiness_baseline * 0.5 +
|
|
subtle_boost +
|
|
happiness_noise,
|
|
1, 10
|
|
)
|
|
happiness = int(np.round(current_happiness))
|
|
|
|
rows.append([
|
|
participant_id,
|
|
'Control',
|
|
day,
|
|
'No', # Reported as "No" - not tracking
|
|
'No', # Reported as "No" - not tracking
|
|
'No', # Reported as "No" - not tracking
|
|
happiness,
|
|
])
|
|
|
|
prev_untracked_habits = untracked_count
|
|
|
|
return rows
|
|
|
|
|
|
data = []
|
|
data.extend(generate_intervention_group(start_participant_id=1))
|
|
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
|
|
|
df = pd.DataFrame(
|
|
data,
|
|
columns=[
|
|
'Participant_ID',
|
|
'Group',
|
|
'Day',
|
|
'Calendar_Adherence',
|
|
'Cleanliness_Adherence',
|
|
'Punctuality_Adherence',
|
|
'Happiness',
|
|
],
|
|
)
|
|
|
|
# Save the combined dataset
|
|
df.to_csv('organization_happiness_study_data.csv', index=False)
|
|
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
|
print(df.head(10)) # shows first 10 rows |