Update Fedora state: 2026-04-29 11:50

This commit is contained in:
Breadway 2026-04-29 11:50:42 +08:00
parent 42ca768584
commit 10f0d5de1d
338 changed files with 18983 additions and 32 deletions

View file

@ -0,0 +1,196 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.7)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(3) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,93 @@
df = pd.DataFrame(data, columns=['Participant_ID', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness'])
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
for day in DAYS:
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
baseline_happiness = np.random.normal(5.5, 1.0)
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,180 @@
import pandas as pd
import numpy as np
np.random.seed(64) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.9, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,92 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
for day in DAYS:
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
baseline_happiness = np.random.normal(5.5, 1.0)
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(43) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,180 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.4)
# Happiness formula: baseline + cumulative effect that strengthens over time
# Stronger multiplier to make dose-response clear (each habit matters 1-1.5 points)
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
habit_strength * (0.9 + study_progress * 0.3) + # Habit benefits clear, final max ~2.1
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(64) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.7)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,144 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/Data%20Gen.py","entries":[{"id":"54EK.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345349390},{"id":"PpFf.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345378739},{"id":"cTNf.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347044805},{"id":"WSl3.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347057825},{"id":"9dqp.py","timestamp":1774347206509},{"id":"blt8.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347345483},{"id":"3jGE.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347365731},{"id":"sBVR.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347432858},{"id":"j9Wc.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347711480},{"id":"bLJN.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347763541},{"id":"MJ5p.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347783690},{"id":"EkUx.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347933805},{"id":"u91r.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347955983},{"id":"QTk6.py","timestamp":1774348022105},{"id":"o2Y7.py","timestamp":1774348397371},{"id":"46oA.py","timestamp":1774352345991}]}

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 3)
habit_strength = np.clip(habit_strength, 0, 3)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.5)
# Happiness formula: baseline + cumulative effect that strengthens over time
# More conservative multiplier to keep final happiness reasonable (6-8 range)
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
habit_strength * (0.4 + study_progress * 0.5) + # Habit benefits grow over time, max ~2.7
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,196 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.5)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows