Update Fedora state: 2026-04-29 11:50

This commit is contained in:
Breadway 2026-04-29 11:50:42 +08:00
parent 42ca768584
commit 10f0d5de1d
338 changed files with 18983 additions and 32 deletions

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows