Update Fedora state: 2026-04-29 11:50

This commit is contained in:
Breadway 2026-04-29 11:50:42 +08:00
parent 42ca768584
commit 10f0d5de1d
338 changed files with 18983 additions and 32 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/organization_happiness_study_data.csv","entries":[{"id":"0Ago.csv","timestamp":1774348491393}]}

View file

@ -0,0 +1,78 @@
#!/usr/bin/env python3
import pandas as pd
import numpy as np
# Load the data
df = pd.read_csv('organization_happiness_study_data.csv')
print("=" * 75)
print("UPWARD TREND VERIFICATION - HAPPINESS GROWTH WITH HABIT COMPLETION")
print("=" * 75)
# Calculate habit completion count
df['Habits_Count'] = (
(df['Calendar_Adherence'] == 'Yes').astype(int) +
(df['Cleanliness_Adherence'] == 'Yes').astype(int) +
(df['Punctuality_Adherence'] == 'Yes').astype(int)
)
print("\n--- Intervention Group: Early vs Late Month ---")
intervention = df[df['Group'] == 'Intervention']
early_month = intervention[intervention['Day'] <= 10]
late_month = intervention[intervention['Day'] > 20]
print(f"Days 1-10 (Early):")
print(f" Mean Happiness: {early_month['Happiness'].mean():.2f}")
print(f" Mean Habits Completed: {early_month['Habits_Count'].mean():.2f}")
print(f"\nDays 21-30 (Late):")
print(f" Mean Happiness: {late_month['Happiness'].mean():.2f}")
print(f" Mean Habits Completed: {late_month['Habits_Count'].mean():.2f}")
print(f"\nGrowth: {late_month['Happiness'].mean() - early_month['Happiness'].mean():.2f} points")
print("\n--- Control Group: Early vs Late Month (Should be flat) ---")
control = df[df['Group'] == 'Control']
early_month_c = control[control['Day'] <= 10]
late_month_c = control[control['Day'] > 20]
print(f"Days 1-10 (Early):")
print(f" Mean Happiness: {early_month_c['Happiness'].mean():.2f}")
print(f"\nDays 21-30 (Late):")
print(f" Mean Happiness: {late_month_c['Happiness'].mean():.2f}")
print(f"\nChange: {late_month_c['Happiness'].mean() - early_month_c['Happiness'].mean():.2f} points (should be ~0)")
print("\n--- Direct Correlation: Intervention Group by Habits Completed ---")
for habit_count in [0, 1, 2, 3]:
subset = intervention[intervention['Habits_Count'] == habit_count]
if len(subset) > 0:
print(f"{habit_count} habits completed: Happiness = {subset['Happiness'].mean():.2f} (n={len(subset)})")
print("\n--- Trend Over 30 Days (Intervention Group) ---")
weekly_avg = intervention.groupby('Day')['Happiness'].mean()
print(f"Week 1 (Days 1-7): Average Happiness = {weekly_avg[1:8].mean():.2f}")
print(f"Week 2 (Days 8-14): Average Happiness = {weekly_avg[8:15].mean():.2f}")
print(f"Week 3 (Days 15-21): Average Happiness = {weekly_avg[15:22].mean():.2f}")
print(f"Week 4 (Days 22-30): Average Happiness = {weekly_avg[22:31].mean():.2f}")
print("\n--- Trend Over 30 Days (Control Group) ---")
weekly_avg_c = control.groupby('Day')['Happiness'].mean()
print(f"Week 1 (Days 1-7): Average Happiness = {weekly_avg_c[1:8].mean():.2f}")
print(f"Week 2 (Days 8-14): Average Happiness = {weekly_avg_c[8:15].mean():.2f}")
print(f"Week 3 (Days 15-21): Average Happiness = {weekly_avg_c[15:22].mean():.2f}")
print(f"Week 4 (Days 22-30): Average Happiness = {weekly_avg_c[22:31].mean():.2f}")
print("\n--- Participant Examples (Intervention Group) ---")
for pid in [1, 5, 10]:
p_data = intervention[intervention['Participant_ID'] == pid]
early = p_data[p_data['Day'] <= 10]['Happiness'].mean()
late = p_data[p_data['Day'] > 20]['Happiness'].mean()
early_habits = p_data[p_data['Day'] <= 10]['Habits_Count'].mean()
late_habits = p_data[p_data['Day'] > 20]['Habits_Count'].mean()
print(f"\nParticipant {pid}:")
print(f" Early (Days 1-10): Happiness {early:.1f}, Habits {early_habits:.1f}/day")
print(f" Late (Days 21-30): Happiness {late:.1f}, Habits {late_habits:.1f}/day")
print(f" Growth: {late - early:.1f} points")
print("\n✓ Data shows:")
print(" • Intervention group has upward trend over study period")
print(" • Happy days strongly correlated with habit completion")
print(" • Control group stays stable with natural random variation")

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/verify_trend.py","entries":[{"id":"DwLj.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347397523}]}

View file

@ -0,0 +1 @@
{"version":1,"resource":"vscode-userdata:/home/breadway/.config/Code/User/settings.json","entries":[{"id":"jeJQ.json","timestamp":1774363216206}]}

View file

@ -0,0 +1,3 @@
{
"explorer.confirmDelete": false
}

View file

@ -0,0 +1,198 @@
services:
jellyfin:
image: jellyfin/jellyfin:latest
container_name: jellyfin
restart: unless-stopped
group_add:
- "993" # render group for VAAPI hardware acceleration
ports:
- "8096:8096" # HTTP web UI
- "8920:8920" # HTTPS
- "7359:7359/udp" # Network discovery
- "1900:1900/udp" # DLNA
expose:
- "8096"
environment:
- PUID=1000
- PGID=1000
- TZ=Australia/Perth
volumes:
# Config on NVMe (fast)
- ./config:/config
- ./cache:/cache
# Media libraries (read-only for safety)
- "/mnt/media/Movies:/media/movies:ro"
- "/mnt/media/TV Shows:/media/tv-shows:ro"
- "/mnt/media/Anime:/media/anime:ro"
- "/mnt/media/Kids TV:/media/kids-tv:ro"
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
- "/tank/home-videos:/media/home-videos:ro"
- "/tank/videos:/media/home-videos:ro"
- "/tank/photos:/media/home-photos:ro"
devices:
# Hardware transcoding (Vega graphics)
- /dev/dri:/dev/dri
networks:
- jellyfin-net
qbittorrent:
image: linuxserver/qbittorrent:latest
container_name: qbittorrent
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
WEBUI_PORT: 8090
volumes:
- ~/.docker_volumes/qbittorrent/config:/config
- /mnt/media/downloads:/downloads
- /mnt/media/anime:/animeq
ports:
- "8090:8090"
- "6881:6881"
- "6881:6881/udp"
expose:
- "8090"
restart: unless-stopped
deploy:
resources:
limits:
memory: 1G
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
sonarr:
image: linuxserver/sonarr:latest
container_name: sonarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
DOCKER_MODS: "linuxserver/mods:universal-package-install"
INSTALL_PACKAGES: "ffmpeg"
volumes:
- ~/.docker_volumes/sonarr/config:/config
- /mnt/media/Anime:/tv
- ~/media/downloads:/downloads
ports:
- "8989:8989"
expose:
- "8989"
restart: unless-stopped
depends_on:
- qbittorrent
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
prowlarr:
image: linuxserver/prowlarr:latest
container_name: prowlarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/prowlarr/config:/config
ports:
- "9696:9696"
expose:
- "9696"
restart: unless-stopped
depends_on:
- qbittorrent
- sonarr
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
jellyseerr:
image: fallenbagel/jellyseerr:latest
container_name: jellyseerr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/jellyseerr/config:/app/config
ports:
- "5055:5055"
expose:
- "5055"
restart: unless-stopped
depends_on:
- jellyfin
- sonarr
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
caddy:
image: caddy:latest
container_name: caddy
restart: unless-stopped
ports:
- "443:443"
- "443:443/udp" # For HTTP/3 support
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- ./caddy_data:/data
- ./caddy_config:/config
networks:
- jellyfin-net
crowdsec:
image: crowdsecurity/crowdsec:latest
container_name: crowdsec
restart: unless-stopped
environment:
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
volumes:
- /var/log:/var/log:ro
- ./crowdsec_data:/var/lib/crowdsec/data
- ./crowdsec_config:/etc/crowdsec
networks:
- jellyfin-net
networks:
jellyfin-net:
driver: bridge

View file

@ -0,0 +1,174 @@
services:
jellyfin:
image: jellyfin/jellyfin:latest
container_name: jellyfin
restart: unless-stopped
group_add:
- "993" # render group for VAAPI hardware acceleration
ports:
- "8096:8096" # HTTP web UI
- "8920:8920" # HTTPS
- "7359:7359/udp" # Network discovery
- "1900:1900/udp" # DLNA
expose:
- "8096"
environment:
- PUID=1000
- PGID=1000
- TZ=Australia/Perth
volumes:
# Config on NVMe (fast)
- ./config:/config
- ./cache:/cache
# Media libraries (read-only for safety)
- "/mnt/media/Movies:/media/movies:ro"
- "/mnt/media/TV Shows:/media/tv-shows:ro"
- "/mnt/media/Anime:/media/anime:ro"
- "/mnt/media/Kids TV:/media/kids-tv:ro"
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
- "/tank/home-videos:/media/home-videos:ro"
- "/tank/videos:/media/home-videos:ro"
- "/tank/photos:/media/home-photos:ro"
devices:
# Hardware transcoding (Vega graphics)
- /dev/dri:/dev/dri
networks:
- jellyfin-net
qbittorrent:
image: linuxserver/qbittorrent:latest
container_name: qbittorrent
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
WEBUI_PORT: 8090
volumes:
- ~/.docker_volumes/qbittorrent/config:/config
- /mnt/media/downloads:/downloads
- /mnt/media/anime:/animeq
ports:
- "8090:8090"
- "6881:6881"
- "6881:6881/udp"
expose:
- "8090"
restart: unless-stopped
deploy:
resources:
limits:
memory: 1G
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
sonarr:
image: linuxserver/sonarr:latest
container_name: sonarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
DOCKER_MODS: "linuxserver/mods:universal-package-install"
INSTALL_PACKAGES: "ffmpeg"
volumes:
- ~/.docker_volumes/sonarr/config:/config
- /mnt/media/Anime:/tv
- ~/media/downloads:/downloads
ports:
- "8989:8989"
expose:
- "8989"
restart: unless-stopped
depends_on:
- qbittorrent
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
prowlarr:
image: linuxserver/prowlarr:latest
container_name: prowlarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/prowlarr/config:/config
ports:
- "9696:9696"
expose:
- "9696"
restart: unless-stopped
depends_on:
- qbittorrent
- sonarr
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
jellyseerr:
image: fallenbagel/jellyseerr:latest
container_name: jellyseerr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/jellyseerr/config:/app/config
ports:
- "5055:5055"
expose:
- "5055"
restart: unless-stopped
depends_on:
- jellyfin
- sonarr
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
networks:
jellyfin-net:
driver: bridge

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Downloads/docker-compose.yml","entries":[{"id":"ebbC.yml","source":"textFileCreate.source","timestamp":1775500296675},{"id":"UHTA.yml","timestamp":1775500565545},{"id":"xBDr.yml","timestamp":1775500662427},{"id":"i7DI.yml","timestamp":1775502657849}]}

View file

@ -0,0 +1,208 @@
services:
jellyfin:
image: jellyfin/jellyfin:latest
container_name: jellyfin
restart: unless-stopped
group_add:
- "993" # render group for VAAPI hardware acceleration
ports:
- "8096:8096" # HTTP web UI
- "8920:8920" # HTTPS
- "7359:7359/udp" # Network discovery
- "1900:1900/udp" # DLNA
expose:
- "8096"
environment:
- PUID=1000
- PGID=1000
- TZ=Australia/Perth
volumes:
# Config on NVMe (fast)
- ./config:/config
- ./cache:/cache
# Media libraries (read-only for safety)
- "/mnt/media/Movies:/media/movies:ro"
- "/mnt/media/TV Shows:/media/tv-shows:ro"
- "/mnt/media/Anime:/media/anime:ro"
- "/mnt/media/Kids TV:/media/kids-tv:ro"
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
- "/tank/home-videos:/media/home-videos:ro"
- "/tank/videos:/media/home-videos:ro"
- "/tank/photos:/media/home-photos:ro"
devices:
# Hardware transcoding (Vega graphics)
- /dev/dri:/dev/dri
networks:
- jellyfin-net
qbittorrent:
image: linuxserver/qbittorrent:latest
container_name: qbittorrent
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
WEBUI_PORT: 8090
volumes:
- ~/.docker_volumes/qbittorrent/config:/config
- /mnt/media/downloads:/downloads
- /mnt/media/anime:/animeq
ports:
- "8090:8090"
- "6881:6881"
- "6881:6881/udp"
expose:
- "8090"
restart: unless-stopped
deploy:
resources:
limits:
memory: 1G
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
sonarr:
image: linuxserver/sonarr:latest
container_name: sonarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
DOCKER_MODS: "linuxserver/mods:universal-package-install"
INSTALL_PACKAGES: "ffmpeg"
volumes:
- ~/.docker_volumes/sonarr/config:/config
- /mnt/media/Anime:/tv
- ~/media/downloads:/downloads
ports:
- "8989:8989"
expose:
- "8989"
restart: unless-stopped
depends_on:
- qbittorrent
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
prowlarr:
image: linuxserver/prowlarr:latest
container_name: prowlarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/prowlarr/config:/config
ports:
- "9696:9696"
expose:
- "9696"
restart: unless-stopped
depends_on:
- qbittorrent
- sonarr
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
jellyseerr:
image: fallenbagel/jellyseerr:latest
container_name: jellyseerr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/jellyseerr/config:/app/config
ports:
- "5055:5055"
expose:
- "5055"
restart: unless-stopped
depends_on:
- jellyfin
- sonarr
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
caddy:
build:
context: .
dockerfile_inline: |
FROM caddy:builder AS builder
RUN xcaddy build --with github.com/caddy-dns/namedotcom
FROM caddy:latest
COPY --from=builder /usr/bin/caddy /usr/bin/caddy
container_name: caddy
restart: unless-stopped
environment:
- NAMEDOTCOM_USERNAME=
- NAMEDOTCOM_TOKEN=your_api_token
ports:
- "443:443"
- "443:443/udp"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- ./caddy_data:/data
- ./caddy_config:/config
networks:
- jellyfin-net
crowdsec:
image: crowdsecurity/crowdsec:latest
container_name: crowdsec
restart: unless-stopped
environment:
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
volumes:
- /var/log:/var/log:ro
- ./crowdsec_data:/var/lib/crowdsec/data
- ./crowdsec_config:/etc/crowdsec
networks:
- jellyfin-net
networks:
jellyfin-net:
driver: bridge

View file

@ -0,0 +1,199 @@
services:
jellyfin:
image: jellyfin/jellyfin:latest
container_name: jellyfin
restart: unless-stopped
group_add:
- "993" # render group for VAAPI hardware acceleration
ports:
- "8096:8096" # HTTP web UI
- "8920:8920" # HTTPS
- "7359:7359/udp" # Network discovery
- "1900:1900/udp" # DLNA
expose:
- "8096"
environment:
- PUID=1000
- PGID=1000
- TZ=Australia/Perth
volumes:
# Config on NVMe (fast)
- ./config:/config
- ./cache:/cache
# Media libraries (read-only for safety)
- "/mnt/media/Movies:/media/movies:ro"
- "/mnt/media/TV Shows:/media/tv-shows:ro"
- "/mnt/media/Anime:/media/anime:ro"
- "/mnt/media/Kids TV:/media/kids-tv:ro"
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
- "/tank/home-videos:/media/home-videos:ro"
- "/tank/videos:/media/home-videos:ro"
- "/tank/photos:/media/home-photos:ro"
devices:
# Hardware transcoding (Vega graphics)
- /dev/dri:/dev/dri
networks:
- jellyfin-net
qbittorrent:
image: linuxserver/qbittorrent:latest
container_name: qbittorrent
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
WEBUI_PORT: 8090
volumes:
- ~/.docker_volumes/qbittorrent/config:/config
- /mnt/media/downloads:/downloads
- /mnt/media/anime:/animeq
ports:
- "8090:8090"
- "6881:6881"
- "6881:6881/udp"
expose:
- "8090"
restart: unless-stopped
deploy:
resources:
limits:
memory: 1G
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
sonarr:
image: linuxserver/sonarr:latest
container_name: sonarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
DOCKER_MODS: "linuxserver/mods:universal-package-install"
INSTALL_PACKAGES: "ffmpeg"
volumes:
- ~/.docker_volumes/sonarr/config:/config
- /mnt/media/Anime:/tv
- ~/media/downloads:/downloads
ports:
- "8989:8989"
expose:
- "8989"
restart: unless-stopped
depends_on:
- qbittorrent
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
prowlarr:
image: linuxserver/prowlarr:latest
container_name: prowlarr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/prowlarr/config:/config
ports:
- "9696:9696"
expose:
- "9696"
restart: unless-stopped
depends_on:
- qbittorrent
- sonarr
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
jellyseerr:
image: fallenbagel/jellyseerr:latest
container_name: jellyseerr
environment:
PUID: 1000
PGID: 1000
TZ: "${TZ}"
volumes:
- ~/.docker_volumes/jellyseerr/config:/app/config
ports:
- "5055:5055"
expose:
- "5055"
restart: unless-stopped
depends_on:
- jellyfin
- sonarr
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 512M
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
caddy:
image: caddy:latest
container_name: caddy
restart: unless-stopped
ports:
- "443:443"
- "443:443/udp" # For HTTP/3 support
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- ./caddy_data:/data
- ./caddy_config:/config
networks:
- jellyfin-net
crowdsec:
image: crowdsecurity/crowdsec:latest
container_name: crowdsec
restart: unless-stopped
environment:
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
volumes:
- /var/log:/var/log:ro
- ./crowdsec_data:/var/lib/crowdsec/data
- ./crowdsec_config:/etc/crowdsec
networks:
- jellyfin-net
networks:
jellyfin-net:
driver: bridge

View file

@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""Quick test to verify Data Analysis.py works correctly."""
import subprocess
import sys
result = subprocess.run([sys.executable, 'Data Analysis.py'], capture_output=True, text=True, timeout=30)
print("STDOUT:")
print(result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
print("\nSTDERR:")
print(result.stderr[-1000:] if len(result.stderr) > 1000 else result.stderr)
print(f"\nExit code: {result.returncode}")
# Check for plot files
import os
plot_files = sorted([f for f in os.listdir('plots') if f.endswith('.png')])
print(f"\nGenerated {len(plot_files)} plot files:")
for f in plot_files:
print(f" - {f}")

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/test_analysis.py","entries":[{"id":"Xb2Q.py","source":"Chat Edit: 'ensure the graphs being used are appropriate for the study'","timestamp":1774346851499}]}

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python3
import pandas as pd
import numpy as np
# Load the data
df = pd.read_csv('organization_happiness_study_data.csv')
print("=" * 70)
print("DATA GENERATION IMPROVEMENTS VERIFICATION")
print("=" * 70)
print(f"\n✓ Dataset shape: {df.shape}")
print(f"✓ Total rows: {len(df)} (20 participants × 30 days × 2 groups = 1200 expected)")
print("\n--- Intervention Group Statistics ---")
intervention = df[df['Group'] == 'Intervention']
print(f"Participants: {intervention['Participant_ID'].nunique()}")
print(f"Mean Happiness: {intervention['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {intervention['Happiness'].std():.2f}")
print(f"Calendar Adherence Rate: {(intervention['Calendar_Adherence'] == 'Yes').mean():.1%}")
print(f"Cleanliness Adherence Rate: {(intervention['Cleanliness_Adherence'] == 'Yes').mean():.1%}")
print(f"Punctuality Adherence Rate: {(intervention['Punctuality_Adherence'] == 'Yes').mean():.1%}")
print("\n--- Control Group Statistics ---")
control = df[df['Group'] == 'Control']
print(f"Participants: {control['Participant_ID'].nunique()}")
print(f"Mean Happiness: {control['Happiness'].mean():.2f}")
print(f"Happiness Std Dev: {control['Happiness'].std():.2f}")
print(f"Reported Calendar: {(control['Calendar_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Cleanliness: {(control['Cleanliness_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print(f"Reported Punctuality: {(control['Punctuality_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
print("\n--- Natural Data Patterns ---")
# Check for habit momentum (persistence)
intervention['Habits_Count'] = (
(intervention['Calendar_Adherence'] == 'Yes').astype(int) +
(intervention['Cleanliness_Adherence'] == 'Yes').astype(int) +
(intervention['Punctuality_Adherence'] == 'Yes').astype(int)
)
print(f"Habit completion rates by number completed:")
for count in [0, 1, 2, 3]:
subset = intervention[intervention['Habits_Count'] == count]
happiness = subset['Happiness'].mean()
print(f" {count} habits: Happiness = {happiness:.2f} (n={len(subset)})")
# Weekend effect
intervention['DayOfWeek'] = intervention['Day'] % 7
weekend = intervention[intervention['DayOfWeek'].isin([0, 6])]
weekday = intervention[~intervention['DayOfWeek'].isin([0, 6])]
print(f"\nWeekend vs Weekday Adherence:")
print(f" Weekday avg habits: {((weekday['Calendar_Adherence']=='Yes').astype(int) + (weekday['Cleanliness_Adherence']=='Yes').astype(int) + (weekday['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Weekend avg habits: {((weekend['Calendar_Adherence']=='Yes').astype(int) + (weekend['Cleanliness_Adherence']=='Yes').astype(int) + (weekend['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
# Habit formation over time
first_week = intervention[intervention['Day'] <= 7]
mid_month = intervention[(intervention['Day'] > 14) & (intervention['Day'] <= 21)]
last_week = intervention[intervention['Day'] > 23]
print(f"\nHabit Formation Over Time:")
print(f" Days 1-7 (Starting): Avg habits = {((first_week['Calendar_Adherence']=='Yes').astype(int) + (first_week['Cleanliness_Adherence']=='Yes').astype(int) + (first_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 15-21 (Momentum): Avg habits = {((mid_month['Calendar_Adherence']=='Yes').astype(int) + (mid_month['Cleanliness_Adherence']=='Yes').astype(int) + (mid_month['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f" Days 24-30 (Late): Avg habits = {((last_week['Calendar_Adherence']=='Yes').astype(int) + (last_week['Cleanliness_Adherence']=='Yes').astype(int) + (last_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
print(f"\nHappiness Persistence (day-to-day correlation):")
intervention_sorted = intervention.sort_values(['Participant_ID', 'Day'])
intervention_sorted['Happiness_prev'] = intervention_sorted.groupby('Participant_ID')['Happiness'].shift(1)
valid = intervention_sorted[intervention_sorted['Happiness_prev'].notna()]
corr = valid[['Happiness', 'Happiness_prev']].corr().iloc[0, 1]
print(f" Correlation between today and yesterday's happiness: {corr:.3f}")
print("\n✓ Data generation complete with natural patterns!")
print("\nKey improvements:")
print(" • Habit momentum: doing it yesterday makes it more likely today")
print(" • Weekly patterns: lower adherence weekends vs weekdays")
print(" • Habit formation: initial difficulty, momentum building, slight fatigue")
print(" • Individual variation: each person has unique habit profiles")
print(" • Happiness persistence: today's mood influenced by yesterday's")
print(" • Control group realism: still report 'No' but data shows natural variation")

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/test_improvements.py","entries":[{"id":"bWGM.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347099618}]}

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/run_and_verify.py","entries":[{"id":"qMUb.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347459417}]}

View file

@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""Generate new data and display sample showing upward trend"""
import subprocess
import pandas as pd
# Run data generator
result = subprocess.run(['python3', 'Data Gen.py'], capture_output=True, text=True)
print(result.stdout)
if result.stderr:
print("Errors:", result.stderr)
# Load and display trend analysis
df = pd.read_csv('organization_happiness_study_data.csv')
df['Habits_Count'] = (
(df['Calendar_Adherence'] == 'Yes').astype(int) +
(df['Cleanliness_Adherence'] == 'Yes').astype(int) +
(df['Punctuality_Adherence'] == 'Yes').astype(int)
)
intervention = df[df['Group'] == 'Intervention']
control = df[df['Group'] == 'Control']
print("\n" + "="*70)
print("UPWARD TREND ANALYSIS")
print("="*70)
print("\n[INTERVENTION GROUP] - Should show upward trend")
early_int = intervention[intervention['Day'] <= 7]
late_int = intervention[intervention['Day'] >= 24]
print(f"Days 1-7: Avg Happiness = {early_int['Happiness'].mean():.2f}")
print(f"Days 24-30: Avg Happiness = {late_int['Happiness'].mean():.2f}")
print(f"GROWTH: +{late_int['Happiness'].mean() - early_int['Happiness'].mean():.2f} points\n")
print("[CONTROL GROUP] - Should show flat/random pattern")
early_ctl = control[control['Day'] <= 7]
late_ctl = control[control['Day'] >= 24]
print(f"Days 1-7: Avg Happiness = {early_ctl['Happiness'].mean():.2f}")
print(f"Days 24-30: Avg Happiness = {late_ctl['Happiness'].mean():.2f}")
print(f"CHANGE: {late_ctl['Happiness'].mean() - early_ctl['Happiness'].mean():+.2f} points\n")
print("[HABIT CORRELATION] - More habits = Higher happiness")
for habits in range(4):
subset = intervention[intervention['Habits_Count'] == habits]
if len(subset) > 0:
print(f"{habits} habits/day: Avg Happiness = {subset['Happiness'].mean():.2f} ({len(subset)} observations)")

View file

@ -0,0 +1,196 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.7)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(3) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,93 @@
df = pd.DataFrame(data, columns=['Participant_ID', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness'])
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
for day in DAYS:
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
baseline_happiness = np.random.normal(5.5, 1.0)
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,180 @@
import pandas as pd
import numpy as np
np.random.seed(64) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.9, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,92 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
for day in DAYS:
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
baseline_happiness = np.random.normal(5.5, 1.0)
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(43) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,180 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.4)
# Happiness formula: baseline + cumulative effect that strengthens over time
# Stronger multiplier to make dose-response clear (each habit matters 1-1.5 points)
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
habit_strength * (0.9 + study_progress * 0.3) + # Habit benefits clear, final max ~2.1
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(64) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.7)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
current_happiness = person_happiness_baseline
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
# Control group happiness is less affected by daily habits and more random
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
current_happiness = np.clip(
current_happiness * 0.5 +
person_happiness_baseline * 0.5 +
subtle_boost +
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,144 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness for this participant
person_happiness_baseline = np.random.normal(5.5, 1.2)
current_happiness = person_happiness_baseline
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Habit stacking: completing one habit makes the next easier
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
# Happiness has persistence but is also affected by habits
happiness_noise = np.random.normal(0, 1.3)
current_happiness = np.clip(
current_happiness * 0.4 + # Previous day influences today
person_happiness_baseline * 0.4 +
habit_boost * 0.9 + # Habits have strong effect
happiness_noise,
1, 10
)
happiness = int(np.round(current_happiness))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
for day in DAYS:
# Control group only records happiness; all habit columns are No.
calendar = 'No'
clean = 'No'
ontime = 'No'
baseline_happiness = np.random.normal(5.5, 1.0)
control_noise = np.random.normal(0.0, 1.1)
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
rows.append([
participant_id,
'Control',
day,
calendar,
clean,
ontime,
happiness,
])
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/Data%20Gen.py","entries":[{"id":"54EK.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345349390},{"id":"PpFf.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345378739},{"id":"cTNf.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347044805},{"id":"WSl3.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347057825},{"id":"9dqp.py","timestamp":1774347206509},{"id":"blt8.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347345483},{"id":"3jGE.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347365731},{"id":"sBVR.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347432858},{"id":"j9Wc.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347711480},{"id":"bLJN.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347763541},{"id":"MJ5p.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347783690},{"id":"EkUx.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347933805},{"id":"u91r.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347955983},{"id":"QTk6.py","timestamp":1774348022105},{"id":"o2Y7.py","timestamp":1774348397371},{"id":"46oA.py","timestamp":1774352345991}]}

View file

@ -0,0 +1,197 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 3)
habit_strength = np.clip(habit_strength, 0, 3)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.5)
# Happiness formula: baseline + cumulative effect that strengthens over time
# More conservative multiplier to keep final happiness reasonable (6-8 range)
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
habit_strength * (0.4 + study_progress * 0.5) + # Habit benefits grow over time, max ~2.7
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,196 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 20
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 4)
habit_strength = np.clip(habit_strength, 0, 4)
# Happiness is baseline + growth from habit_strength over time
# As study progresses and habit_strength builds, happiness increases more
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
# Daily random noise (small)
daily_noise = np.random.normal(0, 0.5)
# Happiness formula: baseline + cumulative effect that strengthens over time
happiness_value = (
person_happiness_baseline + # Starting point
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
daily_noise # Day-to-day variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.2)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,200 @@
import pandas as pd
import numpy as np
np.random.seed(42) # ensures you get exactly the same data every time
N_PARTICIPANTS_PER_GROUP = 40
DAYS = list(range(1, 31))
def clip_yes_prob(prob, ceiling):
return min(ceiling, max(0.05, prob))
def generate_intervention_group(start_participant_id=1):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
org_bias = np.clip(org_bias, 0.1, 0.95)
# Personal baselines for each habit (people are naturally better/worse at specific habits)
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
# Baseline happiness and habit strength for this participant
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
habit_strength = 0.0 # Cumulative measure of consistent habit completion
# Track previous day's habits for momentum/habit stacking
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
for day in DAYS:
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
# Habit formation/fatigue: early days harder, then easier, slight decline late
if day < 7:
time_factor = 0.85 # Getting started is harder
elif day < 20:
time_factor = 1.1 # Momentum builds
else:
time_factor = 0.98 # Slight fatigue
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
calendar_prob = clip_yes_prob(
calendar_ease * week_difficulty * time_factor +
(0.15 if prev_calendar == 'Yes' else 0), 0.95
)
clean_prob = clip_yes_prob(
clean_ease * week_difficulty * time_factor +
(0.15 if prev_clean == 'Yes' else 0), 0.90
)
ontime_prob = clip_yes_prob(
ontime_ease * week_difficulty * time_factor +
(0.12 if prev_ontime == 'Yes' else 0), 0.93
)
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
# Count habits completed today
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
# Habit strength: accumulates with consistent completion, decays with non-completion
# This creates a cumulative effect that drives upward trend
if adherence_count == 3:
habit_strength += 0.6 # Strong boost for completing all habits
elif adherence_count == 2:
habit_strength += 0.35 # Moderate boost
elif adherence_count == 1:
habit_strength += 0.15 # Small boost
else:
habit_strength -= 0.2 # Small decay for missing all habits
# Clip habit_strength to reasonable range (0 to 5)
habit_strength = np.clip(habit_strength, 0, 5)
# Happiness combines DAILY habits effect + cumulative habit strength
study_progress = day / 30.0 # 0.033 to 1.0
daily_noise = np.random.normal(0, 0.35)
# Immediate bonus for today's habits (strong, clear dose-response)
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
# Cumulative bonus grows as study progresses
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
# Happiness formula: baseline + daily effect + cumulative effect + noise
happiness_value = (
person_happiness_baseline + # Starting point (4.0)
daily_habit_bonus + # Today's habits (0-1.8)
cumulative_bonus + # Study progress bonus (0-2.7)
daily_noise # Variability
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Intervention',
day,
calendar,
clean,
ontime,
happiness,
])
# Update for next iteration
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
return rows
def generate_control_group(start_participant_id):
rows = []
for offset in range(N_PARTICIPANTS_PER_GROUP):
participant_id = start_participant_id + offset
# Even without tracking, some people are naturally more organized
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
natural_org = np.clip(natural_org, 0.05, 0.7)
# Personal tendencies (but not tracked/reported as habits)
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
# Since they're not tracking, habits happen at random intervals (not streaky)
prev_untracked_habits = 0
for day in DAYS:
# Week effect: sans the awareness/tracking effect
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
# Without tracking, unaware of patterns, so less habit formation
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
# Untracked habits - they happen but aren't reported
calendar_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
clean_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
ontime_untracked = np.random.choice(['Yes', 'No'],
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
# They report habits as "No" (not tracking), but untracked habits have minimal effect
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
# Control group happiness has day-to-day variability but no systematic growth
# Without awareness and tracking, there's no cumulative benefit
daily_noise = np.random.normal(0, 1.0)
happiness_value = (
person_happiness_baseline + # Same baseline
subtle_boost + # Minimal benefit from occasional habits
daily_noise # Higher variability, no systematic trend
)
happiness = int(np.clip(np.round(happiness_value), 1, 10))
rows.append([
participant_id,
'Control',
day,
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
'No', # Reported as "No" - not tracking
happiness,
])
prev_untracked_habits = untracked_count
return rows
data = []
data.extend(generate_intervention_group(start_participant_id=1))
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
df = pd.DataFrame(
data,
columns=[
'Participant_ID',
'Group',
'Day',
'Calendar_Adherence',
'Cleanliness_Adherence',
'Punctuality_Adherence',
'Happiness',
],
)
# Save the combined dataset
df.to_csv('organization_happiness_study_data.csv', index=False)
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
print(df.head(10)) # shows first 10 rows

View file

@ -0,0 +1,6 @@
pandas
numpy
matplotlib
seaborn
scipy
statsmodels

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/requirements.txt","entries":[{"id":"1nKM.txt","source":"Chat Edit: 'improve on this analysis script'","timestamp":1774345121245}]}

View file

@ -0,0 +1,284 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_theme(style='whitegrid', context='talk')
def finish_plot(filename):
plt.tight_layout()
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
if show_plots:
plt.show()
plt.close()
# 1) PRIMARY OUTCOME: Mean happiness by group with error bars and value labels
if 'Group' in df.columns:
plt.figure(figsize=(8, 6))
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
bars = plt.bar(
np.arange(len(summary)),
summary['mean'].values,
yerr=ci95.values,
color=['#A9B2C3', '#4E79A7'],
capsize=8,
edgecolor='black',
linewidth=1.2,
alpha=0.9
)
plt.xticks(np.arange(len(summary)), ['Control Group\n(No habits tracked)', 'Intervention Group\n(Daily habits tracked)'])
plt.title('Effect of Tracked Organization Habits on Happiness', pad=15, fontsize=14, fontweight='bold')
plt.ylabel('Mean Daily Happiness Score (1-10)', fontsize=12)
plt.ylim(1, 10)
for bar in bars:
yval = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2, yval - 0.8, f'{yval:.1f}', ha='center', va='center', color='white', fontweight='bold', fontsize=11)
finish_plot('01_primary_outcome_group_comparison.png')
# 2) DISTRIBUTIONS: Show overlap and variability in happiness scores
if 'Group' in df.columns:
plt.figure(figsize=(9, 6))
order = ['Control', 'Intervention']
sns.violinplot(
data=df, x='Group', y='Happiness', order=order,
inner='quartile', palette={'Control': '#E0E0E0', 'Intervention': '#B3CDE3'}, cut=0
)
sns.stripplot(
data=df, x='Group', y='Happiness', order=order,
color='black', alpha=0.12, jitter=0.25, size=3
)
plt.title('Distribution of Happiness Reports Over 30 Days', pad=15, fontsize=14, fontweight='bold')
plt.xlabel('Study Group', fontsize=12)
plt.ylabel('Happiness Score', fontsize=12)
plt.ylim(1, 10)
finish_plot('02_happiness_distribution_by_group.png')
# 3) LONGITUDINAL: Daily happiness trend across 30 days
if 'Group' in df.columns and 'Day' in df.columns:
plt.figure(figsize=(10, 6))
daily_mean = df.groupby(['Group', 'Day'])['Happiness'].mean().reset_index()
sns.lineplot(
data=daily_mean, x='Day', y='Happiness', hue='Group',
hue_order=['Control', 'Intervention'],
palette={'Control': '#7F7F7F', 'Intervention': '#D62728'},
marker='o', linewidth=2.5, markersize=6
)
plt.title('Longitudinal Daily Happiness Throughout the Study', pad=15, fontsize=14, fontweight='bold')
plt.xlabel('Day of Study (1-30)', fontsize=12)
plt.ylabel('Average Happiness', fontsize=12)
plt.ylim(1, 10)
plt.xticks(range(1, 31, 2))
plt.legend(title='', frameon=True, facecolor='white', fontsize=10)
finish_plot('03_longitudinal_trends.png')
# 4) DOSE-RESPONSE: In intervention group, does MORE habits = MORE happiness?
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
plt.figure(figsize=(9, 6))
sns.boxplot(
data=intervention_df, x='Habits_Count', y='Happiness',
color='#9ECAE1', width=0.6, fliersize=0
)
sns.stripplot(
data=intervention_df, x='Habits_Count', y='Happiness',
color='#2B5B84', alpha=0.3, jitter=0.2, size=4
)
plt.title('Dose-Response: Happiness by Number of Habits Completed', pad=15, fontsize=14, fontweight='bold')
plt.xlabel('Number of Requested Habits Completed That Day\n(Calendar + Clean Room + Punctual)', fontsize=11)
plt.ylabel('Happiness Score', fontsize=12)
plt.ylim(1, 10)
finish_plot('04_habit_dose_response.png')
# 5) HABIT COMPLETION RATES: Which habits were easiest to maintain?
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
adherence_rates.columns = ['Habit', 'Rate']
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
plt.figure(figsize=(8, 6))
bars = sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
plt.title('Which Habits Were Easiest to Keep?', pad=15, fontsize=14, fontweight='bold')
plt.xlabel('', fontsize=12)
plt.ylabel('Percentage of Days Completed', fontsize=12)
plt.ylim(0, 1.05)
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
for bar in bars.patches:
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
f"{bar.get_height()*100:.0f}%", ha='center', va='bottom', fontweight='bold', fontsize=10)
finish_plot('05_habit_completion_rates.png')
# 6) INDIVIDUAL VARIATION: Participant-level averages show broad effect
if 'Group' in df.columns:
plt.figure(figsize=(12, 6))
participant_avg = df.groupby(['Group', 'Participant_ID'])['Happiness'].mean().reset_index()
participant_avg = participant_avg.sort_values(['Group', 'Happiness'])
participant_avg['Order_Index'] = range(len(participant_avg))
for group, color in zip(['Control', 'Intervention'], ['#BDBDBD', '#4E79A7']):
group_data = participant_avg[participant_avg['Group'] == group]
plt.bar(group_data['Order_Index'], group_data['Happiness'], color=color, label=group, alpha=0.85, width=0.8)
plt.axhline(df[df['Group']=='Control']['Happiness'].mean(), color='#7F7F7F', linestyle='--', linewidth=2, label='Control Mean')
plt.axhline(df[df['Group']=='Intervention']['Happiness'].mean(), color='#2B5B84', linestyle='--', linewidth=2, label='Intervention Mean')
plt.title('Individual Average Happiness Across Study Participants', pad=15, fontsize=14, fontweight='bold')
plt.xlabel('Individual Participants (Sorted by Happiness Level)', fontsize=12)
plt.ylabel('Average Happiness Score', fontsize=12)
plt.xticks([])
plt.ylim(1, 10)
plt.legend(frameon=True, facecolor='white', fontsize=10, loc='upper left')
finish_plot('06_individual_participant_avgs.png')
logging.info('Saved study plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,227 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(range(len(participant_avg)), participant_avg.index, rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
if 'Group' in df.columns:
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
plt.title('Mean Happiness by Group')
plt.ylabel('Average happiness')
f_group = outdir / 'happiness_by_group.png'
plt.tight_layout()
plt.savefig(f_group)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
if 'Group' in df.columns:
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
else:
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,227 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(range(len(participant_avg)), participant_avg.index.astype(str), rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
if 'Group' in df.columns:
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
plt.title('Mean Happiness by Group')
plt.ylabel('Average happiness')
f_group = outdir / 'happiness_by_group.png'
plt.tight_layout()
plt.savefig(f_group)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
if 'Group' in df.columns:
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
else:
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,253 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_theme(style='whitegrid', context='talk')
def finish_plot(filename):
plt.tight_layout()
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
if show_plots:
plt.show()
plt.close()
# 1) Mean happiness by group with error bars
if 'Group' in df.columns:
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
plt.figure(figsize=(8, 6))
plt.bar(summary.index, summary['mean'], color=['#7A7A7A', '#2A9D8F'], yerr=ci95, capsize=6)
plt.title('Average Happiness by Group')
plt.xlabel('Study group')
plt.ylabel('Mean happiness score')
plt.ylim(0, 10)
finish_plot('01_mean_happiness_by_group.png')
# 2) Distribution of happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(9, 6))
order = ['Control', 'Intervention']
sns.boxplot(data=df, x='Group', y='Happiness', order=order, color='#C9D1D9')
sns.stripplot(data=df, x='Group', y='Happiness', order=order, color='black', alpha=0.18, jitter=0.22, size=2)
plt.title('Happiness Distribution by Group')
plt.xlabel('Study group')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('02_happiness_distribution_by_group.png')
# 3) Daily happiness trend by group
if 'Group' in df.columns and 'Day' in df.columns:
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
plt.figure(figsize=(10, 6))
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
plt.title('Mean Daily Happiness Across the Study')
plt.xlabel('Day of study')
plt.ylabel('Average happiness')
plt.ylim(0, 10)
plt.xticks(range(1, 31, 2))
finish_plot('03_daily_happiness_trend.png')
# 4) Happiness by number of habits in intervention group only
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
plt.figure(figsize=(9, 6))
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
plt.title('Intervention Group: Happiness by Number of Habits Completed')
plt.xlabel('Habits completed that day')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('04_happiness_by_habits_intervention.png')
# 5) Mean happiness by habits count in intervention group
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
plt.figure(figsize=(8, 6))
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
plt.xlabel('Number of habits completed')
plt.ylabel('Mean happiness')
plt.xticks([0, 1, 2, 3])
plt.ylim(0, 10)
finish_plot('05_mean_happiness_by_habits.png')
# 6) Habit adherence rates in the intervention group
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
adherence_rates.columns = ['Habit', 'Rate']
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
plt.figure(figsize=(9, 6))
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
plt.title('Intervention Group: Habit Completion Rate')
plt.xlabel('Habit')
plt.ylabel('Proportion completed')
plt.ylim(0, 1)
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
finish_plot('06_habit_completion_rate.png')
# 7) Participant average happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(12, 6))
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
sns.boxplot(data=participant_avg, x='Group', y='Happiness', order=['Control', 'Intervention'], color='#D6D6D6')
sns.stripplot(data=participant_avg, x='Group', y='Happiness', order=['Control', 'Intervention'], color='black', alpha=0.45, jitter=0.12, size=5)
plt.title('Average Happiness per Participant')
plt.xlabel('Study group')
plt.ylabel('Participant mean happiness')
plt.ylim(0, 10)
finish_plot('07_participant_average_happiness.png')
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,272 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_theme(style='whitegrid', context='talk')
def finish_plot(filename):
plt.tight_layout()
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
if show_plots:
plt.show()
plt.close()
# 1) Mean happiness by group with error bars
if 'Group' in df.columns:
plt.figure(figsize=(8, 6))
order = ['Control', 'Intervention']
sns.barplot(
data=df,
x='Group',
y='Happiness',
order=order,
estimator='mean',
errorbar=('ci', 95),
palette=['#7A7A7A', '#2A9D8F'],
)
plt.title('Average Happiness by Group')
plt.xlabel('Study group')
plt.ylabel('Mean happiness score')
plt.ylim(0, 10)
finish_plot('01_mean_happiness_by_group.png')
# 2) Distribution of happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(9, 6))
order = ['Control', 'Intervention']
sns.boxplot(data=df, x='Group', y='Happiness', order=order, palette=['#B0B0B0', '#73C6B6'])
sns.stripplot(data=df, x='Group', y='Happiness', order=order, color='black', alpha=0.18, jitter=0.22, size=2)
plt.title('Happiness Distribution by Group')
plt.xlabel('Study group')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('02_happiness_distribution_by_group.png')
# 3) Daily happiness trend by group
if 'Group' in df.columns and 'Day' in df.columns:
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
plt.figure(figsize=(10, 6))
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
plt.title('Mean Daily Happiness Across the Study')
plt.xlabel('Day of study')
plt.ylabel('Average happiness')
plt.ylim(0, 10)
plt.xticks(range(1, 31, 2))
finish_plot('03_daily_happiness_trend.png')
# 4) Happiness by number of habits in intervention group only
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
plt.figure(figsize=(9, 6))
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
plt.title('Intervention Group: Happiness by Number of Habits Completed')
plt.xlabel('Habits completed that day')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('04_happiness_by_habits_intervention.png')
# 5) Mean happiness by habits count in intervention group
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
plt.figure(figsize=(8, 6))
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
plt.xlabel('Number of habits completed')
plt.ylabel('Mean happiness')
plt.xticks([0, 1, 2, 3])
plt.ylim(0, 10)
finish_plot('05_mean_happiness_by_habits.png')
# 6) Habit adherence rates in the intervention group
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
adherence_rates = (
intervention_df[habit_cols]
.mean()
.sort_values(ascending=False)
.reset_index()
.rename(columns={'index': 'Habit', 0: 'Rate'})
)
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
plt.figure(figsize=(9, 6))
sns.barplot(data=adherence_rates, x='Habit', y=0, color='#E76F51')
plt.title('Intervention Group: Habit Completion Rate')
plt.xlabel('Habit')
plt.ylabel('Proportion completed')
plt.ylim(0, 1)
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
finish_plot('06_habit_completion_rate.png')
# 7) Participant averages, grouped by study group
if 'Group' in df.columns:
participant_avg = df.groupby(['Group', 'Participant_ID'])['Happiness'].mean().reset_index()
plt.figure(figsize=(12, 6))
sns.barplot(
data=participant_avg,
x='Participant_ID',
y='Happiness',
hue='Group',
dodge=True,
palette=['#7A7A7A', '#2A9D8F'],
)
plt.title('Average Happiness per Participant')
plt.xlabel('Participant ID')
plt.ylabel('Mean happiness')
plt.ylim(0, 10)
plt.xticks(rotation=45)
finish_plot('07_participant_average_happiness.png')
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,270 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_theme(style='whitegrid', context='talk')
def finish_plot(filename):
plt.tight_layout()
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
if show_plots:
plt.show()
plt.close()
# 1) Mean happiness by group with error bars
if 'Group' in df.columns:
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
plt.figure(figsize=(8, 6))
xpos = np.arange(len(summary))
plt.bar(xpos, summary['mean'].values, color=['#7A7A7A', '#2A9D8F'], yerr=ci95.values, capsize=6)
plt.xticks(xpos, summary.index)
plt.title('Average Happiness by Group')
plt.xlabel('Study group')
plt.ylabel('Mean happiness score')
plt.ylim(0, 10)
finish_plot('01_mean_happiness_by_group.png')
# 2) Distribution of happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(9, 6))
order = ['Control', 'Intervention']
grouped = [df.loc[df['Group'] == group, 'Happiness'].values for group in order]
plt.boxplot(grouped, labels=order, patch_artist=True,
boxprops=dict(facecolor='#C9D1D9', color='#4C4C4C'),
medianprops=dict(color='#2A9D8F', linewidth=2),
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
for i, group in enumerate(order, start=1):
y = df.loc[df['Group'] == group, 'Happiness'].values
x = np.random.normal(i, 0.06, size=len(y))
plt.scatter(x, y, color='black', alpha=0.15, s=10)
plt.title('Happiness Distribution by Group')
plt.xlabel('Study group')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('02_happiness_distribution_by_group.png')
# 3) Daily happiness trend by group
if 'Group' in df.columns and 'Day' in df.columns:
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
plt.figure(figsize=(10, 6))
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
plt.title('Mean Daily Happiness Across the Study')
plt.xlabel('Day of study')
plt.ylabel('Average happiness')
plt.ylim(0, 10)
plt.xticks(range(1, 31, 2))
finish_plot('03_daily_happiness_trend.png')
# 4) Happiness by number of habits in intervention group only
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
plt.figure(figsize=(9, 6))
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
plt.title('Intervention Group: Happiness by Number of Habits Completed')
plt.xlabel('Habits completed that day')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('04_happiness_by_habits_intervention.png')
# 5) Mean happiness by habits count in intervention group
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
plt.figure(figsize=(8, 6))
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
plt.xlabel('Number of habits completed')
plt.ylabel('Mean happiness')
plt.xticks([0, 1, 2, 3])
plt.ylim(0, 10)
finish_plot('05_mean_happiness_by_habits.png')
# 6) Habit adherence rates in the intervention group
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
adherence_rates.columns = ['Habit', 'Rate']
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
plt.figure(figsize=(9, 6))
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
plt.title('Intervention Group: Habit Completion Rate')
plt.xlabel('Habit')
plt.ylabel('Proportion completed')
plt.ylim(0, 1)
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
finish_plot('06_habit_completion_rate.png')
# 7) Participant average happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(12, 6))
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
group_order = ['Control', 'Intervention']
grouped_avgs = [participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values for group in group_order]
plt.boxplot(grouped_avgs, labels=group_order, patch_artist=True,
boxprops=dict(facecolor='#D6D6D6', color='#4C4C4C'),
medianprops=dict(color='#2A9D8F', linewidth=2),
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
for i, group in enumerate(group_order, start=1):
y = participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values
x = np.random.normal(i, 0.06, size=len(y))
plt.scatter(x, y, color='black', alpha=0.45, s=22)
plt.title('Average Happiness per Participant')
plt.xlabel('Study group')
plt.ylabel('Participant mean happiness')
plt.ylim(0, 10)
finish_plot('07_participant_average_happiness.png')
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,189 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness):')
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(df.loc[mask, habit].astype(int), df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(x='Habits_Count', y='Happiness', data=df, palette='viridis')
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(x='Habits_Count', y='Happiness', data=df, inner=None, palette='muted')
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=participant_avg.index.astype(str), y=participant_avg.values, palette='coolwarm')
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect size example: compare 0 vs 3
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,231 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(data=df, x='Habits_Count', y='Happiness', hue='Habits_Count', palette='viridis', dodge=False)
plt.legend([], [], frameon=False)
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(data=df, x='Habits_Count', y='Happiness', hue='Habits_Count', inner=None, palette='muted', dodge=False)
plt.legend([], [], frameon=False)
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, hue=range(len(participant_avg)), palette='coolwarm', dodge=False)
plt.legend([], [], frameon=False)
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(range(len(participant_avg)), participant_avg.index.astype(str), rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
if 'Group' in df.columns:
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Group', y='Happiness', hue='Group', estimator='mean', errorbar='sd', palette='Set2', dodge=False)
plt.legend([], [], frameon=False)
plt.title('Mean Happiness by Group')
plt.ylabel('Average happiness')
f_group = outdir / 'happiness_by_group.png'
plt.tight_layout()
plt.savefig(f_group)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
if 'Group' in df.columns:
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
else:
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,270 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_theme(style='whitegrid', context='talk')
def finish_plot(filename):
plt.tight_layout()
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
if show_plots:
plt.show()
plt.close()
# 1) Mean happiness by group with error bars
if 'Group' in df.columns:
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
plt.figure(figsize=(8, 6))
xpos = np.arange(len(summary))
plt.bar(xpos, summary['mean'].values, color=['#7A7A7A', '#2A9D8F'], yerr=ci95.values, capsize=6)
plt.xticks(xpos, summary.index)
plt.title('Average Happiness by Group')
plt.xlabel('Study group')
plt.ylabel('Mean happiness score')
plt.ylim(0, 10)
finish_plot('01_mean_happiness_by_group.png')
# 2) Distribution of happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(9, 6))
order = ['Control', 'Intervention']
grouped = [df.loc[df['Group'] == group, 'Happiness'].values for group in order]
plt.boxplot(grouped, tick_labels=order, patch_artist=True,
boxprops=dict(facecolor='#C9D1D9', color='#4C4C4C'),
medianprops=dict(color='#2A9D8F', linewidth=2),
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
for i, group in enumerate(order, start=1):
y = df.loc[df['Group'] == group, 'Happiness'].values
x = np.random.normal(i, 0.06, size=len(y))
plt.scatter(x, y, color='black', alpha=0.15, s=10)
plt.title('Happiness Distribution by Group')
plt.xlabel('Study group')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('02_happiness_distribution_by_group.png')
# 3) Daily happiness trend by group
if 'Group' in df.columns and 'Day' in df.columns:
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
plt.figure(figsize=(10, 6))
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
plt.title('Mean Daily Happiness Across the Study')
plt.xlabel('Day of study')
plt.ylabel('Average happiness')
plt.ylim(0, 10)
plt.xticks(range(1, 31, 2))
finish_plot('03_daily_happiness_trend.png')
# 4) Happiness by number of habits in intervention group only
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
plt.figure(figsize=(9, 6))
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
plt.title('Intervention Group: Happiness by Number of Habits Completed')
plt.xlabel('Habits completed that day')
plt.ylabel('Happiness score')
plt.ylim(0, 10)
finish_plot('04_happiness_by_habits_intervention.png')
# 5) Mean happiness by habits count in intervention group
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
plt.figure(figsize=(8, 6))
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
plt.xlabel('Number of habits completed')
plt.ylabel('Mean happiness')
plt.xticks([0, 1, 2, 3])
plt.ylim(0, 10)
finish_plot('05_mean_happiness_by_habits.png')
# 6) Habit adherence rates in the intervention group
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
adherence_rates.columns = ['Habit', 'Rate']
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
plt.figure(figsize=(9, 6))
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
plt.title('Intervention Group: Habit Completion Rate')
plt.xlabel('Habit')
plt.ylabel('Proportion completed')
plt.ylim(0, 1)
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
finish_plot('06_habit_completion_rate.png')
# 7) Participant average happiness by group
if 'Group' in df.columns:
plt.figure(figsize=(12, 6))
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
group_order = ['Control', 'Intervention']
grouped_avgs = [participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values for group in group_order]
plt.boxplot(grouped_avgs, tick_labels=group_order, patch_artist=True,
boxprops=dict(facecolor='#D6D6D6', color='#4C4C4C'),
medianprops=dict(color='#2A9D8F', linewidth=2),
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
for i, group in enumerate(group_order, start=1):
y = participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values
x = np.random.normal(i, 0.06, size=len(y))
plt.scatter(x, y, color='black', alpha=0.45, s=22)
plt.title('Average Happiness per Participant')
plt.xlabel('Study group')
plt.ylabel('Participant mean happiness')
plt.ylim(0, 10)
finish_plot('07_participant_average_happiness.png')
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1 @@
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/Data%20Analysis.py","entries":[{"id":"SA9R.py","source":"Chat Edit: 'improve on this analysis script'","timestamp":1774345116327},{"id":"ycv3.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345356264},{"id":"bwYb.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345411358},{"id":"Gx76.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345436946},{"id":"FOyN.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345501736},{"id":"MtI5.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346145201},{"id":"Ldgu.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346200970},{"id":"NtsI.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346222014},{"id":"enQE.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346258056},{"id":"yfjL.py","timestamp":1774346751804},{"id":"9KVj.py","source":"Chat Edit: 'ensure the graphs being used are appropriate for the study'","timestamp":1774346803522}]}

View file

@ -0,0 +1,227 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(x='Habits_Count', y='Happiness', data=df, palette='viridis')
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(x='Habits_Count', y='Happiness', data=df, inner=None, palette='muted')
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=participant_avg.index.astype(str), y=participant_avg.values, palette='coolwarm')
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
if 'Group' in df.columns:
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', palette='Set2')
plt.title('Mean Happiness by Group')
plt.ylabel('Average happiness')
f_group = outdir / 'happiness_by_group.png'
plt.tight_layout()
plt.savefig(f_group)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
if 'Group' in df.columns:
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
else:
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1,227 @@
import argparse
import os
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_data(path):
df = pd.read_csv(path)
logging.info("Loaded %d rows from %s", len(df), path)
return df
def prepare_data(df):
# Ensure required columns exist
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
missing = required - set(df.columns)
if missing:
raise KeyError(f"Missing required columns: {missing}")
if 'Group' not in df.columns:
df['Group'] = 'Intervention'
df['Group'] = df['Group'].astype(str).str.strip().str.title()
# Normalize adherence to boolean (Yes/No or True/False)
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
# Count habits per row
df['Habits_Count'] = (
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
)
# Coerce Happiness to numeric and drop rows without Happiness
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
before = len(df)
df = df.dropna(subset=['Happiness'])
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
return df
def descriptive_stats(df):
print('Dataset shape:', df.shape)
print('\nOverall summary:')
print(df['Happiness'].describe())
if 'Group' in df.columns:
print('\nRows by group:')
print(df['Group'].value_counts())
print('\nAverage happiness by group:')
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nAverage happiness by number of habits completed:')
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
print('\nMedian happiness by habits:')
print(df.groupby('Habits_Count')['Happiness'].median())
# Correlations
print('\nPearson correlation between Habits_Count and Happiness:')
print(df[['Habits_Count', 'Happiness']].corr().round(3))
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
mask = ~habit_df[habit].isna()
if mask.sum() == 0:
print(f'{habit:22} (no data)')
continue
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
def cohen_d(x, y):
# Cohen's d for two independent samples
nx, ny = len(x), len(y)
dof = nx + ny - 2
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
return (x.mean() - y.mean()) / pooled_sd
def run_ols(df):
if 'Group' in df.columns:
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
print('\nOLS regression: Happiness ~ Habits_Count + Group')
else:
X = sm.add_constant(df['Habits_Count'])
y = df['Happiness']
model = sm.OLS(y, X).fit()
print('\nSimple OLS regression: Happiness ~ Habits_Count')
print(model.summary())
return model
def run_mixedlm(df):
# Random intercept for Participant_ID
try:
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
mdf = md.fit(reml=False)
print('\nMixed-effects model (random intercept by Participant_ID):')
print(mdf.summary())
return mdf
except Exception as e:
logging.warning('MixedLM failed: %s', e)
return None
def make_plots(df, outdir, show_plots=False):
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
sns.set_style('whitegrid')
# Boxplot by Habits_Count
plt.figure(figsize=(9, 6))
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
plt.title('Daily Happiness by Number of Habits Completed')
plt.xlabel('Number of habits followed (03)')
plt.ylabel('Happiness (110)')
f1 = outdir / 'happiness_by_habits_box.png'
plt.tight_layout()
plt.savefig(f1)
if show_plots:
plt.show()
plt.close()
# Violin / jitter + regression
plt.figure(figsize=(9, 6))
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
plt.title('Happiness distribution by Habits Completed')
f2 = outdir / 'happiness_by_habits_violin.png'
plt.tight_layout()
plt.savefig(f2)
if show_plots:
plt.show()
plt.close()
# Participant average bar
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
plt.figure(figsize=(12, 5))
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
plt.xticks(range(len(participant_avg)), participant_avg.index, rotation=45)
plt.title('Average Happiness per Participant (sorted)')
f3 = outdir / 'participant_avg_happiness.png'
plt.tight_layout()
plt.savefig(f3)
if show_plots:
plt.show()
plt.close()
if 'Group' in df.columns:
plt.figure(figsize=(7, 5))
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
plt.title('Mean Happiness by Group')
plt.ylabel('Average happiness')
f_group = outdir / 'happiness_by_group.png'
plt.tight_layout()
plt.savefig(f_group)
if show_plots:
plt.show()
plt.close()
# Scatter with linear fit
plt.figure(figsize=(9, 6))
if 'Group' in df.columns:
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
else:
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
f4 = outdir / 'happiness_vs_habits_regression.png'
plt.tight_layout()
plt.savefig(f4)
if show_plots:
plt.show()
plt.close()
logging.info('Saved plots to %s', outdir)
def main(args):
df = load_data(args.data)
df = prepare_data(df)
descriptive_stats(df)
# Effect sizes
group0 = df[df['Habits_Count'] == 0]['Happiness']
group3 = df[df['Habits_Count'] == 3]['Happiness']
if len(group0) > 1 and len(group3) > 1:
d = cohen_d(group3, group0)
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
if 'Group' in df.columns:
control = df[df['Group'] == 'Control']['Happiness']
intervention = df[df['Group'] == 'Intervention']['Happiness']
if len(control) > 1 and len(intervention) > 1:
d_group = cohen_d(intervention, control)
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
# Models
run_ols(df)
run_mixedlm(df)
# Plots
make_plots(df, args.outdir, show_plots=args.show)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
parser.add_argument('--show', action='store_true', help='Show plots interactively')
args = parser.parse_args()
main(args)

View file

@ -0,0 +1 @@
[]

View file

@ -0,0 +1,3 @@
{"kind":0,"v":{"version":3,"creationDate":1771726602463,"initialLocation":"panel","responderUsername":"","sessionId":"15f13b33-3dc7-40aa-8a44-bc553620d024","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}
{"kind":1,"k":["responderUsername"],"v":"GitHub Copilot"}
{"kind":1,"k":["inputState","selectedModel"],"v":{"identifier":"copilot/gpt-5-mini","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5-mini","vendor":"copilot","name":"GPT-5 mini","family":"gpt-5-mini","tooltip":"Rate is counted at 0x.","version":"gpt-5-mini","multiplier":"0x","maxInputTokens":127805,"maxOutputTokens":64000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":true,"terminal":true,"notebook":true,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Standard Models","order":0},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774939316365,"customTitle":"Connection error diagnosis request","initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"5ac4e9c8-7d0c-4bfc-99ab-f2186f80b5f1","hasPendingEdits":false,"requests":[{"requestId":"request_c7830057-d8e9-42c9-aa1b-c368110ebef9","timestamp":1774939316703,"agent":{"extensionId":{"value":"ms-vscode-remote.remote-ssh","_lower":"ms-vscode-remote.remote-ssh"},"extensionVersion":"0.122.0","publisherDisplayName":"Microsoft","extensionPublisherId":"ms-vscode-remote","extensionDisplayName":"Remote - SSH","id":"remote-ssh","description":"Learn about Remote - SSH and diagnose connection issues","when":"config.remote.SSH.experimental.chat","metadata":{"isSticky":true,"themeIcon":{"id":"remote"},"hasFollowups":false},"name":"remote-ssh","fullName":"Remote - SSH","locations":["panel"],"modes":["agent","ask","edit"],"slashCommands":[],"disambiguation":[]},"modelId":"copilot/gpt-5.3-codex","responseId":"response_dcbb0c20-6bcb-41c7-adbd-63716ac3d0b6","modelState":{"value":0},"contentReferences":[],"codeCitations":[],"timeSpentWaiting":1774939316703,"response":[{"kind":"mcpServersStarting","didStartServerIds":[]}],"message":{"text":"@remote-ssh diagnose my last connection error","parts":[{"range":{"start":0,"endExclusive":11},"editorRange":{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":12},"agent":{"extensionId":{"value":"ms-vscode-remote.remote-ssh","_lower":"ms-vscode-remote.remote-ssh"},"extensionVersion":"0.122.0","publisherDisplayName":"Microsoft","extensionPublisherId":"ms-vscode-remote","extensionDisplayName":"Remote - SSH","id":"remote-ssh","description":"Learn about Remote - SSH and diagnose connection issues","when":"config.remote.SSH.experimental.chat","metadata":{"isSticky":true,"themeIcon":{"id":"remote"},"hasFollowups":false},"name":"remote-ssh","fullName":"Remote - SSH","locations":["panel"],"modes":["agent","ask","edit"],"slashCommands":[],"disambiguation":[]},"kind":"agent"},{"range":{"start":11,"endExclusive":45},"editorRange":{"startLineNumber":1,"startColumn":12,"endLineNumber":1,"endColumn":46},"text":" diagnose my last connection error","kind":"text"}]},"variableData":{"variables":[]}}],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"ask","kind":"ask"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"@remote-ssh ","selections":[{"startLineNumber":1,"startColumn":13,"endLineNumber":1,"endColumn":13,"selectionStartLineNumber":1,"selectionStartColumn":13,"positionLineNumber":1,"positionColumn":13}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1,3 @@
{"kind":0,"v":{"version":3,"creationDate":1775014837233,"initialLocation":"panel","responderUsername":"","sessionId":"93153c4c-abc7-42d0-a6c8-ddaf556cfb10","hasPendingEdits":false,"requests":[],"pendingRequests":[]}}
{"kind":1,"k":["responderUsername"],"v":"GitHub Copilot"}
{"kind":1,"k":["inputState"],"v":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"GPT-5.3-Codex is currently experiencing degraded performance due to issues with the model upstream provider. We are actively working to resolve the issue. In the meantime, you might want to use Auto or a different model. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"statusIcon":{"id":"warning"},"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,219 @@
#---------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
#---------------------------------------------------------------------------------------------
# Windows GitHub Copilot CLI bootstrapper
#
# Responsibilities:
# 1. Locate the real Copilot CLI binary (avoid recursion if this file shadows it).
# 2. Offer to install if missing (npm -g @github/copilot).
# 3. Enforce minimum version (>= REQUIRED_VERSION) with interactive update.
# 4. Execute the real binary with original arguments and exit with its status.
#
# NOTE: This file intentionally keeps logic selfcontained (no external deps) so it can be dropped into PATH directly.
# Minimum required Copilot CLI version
$RequiredVersion = "0.0.394"
$PackageName = "@github/copilot"
function Invoke-NpmGlobalCommand {
param(
[Parameter(Mandatory = $true)][ValidateSet('install', 'update')][string]$Command,
[Parameter(Mandatory = $true)][string]$Package
)
$npmArgs = @($Command, '-g', $Package)
$npmCmd = Get-Command npm.cmd -ErrorAction SilentlyContinue
if ($npmCmd) {
& npm.cmd @npmArgs
} else {
& npm @npmArgs
}
}
function Find-RealCopilot {
# Find the real copilot binary, avoiding this script if it's in PATH
$CurrentScript = $MyInvocation.PSCommandPath
if (-not $CurrentScript) { $CurrentScript = $PSCommandPath }
$CopilotPath = (Get-Command copilot -ErrorAction SilentlyContinue).Source
# Check if the copilot command would point to this script
$CurrentScriptResolved = if ($CurrentScript) { (Resolve-Path $CurrentScript -ErrorAction SilentlyContinue).Path } else { $null }
$CopilotPathResolved = if ($CopilotPath) { (Resolve-Path $CopilotPath -ErrorAction SilentlyContinue).Path } else { $null }
if ($CurrentScript -eq $CopilotPath -or (Split-Path $CurrentScript -Parent) -eq (Split-Path $CopilotPath -Parent) -or ($CurrentScriptResolved -and $CopilotPathResolved -and $CurrentScriptResolved -eq $CopilotPathResolved)) {
# The copilot in PATH is this script, find the real one by temporarily removing this script's directory from PATH
$ScriptDir = Split-Path $CurrentScript -Parent
$OldPath = $env:PATH
# Use appropriate path delimiter based on OS
$PathDelimiter = if ($IsWindows -or $env:OS -eq "Windows_NT") { ';' } else { ':' }
$env:PATH = ($env:PATH -split $PathDelimiter | Where-Object { $_ -ne $ScriptDir }) -join $PathDelimiter
$RealCopilot = (Get-Command copilot -ErrorAction SilentlyContinue).Source
$env:PATH = $OldPath
if ($RealCopilot -and (Test-Path $RealCopilot)) {
return $RealCopilot
} else {
return $null
}
} else {
# The copilot in PATH is different from this script, use it
if ($CopilotPath -and (Test-Path $CopilotPath)) {
return $CopilotPath
} else {
return $null
}
}
}
function Test-VersionCompatibility {
param([string]$Version)
$cleanInstalled = $Version -replace '^v',''
$cleanRequired = $RequiredVersion -replace '^v',''
try {
$installedVer = [version]$cleanInstalled
$requiredVer = [version]$cleanRequired
} catch {
return $false
}
return ($installedVer -ge $requiredVer)
}
function Test-AndLaunchCopilot {
param([string[]]$Arguments)
# Check if real copilot command exists
$realCopilot = Find-RealCopilot
if (-not $realCopilot) {
Write-Host "Cannot find GitHub Copilot CLI (https://docs.github.com/en/copilot/how-tos/set-up/install-copilot-cli)"
$answer = Read-Host "Install GitHub Copilot CLI? (y/N)"
if ($answer -eq "y" -or $answer -eq "Y") {
try {
Invoke-NpmGlobalCommand -Command 'install' -Package $PackageName
if ($LASTEXITCODE -eq 0) {
Test-AndLaunchCopilot $Arguments
return
} else {
Read-Host "Installation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} catch {
Read-Host "Installation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} else {
exit 0
}
}
# Check version compatibility
$realCopilot = Find-RealCopilot
if (-not $realCopilot) {
Write-Host "Error: Unable to find copilot binary."
$answer = Read-Host "Would you like to reinstall GitHub Copilot CLI? (y/N)"
if ($answer -eq "y" -or $answer -eq "Y") {
Write-Host "Reinstalling GitHub Copilot CLI..."
try {
Invoke-NpmGlobalCommand -Command 'install' -Package $PackageName
if ($LASTEXITCODE -eq 0) {
Test-AndLaunchCopilot $Arguments
return
} else {
Read-Host "Reinstallation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} catch {
Read-Host "Reinstallation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} else {
exit 0
}
}
try {
$versionOutput = & $realCopilot --version 2>$null
if ($LASTEXITCODE -ne 0) {
throw "Command failed"
}
} catch {
# Write-Host "Error: Unable to check copilot version."
$answer = Read-Host "Would you like to reinstall GitHub Copilot CLI? (y/N)"
if ($answer -eq "y" -or $answer -eq "Y") {
try {
Invoke-NpmGlobalCommand -Command 'install' -Package $PackageName
if ($LASTEXITCODE -eq 0) {
Test-AndLaunchCopilot $Arguments
return
} else {
Read-Host "Reinstallation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} catch {
Read-Host "Reinstallation failed. Please check your npm configuration and try again (or run: npm install -g @github/copilot)."
return
}
} else {
exit 0
}
}
# Extract version number from output (search through all lines)
$version = $null
if ($versionOutput) {
foreach ($line in ($versionOutput -split "`n")) {
$trimmedLine = $line.Trim()
if ($trimmedLine -match '[0-9]+\.[0-9]+\.[0-9]+') {
$version = $matches[0]
break
}
}
}
# Command succeeded - assume CLI is installed even if we can't parse the version
# Only check version compatibility if we have a valid version
if ($version -and -not (Test-VersionCompatibility $version)) {
Write-Host "GitHub Copilot CLI version $version is not compatible."
Write-Host "Version $RequiredVersion or later is required."
$answer = Read-Host "Update GitHub Copilot CLI? (y/N)"
if ($answer -eq "y" -or $answer -eq "Y") {
try {
Invoke-NpmGlobalCommand -Command 'update' -Package $PackageName
if ($LASTEXITCODE -eq 0) {
Test-AndLaunchCopilot $Arguments
return
} else {
Read-Host "Update failed. Please check your npm configuration and try again (or run: npm update -g @github/copilot)."
return
}
} catch {
Read-Host "Update failed. Please check your npm configuration and try again (or run: npm update -g @github/copilot)."
return
}
} else {
exit 0
}
}
# All checks passed, execute the real copilot binary
$realCopilot = Find-RealCopilot
if ($realCopilot -and (Test-Path $realCopilot)) {
& $realCopilot @Arguments
} else {
Write-Host "Error: Could not find the real GitHub Copilot CLI binary"
Read-Host "Please ensure it's properly installed with: npm install -g @github/copilot"
return
}
}
# Start the check and launch process
$finalArgs = $args
# Handle --clear argument
if ($args.Length -gt 0 -and $args[0] -eq '--clear') {
Clear-Host
$finalArgs = $args[1..($args.Length - 1)]
}
Test-AndLaunchCopilot $finalArgs

View file

@ -0,0 +1,3 @@
#!/bin/sh
unset NODE_OPTIONS
ELECTRON_RUN_AS_NODE=1 "/usr/share/code/code" "/home/breadway/.config/Code/User/globalStorage/github.copilot-chat/copilotCli/copilotCLIShim.js" "$@"

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,3 @@
#!/bin/sh
unset NODE_OPTIONS
ELECTRON_RUN_AS_NODE=1 "/usr/share/code/code" "/home/breadway/.config/Code/User/globalStorage/github.copilot-chat/debugCommand/copilotDebugCommand.js" "vscode://github.copilot-chat" "" "$@"

View file

@ -0,0 +1,108 @@
---
name: Plan
description: Researches and outlines multi-step plans
argument-hint: Outline the goal or problem to research
target: vscode
disable-model-invocation: true
tools: ['agent', 'search', 'read', 'execute/getTerminalOutput', 'execute/testFailure', 'web', 'github/issue_read', 'github.vscode-pull-request-github/issue_fetch', 'github.vscode-pull-request-github/activePullRequest', 'vscode/askQuestions']
agents: []
handoffs:
- label: Start Implementation
agent: agent
prompt: 'Start implementation'
send: true
- label: Open in Editor
agent: agent
prompt: '#createFile the plan as is into an untitled file (`untitled:plan-${camelCaseName}.prompt.md` without frontmatter) for further refinement.'
send: true
showContinueOn: false
---
You are a PLANNING AGENT, pairing with the user to create a detailed, actionable plan.
Your job: research the codebase → clarify with the user → produce a comprehensive plan. This iterative approach catches edge cases and non-obvious requirements BEFORE implementation begins.
Your SOLE responsibility is planning. NEVER start implementation.
<rules>
- STOP if you consider running file editing tools — plans are for others to execute
- Use #tool:vscode/askQuestions freely to clarify requirements — don't make large assumptions
- Present a well-researched plan with loose ends tied BEFORE implementation
</rules>
<workflow>
Cycle through these phases based on user input. This is iterative, not linear.
## 1. Discovery
Run #tool:agent/runSubagent to gather context and discover potential blockers or ambiguities.
MANDATORY: Instruct the subagent to work autonomously following <research_instructions>.
<research_instructions>
- Research the user's task comprehensively using read-only tools.
- Start with high-level code searches before reading specific files.
- Pay special attention to instructions and skills made available by the developers to understand best practices and intended usage.
- Identify missing information, conflicting requirements, or technical unknowns.
- DO NOT draft a full plan yet — focus on discovery and feasibility.
</research_instructions>
After the subagent returns, analyze the results.
## 2. Alignment
If research reveals major ambiguities or if you need to validate assumptions:
- Use #tool:vscode/askQuestions to clarify intent with the user.
- Surface discovered technical constraints or alternative approaches.
- If answers significantly change the scope, loop back to **Discovery**.
## 3. Design
Once context is clear, draft a comprehensive implementation plan per <plan_style_guide>.
The plan should reflect:
- Critical file paths discovered during research.
- Code patterns and conventions found.
- A step-by-step implementation approach.
Present the plan as a **DRAFT** for review.
## 4. Refinement
On user input after showing a draft:
- Changes requested → revise and present updated plan.
- Questions asked → clarify, or use #tool:vscode/askQuestions for follow-ups.
- Alternatives wanted → loop back to **Discovery** with new subagent.
- Approval given → acknowledge, the user can now use handoff buttons.
The final plan should:
- Be scannable yet detailed enough to execute.
- Include critical file paths and symbol references.
- Reference decisions from the discussion.
- Leave no ambiguity.
Keep iterating until explicit approval or handoff.
</workflow>
<plan_style_guide>
```markdown
## Plan: {Title (2-10 words)}
{TL;DR — what, how, why. Reference key decisions. (30-200 words, depending on complexity)}
**Steps**
1. {Action with [file](path) links and `symbol` refs}
2. {Next step}
3. {…}
**Verification**
{How to test: commands, tests, manual checks}
**Decisions** (if applicable)
- {Decision: chose X over Y}
```
Rules:
- NO code blocks — describe changes, link to files/symbols
- NO questions at the end — ask during workflow via #tool:vscode/askQuestions
- Keep scannable
</plan_style_guide>

View file

@ -0,0 +1,88 @@
import platform
import sys
if sys.platform != "win32":
import readline
original_ps1 = ">>> "
is_wsl = "microsoft-standard-WSL" in platform.release()
class REPLHooks:
def __init__(self):
self.global_exit = None
self.failure_flag = False
self.original_excepthook = sys.excepthook
self.original_displayhook = sys.displayhook
sys.excepthook = self.my_excepthook
sys.displayhook = self.my_displayhook
def my_displayhook(self, value):
if value is None:
self.failure_flag = False
self.original_displayhook(value)
def my_excepthook(self, type_, value, traceback):
self.global_exit = value
self.failure_flag = True
self.original_excepthook(type_, value, traceback)
def get_last_command():
# Get the last history item
last_command = ""
if sys.platform != "win32":
last_command = readline.get_history_item(readline.get_current_history_length())
return last_command
class PS1:
hooks = REPLHooks()
sys.excepthook = hooks.my_excepthook
sys.displayhook = hooks.my_displayhook
# str will get called for every prompt with exit code to show success/failure
def __str__(self):
exit_code = int(bool(self.hooks.failure_flag))
self.hooks.failure_flag = False
# Guide following official VS Code doc for shell integration sequence:
result = ""
# For non-windows allow recent_command history.
if sys.platform != "win32":
result = "{soh}{command_executed}{command_line}{command_finished}{prompt_started}{stx}{prompt}{soh}{command_start}{stx}".format(
soh="\001",
stx="\002",
command_executed="\x1b]633;C\x07",
command_line="\x1b]633;E;" + str(get_last_command()) + "\x07",
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
)
else:
result = "{command_finished}{prompt_started}{prompt}{command_start}{command_executed}".format(
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
command_executed="\x1b]633;C\x07",
)
# result = f"{chr(27)}]633;D;{exit_code}{chr(7)}{chr(27)}]633;A{chr(7)}{original_ps1}{chr(27)}]633;B{chr(7)}{chr(27)}]633;C{chr(7)}"
return result
def __repr__(self):
return "<Custom PS1 for VS Code Python Shell Integration>"
if sys.platform != "win32" and (not is_wsl):
sys.ps1 = PS1()
if sys.platform == "darwin":
print("Cmd click to launch VS Code Native REPL")
else:
print("Ctrl click to launch VS Code Native REPL")

View file

@ -0,0 +1,21 @@
{
"environment": {
"executable": "/usr/bin/python3",
"prefix": "/usr",
"version": "3.14.3.final.0",
"is64Bit": true,
"symlinks": [
"/usr/bin/python3"
]
},
"symlinks": [
[
"/usr/bin/python3",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
]
]
}

View file

@ -0,0 +1,39 @@
{
"environment": {
"executable": "/usr/bin/python",
"prefix": "/usr",
"version": "3.14.3.final.0",
"is64Bit": true,
"symlinks": [
"/usr/bin/python",
"/usr/bin/python3",
"/usr/bin/python3.14"
]
},
"symlinks": [
[
"/usr/bin/python",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
],
[
"/usr/bin/python3",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
],
[
"/usr/bin/python3.14",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
]
]
}

View file

@ -0,0 +1,21 @@
{
"environment": {
"executable": "/usr/bin/python3.14",
"prefix": "/usr",
"version": "3.14.3.final.0",
"is64Bit": true,
"symlinks": [
"/usr/bin/python3.14"
]
},
"symlinks": [
[
"/usr/bin/python3.14",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
]
]
}

View file

@ -0,0 +1,21 @@
{
"environment": {
"executable": "/usr/bin/python3.11",
"prefix": "/usr",
"version": "3.11.15.final.0",
"is64Bit": true,
"symlinks": [
"/usr/bin/python3.11"
]
},
"symlinks": [
[
"/usr/bin/python3.11",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
]
]
}

View file

@ -0,0 +1,21 @@
{
"environment": {
"executable": "/usr/bin/python3.12",
"prefix": "/usr",
"version": "3.12.13.final.0",
"is64Bit": true,
"symlinks": [
"/usr/bin/python3.12"
]
},
"symlinks": [
[
"/usr/bin/python3.12",
{
"secs_since_epoch": 1774483200,
"nanos_since_epoch": 0
},
null
]
]
}

Binary file not shown.

View file

@ -0,0 +1,93 @@
{
"telemetry.sqmId": "",
"telemetry.machineId": "e94c72ff0387061b6a6c65df3c28c9219c0abbe46893e7ee6b1a47da0ef61c91",
"telemetry.devDeviceId": "c4aa5a03-588b-44b5-88b4-1a481f4bf3ee",
"backupWorkspaces": {
"workspaces": [],
"folders": [],
"emptyWindows": [
{
"backupFolder": "1775815287792"
}
]
},
"windowControlHeight": 35,
"profileAssociations": {
"workspaces": {
"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology": "__default__profile__",
"file:///home/breadway/Documents/Hazard%20Pay": "__default__profile__",
"file:///home/breadway/Documents/Laser%20Tag": "__default__profile__",
"vscode-remote://ssh-remote%2Batlas.tail4806cb.ts.net/home/riley": "__default__profile__"
},
"emptyWindows": {
"1775815287792": "__default__profile__"
}
},
"theme": "vs-dark",
"themeBackground": "#1f1f1f",
"windowSplash": {
"zoomLevel": 0,
"baseTheme": "vs-dark",
"colorInfo": {
"foreground": "#cccccc",
"background": "#1f1f1f",
"editorBackground": "#1f1f1f",
"titleBarBackground": "#181818",
"titleBarBorder": "#2b2b2b",
"activityBarBackground": "#181818",
"activityBarBorder": "#2b2b2b",
"sideBarBackground": "#181818",
"sideBarBorder": "#2b2b2b",
"statusBarBackground": "#181818",
"statusBarBorder": "#2b2b2b",
"statusBarNoFolderBackground": "#1f1f1f"
},
"layoutInfo": {
"sideBarSide": "left",
"editorPartMinWidth": 220,
"titleBarHeight": 35,
"activityBarWidth": 48,
"sideBarWidth": 300,
"auxiliaryBarWidth": 0,
"statusBarHeight": 22,
"windowBorder": false
}
},
"windowsState": {
"lastActiveWindow": {
"backupPath": "/home/breadway/.config/Code/Backups/1775815287792",
"uiState": {
"mode": 0,
"x": 0,
"y": 0,
"width": 1200,
"height": 800
}
},
"openedWindows": []
},
"windowSplashWorkspaceOverride": {
"layoutInfo": {
"sideBarWidth": 300,
"auxiliaryBarWidth": 300,
"workspaces": {
"e387457d3a4ae35149094541b5b1bc98": {
"sideBarVisible": true,
"auxiliaryBarVisible": true
},
"d58507d32a163bf14f1020a325e9617c": {
"sideBarVisible": true,
"auxiliaryBarVisible": true
},
"9781d15abb972bcb41fa234c3afe65ff": {
"sideBarVisible": true,
"auxiliaryBarVisible": true
},
"c3bfe6a4e3ea522f0530724fa39111f1": {
"sideBarVisible": true,
"auxiliaryBarVisible": true
}
}
}
}
}

View file

@ -0,0 +1,9 @@
{
"hosts": {
"atlas.tail4806cb.ts.net": {
"persistToSSHConfig": true,
"differentUserFromSSHConfig": false,
"user": "riley"
}
}
}

View file

@ -0,0 +1,3 @@
{
"explorer.confirmDelete": false
}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774600123963,"initialLocation":"panel","responderUsername":"","sessionId":"8b91d7a6-ae77-42bc-b66f-8d95a086715a","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774365051893,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"c7465b2a-7199-40ce-9951-b6795d0a3a84","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774939152252,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"dc74fbcd-b41d-47af-90f7-d0a978ef802b","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1,2 @@
{"kind":0,"v":{"version":3,"creationDate":1774600218109,"initialLocation":"panel","responderUsername":"","sessionId":"eab52d46-dfac-4e20-a099-6b67cabfb460","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}
{"kind":1,"k":["responderUsername"],"v":"GitHub Copilot"}

View file

@ -0,0 +1,88 @@
import platform
import sys
if sys.platform != "win32":
import readline
original_ps1 = ">>> "
is_wsl = "microsoft-standard-WSL" in platform.release()
class REPLHooks:
def __init__(self):
self.global_exit = None
self.failure_flag = False
self.original_excepthook = sys.excepthook
self.original_displayhook = sys.displayhook
sys.excepthook = self.my_excepthook
sys.displayhook = self.my_displayhook
def my_displayhook(self, value):
if value is None:
self.failure_flag = False
self.original_displayhook(value)
def my_excepthook(self, type_, value, traceback):
self.global_exit = value
self.failure_flag = True
self.original_excepthook(type_, value, traceback)
def get_last_command():
# Get the last history item
last_command = ""
if sys.platform != "win32":
last_command = readline.get_history_item(readline.get_current_history_length())
return last_command
class PS1:
hooks = REPLHooks()
sys.excepthook = hooks.my_excepthook
sys.displayhook = hooks.my_displayhook
# str will get called for every prompt with exit code to show success/failure
def __str__(self):
exit_code = int(bool(self.hooks.failure_flag))
self.hooks.failure_flag = False
# Guide following official VS Code doc for shell integration sequence:
result = ""
# For non-windows allow recent_command history.
if sys.platform != "win32":
result = "{soh}{command_executed}{command_line}{command_finished}{prompt_started}{stx}{prompt}{soh}{command_start}{stx}".format(
soh="\001",
stx="\002",
command_executed="\x1b]633;C\x07",
command_line="\x1b]633;E;" + str(get_last_command()) + "\x07",
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
)
else:
result = "{command_finished}{prompt_started}{prompt}{command_start}{command_executed}".format(
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
command_executed="\x1b]633;C\x07",
)
# result = f"{chr(27)}]633;D;{exit_code}{chr(7)}{chr(27)}]633;A{chr(7)}{original_ps1}{chr(27)}]633;B{chr(7)}{chr(27)}]633;C{chr(7)}"
return result
def __repr__(self):
return "<Custom PS1 for VS Code Python Shell Integration>"
if sys.platform != "win32" and (not is_wsl):
sys.ps1 = PS1()
if sys.platform == "darwin":
print("Cmd click to launch VS Code Native REPL")
else:
print("Ctrl click to launch VS Code Native REPL")

View file

@ -0,0 +1,3 @@
{
"folder": "file:///home/breadway/Documents/Laser%20Tag"
}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774941409869,"initialLocation":"panel","responderUsername":"","sessionId":"2324c150-9e6f-47e2-8f4a-20af82ec2af1","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774941514520,"initialLocation":"panel","responderUsername":"","sessionId":"e8ce1288-ab4d-4fa7-83ee-d054c746a71b","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1,3 @@
{
"folder": "vscode-remote://ssh-remote%2Batlas.tail4806cb.ts.net/home/riley"
}

View file

@ -0,0 +1,88 @@
import platform
import sys
if sys.platform != "win32":
import readline
original_ps1 = ">>> "
is_wsl = "microsoft-standard-WSL" in platform.release()
class REPLHooks:
def __init__(self):
self.global_exit = None
self.failure_flag = False
self.original_excepthook = sys.excepthook
self.original_displayhook = sys.displayhook
sys.excepthook = self.my_excepthook
sys.displayhook = self.my_displayhook
def my_displayhook(self, value):
if value is None:
self.failure_flag = False
self.original_displayhook(value)
def my_excepthook(self, type_, value, traceback):
self.global_exit = value
self.failure_flag = True
self.original_excepthook(type_, value, traceback)
def get_last_command():
# Get the last history item
last_command = ""
if sys.platform != "win32":
last_command = readline.get_history_item(readline.get_current_history_length())
return last_command
class PS1:
hooks = REPLHooks()
sys.excepthook = hooks.my_excepthook
sys.displayhook = hooks.my_displayhook
# str will get called for every prompt with exit code to show success/failure
def __str__(self):
exit_code = int(bool(self.hooks.failure_flag))
self.hooks.failure_flag = False
# Guide following official VS Code doc for shell integration sequence:
result = ""
# For non-windows allow recent_command history.
if sys.platform != "win32":
result = "{soh}{command_executed}{command_line}{command_finished}{prompt_started}{stx}{prompt}{soh}{command_start}{stx}".format(
soh="\001",
stx="\002",
command_executed="\x1b]633;C\x07",
command_line="\x1b]633;E;" + str(get_last_command()) + "\x07",
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
)
else:
result = "{command_finished}{prompt_started}{prompt}{command_start}{command_executed}".format(
command_finished="\x1b]633;D;" + str(exit_code) + "\x07",
prompt_started="\x1b]633;A\x07",
prompt=original_ps1,
command_start="\x1b]633;B\x07",
command_executed="\x1b]633;C\x07",
)
# result = f"{chr(27)}]633;D;{exit_code}{chr(7)}{chr(27)}]633;A{chr(7)}{original_ps1}{chr(27)}]633;B{chr(7)}{chr(27)}]633;C{chr(7)}"
return result
def __repr__(self):
return "<Custom PS1 for VS Code Python Shell Integration>"
if sys.platform != "win32" and (not is_wsl):
sys.ps1 = PS1()
if sys.platform == "darwin":
print("Cmd click to launch VS Code Native REPL")
else:
print("Ctrl click to launch VS Code Native REPL")

View file

@ -0,0 +1,3 @@
{
"folder": "file:///home/breadway/Documents/Hazard%20Pay"
}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1775454338637,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"11f2de30-ec2b-4879-a78f-e06b4c2b0bad","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1,2 @@
{"kind":0,"v":{"version":3,"creationDate":1775303745152,"initialLocation":"panel","responderUsername":"","sessionId":"123907f6-8dde-436d-91a1-9b20eef75377","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}
{"kind":1,"k":["responderUsername"],"v":"GitHub Copilot"}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1775477131078,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"232d35f4-1b7a-4e62-aa92-a4adfa72f21c","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1775502675448,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"3c426d55-429e-4167-afb1-8c4ff89be4b7","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1,2 @@
{"kind":0,"v":{"version":3,"creationDate":1774363126866,"initialLocation":"panel","responderUsername":"","sessionId":"63779320-deba-4d03-8619-47e146708f20","hasPendingEdits":false,"requests":[],"pendingRequests":[]}}
{"kind":1,"k":["inputState"],"v":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1775540338319,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"782648e0-18b2-4e6c-a6c7-609c19b021df","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774367937028,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"809bdf57-f190-4e54-98f2-ea20e1c04007","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1774600185306,"initialLocation":"panel","responderUsername":"","sessionId":"c6f9ede1-602d-4967-a697-8f3f963b9e59","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

View file

@ -0,0 +1 @@
{"kind":0,"v":{"version":3,"creationDate":1775536498799,"initialLocation":"panel","responderUsername":"GitHub Copilot","sessionId":"cfad25bb-6655-45fb-84e8-21d567d83b3e","hasPendingEdits":false,"requests":[],"pendingRequests":[],"inputState":{"attachments":[],"mode":{"id":"agent","kind":"agent"},"selectedModel":{"identifier":"copilot/gpt-5.3-codex","metadata":{"extension":{"value":"GitHub.copilot-chat","_lower":"github.copilot-chat"},"id":"gpt-5.3-codex","vendor":"copilot","name":"GPT-5.3-Codex","family":"gpt-5.3-codex","tooltip":"OpenAI Codex model specialized for code generation, debugging, and software development tasks. Rate is counted at 1x.","version":"gpt-5.3-codex","multiplier":"1x","maxInputTokens":271805,"maxOutputTokens":128000,"auth":{"providerLabel":"GitHub Copilot Chat","accountLabel":"Breadway"},"isDefaultForLocation":{"panel":false,"terminal":false,"notebook":false,"editor":false},"isUserSelectable":true,"modelPickerCategory":{"label":"Premium Models","order":1},"capabilities":{"vision":true,"toolCalling":true,"agentMode":true}}},"inputText":"","selections":[{"startLineNumber":1,"startColumn":1,"endLineNumber":1,"endColumn":1,"selectionStartLineNumber":1,"selectionStartColumn":1,"positionLineNumber":1,"positionColumn":1}],"contrib":{"chatDynamicVariableModel":[]}}}}

Some files were not shown because too many files have changed in this diff Show more