Update Fedora state: 2026-04-29 11:50
This commit is contained in:
parent
42ca768584
commit
10f0d5de1d
338 changed files with 18983 additions and 32 deletions
2402
dot_config/private_Code/User/History/-1aabd2fd/0Ago.csv
Normal file
2402
dot_config/private_Code/User/History/-1aabd2fd/0Ago.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/organization_happiness_study_data.csv","entries":[{"id":"0Ago.csv","timestamp":1774348491393}]}
|
||||
78
dot_config/private_Code/User/History/-2e6bd5d9/DwLj.py
Normal file
78
dot_config/private_Code/User/History/-2e6bd5d9/DwLj.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# Load the data
|
||||
df = pd.read_csv('organization_happiness_study_data.csv')
|
||||
|
||||
print("=" * 75)
|
||||
print("UPWARD TREND VERIFICATION - HAPPINESS GROWTH WITH HABIT COMPLETION")
|
||||
print("=" * 75)
|
||||
|
||||
# Calculate habit completion count
|
||||
df['Habits_Count'] = (
|
||||
(df['Calendar_Adherence'] == 'Yes').astype(int) +
|
||||
(df['Cleanliness_Adherence'] == 'Yes').astype(int) +
|
||||
(df['Punctuality_Adherence'] == 'Yes').astype(int)
|
||||
)
|
||||
|
||||
print("\n--- Intervention Group: Early vs Late Month ---")
|
||||
intervention = df[df['Group'] == 'Intervention']
|
||||
early_month = intervention[intervention['Day'] <= 10]
|
||||
late_month = intervention[intervention['Day'] > 20]
|
||||
|
||||
print(f"Days 1-10 (Early):")
|
||||
print(f" Mean Happiness: {early_month['Happiness'].mean():.2f}")
|
||||
print(f" Mean Habits Completed: {early_month['Habits_Count'].mean():.2f}")
|
||||
print(f"\nDays 21-30 (Late):")
|
||||
print(f" Mean Happiness: {late_month['Happiness'].mean():.2f}")
|
||||
print(f" Mean Habits Completed: {late_month['Habits_Count'].mean():.2f}")
|
||||
print(f"\nGrowth: {late_month['Happiness'].mean() - early_month['Happiness'].mean():.2f} points")
|
||||
|
||||
print("\n--- Control Group: Early vs Late Month (Should be flat) ---")
|
||||
control = df[df['Group'] == 'Control']
|
||||
early_month_c = control[control['Day'] <= 10]
|
||||
late_month_c = control[control['Day'] > 20]
|
||||
|
||||
print(f"Days 1-10 (Early):")
|
||||
print(f" Mean Happiness: {early_month_c['Happiness'].mean():.2f}")
|
||||
print(f"\nDays 21-30 (Late):")
|
||||
print(f" Mean Happiness: {late_month_c['Happiness'].mean():.2f}")
|
||||
print(f"\nChange: {late_month_c['Happiness'].mean() - early_month_c['Happiness'].mean():.2f} points (should be ~0)")
|
||||
|
||||
print("\n--- Direct Correlation: Intervention Group by Habits Completed ---")
|
||||
for habit_count in [0, 1, 2, 3]:
|
||||
subset = intervention[intervention['Habits_Count'] == habit_count]
|
||||
if len(subset) > 0:
|
||||
print(f"{habit_count} habits completed: Happiness = {subset['Happiness'].mean():.2f} (n={len(subset)})")
|
||||
|
||||
print("\n--- Trend Over 30 Days (Intervention Group) ---")
|
||||
weekly_avg = intervention.groupby('Day')['Happiness'].mean()
|
||||
print(f"Week 1 (Days 1-7): Average Happiness = {weekly_avg[1:8].mean():.2f}")
|
||||
print(f"Week 2 (Days 8-14): Average Happiness = {weekly_avg[8:15].mean():.2f}")
|
||||
print(f"Week 3 (Days 15-21): Average Happiness = {weekly_avg[15:22].mean():.2f}")
|
||||
print(f"Week 4 (Days 22-30): Average Happiness = {weekly_avg[22:31].mean():.2f}")
|
||||
|
||||
print("\n--- Trend Over 30 Days (Control Group) ---")
|
||||
weekly_avg_c = control.groupby('Day')['Happiness'].mean()
|
||||
print(f"Week 1 (Days 1-7): Average Happiness = {weekly_avg_c[1:8].mean():.2f}")
|
||||
print(f"Week 2 (Days 8-14): Average Happiness = {weekly_avg_c[8:15].mean():.2f}")
|
||||
print(f"Week 3 (Days 15-21): Average Happiness = {weekly_avg_c[15:22].mean():.2f}")
|
||||
print(f"Week 4 (Days 22-30): Average Happiness = {weekly_avg_c[22:31].mean():.2f}")
|
||||
|
||||
print("\n--- Participant Examples (Intervention Group) ---")
|
||||
for pid in [1, 5, 10]:
|
||||
p_data = intervention[intervention['Participant_ID'] == pid]
|
||||
early = p_data[p_data['Day'] <= 10]['Happiness'].mean()
|
||||
late = p_data[p_data['Day'] > 20]['Happiness'].mean()
|
||||
early_habits = p_data[p_data['Day'] <= 10]['Habits_Count'].mean()
|
||||
late_habits = p_data[p_data['Day'] > 20]['Habits_Count'].mean()
|
||||
print(f"\nParticipant {pid}:")
|
||||
print(f" Early (Days 1-10): Happiness {early:.1f}, Habits {early_habits:.1f}/day")
|
||||
print(f" Late (Days 21-30): Happiness {late:.1f}, Habits {late_habits:.1f}/day")
|
||||
print(f" Growth: {late - early:.1f} points")
|
||||
|
||||
print("\n✓ Data shows:")
|
||||
print(" • Intervention group has upward trend over study period")
|
||||
print(" • Happy days strongly correlated with habit completion")
|
||||
print(" • Control group stays stable with natural random variation")
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/verify_trend.py","entries":[{"id":"DwLj.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347397523}]}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"vscode-userdata:/home/breadway/.config/Code/User/settings.json","entries":[{"id":"jeJQ.json","timestamp":1774363216206}]}
|
||||
3
dot_config/private_Code/User/History/-393f507a/jeJQ.json
Normal file
3
dot_config/private_Code/User/History/-393f507a/jeJQ.json
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"explorer.confirmDelete": false
|
||||
}
|
||||
198
dot_config/private_Code/User/History/-3a2e7e8b/UHTA.yml
Normal file
198
dot_config/private_Code/User/History/-3a2e7e8b/UHTA.yml
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
services:
|
||||
jellyfin:
|
||||
image: jellyfin/jellyfin:latest
|
||||
container_name: jellyfin
|
||||
restart: unless-stopped
|
||||
group_add:
|
||||
- "993" # render group for VAAPI hardware acceleration
|
||||
|
||||
ports:
|
||||
- "8096:8096" # HTTP web UI
|
||||
- "8920:8920" # HTTPS
|
||||
- "7359:7359/udp" # Network discovery
|
||||
- "1900:1900/udp" # DLNA
|
||||
|
||||
expose:
|
||||
- "8096"
|
||||
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Australia/Perth
|
||||
|
||||
volumes:
|
||||
# Config on NVMe (fast)
|
||||
- ./config:/config
|
||||
- ./cache:/cache
|
||||
|
||||
# Media libraries (read-only for safety)
|
||||
- "/mnt/media/Movies:/media/movies:ro"
|
||||
- "/mnt/media/TV Shows:/media/tv-shows:ro"
|
||||
- "/mnt/media/Anime:/media/anime:ro"
|
||||
- "/mnt/media/Kids TV:/media/kids-tv:ro"
|
||||
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
|
||||
- "/tank/home-videos:/media/home-videos:ro"
|
||||
- "/tank/videos:/media/home-videos:ro"
|
||||
- "/tank/photos:/media/home-photos:ro"
|
||||
|
||||
devices:
|
||||
# Hardware transcoding (Vega graphics)
|
||||
- /dev/dri:/dev/dri
|
||||
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
qbittorrent:
|
||||
image: linuxserver/qbittorrent:latest
|
||||
container_name: qbittorrent
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
WEBUI_PORT: 8090
|
||||
volumes:
|
||||
- ~/.docker_volumes/qbittorrent/config:/config
|
||||
- /mnt/media/downloads:/downloads
|
||||
- /mnt/media/anime:/animeq
|
||||
ports:
|
||||
- "8090:8090"
|
||||
- "6881:6881"
|
||||
- "6881:6881/udp"
|
||||
expose:
|
||||
- "8090"
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
sonarr:
|
||||
image: linuxserver/sonarr:latest
|
||||
container_name: sonarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
DOCKER_MODS: "linuxserver/mods:universal-package-install"
|
||||
INSTALL_PACKAGES: "ffmpeg"
|
||||
volumes:
|
||||
- ~/.docker_volumes/sonarr/config:/config
|
||||
- /mnt/media/Anime:/tv
|
||||
- ~/media/downloads:/downloads
|
||||
ports:
|
||||
- "8989:8989"
|
||||
expose:
|
||||
- "8989"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
prowlarr:
|
||||
image: linuxserver/prowlarr:latest
|
||||
container_name: prowlarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/prowlarr/config:/config
|
||||
ports:
|
||||
- "9696:9696"
|
||||
expose:
|
||||
- "9696"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
jellyseerr:
|
||||
image: fallenbagel/jellyseerr:latest
|
||||
container_name: jellyseerr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/jellyseerr/config:/app/config
|
||||
ports:
|
||||
- "5055:5055"
|
||||
expose:
|
||||
- "5055"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- jellyfin
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
caddy:
|
||||
image: caddy:latest
|
||||
container_name: caddy
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "443:443"
|
||||
- "443:443/udp" # For HTTP/3 support
|
||||
volumes:
|
||||
- ./Caddyfile:/etc/caddy/Caddyfile
|
||||
- ./caddy_data:/data
|
||||
- ./caddy_config:/config
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
crowdsec:
|
||||
image: crowdsecurity/crowdsec:latest
|
||||
container_name: crowdsec
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
|
||||
volumes:
|
||||
- /var/log:/var/log:ro
|
||||
- ./crowdsec_data:/var/lib/crowdsec/data
|
||||
- ./crowdsec_config:/etc/crowdsec
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
networks:
|
||||
jellyfin-net:
|
||||
driver: bridge
|
||||
174
dot_config/private_Code/User/History/-3a2e7e8b/ebbC.yml
Normal file
174
dot_config/private_Code/User/History/-3a2e7e8b/ebbC.yml
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
services:
|
||||
jellyfin:
|
||||
image: jellyfin/jellyfin:latest
|
||||
container_name: jellyfin
|
||||
restart: unless-stopped
|
||||
group_add:
|
||||
- "993" # render group for VAAPI hardware acceleration
|
||||
|
||||
ports:
|
||||
- "8096:8096" # HTTP web UI
|
||||
- "8920:8920" # HTTPS
|
||||
- "7359:7359/udp" # Network discovery
|
||||
- "1900:1900/udp" # DLNA
|
||||
|
||||
expose:
|
||||
- "8096"
|
||||
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Australia/Perth
|
||||
|
||||
volumes:
|
||||
# Config on NVMe (fast)
|
||||
- ./config:/config
|
||||
- ./cache:/cache
|
||||
|
||||
# Media libraries (read-only for safety)
|
||||
- "/mnt/media/Movies:/media/movies:ro"
|
||||
- "/mnt/media/TV Shows:/media/tv-shows:ro"
|
||||
- "/mnt/media/Anime:/media/anime:ro"
|
||||
- "/mnt/media/Kids TV:/media/kids-tv:ro"
|
||||
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
|
||||
- "/tank/home-videos:/media/home-videos:ro"
|
||||
- "/tank/videos:/media/home-videos:ro"
|
||||
- "/tank/photos:/media/home-photos:ro"
|
||||
|
||||
devices:
|
||||
# Hardware transcoding (Vega graphics)
|
||||
- /dev/dri:/dev/dri
|
||||
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
qbittorrent:
|
||||
image: linuxserver/qbittorrent:latest
|
||||
container_name: qbittorrent
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
WEBUI_PORT: 8090
|
||||
volumes:
|
||||
- ~/.docker_volumes/qbittorrent/config:/config
|
||||
- /mnt/media/downloads:/downloads
|
||||
- /mnt/media/anime:/animeq
|
||||
ports:
|
||||
- "8090:8090"
|
||||
- "6881:6881"
|
||||
- "6881:6881/udp"
|
||||
expose:
|
||||
- "8090"
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
sonarr:
|
||||
image: linuxserver/sonarr:latest
|
||||
container_name: sonarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
DOCKER_MODS: "linuxserver/mods:universal-package-install"
|
||||
INSTALL_PACKAGES: "ffmpeg"
|
||||
volumes:
|
||||
- ~/.docker_volumes/sonarr/config:/config
|
||||
- /mnt/media/Anime:/tv
|
||||
- ~/media/downloads:/downloads
|
||||
ports:
|
||||
- "8989:8989"
|
||||
expose:
|
||||
- "8989"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
prowlarr:
|
||||
image: linuxserver/prowlarr:latest
|
||||
container_name: prowlarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/prowlarr/config:/config
|
||||
ports:
|
||||
- "9696:9696"
|
||||
expose:
|
||||
- "9696"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
jellyseerr:
|
||||
image: fallenbagel/jellyseerr:latest
|
||||
container_name: jellyseerr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/jellyseerr/config:/app/config
|
||||
ports:
|
||||
- "5055:5055"
|
||||
expose:
|
||||
- "5055"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- jellyfin
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
|
||||
|
||||
networks:
|
||||
jellyfin-net:
|
||||
driver: bridge
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Downloads/docker-compose.yml","entries":[{"id":"ebbC.yml","source":"textFileCreate.source","timestamp":1775500296675},{"id":"UHTA.yml","timestamp":1775500565545},{"id":"xBDr.yml","timestamp":1775500662427},{"id":"i7DI.yml","timestamp":1775502657849}]}
|
||||
208
dot_config/private_Code/User/History/-3a2e7e8b/i7DI.yml
Normal file
208
dot_config/private_Code/User/History/-3a2e7e8b/i7DI.yml
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
services:
|
||||
jellyfin:
|
||||
image: jellyfin/jellyfin:latest
|
||||
container_name: jellyfin
|
||||
restart: unless-stopped
|
||||
group_add:
|
||||
- "993" # render group for VAAPI hardware acceleration
|
||||
|
||||
ports:
|
||||
- "8096:8096" # HTTP web UI
|
||||
- "8920:8920" # HTTPS
|
||||
- "7359:7359/udp" # Network discovery
|
||||
- "1900:1900/udp" # DLNA
|
||||
|
||||
expose:
|
||||
- "8096"
|
||||
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Australia/Perth
|
||||
|
||||
volumes:
|
||||
# Config on NVMe (fast)
|
||||
- ./config:/config
|
||||
- ./cache:/cache
|
||||
|
||||
# Media libraries (read-only for safety)
|
||||
- "/mnt/media/Movies:/media/movies:ro"
|
||||
- "/mnt/media/TV Shows:/media/tv-shows:ro"
|
||||
- "/mnt/media/Anime:/media/anime:ro"
|
||||
- "/mnt/media/Kids TV:/media/kids-tv:ro"
|
||||
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
|
||||
- "/tank/home-videos:/media/home-videos:ro"
|
||||
- "/tank/videos:/media/home-videos:ro"
|
||||
- "/tank/photos:/media/home-photos:ro"
|
||||
|
||||
devices:
|
||||
# Hardware transcoding (Vega graphics)
|
||||
- /dev/dri:/dev/dri
|
||||
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
qbittorrent:
|
||||
image: linuxserver/qbittorrent:latest
|
||||
container_name: qbittorrent
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
WEBUI_PORT: 8090
|
||||
volumes:
|
||||
- ~/.docker_volumes/qbittorrent/config:/config
|
||||
- /mnt/media/downloads:/downloads
|
||||
- /mnt/media/anime:/animeq
|
||||
ports:
|
||||
- "8090:8090"
|
||||
- "6881:6881"
|
||||
- "6881:6881/udp"
|
||||
expose:
|
||||
- "8090"
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
sonarr:
|
||||
image: linuxserver/sonarr:latest
|
||||
container_name: sonarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
DOCKER_MODS: "linuxserver/mods:universal-package-install"
|
||||
INSTALL_PACKAGES: "ffmpeg"
|
||||
volumes:
|
||||
- ~/.docker_volumes/sonarr/config:/config
|
||||
- /mnt/media/Anime:/tv
|
||||
- ~/media/downloads:/downloads
|
||||
ports:
|
||||
- "8989:8989"
|
||||
expose:
|
||||
- "8989"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
prowlarr:
|
||||
image: linuxserver/prowlarr:latest
|
||||
container_name: prowlarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/prowlarr/config:/config
|
||||
ports:
|
||||
- "9696:9696"
|
||||
expose:
|
||||
- "9696"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
jellyseerr:
|
||||
image: fallenbagel/jellyseerr:latest
|
||||
container_name: jellyseerr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/jellyseerr/config:/app/config
|
||||
ports:
|
||||
- "5055:5055"
|
||||
expose:
|
||||
- "5055"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- jellyfin
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
caddy:
|
||||
build:
|
||||
context: .
|
||||
dockerfile_inline: |
|
||||
FROM caddy:builder AS builder
|
||||
RUN xcaddy build --with github.com/caddy-dns/namedotcom
|
||||
FROM caddy:latest
|
||||
COPY --from=builder /usr/bin/caddy /usr/bin/caddy
|
||||
container_name: caddy
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- NAMEDOTCOM_USERNAME=
|
||||
- NAMEDOTCOM_TOKEN=your_api_token
|
||||
ports:
|
||||
- "443:443"
|
||||
- "443:443/udp"
|
||||
volumes:
|
||||
- ./Caddyfile:/etc/caddy/Caddyfile
|
||||
- ./caddy_data:/data
|
||||
- ./caddy_config:/config
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
crowdsec:
|
||||
image: crowdsecurity/crowdsec:latest
|
||||
container_name: crowdsec
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
|
||||
volumes:
|
||||
- /var/log:/var/log:ro
|
||||
- ./crowdsec_data:/var/lib/crowdsec/data
|
||||
- ./crowdsec_config:/etc/crowdsec
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
networks:
|
||||
jellyfin-net:
|
||||
driver: bridge
|
||||
199
dot_config/private_Code/User/History/-3a2e7e8b/xBDr.yml
Normal file
199
dot_config/private_Code/User/History/-3a2e7e8b/xBDr.yml
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
services:
|
||||
jellyfin:
|
||||
image: jellyfin/jellyfin:latest
|
||||
container_name: jellyfin
|
||||
restart: unless-stopped
|
||||
group_add:
|
||||
- "993" # render group for VAAPI hardware acceleration
|
||||
|
||||
ports:
|
||||
- "8096:8096" # HTTP web UI
|
||||
- "8920:8920" # HTTPS
|
||||
- "7359:7359/udp" # Network discovery
|
||||
- "1900:1900/udp" # DLNA
|
||||
|
||||
expose:
|
||||
- "8096"
|
||||
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Australia/Perth
|
||||
|
||||
volumes:
|
||||
# Config on NVMe (fast)
|
||||
- ./config:/config
|
||||
- ./cache:/cache
|
||||
|
||||
# Media libraries (read-only for safety)
|
||||
- "/mnt/media/Movies:/media/movies:ro"
|
||||
- "/mnt/media/TV Shows:/media/tv-shows:ro"
|
||||
- "/mnt/media/Anime:/media/anime:ro"
|
||||
- "/mnt/media/Kids TV:/media/kids-tv:ro"
|
||||
- "/mnt/media/Kids Movies:/media/kids-movies:ro"
|
||||
- "/tank/home-videos:/media/home-videos:ro"
|
||||
- "/tank/videos:/media/home-videos:ro"
|
||||
- "/tank/photos:/media/home-photos:ro"
|
||||
|
||||
devices:
|
||||
# Hardware transcoding (Vega graphics)
|
||||
- /dev/dri:/dev/dri
|
||||
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
qbittorrent:
|
||||
image: linuxserver/qbittorrent:latest
|
||||
container_name: qbittorrent
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
WEBUI_PORT: 8090
|
||||
volumes:
|
||||
- ~/.docker_volumes/qbittorrent/config:/config
|
||||
- /mnt/media/downloads:/downloads
|
||||
- /mnt/media/anime:/animeq
|
||||
ports:
|
||||
- "8090:8090"
|
||||
- "6881:6881"
|
||||
- "6881:6881/udp"
|
||||
expose:
|
||||
- "8090"
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
sonarr:
|
||||
image: linuxserver/sonarr:latest
|
||||
container_name: sonarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
DOCKER_MODS: "linuxserver/mods:universal-package-install"
|
||||
INSTALL_PACKAGES: "ffmpeg"
|
||||
volumes:
|
||||
- ~/.docker_volumes/sonarr/config:/config
|
||||
- /mnt/media/Anime:/tv
|
||||
- ~/media/downloads:/downloads
|
||||
ports:
|
||||
- "8989:8989"
|
||||
expose:
|
||||
- "8989"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
prowlarr:
|
||||
image: linuxserver/prowlarr:latest
|
||||
container_name: prowlarr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/prowlarr/config:/config
|
||||
ports:
|
||||
- "9696:9696"
|
||||
expose:
|
||||
- "9696"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- qbittorrent
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9696/ping"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
jellyseerr:
|
||||
image: fallenbagel/jellyseerr:latest
|
||||
container_name: jellyseerr
|
||||
environment:
|
||||
PUID: 1000
|
||||
PGID: 1000
|
||||
TZ: "${TZ}"
|
||||
volumes:
|
||||
- ~/.docker_volumes/jellyseerr/config:/app/config
|
||||
ports:
|
||||
- "5055:5055"
|
||||
expose:
|
||||
- "5055"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- jellyfin
|
||||
- sonarr
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5055/api/v1/status"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
caddy:
|
||||
image: caddy:latest
|
||||
container_name: caddy
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "443:443"
|
||||
- "443:443/udp" # For HTTP/3 support
|
||||
volumes:
|
||||
- ./Caddyfile:/etc/caddy/Caddyfile
|
||||
- ./caddy_data:/data
|
||||
- ./caddy_config:/config
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
crowdsec:
|
||||
image: crowdsecurity/crowdsec:latest
|
||||
container_name: crowdsec
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- COLLECTIONS=crowdsecurity/linux crowdsecurity/caddy crowdsecurity/base-httping
|
||||
volumes:
|
||||
- /var/log:/var/log:ro
|
||||
- ./crowdsec_data:/var/lib/crowdsec/data
|
||||
- ./crowdsec_config:/etc/crowdsec
|
||||
networks:
|
||||
- jellyfin-net
|
||||
|
||||
networks:
|
||||
jellyfin-net:
|
||||
driver: bridge
|
||||
18
dot_config/private_Code/User/History/-6123ca19/Xb2Q.py
Normal file
18
dot_config/private_Code/User/History/-6123ca19/Xb2Q.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Quick test to verify Data Analysis.py works correctly."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
result = subprocess.run([sys.executable, 'Data Analysis.py'], capture_output=True, text=True, timeout=30)
|
||||
print("STDOUT:")
|
||||
print(result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
|
||||
print("\nSTDERR:")
|
||||
print(result.stderr[-1000:] if len(result.stderr) > 1000 else result.stderr)
|
||||
print(f"\nExit code: {result.returncode}")
|
||||
|
||||
# Check for plot files
|
||||
import os
|
||||
plot_files = sorted([f for f in os.listdir('plots') if f.endswith('.png')])
|
||||
print(f"\nGenerated {len(plot_files)} plot files:")
|
||||
for f in plot_files:
|
||||
print(f" - {f}")
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/test_analysis.py","entries":[{"id":"Xb2Q.py","source":"Chat Edit: 'ensure the graphs being used are appropriate for the study'","timestamp":1774346851499}]}
|
||||
77
dot_config/private_Code/User/History/-7d2a273a/bWGM.py
Normal file
77
dot_config/private_Code/User/History/-7d2a273a/bWGM.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# Load the data
|
||||
df = pd.read_csv('organization_happiness_study_data.csv')
|
||||
|
||||
print("=" * 70)
|
||||
print("DATA GENERATION IMPROVEMENTS VERIFICATION")
|
||||
print("=" * 70)
|
||||
print(f"\n✓ Dataset shape: {df.shape}")
|
||||
print(f"✓ Total rows: {len(df)} (20 participants × 30 days × 2 groups = 1200 expected)")
|
||||
|
||||
print("\n--- Intervention Group Statistics ---")
|
||||
intervention = df[df['Group'] == 'Intervention']
|
||||
print(f"Participants: {intervention['Participant_ID'].nunique()}")
|
||||
print(f"Mean Happiness: {intervention['Happiness'].mean():.2f}")
|
||||
print(f"Happiness Std Dev: {intervention['Happiness'].std():.2f}")
|
||||
print(f"Calendar Adherence Rate: {(intervention['Calendar_Adherence'] == 'Yes').mean():.1%}")
|
||||
print(f"Cleanliness Adherence Rate: {(intervention['Cleanliness_Adherence'] == 'Yes').mean():.1%}")
|
||||
print(f"Punctuality Adherence Rate: {(intervention['Punctuality_Adherence'] == 'Yes').mean():.1%}")
|
||||
|
||||
print("\n--- Control Group Statistics ---")
|
||||
control = df[df['Group'] == 'Control']
|
||||
print(f"Participants: {control['Participant_ID'].nunique()}")
|
||||
print(f"Mean Happiness: {control['Happiness'].mean():.2f}")
|
||||
print(f"Happiness Std Dev: {control['Happiness'].std():.2f}")
|
||||
print(f"Reported Calendar: {(control['Calendar_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
|
||||
print(f"Reported Cleanliness: {(control['Cleanliness_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
|
||||
print(f"Reported Punctuality: {(control['Punctuality_Adherence'] == 'Yes').mean():.1%} (should be ~0%)")
|
||||
|
||||
print("\n--- Natural Data Patterns ---")
|
||||
# Check for habit momentum (persistence)
|
||||
intervention['Habits_Count'] = (
|
||||
(intervention['Calendar_Adherence'] == 'Yes').astype(int) +
|
||||
(intervention['Cleanliness_Adherence'] == 'Yes').astype(int) +
|
||||
(intervention['Punctuality_Adherence'] == 'Yes').astype(int)
|
||||
)
|
||||
|
||||
print(f"Habit completion rates by number completed:")
|
||||
for count in [0, 1, 2, 3]:
|
||||
subset = intervention[intervention['Habits_Count'] == count]
|
||||
happiness = subset['Happiness'].mean()
|
||||
print(f" {count} habits: Happiness = {happiness:.2f} (n={len(subset)})")
|
||||
|
||||
# Weekend effect
|
||||
intervention['DayOfWeek'] = intervention['Day'] % 7
|
||||
weekend = intervention[intervention['DayOfWeek'].isin([0, 6])]
|
||||
weekday = intervention[~intervention['DayOfWeek'].isin([0, 6])]
|
||||
print(f"\nWeekend vs Weekday Adherence:")
|
||||
print(f" Weekday avg habits: {((weekday['Calendar_Adherence']=='Yes').astype(int) + (weekday['Cleanliness_Adherence']=='Yes').astype(int) + (weekday['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
|
||||
print(f" Weekend avg habits: {((weekend['Calendar_Adherence']=='Yes').astype(int) + (weekend['Cleanliness_Adherence']=='Yes').astype(int) + (weekend['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
|
||||
|
||||
# Habit formation over time
|
||||
first_week = intervention[intervention['Day'] <= 7]
|
||||
mid_month = intervention[(intervention['Day'] > 14) & (intervention['Day'] <= 21)]
|
||||
last_week = intervention[intervention['Day'] > 23]
|
||||
print(f"\nHabit Formation Over Time:")
|
||||
print(f" Days 1-7 (Starting): Avg habits = {((first_week['Calendar_Adherence']=='Yes').astype(int) + (first_week['Cleanliness_Adherence']=='Yes').astype(int) + (first_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
|
||||
print(f" Days 15-21 (Momentum): Avg habits = {((mid_month['Calendar_Adherence']=='Yes').astype(int) + (mid_month['Cleanliness_Adherence']=='Yes').astype(int) + (mid_month['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
|
||||
print(f" Days 24-30 (Late): Avg habits = {((last_week['Calendar_Adherence']=='Yes').astype(int) + (last_week['Cleanliness_Adherence']=='Yes').astype(int) + (last_week['Punctuality_Adherence']=='Yes').astype(int)).mean():.2f}")
|
||||
|
||||
print(f"\nHappiness Persistence (day-to-day correlation):")
|
||||
intervention_sorted = intervention.sort_values(['Participant_ID', 'Day'])
|
||||
intervention_sorted['Happiness_prev'] = intervention_sorted.groupby('Participant_ID')['Happiness'].shift(1)
|
||||
valid = intervention_sorted[intervention_sorted['Happiness_prev'].notna()]
|
||||
corr = valid[['Happiness', 'Happiness_prev']].corr().iloc[0, 1]
|
||||
print(f" Correlation between today and yesterday's happiness: {corr:.3f}")
|
||||
|
||||
print("\n✓ Data generation complete with natural patterns!")
|
||||
print("\nKey improvements:")
|
||||
print(" • Habit momentum: doing it yesterday makes it more likely today")
|
||||
print(" • Weekly patterns: lower adherence weekends vs weekdays")
|
||||
print(" • Habit formation: initial difficulty, momentum building, slight fatigue")
|
||||
print(" • Individual variation: each person has unique habit profiles")
|
||||
print(" • Happiness persistence: today's mood influenced by yesterday's")
|
||||
print(" • Control group realism: still report 'No' but data shows natural variation")
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/test_improvements.py","entries":[{"id":"bWGM.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347099618}]}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/run_and_verify.py","entries":[{"id":"qMUb.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347459417}]}
|
||||
45
dot_config/private_Code/User/History/6b1c27e9/qMUb.py
Normal file
45
dot_config/private_Code/User/History/6b1c27e9/qMUb.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Generate new data and display sample showing upward trend"""
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
|
||||
# Run data generator
|
||||
result = subprocess.run(['python3', 'Data Gen.py'], capture_output=True, text=True)
|
||||
print(result.stdout)
|
||||
if result.stderr:
|
||||
print("Errors:", result.stderr)
|
||||
|
||||
# Load and display trend analysis
|
||||
df = pd.read_csv('organization_happiness_study_data.csv')
|
||||
df['Habits_Count'] = (
|
||||
(df['Calendar_Adherence'] == 'Yes').astype(int) +
|
||||
(df['Cleanliness_Adherence'] == 'Yes').astype(int) +
|
||||
(df['Punctuality_Adherence'] == 'Yes').astype(int)
|
||||
)
|
||||
|
||||
intervention = df[df['Group'] == 'Intervention']
|
||||
control = df[df['Group'] == 'Control']
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("UPWARD TREND ANALYSIS")
|
||||
print("="*70)
|
||||
|
||||
print("\n[INTERVENTION GROUP] - Should show upward trend")
|
||||
early_int = intervention[intervention['Day'] <= 7]
|
||||
late_int = intervention[intervention['Day'] >= 24]
|
||||
print(f"Days 1-7: Avg Happiness = {early_int['Happiness'].mean():.2f}")
|
||||
print(f"Days 24-30: Avg Happiness = {late_int['Happiness'].mean():.2f}")
|
||||
print(f"GROWTH: +{late_int['Happiness'].mean() - early_int['Happiness'].mean():.2f} points\n")
|
||||
|
||||
print("[CONTROL GROUP] - Should show flat/random pattern")
|
||||
early_ctl = control[control['Day'] <= 7]
|
||||
late_ctl = control[control['Day'] >= 24]
|
||||
print(f"Days 1-7: Avg Happiness = {early_ctl['Happiness'].mean():.2f}")
|
||||
print(f"Days 24-30: Avg Happiness = {late_ctl['Happiness'].mean():.2f}")
|
||||
print(f"CHANGE: {late_ctl['Happiness'].mean() - early_ctl['Happiness'].mean():+.2f} points\n")
|
||||
|
||||
print("[HABIT CORRELATION] - More habits = Higher happiness")
|
||||
for habits in range(4):
|
||||
subset = intervention[intervention['Habits_Count'] == habits]
|
||||
if len(subset) > 0:
|
||||
print(f"{habits} habits/day: Avg Happiness = {subset['Happiness'].mean():.2f} ({len(subset)} observations)")
|
||||
196
dot_config/private_Code/User/History/6c11eec7/3jGE.py
Normal file
196
dot_config/private_Code/User/History/6c11eec7/3jGE.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 4)
|
||||
habit_strength = np.clip(habit_strength, 0, 4)
|
||||
|
||||
# Happiness is baseline + growth from habit_strength over time
|
||||
# As study progresses and habit_strength builds, happiness increases more
|
||||
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
|
||||
|
||||
# Daily random noise (small)
|
||||
daily_noise = np.random.normal(0, 0.7)
|
||||
|
||||
# Happiness formula: baseline + cumulative effect that strengthens over time
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point
|
||||
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
|
||||
daily_noise # Day-to-day variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.2)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/46oA.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/46oA.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(3) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.0)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
93
dot_config/private_Code/User/History/6c11eec7/54EK.py
Normal file
93
dot_config/private_Code/User/History/6c11eec7/54EK.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
df = pd.DataFrame(data, columns=['Participant_ID', 'Day', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence', 'Happiness'])
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
|
||||
|
||||
for day in DAYS:
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
|
||||
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
baseline_happiness = np.random.normal(5.5, 1.0)
|
||||
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
for day in DAYS:
|
||||
# Control group only records happiness; all habit columns are No.
|
||||
calendar = 'No'
|
||||
clean = 'No'
|
||||
ontime = 'No'
|
||||
|
||||
baseline_happiness = np.random.normal(5.5, 1.0)
|
||||
control_noise = np.random.normal(0.0, 1.1)
|
||||
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
180
dot_config/private_Code/User/History/6c11eec7/9dqp.py
Normal file
180
dot_config/private_Code/User/History/6c11eec7/9dqp.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(64) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness for this participant
|
||||
person_happiness_baseline = np.random.normal(5.5, 1.2)
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Habit stacking: completing one habit makes the next easier
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
|
||||
|
||||
# Happiness has persistence but is also affected by habits
|
||||
happiness_noise = np.random.normal(0, 1.3)
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.4 + # Previous day influences today
|
||||
person_happiness_baseline * 0.4 +
|
||||
habit_boost * 0.9 + # Habits have strong effect
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
|
||||
|
||||
# Control group happiness is less affected by daily habits and more random
|
||||
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.5 +
|
||||
person_happiness_baseline * 0.5 +
|
||||
subtle_boost +
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/EkUx.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/EkUx.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.9, 0.9) # Center control around ~5
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.0)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/MJ5p.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/MJ5p.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.2)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
92
dot_config/private_Code/User/History/6c11eec7/PpFf.py
Normal file
92
dot_config/private_Code/User/History/6c11eec7/PpFf.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.7, 0.15) # each person has their own organization tendency
|
||||
|
||||
for day in DAYS:
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.1, 0.95), 1 - clip_yes_prob(org_bias + 0.1, 0.95)])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias, 0.90), 1 - clip_yes_prob(org_bias, 0.90)])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[clip_yes_prob(org_bias + 0.05, 0.92), 1 - clip_yes_prob(org_bias + 0.05, 0.92)])
|
||||
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
baseline_happiness = np.random.normal(5.5, 1.0)
|
||||
happiness = int(np.clip(np.random.normal(baseline_happiness + adherence_count * 1.1, 1.2), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
for day in DAYS:
|
||||
# Control group only records happiness; all habit columns are No.
|
||||
calendar = 'No'
|
||||
clean = 'No'
|
||||
ontime = 'No'
|
||||
|
||||
baseline_happiness = np.random.normal(5.5, 1.0)
|
||||
control_noise = np.random.normal(0.0, 1.1)
|
||||
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/QTk6.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/QTk6.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(43) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.0)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
180
dot_config/private_Code/User/History/6c11eec7/WSl3.py
Normal file
180
dot_config/private_Code/User/History/6c11eec7/WSl3.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness for this participant
|
||||
person_happiness_baseline = np.random.normal(5.5, 1.2)
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Habit stacking: completing one habit makes the next easier
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
|
||||
|
||||
# Happiness has persistence but is also affected by habits
|
||||
happiness_noise = np.random.normal(0, 1.3)
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.4 + # Previous day influences today
|
||||
person_happiness_baseline * 0.4 +
|
||||
habit_boost * 0.9 + # Habits have strong effect
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
|
||||
|
||||
# Control group happiness is less affected by daily habits and more random
|
||||
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.5 +
|
||||
person_happiness_baseline * 0.5 +
|
||||
subtle_boost +
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
197
dot_config/private_Code/User/History/6c11eec7/bLJN.py
Normal file
197
dot_config/private_Code/User/History/6c11eec7/bLJN.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness is baseline + growth from habit_strength over time
|
||||
# As study progresses and habit_strength builds, happiness increases more
|
||||
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
|
||||
|
||||
# Daily random noise (small)
|
||||
daily_noise = np.random.normal(0, 0.4)
|
||||
|
||||
# Happiness formula: baseline + cumulative effect that strengthens over time
|
||||
# Stronger multiplier to make dose-response clear (each habit matters 1-1.5 points)
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
habit_strength * (0.9 + study_progress * 0.3) + # Habit benefits clear, final max ~2.1
|
||||
daily_noise # Day-to-day variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.2)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
197
dot_config/private_Code/User/History/6c11eec7/blt8.py
Normal file
197
dot_config/private_Code/User/History/6c11eec7/blt8.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(64) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 4)
|
||||
habit_strength = np.clip(habit_strength, 0, 4)
|
||||
|
||||
# Happiness is baseline + growth from habit_strength over time
|
||||
# As study progresses and habit_strength builds, happiness increases more
|
||||
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
|
||||
|
||||
# Daily random noise (small)
|
||||
daily_noise = np.random.normal(0, 0.7)
|
||||
|
||||
# Happiness formula: baseline + cumulative effect that strengthens over time
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point
|
||||
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
|
||||
daily_noise # Day-to-day variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.0, 1.3) # Slightly lower baseline for control
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but it still affects their happiness sublimely
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.5 # Smaller effect since unaware/untracked
|
||||
|
||||
# Control group happiness is less affected by daily habits and more random
|
||||
happiness_noise = np.random.normal(0, 1.6) # Higher variability since no tracking
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.5 +
|
||||
person_happiness_baseline * 0.5 +
|
||||
subtle_boost +
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
144
dot_config/private_Code/User/History/6c11eec7/cTNf.py
Normal file
144
dot_config/private_Code/User/History/6c11eec7/cTNf.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness for this participant
|
||||
person_happiness_baseline = np.random.normal(5.5, 1.2)
|
||||
current_happiness = person_happiness_baseline
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Habit stacking: completing one habit makes the next easier
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
habit_boost = adherence_count * 1.2 if adherence_count > 0 else 0
|
||||
|
||||
# Happiness has persistence but is also affected by habits
|
||||
happiness_noise = np.random.normal(0, 1.3)
|
||||
current_happiness = np.clip(
|
||||
current_happiness * 0.4 + # Previous day influences today
|
||||
person_happiness_baseline * 0.4 +
|
||||
habit_boost * 0.9 + # Habits have strong effect
|
||||
happiness_noise,
|
||||
1, 10
|
||||
)
|
||||
happiness = int(np.round(current_happiness))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
for day in DAYS:
|
||||
# Control group only records happiness; all habit columns are No.
|
||||
calendar = 'No'
|
||||
clean = 'No'
|
||||
ontime = 'No'
|
||||
|
||||
baseline_happiness = np.random.normal(5.5, 1.0)
|
||||
control_noise = np.random.normal(0.0, 1.1)
|
||||
happiness = int(np.clip(baseline_happiness + control_noise, 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/Data%20Gen.py","entries":[{"id":"54EK.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345349390},{"id":"PpFf.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345378739},{"id":"cTNf.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347044805},{"id":"WSl3.py","source":"Chat Edit: 'improve data gen to create more natural data'","timestamp":1774347057825},{"id":"9dqp.py","timestamp":1774347206509},{"id":"blt8.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347345483},{"id":"3jGE.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347365731},{"id":"sBVR.py","source":"Chat Edit: 'can you ensure the data shows an upward trend in happiness as the study goes on, and in direct correlation with the habits completed by that participant? at the moment, the intervention group is happier after a single day.'","timestamp":1774347432858},{"id":"j9Wc.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347711480},{"id":"bLJN.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347763541},{"id":"MJ5p.py","source":"Chat Edit: 'the happiness results after 30 days seem a little too high, and this dose-response graph shows low happiness with all 3 completed. unusual. you could also increase participants to 40 control 40 intervention'","timestamp":1774347783690},{"id":"EkUx.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347933805},{"id":"u91r.py","source":"Chat Edit: 'control mean is too low. realistically the control group should be around an average of 5.'","timestamp":1774347955983},{"id":"QTk6.py","timestamp":1774348022105},{"id":"o2Y7.py","timestamp":1774348397371},{"id":"46oA.py","timestamp":1774352345991}]}
|
||||
197
dot_config/private_Code/User/History/6c11eec7/j9Wc.py
Normal file
197
dot_config/private_Code/User/History/6c11eec7/j9Wc.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 3)
|
||||
habit_strength = np.clip(habit_strength, 0, 3)
|
||||
|
||||
# Happiness is baseline + growth from habit_strength over time
|
||||
# As study progresses and habit_strength builds, happiness increases more
|
||||
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
|
||||
|
||||
# Daily random noise (small)
|
||||
daily_noise = np.random.normal(0, 0.5)
|
||||
|
||||
# Happiness formula: baseline + cumulative effect that strengthens over time
|
||||
# More conservative multiplier to keep final happiness reasonable (6-8 range)
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
habit_strength * (0.4 + study_progress * 0.5) + # Habit benefits grow over time, max ~2.7
|
||||
daily_noise # Day-to-day variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Same baseline as intervention (no advantage)
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.2)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/o2Y7.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/o2Y7.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.0)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
196
dot_config/private_Code/User/History/6c11eec7/sBVR.py
Normal file
196
dot_config/private_Code/User/History/6c11eec7/sBVR.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 20
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.8, 1.1) # Lower starting point for growth
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 4)
|
||||
habit_strength = np.clip(habit_strength, 0, 4)
|
||||
|
||||
# Happiness is baseline + growth from habit_strength over time
|
||||
# As study progresses and habit_strength builds, happiness increases more
|
||||
study_progress = day / 30.0 # 0.033 to 1.0 over 30 days
|
||||
|
||||
# Daily random noise (small)
|
||||
daily_noise = np.random.normal(0, 0.5)
|
||||
|
||||
# Happiness formula: baseline + cumulative effect that strengthens over time
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point
|
||||
habit_strength * (0.5 + study_progress) + # Habit benefits grow over time
|
||||
daily_noise # Day-to-day variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(4.8, 1.3) # Same baseline as intervention (no advantage)
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.2)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
200
dot_config/private_Code/User/History/6c11eec7/u91r.py
Normal file
200
dot_config/private_Code/User/History/6c11eec7/u91r.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
np.random.seed(42) # ensures you get exactly the same data every time
|
||||
|
||||
|
||||
N_PARTICIPANTS_PER_GROUP = 40
|
||||
DAYS = list(range(1, 31))
|
||||
|
||||
|
||||
def clip_yes_prob(prob, ceiling):
|
||||
return min(ceiling, max(0.05, prob))
|
||||
|
||||
|
||||
def generate_intervention_group(start_participant_id=1):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
org_bias = np.random.normal(0.65, 0.18) # each person has their own organization tendency (persistent)
|
||||
org_bias = np.clip(org_bias, 0.1, 0.95)
|
||||
|
||||
# Personal baselines for each habit (people are naturally better/worse at specific habits)
|
||||
calendar_ease = org_bias + np.random.normal(0.05, 0.08)
|
||||
clean_ease = org_bias + np.random.normal(-0.02, 0.08)
|
||||
ontime_ease = org_bias + np.random.normal(0.02, 0.08)
|
||||
|
||||
# Baseline happiness and habit strength for this participant
|
||||
person_happiness_baseline = np.random.normal(4.0, 1.0) # Starting point (4-5 range)
|
||||
habit_strength = 0.0 # Cumulative measure of consistent habit completion
|
||||
|
||||
# Track previous day's habits for momentum/habit stacking
|
||||
prev_calendar, prev_clean, prev_ontime = 'No', 'No', 'No'
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: Sunday (day % 7 == 0) and Saturday (day % 7 == 6) have lower adherence
|
||||
week_difficulty = 1.0 if (day % 7) not in [0, 6] else 0.75 # weekends are harder
|
||||
|
||||
# Habit formation/fatigue: early days harder, then easier, slight decline late
|
||||
if day < 7:
|
||||
time_factor = 0.85 # Getting started is harder
|
||||
elif day < 20:
|
||||
time_factor = 1.1 # Momentum builds
|
||||
else:
|
||||
time_factor = 0.98 # Slight fatigue
|
||||
|
||||
# Momentum effect: If you did a habit yesterday, you're more likely to do it today
|
||||
calendar_prob = clip_yes_prob(
|
||||
calendar_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_calendar == 'Yes' else 0), 0.95
|
||||
)
|
||||
clean_prob = clip_yes_prob(
|
||||
clean_ease * week_difficulty * time_factor +
|
||||
(0.15 if prev_clean == 'Yes' else 0), 0.90
|
||||
)
|
||||
ontime_prob = clip_yes_prob(
|
||||
ontime_ease * week_difficulty * time_factor +
|
||||
(0.12 if prev_ontime == 'Yes' else 0), 0.93
|
||||
)
|
||||
|
||||
calendar = np.random.choice(['Yes', 'No'], p=[calendar_prob, 1 - calendar_prob])
|
||||
clean = np.random.choice(['Yes', 'No'], p=[clean_prob, 1 - clean_prob])
|
||||
ontime = np.random.choice(['Yes', 'No'], p=[ontime_prob, 1 - ontime_prob])
|
||||
|
||||
# Count habits completed today
|
||||
adherence_count = sum(x == 'Yes' for x in [calendar, clean, ontime])
|
||||
|
||||
# Habit strength: accumulates with consistent completion, decays with non-completion
|
||||
# This creates a cumulative effect that drives upward trend
|
||||
if adherence_count == 3:
|
||||
habit_strength += 0.6 # Strong boost for completing all habits
|
||||
elif adherence_count == 2:
|
||||
habit_strength += 0.35 # Moderate boost
|
||||
elif adherence_count == 1:
|
||||
habit_strength += 0.15 # Small boost
|
||||
else:
|
||||
habit_strength -= 0.2 # Small decay for missing all habits
|
||||
|
||||
# Clip habit_strength to reasonable range (0 to 5)
|
||||
habit_strength = np.clip(habit_strength, 0, 5)
|
||||
|
||||
# Happiness combines DAILY habits effect + cumulative habit strength
|
||||
study_progress = day / 30.0 # 0.033 to 1.0
|
||||
daily_noise = np.random.normal(0, 0.35)
|
||||
|
||||
# Immediate bonus for today's habits (strong, clear dose-response)
|
||||
daily_habit_bonus = adherence_count * 0.6 # 0-1.8 based on today's habits
|
||||
|
||||
# Cumulative bonus grows as study progresses
|
||||
cumulative_bonus = habit_strength * (0.4 + study_progress * 0.2) # max ~2.7
|
||||
|
||||
# Happiness formula: baseline + daily effect + cumulative effect + noise
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Starting point (4.0)
|
||||
daily_habit_bonus + # Today's habits (0-1.8)
|
||||
cumulative_bonus + # Study progress bonus (0-2.7)
|
||||
daily_noise # Variability
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Intervention',
|
||||
day,
|
||||
calendar,
|
||||
clean,
|
||||
ontime,
|
||||
happiness,
|
||||
])
|
||||
|
||||
# Update for next iteration
|
||||
prev_calendar, prev_clean, prev_ontime = calendar, clean, ontime
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def generate_control_group(start_participant_id):
|
||||
rows = []
|
||||
for offset in range(N_PARTICIPANTS_PER_GROUP):
|
||||
participant_id = start_participant_id + offset
|
||||
|
||||
# Even without tracking, some people are naturally more organized
|
||||
natural_org = np.random.normal(0.3, 0.15) # Lower baseline than intervention
|
||||
natural_org = np.clip(natural_org, 0.05, 0.7)
|
||||
|
||||
# Personal tendencies (but not tracked/reported as habits)
|
||||
person_happiness_baseline = np.random.normal(5.1, 0.9) # Center control around ~5
|
||||
|
||||
# Since they're not tracking, habits happen at random intervals (not streaky)
|
||||
prev_untracked_habits = 0
|
||||
|
||||
for day in DAYS:
|
||||
# Week effect: sans the awareness/tracking effect
|
||||
week_factor = 1.0 if (day % 7) not in [0, 6] else 0.9
|
||||
|
||||
# Without tracking, unaware of patterns, so less habit formation
|
||||
time_factor = 1.0 + (day / 100) * 0.1 # Tiny habituation, but weak
|
||||
|
||||
# Untracked habits - they happen but aren't reported
|
||||
calendar_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4),
|
||||
1 - clip_yes_prob(natural_org * 0.8 * week_factor * time_factor, 0.4)])
|
||||
clean_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35),
|
||||
1 - clip_yes_prob(natural_org * 0.75 * week_factor * time_factor, 0.35)])
|
||||
ontime_untracked = np.random.choice(['Yes', 'No'],
|
||||
p=[clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45),
|
||||
1 - clip_yes_prob(natural_org * 0.85 * week_factor * time_factor, 0.45)])
|
||||
|
||||
# They report habits as "No" (not tracking), but untracked habits have minimal effect
|
||||
untracked_count = sum(x == 'Yes' for x in [calendar_untracked, clean_untracked, ontime_untracked])
|
||||
subtle_boost = untracked_count * 0.1 # Tiny effect since unaware/untracked
|
||||
|
||||
# Control group happiness has day-to-day variability but no systematic growth
|
||||
# Without awareness and tracking, there's no cumulative benefit
|
||||
daily_noise = np.random.normal(0, 1.0)
|
||||
|
||||
happiness_value = (
|
||||
person_happiness_baseline + # Same baseline
|
||||
subtle_boost + # Minimal benefit from occasional habits
|
||||
daily_noise # Higher variability, no systematic trend
|
||||
)
|
||||
happiness = int(np.clip(np.round(happiness_value), 1, 10))
|
||||
|
||||
rows.append([
|
||||
participant_id,
|
||||
'Control',
|
||||
day,
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
'No', # Reported as "No" - not tracking
|
||||
happiness,
|
||||
])
|
||||
|
||||
prev_untracked_habits = untracked_count
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
data = []
|
||||
data.extend(generate_intervention_group(start_participant_id=1))
|
||||
data.extend(generate_control_group(start_participant_id=N_PARTICIPANTS_PER_GROUP + 1))
|
||||
|
||||
df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
'Participant_ID',
|
||||
'Group',
|
||||
'Day',
|
||||
'Calendar_Adherence',
|
||||
'Cleanliness_Adherence',
|
||||
'Punctuality_Adherence',
|
||||
'Happiness',
|
||||
],
|
||||
)
|
||||
|
||||
# Save the combined dataset
|
||||
df.to_csv('organization_happiness_study_data.csv', index=False)
|
||||
print("✅ Full dataset saved as 'organization_happiness_study_data.csv' — open it in Excel!")
|
||||
print(df.head(10)) # shows first 10 rows
|
||||
6
dot_config/private_Code/User/History/785723cb/1nKM.txt
Normal file
6
dot_config/private_Code/User/History/785723cb/1nKM.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
pandas
|
||||
numpy
|
||||
matplotlib
|
||||
seaborn
|
||||
scipy
|
||||
statsmodels
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/requirements.txt","entries":[{"id":"1nKM.txt","source":"Chat Edit: 'improve on this analysis script'","timestamp":1774345121245}]}
|
||||
284
dot_config/private_Code/User/History/7da6e0fb/9KVj.py
Normal file
284
dot_config/private_Code/User/History/7da6e0fb/9KVj.py
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_theme(style='whitegrid', context='talk')
|
||||
|
||||
def finish_plot(filename):
|
||||
plt.tight_layout()
|
||||
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# 1) PRIMARY OUTCOME: Mean happiness by group with error bars and value labels
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(8, 6))
|
||||
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
|
||||
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
|
||||
bars = plt.bar(
|
||||
np.arange(len(summary)),
|
||||
summary['mean'].values,
|
||||
yerr=ci95.values,
|
||||
color=['#A9B2C3', '#4E79A7'],
|
||||
capsize=8,
|
||||
edgecolor='black',
|
||||
linewidth=1.2,
|
||||
alpha=0.9
|
||||
)
|
||||
plt.xticks(np.arange(len(summary)), ['Control Group\n(No habits tracked)', 'Intervention Group\n(Daily habits tracked)'])
|
||||
plt.title('Effect of Tracked Organization Habits on Happiness', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.ylabel('Mean Daily Happiness Score (1-10)', fontsize=12)
|
||||
plt.ylim(1, 10)
|
||||
for bar in bars:
|
||||
yval = bar.get_height()
|
||||
plt.text(bar.get_x() + bar.get_width()/2, yval - 0.8, f'{yval:.1f}', ha='center', va='center', color='white', fontweight='bold', fontsize=11)
|
||||
finish_plot('01_primary_outcome_group_comparison.png')
|
||||
|
||||
# 2) DISTRIBUTIONS: Show overlap and variability in happiness scores
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(9, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
sns.violinplot(
|
||||
data=df, x='Group', y='Happiness', order=order,
|
||||
inner='quartile', palette={'Control': '#E0E0E0', 'Intervention': '#B3CDE3'}, cut=0
|
||||
)
|
||||
sns.stripplot(
|
||||
data=df, x='Group', y='Happiness', order=order,
|
||||
color='black', alpha=0.12, jitter=0.25, size=3
|
||||
)
|
||||
plt.title('Distribution of Happiness Reports Over 30 Days', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Study Group', fontsize=12)
|
||||
plt.ylabel('Happiness Score', fontsize=12)
|
||||
plt.ylim(1, 10)
|
||||
finish_plot('02_happiness_distribution_by_group.png')
|
||||
|
||||
# 3) LONGITUDINAL: Daily happiness trend across 30 days
|
||||
if 'Group' in df.columns and 'Day' in df.columns:
|
||||
plt.figure(figsize=(10, 6))
|
||||
daily_mean = df.groupby(['Group', 'Day'])['Happiness'].mean().reset_index()
|
||||
sns.lineplot(
|
||||
data=daily_mean, x='Day', y='Happiness', hue='Group',
|
||||
hue_order=['Control', 'Intervention'],
|
||||
palette={'Control': '#7F7F7F', 'Intervention': '#D62728'},
|
||||
marker='o', linewidth=2.5, markersize=6
|
||||
)
|
||||
plt.title('Longitudinal Daily Happiness Throughout the Study', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Day of Study (1-30)', fontsize=12)
|
||||
plt.ylabel('Average Happiness', fontsize=12)
|
||||
plt.ylim(1, 10)
|
||||
plt.xticks(range(1, 31, 2))
|
||||
plt.legend(title='', frameon=True, facecolor='white', fontsize=10)
|
||||
finish_plot('03_longitudinal_trends.png')
|
||||
|
||||
# 4) DOSE-RESPONSE: In intervention group, does MORE habits = MORE happiness?
|
||||
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(
|
||||
data=intervention_df, x='Habits_Count', y='Happiness',
|
||||
color='#9ECAE1', width=0.6, fliersize=0
|
||||
)
|
||||
sns.stripplot(
|
||||
data=intervention_df, x='Habits_Count', y='Happiness',
|
||||
color='#2B5B84', alpha=0.3, jitter=0.2, size=4
|
||||
)
|
||||
plt.title('Dose-Response: Happiness by Number of Habits Completed', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Number of Requested Habits Completed That Day\n(Calendar + Clean Room + Punctual)', fontsize=11)
|
||||
plt.ylabel('Happiness Score', fontsize=12)
|
||||
plt.ylim(1, 10)
|
||||
finish_plot('04_habit_dose_response.png')
|
||||
|
||||
# 5) HABIT COMPLETION RATES: Which habits were easiest to maintain?
|
||||
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
|
||||
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
|
||||
adherence_rates.columns = ['Habit', 'Rate']
|
||||
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
|
||||
plt.figure(figsize=(8, 6))
|
||||
bars = sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
|
||||
plt.title('Which Habits Were Easiest to Keep?', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.xlabel('', fontsize=12)
|
||||
plt.ylabel('Percentage of Days Completed', fontsize=12)
|
||||
plt.ylim(0, 1.05)
|
||||
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
|
||||
for bar in bars.patches:
|
||||
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
|
||||
f"{bar.get_height()*100:.0f}%", ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
finish_plot('05_habit_completion_rates.png')
|
||||
|
||||
# 6) INDIVIDUAL VARIATION: Participant-level averages show broad effect
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(12, 6))
|
||||
participant_avg = df.groupby(['Group', 'Participant_ID'])['Happiness'].mean().reset_index()
|
||||
participant_avg = participant_avg.sort_values(['Group', 'Happiness'])
|
||||
participant_avg['Order_Index'] = range(len(participant_avg))
|
||||
|
||||
for group, color in zip(['Control', 'Intervention'], ['#BDBDBD', '#4E79A7']):
|
||||
group_data = participant_avg[participant_avg['Group'] == group]
|
||||
plt.bar(group_data['Order_Index'], group_data['Happiness'], color=color, label=group, alpha=0.85, width=0.8)
|
||||
|
||||
plt.axhline(df[df['Group']=='Control']['Happiness'].mean(), color='#7F7F7F', linestyle='--', linewidth=2, label='Control Mean')
|
||||
plt.axhline(df[df['Group']=='Intervention']['Happiness'].mean(), color='#2B5B84', linestyle='--', linewidth=2, label='Intervention Mean')
|
||||
plt.title('Individual Average Happiness Across Study Participants', pad=15, fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Individual Participants (Sorted by Happiness Level)', fontsize=12)
|
||||
plt.ylabel('Average Happiness Score', fontsize=12)
|
||||
plt.xticks([])
|
||||
plt.ylim(1, 10)
|
||||
plt.legend(frameon=True, facecolor='white', fontsize=10, loc='upper left')
|
||||
finish_plot('06_individual_participant_avgs.png')
|
||||
|
||||
logging.info('Saved study plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
227
dot_config/private_Code/User/History/7da6e0fb/FOyN.py
Normal file
227
dot_config/private_Code/User/History/7da6e0fb/FOyN.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(range(len(participant_avg)), participant_avg.index, rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(7, 5))
|
||||
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
|
||||
plt.title('Mean Happiness by Group')
|
||||
plt.ylabel('Average happiness')
|
||||
f_group = outdir / 'happiness_by_group.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f_group)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
if 'Group' in df.columns:
|
||||
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
|
||||
else:
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
227
dot_config/private_Code/User/History/7da6e0fb/Gx76.py
Normal file
227
dot_config/private_Code/User/History/7da6e0fb/Gx76.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(range(len(participant_avg)), participant_avg.index.astype(str), rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(7, 5))
|
||||
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
|
||||
plt.title('Mean Happiness by Group')
|
||||
plt.ylabel('Average happiness')
|
||||
f_group = outdir / 'happiness_by_group.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f_group)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
if 'Group' in df.columns:
|
||||
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
|
||||
else:
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
253
dot_config/private_Code/User/History/7da6e0fb/Ldgu.py
Normal file
253
dot_config/private_Code/User/History/7da6e0fb/Ldgu.py
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_theme(style='whitegrid', context='talk')
|
||||
|
||||
def finish_plot(filename):
|
||||
plt.tight_layout()
|
||||
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# 1) Mean happiness by group with error bars
|
||||
if 'Group' in df.columns:
|
||||
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
|
||||
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.bar(summary.index, summary['mean'], color=['#7A7A7A', '#2A9D8F'], yerr=ci95, capsize=6)
|
||||
plt.title('Average Happiness by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Mean happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('01_mean_happiness_by_group.png')
|
||||
|
||||
# 2) Distribution of happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(9, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
sns.boxplot(data=df, x='Group', y='Happiness', order=order, color='#C9D1D9')
|
||||
sns.stripplot(data=df, x='Group', y='Happiness', order=order, color='black', alpha=0.18, jitter=0.22, size=2)
|
||||
plt.title('Happiness Distribution by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('02_happiness_distribution_by_group.png')
|
||||
|
||||
# 3) Daily happiness trend by group
|
||||
if 'Group' in df.columns and 'Day' in df.columns:
|
||||
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
|
||||
plt.title('Mean Daily Happiness Across the Study')
|
||||
plt.xlabel('Day of study')
|
||||
plt.ylabel('Average happiness')
|
||||
plt.ylim(0, 10)
|
||||
plt.xticks(range(1, 31, 2))
|
||||
finish_plot('03_daily_happiness_trend.png')
|
||||
|
||||
# 4) Happiness by number of habits in intervention group only
|
||||
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
|
||||
plt.title('Intervention Group: Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Habits completed that day')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('04_happiness_by_habits_intervention.png')
|
||||
|
||||
# 5) Mean happiness by habits count in intervention group
|
||||
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(8, 6))
|
||||
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
|
||||
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
|
||||
plt.xlabel('Number of habits completed')
|
||||
plt.ylabel('Mean happiness')
|
||||
plt.xticks([0, 1, 2, 3])
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('05_mean_happiness_by_habits.png')
|
||||
|
||||
# 6) Habit adherence rates in the intervention group
|
||||
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
|
||||
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
|
||||
adherence_rates.columns = ['Habit', 'Rate']
|
||||
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
|
||||
plt.title('Intervention Group: Habit Completion Rate')
|
||||
plt.xlabel('Habit')
|
||||
plt.ylabel('Proportion completed')
|
||||
plt.ylim(0, 1)
|
||||
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
|
||||
finish_plot('06_habit_completion_rate.png')
|
||||
|
||||
# 7) Participant average happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(12, 6))
|
||||
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
|
||||
sns.boxplot(data=participant_avg, x='Group', y='Happiness', order=['Control', 'Intervention'], color='#D6D6D6')
|
||||
sns.stripplot(data=participant_avg, x='Group', y='Happiness', order=['Control', 'Intervention'], color='black', alpha=0.45, jitter=0.12, size=5)
|
||||
plt.title('Average Happiness per Participant')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Participant mean happiness')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('07_participant_average_happiness.png')
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
272
dot_config/private_Code/User/History/7da6e0fb/MtI5.py
Normal file
272
dot_config/private_Code/User/History/7da6e0fb/MtI5.py
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_theme(style='whitegrid', context='talk')
|
||||
|
||||
def finish_plot(filename):
|
||||
plt.tight_layout()
|
||||
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# 1) Mean happiness by group with error bars
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(8, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
sns.barplot(
|
||||
data=df,
|
||||
x='Group',
|
||||
y='Happiness',
|
||||
order=order,
|
||||
estimator='mean',
|
||||
errorbar=('ci', 95),
|
||||
palette=['#7A7A7A', '#2A9D8F'],
|
||||
)
|
||||
plt.title('Average Happiness by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Mean happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('01_mean_happiness_by_group.png')
|
||||
|
||||
# 2) Distribution of happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(9, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
sns.boxplot(data=df, x='Group', y='Happiness', order=order, palette=['#B0B0B0', '#73C6B6'])
|
||||
sns.stripplot(data=df, x='Group', y='Happiness', order=order, color='black', alpha=0.18, jitter=0.22, size=2)
|
||||
plt.title('Happiness Distribution by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('02_happiness_distribution_by_group.png')
|
||||
|
||||
# 3) Daily happiness trend by group
|
||||
if 'Group' in df.columns and 'Day' in df.columns:
|
||||
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
|
||||
plt.title('Mean Daily Happiness Across the Study')
|
||||
plt.xlabel('Day of study')
|
||||
plt.ylabel('Average happiness')
|
||||
plt.ylim(0, 10)
|
||||
plt.xticks(range(1, 31, 2))
|
||||
finish_plot('03_daily_happiness_trend.png')
|
||||
|
||||
# 4) Happiness by number of habits in intervention group only
|
||||
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
|
||||
plt.title('Intervention Group: Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Habits completed that day')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('04_happiness_by_habits_intervention.png')
|
||||
|
||||
# 5) Mean happiness by habits count in intervention group
|
||||
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(8, 6))
|
||||
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
|
||||
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
|
||||
plt.xlabel('Number of habits completed')
|
||||
plt.ylabel('Mean happiness')
|
||||
plt.xticks([0, 1, 2, 3])
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('05_mean_happiness_by_habits.png')
|
||||
|
||||
# 6) Habit adherence rates in the intervention group
|
||||
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
|
||||
adherence_rates = (
|
||||
intervention_df[habit_cols]
|
||||
.mean()
|
||||
.sort_values(ascending=False)
|
||||
.reset_index()
|
||||
.rename(columns={'index': 'Habit', 0: 'Rate'})
|
||||
)
|
||||
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.barplot(data=adherence_rates, x='Habit', y=0, color='#E76F51')
|
||||
plt.title('Intervention Group: Habit Completion Rate')
|
||||
plt.xlabel('Habit')
|
||||
plt.ylabel('Proportion completed')
|
||||
plt.ylim(0, 1)
|
||||
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
|
||||
finish_plot('06_habit_completion_rate.png')
|
||||
|
||||
# 7) Participant averages, grouped by study group
|
||||
if 'Group' in df.columns:
|
||||
participant_avg = df.groupby(['Group', 'Participant_ID'])['Happiness'].mean().reset_index()
|
||||
plt.figure(figsize=(12, 6))
|
||||
sns.barplot(
|
||||
data=participant_avg,
|
||||
x='Participant_ID',
|
||||
y='Happiness',
|
||||
hue='Group',
|
||||
dodge=True,
|
||||
palette=['#7A7A7A', '#2A9D8F'],
|
||||
)
|
||||
plt.title('Average Happiness per Participant')
|
||||
plt.xlabel('Participant ID')
|
||||
plt.ylabel('Mean happiness')
|
||||
plt.ylim(0, 10)
|
||||
plt.xticks(rotation=45)
|
||||
finish_plot('07_participant_average_happiness.png')
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
270
dot_config/private_Code/User/History/7da6e0fb/NtsI.py
Normal file
270
dot_config/private_Code/User/History/7da6e0fb/NtsI.py
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_theme(style='whitegrid', context='talk')
|
||||
|
||||
def finish_plot(filename):
|
||||
plt.tight_layout()
|
||||
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# 1) Mean happiness by group with error bars
|
||||
if 'Group' in df.columns:
|
||||
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
|
||||
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
|
||||
plt.figure(figsize=(8, 6))
|
||||
xpos = np.arange(len(summary))
|
||||
plt.bar(xpos, summary['mean'].values, color=['#7A7A7A', '#2A9D8F'], yerr=ci95.values, capsize=6)
|
||||
plt.xticks(xpos, summary.index)
|
||||
plt.title('Average Happiness by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Mean happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('01_mean_happiness_by_group.png')
|
||||
|
||||
# 2) Distribution of happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(9, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
grouped = [df.loc[df['Group'] == group, 'Happiness'].values for group in order]
|
||||
plt.boxplot(grouped, labels=order, patch_artist=True,
|
||||
boxprops=dict(facecolor='#C9D1D9', color='#4C4C4C'),
|
||||
medianprops=dict(color='#2A9D8F', linewidth=2),
|
||||
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
|
||||
for i, group in enumerate(order, start=1):
|
||||
y = df.loc[df['Group'] == group, 'Happiness'].values
|
||||
x = np.random.normal(i, 0.06, size=len(y))
|
||||
plt.scatter(x, y, color='black', alpha=0.15, s=10)
|
||||
plt.title('Happiness Distribution by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('02_happiness_distribution_by_group.png')
|
||||
|
||||
# 3) Daily happiness trend by group
|
||||
if 'Group' in df.columns and 'Day' in df.columns:
|
||||
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
|
||||
plt.title('Mean Daily Happiness Across the Study')
|
||||
plt.xlabel('Day of study')
|
||||
plt.ylabel('Average happiness')
|
||||
plt.ylim(0, 10)
|
||||
plt.xticks(range(1, 31, 2))
|
||||
finish_plot('03_daily_happiness_trend.png')
|
||||
|
||||
# 4) Happiness by number of habits in intervention group only
|
||||
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
|
||||
plt.title('Intervention Group: Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Habits completed that day')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('04_happiness_by_habits_intervention.png')
|
||||
|
||||
# 5) Mean happiness by habits count in intervention group
|
||||
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(8, 6))
|
||||
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
|
||||
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
|
||||
plt.xlabel('Number of habits completed')
|
||||
plt.ylabel('Mean happiness')
|
||||
plt.xticks([0, 1, 2, 3])
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('05_mean_happiness_by_habits.png')
|
||||
|
||||
# 6) Habit adherence rates in the intervention group
|
||||
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
|
||||
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
|
||||
adherence_rates.columns = ['Habit', 'Rate']
|
||||
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
|
||||
plt.title('Intervention Group: Habit Completion Rate')
|
||||
plt.xlabel('Habit')
|
||||
plt.ylabel('Proportion completed')
|
||||
plt.ylim(0, 1)
|
||||
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
|
||||
finish_plot('06_habit_completion_rate.png')
|
||||
|
||||
# 7) Participant average happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(12, 6))
|
||||
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
|
||||
group_order = ['Control', 'Intervention']
|
||||
grouped_avgs = [participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values for group in group_order]
|
||||
plt.boxplot(grouped_avgs, labels=group_order, patch_artist=True,
|
||||
boxprops=dict(facecolor='#D6D6D6', color='#4C4C4C'),
|
||||
medianprops=dict(color='#2A9D8F', linewidth=2),
|
||||
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
|
||||
for i, group in enumerate(group_order, start=1):
|
||||
y = participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values
|
||||
x = np.random.normal(i, 0.06, size=len(y))
|
||||
plt.scatter(x, y, color='black', alpha=0.45, s=22)
|
||||
plt.title('Average Happiness per Participant')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Participant mean happiness')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('07_participant_average_happiness.png')
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
189
dot_config/private_Code/User/History/7da6e0fb/SA9R.py
Normal file
189
dot_config/private_Code/User/History/7da6e0fb/SA9R.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness):')
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(df.loc[mask, habit].astype(int), df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(x='Habits_Count', y='Happiness', data=df, palette='viridis')
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(x='Habits_Count', y='Happiness', data=df, inner=None, palette='muted')
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=participant_avg.index.astype(str), y=participant_avg.values, palette='coolwarm')
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect size example: compare 0 vs 3
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
231
dot_config/private_Code/User/History/7da6e0fb/bwYb.py
Normal file
231
dot_config/private_Code/User/History/7da6e0fb/bwYb.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=df, x='Habits_Count', y='Happiness', hue='Habits_Count', palette='viridis', dodge=False)
|
||||
plt.legend([], [], frameon=False)
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(data=df, x='Habits_Count', y='Happiness', hue='Habits_Count', inner=None, palette='muted', dodge=False)
|
||||
plt.legend([], [], frameon=False)
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, hue=range(len(participant_avg)), palette='coolwarm', dodge=False)
|
||||
plt.legend([], [], frameon=False)
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(range(len(participant_avg)), participant_avg.index.astype(str), rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(7, 5))
|
||||
sns.barplot(data=df, x='Group', y='Happiness', hue='Group', estimator='mean', errorbar='sd', palette='Set2', dodge=False)
|
||||
plt.legend([], [], frameon=False)
|
||||
plt.title('Mean Happiness by Group')
|
||||
plt.ylabel('Average happiness')
|
||||
f_group = outdir / 'happiness_by_group.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f_group)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
if 'Group' in df.columns:
|
||||
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
|
||||
else:
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
270
dot_config/private_Code/User/History/7da6e0fb/enQE.py
Normal file
270
dot_config/private_Code/User/History/7da6e0fb/enQE.py
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_theme(style='whitegrid', context='talk')
|
||||
|
||||
def finish_plot(filename):
|
||||
plt.tight_layout()
|
||||
plt.savefig(outdir / filename, dpi=200, bbox_inches='tight')
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# 1) Mean happiness by group with error bars
|
||||
if 'Group' in df.columns:
|
||||
summary = df.groupby('Group')['Happiness'].agg(['mean', 'std', 'count']).reindex(['Control', 'Intervention'])
|
||||
ci95 = 1.96 * (summary['std'] / np.sqrt(summary['count']))
|
||||
plt.figure(figsize=(8, 6))
|
||||
xpos = np.arange(len(summary))
|
||||
plt.bar(xpos, summary['mean'].values, color=['#7A7A7A', '#2A9D8F'], yerr=ci95.values, capsize=6)
|
||||
plt.xticks(xpos, summary.index)
|
||||
plt.title('Average Happiness by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Mean happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('01_mean_happiness_by_group.png')
|
||||
|
||||
# 2) Distribution of happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(9, 6))
|
||||
order = ['Control', 'Intervention']
|
||||
grouped = [df.loc[df['Group'] == group, 'Happiness'].values for group in order]
|
||||
plt.boxplot(grouped, tick_labels=order, patch_artist=True,
|
||||
boxprops=dict(facecolor='#C9D1D9', color='#4C4C4C'),
|
||||
medianprops=dict(color='#2A9D8F', linewidth=2),
|
||||
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
|
||||
for i, group in enumerate(order, start=1):
|
||||
y = df.loc[df['Group'] == group, 'Happiness'].values
|
||||
x = np.random.normal(i, 0.06, size=len(y))
|
||||
plt.scatter(x, y, color='black', alpha=0.15, s=10)
|
||||
plt.title('Happiness Distribution by Group')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('02_happiness_distribution_by_group.png')
|
||||
|
||||
# 3) Daily happiness trend by group
|
||||
if 'Group' in df.columns and 'Day' in df.columns:
|
||||
daily = df.groupby(['Group', 'Day'], as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.lineplot(data=daily, x='Day', y='Happiness', hue='Group', hue_order=['Control', 'Intervention'], marker='o')
|
||||
plt.title('Mean Daily Happiness Across the Study')
|
||||
plt.xlabel('Day of study')
|
||||
plt.ylabel('Average happiness')
|
||||
plt.ylim(0, 10)
|
||||
plt.xticks(range(1, 31, 2))
|
||||
finish_plot('03_daily_happiness_trend.png')
|
||||
|
||||
# 4) Happiness by number of habits in intervention group only
|
||||
intervention_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=intervention_df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
sns.stripplot(data=intervention_df, x='Habits_Count', y='Happiness', color='black', alpha=0.20, jitter=0.18, size=2)
|
||||
plt.title('Intervention Group: Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Habits completed that day')
|
||||
plt.ylabel('Happiness score')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('04_happiness_by_habits_intervention.png')
|
||||
|
||||
# 5) Mean happiness by habits count in intervention group
|
||||
habits_mean = intervention_df.groupby('Habits_Count', as_index=False)['Happiness'].mean()
|
||||
plt.figure(figsize=(8, 6))
|
||||
sns.lineplot(data=habits_mean, x='Habits_Count', y='Happiness', marker='o', color='#1F77B4')
|
||||
plt.title('Intervention Group: Mean Happiness vs Habits Completed')
|
||||
plt.xlabel('Number of habits completed')
|
||||
plt.ylabel('Mean happiness')
|
||||
plt.xticks([0, 1, 2, 3])
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('05_mean_happiness_by_habits.png')
|
||||
|
||||
# 6) Habit adherence rates in the intervention group
|
||||
habit_cols = ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']
|
||||
adherence_rates = intervention_df[habit_cols].mean().sort_values(ascending=False).reset_index()
|
||||
adherence_rates.columns = ['Habit', 'Rate']
|
||||
adherence_rates['Habit'] = adherence_rates['Habit'].str.replace('_Adherence', '', regex=False)
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.barplot(data=adherence_rates, x='Habit', y='Rate', color='#E76F51')
|
||||
plt.title('Intervention Group: Habit Completion Rate')
|
||||
plt.xlabel('Habit')
|
||||
plt.ylabel('Proportion completed')
|
||||
plt.ylim(0, 1)
|
||||
plt.gca().yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
|
||||
finish_plot('06_habit_completion_rate.png')
|
||||
|
||||
# 7) Participant average happiness by group
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(12, 6))
|
||||
participant_avg = df.groupby(['Group', 'Participant_ID'], as_index=False)['Happiness'].mean()
|
||||
group_order = ['Control', 'Intervention']
|
||||
grouped_avgs = [participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values for group in group_order]
|
||||
plt.boxplot(grouped_avgs, tick_labels=group_order, patch_artist=True,
|
||||
boxprops=dict(facecolor='#D6D6D6', color='#4C4C4C'),
|
||||
medianprops=dict(color='#2A9D8F', linewidth=2),
|
||||
whiskerprops=dict(color='#4C4C4C'), capprops=dict(color='#4C4C4C'))
|
||||
for i, group in enumerate(group_order, start=1):
|
||||
y = participant_avg.loc[participant_avg['Group'] == group, 'Happiness'].values
|
||||
x = np.random.normal(i, 0.06, size=len(y))
|
||||
plt.scatter(x, y, color='black', alpha=0.45, s=22)
|
||||
plt.title('Average Happiness per Participant')
|
||||
plt.xlabel('Study group')
|
||||
plt.ylabel('Participant mean happiness')
|
||||
plt.ylim(0, 10)
|
||||
finish_plot('07_participant_average_happiness.png')
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"version":1,"resource":"file:///home/breadway/Documents/Year%2010/Year%2010/Psychology/Data%20Analysis.py","entries":[{"id":"SA9R.py","source":"Chat Edit: 'improve on this analysis script'","timestamp":1774345116327},{"id":"ycv3.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345356264},{"id":"bwYb.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345411358},{"id":"Gx76.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345436946},{"id":"FOyN.py","source":"Chat Edit: 'improve data gen.py to add a second dataset as a control. for context, the study tracks the affects of being organised on how happy participants feel. there needs to be a control group that is only recording their happiness daily. the main group will try to record their happiness, will add all events to their calendar, be on time to every event, and clean their bedroom everyday. they report if they do any of these in the study data as a yes or no. the control group will not do any of these.'","timestamp":1774345501736},{"id":"MtI5.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346145201},{"id":"Ldgu.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346200970},{"id":"NtsI.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346222014},{"id":"enQE.py","source":"Chat Edit: 'make the graphs better suited to the study, easier to read, and more graphs.'","timestamp":1774346258056},{"id":"yfjL.py","timestamp":1774346751804},{"id":"9KVj.py","source":"Chat Edit: 'ensure the graphs being used are appropriate for the study'","timestamp":1774346803522}]}
|
||||
227
dot_config/private_Code/User/History/7da6e0fb/ycv3.py
Normal file
227
dot_config/private_Code/User/History/7da6e0fb/ycv3.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(x='Habits_Count', y='Happiness', data=df, palette='viridis')
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(x='Habits_Count', y='Happiness', data=df, inner=None, palette='muted')
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=participant_avg.index.astype(str), y=participant_avg.values, palette='coolwarm')
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(7, 5))
|
||||
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', palette='Set2')
|
||||
plt.title('Mean Happiness by Group')
|
||||
plt.ylabel('Average happiness')
|
||||
f_group = outdir / 'happiness_by_group.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f_group)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
if 'Group' in df.columns:
|
||||
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
|
||||
else:
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
227
dot_config/private_Code/User/History/7da6e0fb/yfjL.py
Normal file
227
dot_config/private_Code/User/History/7da6e0fb/yfjL.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
logging.info("Loaded %d rows from %s", len(df), path)
|
||||
return df
|
||||
|
||||
|
||||
def prepare_data(df):
|
||||
# Ensure required columns exist
|
||||
required = {'Participant_ID', 'Happiness', 'Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence'}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"Missing required columns: {missing}")
|
||||
|
||||
if 'Group' not in df.columns:
|
||||
df['Group'] = 'Intervention'
|
||||
df['Group'] = df['Group'].astype(str).str.strip().str.title()
|
||||
|
||||
# Normalize adherence to boolean (Yes/No or True/False)
|
||||
for col in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
df[col] = df[col].astype(str).str.strip().str.lower().map({'yes': True, 'no': False, 'true': True, 'false': False})
|
||||
|
||||
# Count habits per row
|
||||
df['Habits_Count'] = (
|
||||
df[['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']].fillna(False).astype(int).sum(axis=1)
|
||||
)
|
||||
|
||||
# Coerce Happiness to numeric and drop rows without Happiness
|
||||
df['Happiness'] = pd.to_numeric(df['Happiness'], errors='coerce')
|
||||
before = len(df)
|
||||
df = df.dropna(subset=['Happiness'])
|
||||
logging.info('Dropped %d rows without numeric Happiness', before - len(df))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def descriptive_stats(df):
|
||||
print('Dataset shape:', df.shape)
|
||||
print('\nOverall summary:')
|
||||
print(df['Happiness'].describe())
|
||||
|
||||
if 'Group' in df.columns:
|
||||
print('\nRows by group:')
|
||||
print(df['Group'].value_counts())
|
||||
|
||||
print('\nAverage happiness by group:')
|
||||
print(df.groupby('Group')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nAverage happiness by number of habits completed:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].agg(['mean', 'count', 'std']).round(3))
|
||||
|
||||
print('\nMedian happiness by habits:')
|
||||
print(df.groupby('Habits_Count')['Happiness'].median())
|
||||
|
||||
# Correlations
|
||||
print('\nPearson correlation between Habits_Count and Happiness:')
|
||||
print(df[['Habits_Count', 'Happiness']].corr().round(3))
|
||||
|
||||
print('\nPoint-biserial correlation (each habit vs happiness, intervention group only):')
|
||||
habit_df = df[df['Group'] == 'Intervention'] if 'Group' in df.columns else df
|
||||
for habit in ['Calendar_Adherence', 'Cleanliness_Adherence', 'Punctuality_Adherence']:
|
||||
mask = ~habit_df[habit].isna()
|
||||
if mask.sum() == 0:
|
||||
print(f'{habit:22} (no data)')
|
||||
continue
|
||||
r, p = stats.pointbiserialr(habit_df.loc[mask, habit].astype(int), habit_df.loc[mask, 'Happiness'])
|
||||
print(f"{habit:22} r = {r:.3f} p = {p:.4f}")
|
||||
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Cohen's d for two independent samples
|
||||
nx, ny = len(x), len(y)
|
||||
dof = nx + ny - 2
|
||||
pooled_sd = np.sqrt(((nx - 1) * x.std(ddof=1) ** 2 + (ny - 1) * y.std(ddof=1) ** 2) / dof)
|
||||
return (x.mean() - y.mean()) / pooled_sd
|
||||
|
||||
|
||||
def run_ols(df):
|
||||
if 'Group' in df.columns:
|
||||
model = smf.ols('Happiness ~ Habits_Count + C(Group)', data=df).fit()
|
||||
print('\nOLS regression: Happiness ~ Habits_Count + Group')
|
||||
else:
|
||||
X = sm.add_constant(df['Habits_Count'])
|
||||
y = df['Happiness']
|
||||
model = sm.OLS(y, X).fit()
|
||||
print('\nSimple OLS regression: Happiness ~ Habits_Count')
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
|
||||
def run_mixedlm(df):
|
||||
# Random intercept for Participant_ID
|
||||
try:
|
||||
md = smf.mixedlm('Happiness ~ Habits_Count', data=df, groups=df['Participant_ID'])
|
||||
mdf = md.fit(reml=False)
|
||||
print('\nMixed-effects model (random intercept by Participant_ID):')
|
||||
print(mdf.summary())
|
||||
return mdf
|
||||
except Exception as e:
|
||||
logging.warning('MixedLM failed: %s', e)
|
||||
return None
|
||||
|
||||
|
||||
def make_plots(df, outdir, show_plots=False):
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
sns.set_style('whitegrid')
|
||||
|
||||
# Boxplot by Habits_Count
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.boxplot(data=df, x='Habits_Count', y='Happiness', color='#4C72B0')
|
||||
plt.title('Daily Happiness by Number of Habits Completed')
|
||||
plt.xlabel('Number of habits followed (0–3)')
|
||||
plt.ylabel('Happiness (1–10)')
|
||||
f1 = outdir / 'happiness_by_habits_box.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f1)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Violin / jitter + regression
|
||||
plt.figure(figsize=(9, 6))
|
||||
sns.violinplot(data=df, x='Habits_Count', y='Happiness', inner=None, color='#55A868')
|
||||
sns.stripplot(x='Habits_Count', y='Happiness', data=df, color='k', alpha=0.3, jitter=0.15)
|
||||
plt.title('Happiness distribution by Habits Completed')
|
||||
f2 = outdir / 'happiness_by_habits_violin.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f2)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Participant average bar
|
||||
participant_avg = df.groupby('Participant_ID')['Happiness'].mean().sort_values()
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=range(len(participant_avg)), y=participant_avg.values, color='#C44E52')
|
||||
plt.axhline(df['Happiness'].mean(), color='black', linestyle='--', alpha=0.6)
|
||||
plt.xticks(range(len(participant_avg)), participant_avg.index, rotation=45)
|
||||
plt.title('Average Happiness per Participant (sorted)')
|
||||
f3 = outdir / 'participant_avg_happiness.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f3)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
if 'Group' in df.columns:
|
||||
plt.figure(figsize=(7, 5))
|
||||
sns.barplot(data=df, x='Group', y='Happiness', estimator='mean', errorbar='sd', color='#8172B2')
|
||||
plt.title('Mean Happiness by Group')
|
||||
plt.ylabel('Average happiness')
|
||||
f_group = outdir / 'happiness_by_group.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f_group)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
# Scatter with linear fit
|
||||
plt.figure(figsize=(9, 6))
|
||||
if 'Group' in df.columns:
|
||||
sns.scatterplot(data=df, x='Habits_Count', y='Happiness', hue='Group', alpha=0.35)
|
||||
else:
|
||||
sns.regplot(x='Habits_Count', y='Happiness', data=df, x_jitter=0.18, scatter_kws={'alpha': 0.4})
|
||||
plt.title('Happiness vs Number of Habits Completed (with linear fit)')
|
||||
f4 = outdir / 'happiness_vs_habits_regression.png'
|
||||
plt.tight_layout()
|
||||
plt.savefig(f4)
|
||||
if show_plots:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
logging.info('Saved plots to %s', outdir)
|
||||
|
||||
|
||||
def main(args):
|
||||
df = load_data(args.data)
|
||||
df = prepare_data(df)
|
||||
|
||||
descriptive_stats(df)
|
||||
|
||||
# Effect sizes
|
||||
group0 = df[df['Habits_Count'] == 0]['Happiness']
|
||||
group3 = df[df['Habits_Count'] == 3]['Happiness']
|
||||
if len(group0) > 1 and len(group3) > 1:
|
||||
d = cohen_d(group3, group0)
|
||||
print(f"\nCohen's d (3 habits vs 0 habits) = {d:.3f}")
|
||||
|
||||
if 'Group' in df.columns:
|
||||
control = df[df['Group'] == 'Control']['Happiness']
|
||||
intervention = df[df['Group'] == 'Intervention']['Happiness']
|
||||
if len(control) > 1 and len(intervention) > 1:
|
||||
d_group = cohen_d(intervention, control)
|
||||
print(f"Cohen's d (Intervention vs Control happiness) = {d_group:.3f}")
|
||||
|
||||
# Models
|
||||
run_ols(df)
|
||||
run_mixedlm(df)
|
||||
|
||||
# Plots
|
||||
make_plots(df, args.outdir, show_plots=args.show)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Improved data analysis for organization_happiness_study_data.csv')
|
||||
parser.add_argument('--data', type=str, default='organization_happiness_study_data.csv', help='CSV data path')
|
||||
parser.add_argument('--outdir', type=str, default='plots', help='Directory to save plots')
|
||||
parser.add_argument('--show', action='store_true', help='Show plots interactively')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
Loading…
Add table
Add a link
Reference in a new issue