meta-analysis-psych
>
Install
mkdir -p .claude/skills/meta-analysis-psych && curl -L -o skill.zip "https://agentskills.codes/api/skills/download/16056" && unzip -o skill.zip -d .claude/skills/meta-analysis-psych && rm skill.zipInstalls to .claude/skills/meta-analysis-psych
Activation
This is the description your AI agent reads to decide when to run this skill — the better it matches your request, the more reliably it fires.
Use this Skill for psychology meta-analysis: Cohen's d, Hedges' g, random-effects pooling, forest plot, publication bias (Egger, p-curve, PET-PEESE), and sensitivity analysis.About this skill
Meta-Analysis for Psychology
TL;DR — Complete meta-analysis pipeline in pure Python/NumPy: Cohen's d and Hedges' g from raw statistics, DerSimonian-Laird random-effects pooling, I² heterogeneity, forest plot with summary diamond, funnel plot, Egger test, trim-and-fill, PET-PEESE regression correction, and p-curve analysis.
When to Use
Use this Skill when you need to:
- Pool effect sizes from multiple independent studies on the same research question
- Quantify and visualize heterogeneity across studies (I², τ²)
- Detect and correct for publication bias (Egger test, trim-and-fill, PET-PEESE)
- Assess whether p-values are consistent with a true effect (p-curve analysis)
- Generate publication-quality forest and funnel plots
- Run sensitivity analyses (leave-one-out, cumulative meta-analysis)
Background
Effect Size Measures
Cohen's d (raw mean difference standardized by pooled SD):
d = (M1 - M2) / SD_pooled
Hedges' g (small-sample bias correction for d):
J = 1 - 3 / (4 * (n1 + n2 - 2) - 1)
g = J × d
Var(g) = (n1 + n2) / (n1 × n2) + g² / (2 × (n1 + n2))
Random-Effects Model (DerSimonian-Laird)
Under the random-effects model, true effect sizes θi vary:
θi = θ + ui + εi where ui ~ N(0, τ²), εi ~ N(0, vi)
Between-study variance τ² is estimated by the DerSimonian-Laird method:
Q = Σ wi(gi - g_FE)² (Cochran's Q statistic)
τ² = max(0, (Q - (k-1)) / (Σwi - Σwi²/Σwi))
wi* = 1 / (vi + τ²) (RE weights)
g_RE = Σ(wi* × gi) / Σwi*
I² = (Q - (k-1)) / Q × 100% (% of total variance due to heterogeneity)
Publication Bias Methods
| Method | Description |
|---|---|
| Funnel plot | Asymmetry suggests missing small-n null studies |
| Egger test | Regress std normal deviate on precision; intercept ≠ 0 = bias |
| Trim-and-fill | Impute mirror studies to restore funnel symmetry |
| PET-PEESE | Regress g on SE (PET) or SE² (PEESE); intercept = bias-corrected effect |
| p-curve | Distribution of p-values below .05; right-skew = real effect |
Environment Setup
conda create -n meta_env python=3.11 -y
conda activate meta_env
pip install numpy>=1.23 scipy>=1.9 pandas>=1.5 matplotlib>=3.6
# Verify
python -c "import numpy, scipy, pandas, matplotlib; print('All OK')"
Core Workflow
Step 1 — Effect Size Computation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy import stats
from typing import Optional, Dict, List, Tuple
# ── Cohen's d from various input formats ─────────────────────────────────────
def cohens_d_from_means(
m1: float, m2: float,
sd1: float, sd2: float,
n1: int, n2: int,
) -> Dict:
"""
Compute Cohen's d and Hedges' g from group means and SDs.
Args:
m1, m2: Group means.
sd1, sd2: Group standard deviations.
n1, n2: Sample sizes.
Returns:
Dict with d, g, var_g, se_g, 95% CI.
"""
# Pooled SD
sd_pooled = np.sqrt(((n1 - 1) * sd1**2 + (n2 - 1) * sd2**2) / (n1 + n2 - 2))
d = (m1 - m2) / sd_pooled
# Hedges' g correction
df_total = n1 + n2 - 2
J = 1 - 3 / (4 * df_total - 1)
g = J * d
# Variance of g
var_g = (n1 + n2) / (n1 * n2) + g**2 / (2 * (n1 + n2))
se_g = np.sqrt(var_g)
return {
"d": round(d, 4),
"g": round(g, 4),
"J": round(J, 4),
"var_g": round(var_g, 6),
"se_g": round(se_g, 4),
"ci_lo": round(g - 1.96 * se_g, 4),
"ci_hi": round(g + 1.96 * se_g, 4),
"n1": n1, "n2": n2,
}
def cohens_d_from_t(
t: float, n1: int, n2: int,
) -> Dict:
"""
Compute Cohen's d from an independent-samples t-statistic.
Args:
t: t-statistic (signed).
n1, n2: Group sample sizes.
Returns:
Dict with d, g, var_g, se_g, 95% CI.
"""
d = t * np.sqrt((n1 + n2) / (n1 * n2))
df_total = n1 + n2 - 2
J = 1 - 3 / (4 * df_total - 1)
g = J * d
var_g = (n1 + n2) / (n1 * n2) + g**2 / (2 * (n1 + n2))
se_g = np.sqrt(var_g)
return {
"d": round(d, 4), "g": round(g, 4), "J": round(J, 4),
"var_g": round(var_g, 6), "se_g": round(se_g, 4),
"ci_lo": round(g - 1.96 * se_g, 4), "ci_hi": round(g + 1.96 * se_g, 4),
"n1": n1, "n2": n2,
}
def cohens_d_from_F(
F: float, n1: int, n2: int,
) -> Dict:
"""
Compute Cohen's d from a one-df F-ratio (two-group comparison).
Args:
F: F-statistic (F = t²).
n1, n2: Group sample sizes.
Returns:
Dict with d, g, var_g, se_g, 95% CI.
"""
t = np.sqrt(F) # F = t² for one-df tests
return cohens_d_from_t(t, n1, n2)
Step 2 — Random-Effects Meta-Analysis
def run_meta_analysis(
df: pd.DataFrame,
g_col: str = "g",
var_col: str = "var_g",
study_col: str = "study",
) -> Dict:
"""
DerSimonian-Laird random-effects meta-analysis.
Args:
df: DataFrame with one row per study.
g_col: Column of Hedges' g effect sizes.
var_col: Column of effect size variances.
study_col: Column of study labels.
Returns:
Dict with pooled estimate, heterogeneity statistics, and weights.
"""
g = df[g_col].values
v = df[var_col].values
k = len(g)
# Fixed-effects weights and estimate
w_FE = 1 / v
g_FE = np.sum(w_FE * g) / np.sum(w_FE)
# Cochran's Q
Q = np.sum(w_FE * (g - g_FE) ** 2)
# DerSimonian-Laird tau²
c = np.sum(w_FE) - np.sum(w_FE ** 2) / np.sum(w_FE)
tau2 = max(0.0, (Q - (k - 1)) / c)
# I²
I2 = max(0.0, (Q - (k - 1)) / Q * 100) if Q > 0 else 0.0
# Random-effects weights and estimate
w_RE = 1 / (v + tau2)
g_RE = np.sum(w_RE * g) / np.sum(w_RE)
var_RE = 1 / np.sum(w_RE)
se_RE = np.sqrt(var_RE)
z_RE = g_RE / se_RE
p_RE = 2 * stats.norm.sf(abs(z_RE))
ci_lo = g_RE - 1.96 * se_RE
ci_hi = g_RE + 1.96 * se_RE
result = {
"k": k,
"g_RE": round(g_RE, 4),
"se_RE": round(se_RE, 4),
"ci_lo": round(ci_lo, 4),
"ci_hi": round(ci_hi, 4),
"z": round(z_RE, 3),
"p": round(p_RE, 4),
"Q": round(Q, 3),
"Q_df": k - 1,
"Q_p": round(float(stats.chi2.sf(Q, df=k - 1)), 4),
"I2": round(I2, 1),
"tau2": round(tau2, 6),
"tau": round(np.sqrt(tau2), 4),
"w_RE": w_RE,
"g_FE": round(g_FE, 4),
}
print(
f"Random-effects meta-analysis (k={k} studies):\n"
f" g = {g_RE:.3f} [{ci_lo:.3f}, {ci_hi:.3f}], p = {p_RE:.4f}\n"
f" Q({k-1}) = {Q:.2f}, p = {result['Q_p']:.4f}\n"
f" I² = {I2:.1f}%, τ = {result['tau']:.4f}\n"
f" Heterogeneity: "
+ ("low" if I2 < 25 else "moderate" if I2 < 75 else "high")
)
return result
Step 3 — Forest Plot
def forest_plot(
df: pd.DataFrame,
meta_result: Dict,
g_col: str = "g",
ci_lo_col: str = "ci_lo",
ci_hi_col: str = "ci_hi",
study_col: str = "study",
sort_by: str = "g",
output_path: Optional[str] = None,
) -> plt.Figure:
"""
Generate a forest plot with study CIs and summary diamond.
Args:
df: Study-level DataFrame (one row per study).
meta_result: Output from run_meta_analysis().
g_col: Column of effect sizes.
ci_lo_col: Column of lower 95% CIs.
ci_hi_col: Column of upper 95% CIs.
study_col: Column of study labels.
sort_by: Sort studies by 'g' (default) or 'year'.
output_path: Optional path to save figure.
Returns:
Matplotlib Figure.
"""
if sort_by == "g":
df = df.sort_values(g_col).reset_index(drop=True)
k = len(df)
g_RE = meta_result["g_RE"]
ci_lo_RE = meta_result["ci_lo"]
ci_hi_RE = meta_result["ci_hi"]
fig, ax = plt.subplots(figsize=(10, max(6, k * 0.45 + 2)))
y_positions = list(range(k, 0, -1))
# Study rows
for i, (_, row) in enumerate(df.iterrows()):
y = y_positions[i]
g = row[g_col]
lo = row[ci_lo_col]
hi = row[ci_hi_col]
ci_width = hi - lo
# Marker size proportional to inverse variance (weight)
w = 1 / (hi - lo) ** 2 if (hi - lo) > 0 else 1
ms = min(max(4, w * 0.5), 16)
ax.plot([lo, hi], [y, y], color="black", linewidth=1)
ax.plot(g, y, "s", color="steelblue", markersize=ms, zorder=5)
ax.text(-0.05 + min(df[ci_lo_col]) - 0.3, y,
row[study_col], ha="right", va="center", fontsize=8)
ax.text(max(df[ci_hi_col]) + 0.3, y,
f"{g:.2f} [{lo:.2f}, {hi:.2f}]",
ha="left", va="center", fontsize=7, color="dimgray")
# Summary diamond
y_summary = 0
diamond_x = [ci_lo_RE, g_RE, ci_hi_RE, g_RE]
diamond_y = [y_summary, y_summary + 0.4, y_summary, y_summary - 0.4]
ax.fill(diamond_x, diamond_y, color="crimson", alpha=0.85, zorder=6)
ax.text(max(df[ci_hi_col]) + 0.3, y_summary,
f"{g_RE:.2f} [{ci_lo_RE:.2f}, {ci_hi_RE:.2f}]",
ha="left", va="center", fontsize=8, fontweight="bold", color="crimson")
# Reference line at 0
ax.axvline(0, color="gray", linestyle="--", linewidth=1)
ax.set_yticks([])
ax.set_xlabel("Hedges' g")
ax.set_title(
f"Forest Plot — {k} Studies\n"
f"g = {g_RE:.3f}, I² = {meta_result['I2']:.1f}%, "
f"τ = {meta_result['tau']:.3f}"
)
ax.set_xlim(min(df[ci_lo_col]) - 1.0, max(df[ci_hi_col]) + 1.5)
fig.tight_layout()
if output_path:
fig.savefig(output_path, dpi=150)
plt.show()
return fig
Advanced Usage
Publication Bias: Egger Test, Trim-and-Fill, PET-PEESE
def egger_test(
df: pd.DataFrame,
g_col: str = "g",
var_col: str = "var_g",
) -> Dict:
"""
Egger's test for funnel p
---
*Content truncated.*