Implement a Two-Sample T-Test from scratch. Given two independent samples, compute the t-statistic, degrees of freedom, and p-value to test whether the two population means are significantly different. Support both equal and unequal variance (Welch's) variants.
import math
def t_test_two_sample(sample1: list, sample2: list,
equal_var: bool = False) -> dict:
n1 = len(sample1)
n2 = len(sample2)
mean1 = sum(sample1) / n1
mean2 = sum(sample2) / n2
var1 = sum((x - mean1) ** 2 for x in sample1) / (n1 - 1)
var2 = sum((x - mean2) ** 2 for x in sample2) / (n2 - 1)
if equal_var:
# Pooled variance
sp2 = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
se = math.sqrt(sp2 * (1 / n1 + 1 / n2))
df = n1 + n2 - 2
else:
# Welch's t-test
se = math.sqrt(var1 / n1 + var2 / n2)
num = (var1 / n1 + var2 / n2) ** 2
den = ((var1 / n1) ** 2 / (n1 - 1) +
(var2 / n2) ** 2 / (n2 - 1))
df = num / den if den > 0 else n1 + n2 - 2
t_stat = (mean1 - mean2) / se if se > 0 else 0.0
# Compute p-value using regularized incomplete beta function
p_value = _two_tailed_p(t_stat, df)
return {
"t_statistic": round(t_stat, 6),
"degrees_of_freedom": round(df, 4),
"p_value": round(p_value, 6),
"mean1": mean1,
"mean2": mean2,
}
def _two_tailed_p(t, df):
x = df / (df + t * t)
p = _regularized_beta(x, df / 2.0, 0.5)
return p
def _regularized_beta(x, a, b, n_iter=200):
"""Compute regularized incomplete beta function I_x(a, b)."""
if x <= 0:
return 0.0
if x >= 1:
return 1.0
# Use continued fraction (Lentz's method)
ln_prefactor = (a * math.log(x) + b * math.log(1 - x) -
math.log(a) - _log_beta(a, b))
prefactor = math.exp(ln_prefactor)
# Continued fraction for I_x(a, b)
f = 1.0
c = 1.0
d = 1.0 - (a + b) * x / (a + 1)
if abs(d) < 1e-30:
d = 1e-30
d = 1.0 / d
f = d
for m in range(1, n_iter + 1):
# Even step
num = m * (b - m) * x / ((a + 2 * m - 1) * (a + 2 * m))
d = 1.0 + num * d
if abs(d) < 1e-30:
d = 1e-30
c = 1.0 + num / c
if abs(c) < 1e-30:
c = 1e-30
d = 1.0 / d
f *= c * d
# Odd step
num = -(a + m) * (a + b + m) * x / ((a + 2 * m) * (a + 2 * m + 1))
d = 1.0 + num * d
if abs(d) < 1e-30:
d = 1e-30
c = 1.0 + num / c
if abs(c) < 1e-30:
c = 1e-30
d = 1.0 / d
delta = c * d
f *= delta
if abs(delta - 1.0) < 1e-10:
break
return prefactor * f * a
def _log_beta(a, b):
return math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)sp^2 and df = n1 + n2 - 2.t = (mean1 - mean2) / SE.