我正在尝试从scratch编写偏度度量计算。但不能匹配来自pandas attribute/ scipy.stats函数的值。
我已经看过了scipy.stats here的源代码。但我找不到我遗漏了什么。
from math import sqrt
from scipy import stats
import pandas as pd
def mean(values):
return sum(values) / len(values)
def standard_dev(values):
vals_mean = mean(values)
numerator = 0
for val in values:
numerator += (val - vals_mean) ** 2
return sqrt(numerator/len(values))
def skewness(values):
n = len(values)
vals_mean = mean(values)
thrid_moment = 0
for val in values:
thrid_moment += (val - vals_mean)**3
return (sqrt(n*(n-1))/ (n-2)) * (thrid_moment / standard_dev(values) ** 3)
values = [1,1,1,2,2,3,3,3,4,4,5]
print(f'mean{mean(values)}')
# mean2.6363636363636362
print(f'standard_dev{standard_dev(values)}')
# standard_dev1.2984415324623364
print(f'skewness{skewness(values)}')
# skewness2.5341000098031734
a = pd.Series(values)
a.std(ddof=0)
# 1.2984415324623364
a.skew()
# 0.23037272816392504
stats.skew(a, bias=False)
# 0.230372728163925
stats.skew(a, bias=True)
# 0.19768660009807223发布于 2020-12-11 22:09:03
我发现了错误。我没有对三阶矩的值进行归一化,也就是说没有除以len(values)。
下面是完整的版本:
from math import sqrt
def mean(values):
return sum(values) / len(values)
def moments(values, moment):
vals_mean = mean(values)
numerator = 0
for val in values:
numerator += (val - vals_mean) ** moment
return numerator/len(values)
def skewness(values):
n = len(values)
m2 = moments(values, 2)
m3 = moments(values, 3)
return (sqrt(n*(n-1))/ (n-2)) * (m3 / m2 ** 1.5)
values = [1,1,1,2,2, 3,3,3, 4,4, 5]
print(f'mean: {mean(values)}')
print(f'skewness: {skewness(values)}')
# mean: 2.6363636363636362
# skewness: 0.23037272816392482https://stackoverflow.com/questions/65252126
复制相似问题