Skewness and Kurtosis#
Central Moments#
In modern data analysis we are sometimes interested in high-order moments. Here we consider two useful quantities: skewness and kurtosis.
Definition 122 (Central Moments)
For a random variable
As you can see from the definitions above, skewness is the third central moment, whereas kurtosis is the fourth central moment. Both skewness and kurtosis can be regarded as “deviations” from a standard Gaussian - not in terms of mean and variance but in terms of shape[Chan, 2021].
Skewness#
1import numpy as np
2import matplotlib.pyplot as plt
3from scipy import stats
4from typing import List
5
6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
7 """Generate skewed data using a beta distribution."""
8 if skewness > 0:
9 a, b = 2, 5
10 elif skewness < 0:
11 a, b = 5, 2
12 else:
13 a = b = 5
14 return np.random.beta(a, b, size)
15
16def plot_distributions(data: List[np.ndarray], labels: List[str]) -> None:
17 """Plot the three distributions on the same graph with KDE."""
18 plt.figure(figsize=(12, 6))
19 colors = ['red', 'blue', 'green']
20
21 for d, label, color in zip(data, labels, colors):
22 skewness = stats.skew(d)
23 kde = stats.gaussian_kde(d)
24 x_range = np.linspace(0, 1, 1000)
25
26 plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
27 plt.plot(x_range, kde(x_range), color=color, label=f'{label} (Skewness: {skewness:.2f})')
28
29 plt.title('Comparison of Skewed Distributions')
30 plt.xlabel('Value')
31 plt.ylabel('Density')
32 plt.legend()
33 plt.grid(True, alpha=0.3)
34 plt.tight_layout()
35 plt.show()
36
37def main() -> None:
38 np.random.seed(42) # For reproducibility
39 size = 10000
40
41 positive_skew = generate_skewed_data(size, 1)
42 negative_skew = generate_skewed_data(size, -1)
43 symmetric = generate_skewed_data(size, 0)
44
45 data = [positive_skew, negative_skew, symmetric]
46 labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
47
48 plot_distributions(data, labels)
49
50if __name__ == "__main__":
51 main()
Kurtosis#
1import numpy as np
2import matplotlib.pyplot as plt
3from scipy import stats
4from typing import List, Tuple
5
6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
7 """Generate skewed data using a beta distribution."""
8 if skewness > 0:
9 a, b = 2, 5
10 elif skewness < 0:
11 a, b = 5, 2
12 else:
13 a = b = 5
14 return np.random.beta(a, b, size)
15
16def generate_kurtosis_data(size: int, df: float) -> np.ndarray:
17 """Generate data with different kurtosis using Student's t-distribution."""
18 return stats.t.rvs(df, size=size)
19
20def plot_distributions(data: List[np.ndarray], labels: List[str], title: str, show_histogram: bool = True) -> None:
21 """Plot the distributions on the same graph with KDE."""
22 plt.figure(figsize=(12, 6))
23 colors = ['red', 'blue', 'green']
24
25 for d, label, color in zip(data, labels, colors):
26 skewness = stats.skew(d)
27 kurtosis = stats.kurtosis(d)
28 kde = stats.gaussian_kde(d)
29 x_range = np.linspace(d.min(), d.max(), 1000)
30
31 if show_histogram:
32 plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
33 plt.plot(x_range, kde(x_range), color=color, label=f'{label}\n(Skewness: {skewness:.2f}, Kurtosis: {kurtosis:.2f})')
34
35 plt.title(title)
36 plt.xlabel('Value')
37 plt.ylabel('Density')
38 plt.legend()
39 plt.grid(True, alpha=0.3)
40 plt.tight_layout()
41
42def main() -> None:
43 np.random.seed(42) # For reproducibility
44 size = 10000
45
46 # Skewness plot
47 positive_skew = generate_skewed_data(size, 1)
48 negative_skew = generate_skewed_data(size, -1)
49 symmetric = generate_skewed_data(size, 0)
50
51 skew_data = [positive_skew, negative_skew, symmetric]
52 skew_labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
53
54 plot_distributions(skew_data, skew_labels, 'Comparison of Skewed Distributions')
55
56 # Kurtosis plot
57 leptokurtic = generate_kurtosis_data(size, df=5) # High kurtosis
58 mesokurtic = generate_kurtosis_data(size, df=30) # Normal kurtosis
59 platykurtic = np.random.uniform(-np.sqrt(3), np.sqrt(3), size) # Low kurtosis
60
61 kurtosis_data = [leptokurtic, mesokurtic, platykurtic]
62 kurtosis_labels = ['Leptokurtic', 'Mesokurtic', 'Platykurtic']
63
64 plot_distributions(kurtosis_data, kurtosis_labels, 'Comparison of Kurtosis', show_histogram=False)
65
66 plt.show()
67
68if __name__ == "__main__":
69 main()
References and Further Readings#
Chan, Stanley H. “Chapter 4.6.3 Skewness and kurtosis.” In Introduction to Probability for Data Science, 216-220. Ann Arbor, Michigan: Michigan Publishing Services, 2021.