Skewness and Kurtosis#

Central Moments#

In modern data analysis we are sometimes interested in high-order moments. Here we consider two useful quantities: skewness and kurtosis.

Definition 122 (Central Moments)

For a random variable X with PDFfX(x), define the following central moments as

 mean =E[X]= def μ, variance =E[(Xμ)2]= def σ2, skewness =E[(Xμσ)3]= def γ kurtosis =E[(Xμσ)4]= def κ, excess kurtosis = def κ3.

As you can see from the definitions above, skewness is the third central moment, whereas kurtosis is the fourth central moment. Both skewness and kurtosis can be regarded as “deviations” from a standard Gaussian - not in terms of mean and variance but in terms of shape[Chan, 2021].

Skewness#

 1import numpy as np
 2import matplotlib.pyplot as plt
 3from scipy import stats
 4from typing import List
 5
 6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
 7    """Generate skewed data using a beta distribution."""
 8    if skewness > 0:
 9        a, b = 2, 5
10    elif skewness < 0:
11        a, b = 5, 2
12    else:
13        a = b = 5
14    return np.random.beta(a, b, size)
15
16def plot_distributions(data: List[np.ndarray], labels: List[str]) -> None:
17    """Plot the three distributions on the same graph with KDE."""
18    plt.figure(figsize=(12, 6))
19    colors = ['red', 'blue', 'green']
20
21    for d, label, color in zip(data, labels, colors):
22        skewness = stats.skew(d)
23        kde = stats.gaussian_kde(d)
24        x_range = np.linspace(0, 1, 1000)
25
26        plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
27        plt.plot(x_range, kde(x_range), color=color, label=f'{label} (Skewness: {skewness:.2f})')
28
29    plt.title('Comparison of Skewed Distributions')
30    plt.xlabel('Value')
31    plt.ylabel('Density')
32    plt.legend()
33    plt.grid(True, alpha=0.3)
34    plt.tight_layout()
35    plt.show()
36
37def main() -> None:
38    np.random.seed(42)  # For reproducibility
39    size = 10000
40
41    positive_skew = generate_skewed_data(size, 1)
42    negative_skew = generate_skewed_data(size, -1)
43    symmetric = generate_skewed_data(size, 0)
44
45    data = [positive_skew, negative_skew, symmetric]
46    labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
47
48    plot_distributions(data, labels)
49
50if __name__ == "__main__":
51    main()
../../_images/a824800a121859cd50e89bd026788671586ac0f43c244092026a0922c39a0ca9.svg

Kurtosis#

 1import numpy as np
 2import matplotlib.pyplot as plt
 3from scipy import stats
 4from typing import List, Tuple
 5
 6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
 7    """Generate skewed data using a beta distribution."""
 8    if skewness > 0:
 9        a, b = 2, 5
10    elif skewness < 0:
11        a, b = 5, 2
12    else:
13        a = b = 5
14    return np.random.beta(a, b, size)
15
16def generate_kurtosis_data(size: int, df: float) -> np.ndarray:
17    """Generate data with different kurtosis using Student's t-distribution."""
18    return stats.t.rvs(df, size=size)
19
20def plot_distributions(data: List[np.ndarray], labels: List[str], title: str, show_histogram: bool = True) -> None:
21    """Plot the distributions on the same graph with KDE."""
22    plt.figure(figsize=(12, 6))
23    colors = ['red', 'blue', 'green']
24
25    for d, label, color in zip(data, labels, colors):
26        skewness = stats.skew(d)
27        kurtosis = stats.kurtosis(d)
28        kde = stats.gaussian_kde(d)
29        x_range = np.linspace(d.min(), d.max(), 1000)
30
31        if show_histogram:
32            plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
33        plt.plot(x_range, kde(x_range), color=color, label=f'{label}\n(Skewness: {skewness:.2f}, Kurtosis: {kurtosis:.2f})')
34
35    plt.title(title)
36    plt.xlabel('Value')
37    plt.ylabel('Density')
38    plt.legend()
39    plt.grid(True, alpha=0.3)
40    plt.tight_layout()
41
42def main() -> None:
43    np.random.seed(42)  # For reproducibility
44    size = 10000
45
46    # Skewness plot
47    positive_skew = generate_skewed_data(size, 1)
48    negative_skew = generate_skewed_data(size, -1)
49    symmetric = generate_skewed_data(size, 0)
50
51    skew_data = [positive_skew, negative_skew, symmetric]
52    skew_labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
53
54    plot_distributions(skew_data, skew_labels, 'Comparison of Skewed Distributions')
55
56    # Kurtosis plot
57    leptokurtic = generate_kurtosis_data(size, df=5)  # High kurtosis
58    mesokurtic = generate_kurtosis_data(size, df=30)  # Normal kurtosis
59    platykurtic = np.random.uniform(-np.sqrt(3), np.sqrt(3), size)  # Low kurtosis
60
61    kurtosis_data = [leptokurtic, mesokurtic, platykurtic]
62    kurtosis_labels = ['Leptokurtic', 'Mesokurtic', 'Platykurtic']
63
64    plot_distributions(kurtosis_data, kurtosis_labels, 'Comparison of Kurtosis', show_histogram=False)
65
66    plt.show()
67
68if __name__ == "__main__":
69    main()
../../_images/5aa77d6ffa988371dfc7c653d26e1cbe63146715159c8fa44fdff1a11c614d82.svg ../../_images/7e19f88a7c4b021ded03605a0afcd85088c08e8d54090486342b6f89167f1430.svg

References and Further Readings#

  • Chan, Stanley H. “Chapter 4.6.3 Skewness and kurtosis.” In Introduction to Probability for Data Science, 216-220. Ann Arbor, Michigan: Michigan Publishing Services, 2021.