Skewness and Kurtosis#

Central Moments#

In modern data analysis we are sometimes interested in high-order moments. Here we consider two useful quantities: skewness and kurtosis.

Definition 119 (Central Moments)

For a random variable \(X\) with \(P D F f_X(x)\), define the following central moments as

\[\begin{split} \begin{aligned} \text { mean } & =\mathbb{E}[X] \stackrel{\text { def }}{=} \mu, \\ \text { variance } & =\mathbb{E}\left[(X-\mu)^2\right] \stackrel{\text { def }}{=} \sigma^2, \\ \text { skewness } & =\mathbb{E}\left[\left(\frac{X-\mu}{\sigma}\right)^3\right] \stackrel{\text { def }}{=} \gamma \\ \text { kurtosis } & =\mathbb{E}\left[\left(\frac{X-\mu}{\sigma}\right)^4\right] \stackrel{\text { def }}{=} \kappa, \quad \text { excess kurtosis } \stackrel{\text { def }}{=} \kappa-3 . \end{aligned} \end{split}\]

As you can see from the definitions above, skewness is the third central moment, whereas kurtosis is the fourth central moment. Both skewness and kurtosis can be regarded as “deviations” from a standard Gaussian - not in terms of mean and variance but in terms of shape[Chan, 2021].

Skewness#

 1import numpy as np
 2import matplotlib.pyplot as plt
 3from scipy import stats
 4from typing import List
 5
 6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
 7    """Generate skewed data using a beta distribution."""
 8    if skewness > 0:
 9        a, b = 2, 5
10    elif skewness < 0:
11        a, b = 5, 2
12    else:
13        a = b = 5
14    return np.random.beta(a, b, size)
15
16def plot_distributions(data: List[np.ndarray], labels: List[str]) -> None:
17    """Plot the three distributions on the same graph with KDE."""
18    plt.figure(figsize=(12, 6))
19    colors = ['red', 'blue', 'green']
20
21    for d, label, color in zip(data, labels, colors):
22        skewness = stats.skew(d)
23        kde = stats.gaussian_kde(d)
24        x_range = np.linspace(0, 1, 1000)
25
26        plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
27        plt.plot(x_range, kde(x_range), color=color, label=f'{label} (Skewness: {skewness:.2f})')
28
29    plt.title('Comparison of Skewed Distributions')
30    plt.xlabel('Value')
31    plt.ylabel('Density')
32    plt.legend()
33    plt.grid(True, alpha=0.3)
34    plt.tight_layout()
35    plt.show()
36
37def main() -> None:
38    np.random.seed(42)  # For reproducibility
39    size = 10000
40
41    positive_skew = generate_skewed_data(size, 1)
42    negative_skew = generate_skewed_data(size, -1)
43    symmetric = generate_skewed_data(size, 0)
44
45    data = [positive_skew, negative_skew, symmetric]
46    labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
47
48    plot_distributions(data, labels)
49
50if __name__ == "__main__":
51    main()
../../_images/7882b7c707f23b2c780335b5fe8b6ddfb8776872f9d48730e2568d3840fd7ee9.svg

Kurtosis#

 1import numpy as np
 2import matplotlib.pyplot as plt
 3from scipy import stats
 4from typing import List, Tuple
 5
 6def generate_skewed_data(size: int, skewness: float) -> np.ndarray:
 7    """Generate skewed data using a beta distribution."""
 8    if skewness > 0:
 9        a, b = 2, 5
10    elif skewness < 0:
11        a, b = 5, 2
12    else:
13        a = b = 5
14    return np.random.beta(a, b, size)
15
16def generate_kurtosis_data(size: int, df: float) -> np.ndarray:
17    """Generate data with different kurtosis using Student's t-distribution."""
18    return stats.t.rvs(df, size=size)
19
20def plot_distributions(data: List[np.ndarray], labels: List[str], title: str, show_histogram: bool = True) -> None:
21    """Plot the distributions on the same graph with KDE."""
22    plt.figure(figsize=(12, 6))
23    colors = ['red', 'blue', 'green']
24
25    for d, label, color in zip(data, labels, colors):
26        skewness = stats.skew(d)
27        kurtosis = stats.kurtosis(d)
28        kde = stats.gaussian_kde(d)
29        x_range = np.linspace(d.min(), d.max(), 1000)
30
31        if show_histogram:
32            plt.hist(d, bins=30, density=True, alpha=0.3, color=color, edgecolor='black')
33        plt.plot(x_range, kde(x_range), color=color, label=f'{label}\n(Skewness: {skewness:.2f}, Kurtosis: {kurtosis:.2f})')
34
35    plt.title(title)
36    plt.xlabel('Value')
37    plt.ylabel('Density')
38    plt.legend()
39    plt.grid(True, alpha=0.3)
40    plt.tight_layout()
41
42def main() -> None:
43    np.random.seed(42)  # For reproducibility
44    size = 10000
45
46    # Skewness plot
47    positive_skew = generate_skewed_data(size, 1)
48    negative_skew = generate_skewed_data(size, -1)
49    symmetric = generate_skewed_data(size, 0)
50
51    skew_data = [positive_skew, negative_skew, symmetric]
52    skew_labels = ['Positive Skewness', 'Negative Skewness', 'Symmetric (No Skewness)']
53
54    plot_distributions(skew_data, skew_labels, 'Comparison of Skewed Distributions')
55
56    # Kurtosis plot
57    leptokurtic = generate_kurtosis_data(size, df=5)  # High kurtosis
58    mesokurtic = generate_kurtosis_data(size, df=30)  # Normal kurtosis
59    platykurtic = np.random.uniform(-np.sqrt(3), np.sqrt(3), size)  # Low kurtosis
60
61    kurtosis_data = [leptokurtic, mesokurtic, platykurtic]
62    kurtosis_labels = ['Leptokurtic', 'Mesokurtic', 'Platykurtic']
63
64    plot_distributions(kurtosis_data, kurtosis_labels, 'Comparison of Kurtosis', show_histogram=False)
65
66    plt.show()
67
68if __name__ == "__main__":
69    main()
../../_images/2dd07f8be1e9006ff0eeba65c1806b9c2bb89a5521d557c661875aca98370d0d.svg ../../_images/d4a291264c1fd48c999d7cce0a8721637eb9536f0dd170c644a42fd815d19d2a.svg

References and Further Readings#

  • Chan, Stanley H. “Chapter 4.6.3 Skewness and kurtosis.” In Introduction to Probability for Data Science, 216-220. Ann Arbor, Michigan: Michigan Publishing Services, 2021.