Skip to content

Statmaker

StatMaker

Source code in may/stats/statmaker.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class StatMaker:
    def __init__(self):
        self.stats = {}
        self._next_stat_id = 0

    def _generate_stat_id(self):
        self._next_stat_id += 1
        return self._next_stat_id

    def collect_statistics(self,data):
        """Returns some common statistics from a distribution.

        Args:
          data (array-like): The data to analyse.

        Returns:
          (dict): Dict containing statistical properties. 
        """
        arr = np.array(data)

        # Remove NaN values if present
        arr = arr[~np.isnan(arr)]

        if len(arr) == 0:
            return {"error": "Empty array or all NaN values"}

        return {
            # Central tendency
            "mean": np.mean(arr),
            "median": np.median(arr),
            "mode": stats.mode(arr, keepdims=True).mode[0],

            # Spread/Dispersion
            "std": np.std(arr, ddof=1),  # sample std
            "variance": np.var(arr, ddof=1),  # sample variance
            "range": np.ptp(arr),  # peak-to-peak (max - min)
            "iqr": stats.iqr(arr),  # interquartile range
            "mad": stats.median_abs_deviation(arr),  # median absolute deviation

            # Position
            "min": np.min(arr),
            "max": np.max(arr),
            "q1": np.percentile(arr, 25),  # 1st quartile
            "q3": np.percentile(arr, 75),  # 3rd quartile

            # Shape
            "skewness": stats.skew(arr),
            "kurtosis": stats.kurtosis(arr),

            # Sample properties
            "count": len(arr),
            "sum": np.sum(arr),

            # Coefficient of variation (relative std)
            "cv": np.std(arr, ddof=1) / np.mean(arr) if np.mean(arr) != 0 else np.inf
        }

collect_statistics(data)

Returns some common statistics from a distribution.

Parameters:

Name Type Description Default
data array - like

The data to analyse.

required

Returns:

Type Description
dict

Dict containing statistical properties.

Source code in may/stats/statmaker.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def collect_statistics(self,data):
    """Returns some common statistics from a distribution.

    Args:
      data (array-like): The data to analyse.

    Returns:
      (dict): Dict containing statistical properties. 
    """
    arr = np.array(data)

    # Remove NaN values if present
    arr = arr[~np.isnan(arr)]

    if len(arr) == 0:
        return {"error": "Empty array or all NaN values"}

    return {
        # Central tendency
        "mean": np.mean(arr),
        "median": np.median(arr),
        "mode": stats.mode(arr, keepdims=True).mode[0],

        # Spread/Dispersion
        "std": np.std(arr, ddof=1),  # sample std
        "variance": np.var(arr, ddof=1),  # sample variance
        "range": np.ptp(arr),  # peak-to-peak (max - min)
        "iqr": stats.iqr(arr),  # interquartile range
        "mad": stats.median_abs_deviation(arr),  # median absolute deviation

        # Position
        "min": np.min(arr),
        "max": np.max(arr),
        "q1": np.percentile(arr, 25),  # 1st quartile
        "q3": np.percentile(arr, 75),  # 3rd quartile

        # Shape
        "skewness": stats.skew(arr),
        "kurtosis": stats.kurtosis(arr),

        # Sample properties
        "count": len(arr),
        "sum": np.sum(arr),

        # Coefficient of variation (relative std)
        "cv": np.std(arr, ddof=1) / np.mean(arr) if np.mean(arr) != 0 else np.inf
    }