eval_stats_clustering

class ClusterLabelsEvalStats(labels: Sequence[int], noise_label: int, default_metrics: List[sensai.evaluation.eval_stats.eval_stats_base.TMetric], additional_metrics: Optional[List[sensai.evaluation.eval_stats.eval_stats_base.TMetric]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_base.EvalStats[sensai.evaluation.eval_stats.eval_stats_base.TMetric], abc.ABC

NUM_CLUSTERS = 'numClusters'
AV_SIZE = 'averageClusterSize'
MEDIAN_SIZE = 'medianClusterSize'
STDDEV_SIZE = 'clusterSizeStd'
MIN_SIZE = 'minClusterSize'
MAX_SIZE = 'maxClusterSize'
NOISE_SIZE = 'noiseClusterSize'
__init__(labels: Sequence[int], noise_label: int, default_metrics: List[sensai.evaluation.eval_stats.eval_stats_base.TMetric], additional_metrics: Optional[List[sensai.evaluation.eval_stats.eval_stats_base.TMetric]] = None)
get_distribution_summary() Dict[str, float]
metrics_dict() Dict[str, float]

Computes all metrics

Returns

a dictionary mapping metric names to values

class ClusteringUnsupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_base.Metric[ClusteringUnsupervisedEvalStats], abc.ABC

name: str
class RemovedNoiseUnsupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric

worstValue = 0
compute_value_for_eval_stats(eval_stats: sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedEvalStats) float
abstract static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])
name: str
class CalinskiHarabaszScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedNoiseUnsupervisedMetric

name: str = 'CalinskiHarabaszScore'
static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])
class DaviesBouldinScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedNoiseUnsupervisedMetric

name: str = 'DaviesBouldinScore'
worstValue = 1
static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])
class SilhouetteScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedNoiseUnsupervisedMetric

name: str = 'SilhouetteScore'
worstValue = -1
static compute_value(datapoints: numpy.ndarray, labels: Sequence[int])
class ClusteringUnsupervisedEvalStats(datapoints: numpy.ndarray, labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric]] = None, additional_metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.ClusterLabelsEvalStats[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric]

Class containing methods to compute evaluation statistics of a clustering result

__init__(datapoints: numpy.ndarray, labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric]] = None, additional_metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringUnsupervisedMetric]] = None)
Parameters
  • datapoints – datapoints that were clustered

  • labels – sequence of labels, usually the output of some clustering algorithm

  • metrics – the metrics to compute. If None, will compute default metrics

  • additional_metrics – the metrics to additionally compute

classmethod from_model(clustering_model: sensai.clustering.clustering_base.EuclideanClusterer)
clusterLabelsMask: numpy.ndarray
noiseLabelsMask: numpy.ndarray
class ClusteringSupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_base.Metric[ClusteringSupervisedEvalStats], abc.ABC

name: str
class RemovedCommonNoiseSupervisedMetric(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric, abc.ABC

worstValue = 0
compute_value_for_eval_stats(eval_stats: sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedEvalStats) float
abstract static compute_value(labels: Sequence[int], true_labels: Sequence[int])
name: str
class VMeasureScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedCommonNoiseSupervisedMetric

name: str = 'VMeasureScore'
static compute_value(labels: Sequence[int], true_labels: Sequence[int])
class AdjustedRandScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedCommonNoiseSupervisedMetric

name: str = 'AdjustedRandScore'
worstValue = -1
static compute_value(labels: Sequence[int], true_labels: Sequence[int])
class FowlkesMallowsScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedCommonNoiseSupervisedMetric

name: str = 'FowlkesMallowsScore'
static compute_value(labels: Sequence[int], true_labels: Sequence[int])
class AdjustedMutualInfoScore(name: Optional[str] = None, bounds: Optional[Tuple[float, float]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.RemovedCommonNoiseSupervisedMetric

name: str = 'AdjustedMutualInfoScore'
static compute_value(labels: Sequence[int], true_labels: Sequence[int])
class ClusteringSupervisedEvalStats(labels: Sequence[int], true_labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric]] = None, additional_metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric]] = None)[source]

Bases: sensai.evaluation.eval_stats.eval_stats_clustering.ClusterLabelsEvalStats[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric]

Class containing methods to compute evaluation statistics a clustering result based on ground truth clusters

__init__(labels: Sequence[int], true_labels: Sequence[int], noise_label=- 1, metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric]] = None, additional_metrics: Optional[Sequence[sensai.evaluation.eval_stats.eval_stats_clustering.ClusteringSupervisedMetric]] = None)
Parameters
  • labels – sequence of labels, usually the output of some clustering algorithm

  • true_labels – sequence of labels that represent the ground truth clusters

  • metrics – the metrics to compute. If None, will compute default metrics

  • additional_metrics – the metrics to additionally compute

classmethod from_model(clustering_model: sensai.clustering.clustering_base.EuclideanClusterer, true_labels: Sequence[int])
labels_with_removed_common_noise() Tuple[numpy.ndarray, numpy.ndarray]
Returns

tuple (labels, true_labels) where points classified as noise in true and predicted data were removed

clusterLabelsMask: numpy.ndarray
noiseLabelsMask: numpy.ndarray