distance_metric

class DistanceMetric[source]

Bases: abc.ABC

Abstract base class for (symmetric) distance metrics

abstract distance(named_tuple_a: sensai.util.typing.PandasNamedTuple, named_tuple_b: sensai.util.typing.PandasNamedTuple) float
class SingleColumnDistanceMetric(column: str)[source]

Bases: sensai.distance_metric.DistanceMetric, abc.ABC

__init__(column: str)
distance(named_tuple_a: sensai.util.typing.PandasNamedTuple, named_tuple_b: sensai.util.typing.PandasNamedTuple)
class DistanceMatrixDFCache(pickle_path: str, save_on_update: bool = True, deferred_save_delay_secs: float = 1.0)[source]

Bases: sensai.util.cache.PersistentKeyValueCache[Tuple[Union[str, int], Union[str, int]], sensai.util.cache.TValue], Generic[sensai.util.cache.TValue]

A cache for distance matrices, which are stored as dataframes with identifiers as both index and columns

__init__(pickle_path: str, save_on_update: bool = True, deferred_save_delay_secs: float = 1.0)
shape()
set(key: Tuple[Union[str, int], Union[str, int]], value: sensai.util.cache.TValue)

Sets a cached value

Parameters
  • key – the key under which to store the value

  • value – the value to store; since None is used indicate the absence of a value, None should not be used a value

save()
get(key: Tuple[Union[str, int], Union[str, int]]) sensai.util.cache.TValue

Retrieves a cached value

Parameters

key – the lookup key

Returns

the cached value or None if no value is found

num_unfilled_entries()
get_all_cached(identifier: Union[str, int])
class CachedDistanceMetric(distance_metric: sensai.distance_metric.DistanceMetric, key_value_cache: sensai.util.cache.KeyValueCache, persist_cache=False)[source]

Bases: sensai.distance_metric.DistanceMetric, sensai.util.cache.CachedValueProviderMixin

A decorator which provides caching for a distance metric, i.e. the metric is computed only if the value for the given pair of identifiers is not found within the persistent cache

__init__(distance_metric: sensai.distance_metric.DistanceMetric, key_value_cache: sensai.util.cache.KeyValueCache, persist_cache=False)
distance(named_tuple_a, named_tuple_b)
fill_cache(df_indexed_by_id: pandas.core.frame.DataFrame)

Fill cache for all identifiers in the provided dataframe

Parameters

df_indexed_by_id – Dataframe that is indexed by identifiers of the members

class LinearCombinationDistanceMetric(metrics: Sequence[Tuple[float, sensai.distance_metric.DistanceMetric]])[source]

Bases: sensai.distance_metric.DistanceMetric

__init__(metrics: Sequence[Tuple[float, sensai.distance_metric.DistanceMetric]])
Parameters

metrics – a sequence of tuples (weight, distance metric)

distance(named_tuple_a, named_tuple_b)
class HellingerDistanceMetric(column: str, check_input=False)[source]

Bases: sensai.distance_metric.SingleColumnDistanceMetric

__init__(column: str, check_input=False)
class EuclideanDistanceMetric(column: str)[source]

Bases: sensai.distance_metric.SingleColumnDistanceMetric

__init__(column: str)
class IdentityDistanceMetric(keys: Union[str, List[str]])[source]

Bases: sensai.distance_metric.DistanceMetric

__init__(keys: Union[str, List[str]])
distance(named_tuple_a, named_tuple_b)
class RelativeBitwiseEqualityDistanceMetric(column: str, check_input=False)[source]

Bases: sensai.distance_metric.SingleColumnDistanceMetric

__init__(column: str, check_input=False)
check_input_value(input_value)