data

class BaseInputOutputData(inputs: sensai.data.T, outputs: sensai.data.T)[source]

Bases: Generic[sensai.data.T], abc.ABC

__init__(inputs: sensai.data.T, outputs: sensai.data.T)
Parameters
  • inputs – expected to have shape and __len__

  • outputs – expected to have shape and __len__

abstract filter_indices(indices: Sequence[int]) sensai.data.BaseInputOutputData
class InputOutputArrays(inputs: numpy.ndarray, outputs: numpy.ndarray)[source]

Bases: sensai.data.BaseInputOutputData[numpy.ndarray]

__init__(inputs: numpy.ndarray, outputs: numpy.ndarray)
Parameters
  • inputs – expected to have shape and __len__

  • outputs – expected to have shape and __len__

filter_indices(indices: Sequence[int]) sensai.data.InputOutputArrays
to_torch_data_loader(batch_size=64, shuffle=True)
class InputOutputData(inputs: pandas.core.frame.DataFrame, outputs: pandas.core.frame.DataFrame)[source]

Bases: sensai.data.BaseInputOutputData[pandas.core.frame.DataFrame], sensai.util.string.ToStringMixin

Holds input and output data for learning problems

__init__(inputs: pandas.core.frame.DataFrame, outputs: pandas.core.frame.DataFrame)
Parameters
  • inputs – expected to have shape and __len__

  • outputs – expected to have shape and __len__

classmethod from_data_frame(df: pandas.core.frame.DataFrame, *output_columns: str) sensai.data.InputOutputData
Parameters
  • df – a data frame containing both input and output columns

  • output_columns – the output column name(s)

Returns

an InputOutputData instance with inputs and outputs separated

filter_indices(indices: Sequence[int]) sensai.data.InputOutputData
filter_index(index_elements: Sequence[any]) sensai.data.InputOutputData
property input_dim
property output_dim
compute_input_output_correlation()
class DataSplitter(*args, **kwds)[source]

Bases: abc.ABC, Generic[sensai.data.TInputOutputData]

abstract split(data: sensai.data.TInputOutputData) Tuple[sensai.data.TInputOutputData, sensai.data.TInputOutputData]
class DataSplitterFractional(fractional_size_of_first_set: float, shuffle=True, random_seed=42)[source]

Bases: sensai.data.DataSplitter

__init__(fractional_size_of_first_set: float, shuffle=True, random_seed=42)
split_with_indices(data) Tuple[Tuple[Sequence[int], Sequence[int]], Tuple[sensai.data.TInputOutputData, sensai.data.TInputOutputData]]
split(data: sensai.data.TInputOutputData) Tuple[sensai.data.TInputOutputData, sensai.data.TInputOutputData]
class DataSplitterFromDataFrameSplitter(data_frame_splitter: sensai.data.DataFrameSplitter, fractional_size_of_first_set: float, apply_to_input=True)[source]

Bases: sensai.data.DataSplitter[sensai.data.InputOutputData]

Creates a DataSplitter from a DataFrameSplitter, which can be applied either to the input or the output data. It supports only InputOutputData, not other subclasses of BaseInputOutputData.

__init__(data_frame_splitter: sensai.data.DataFrameSplitter, fractional_size_of_first_set: float, apply_to_input=True)
Parameters
  • data_frame_splitter – the splitter to apply

  • fractional_size_of_first_set – the desired fractional size of the first set when applying the splitter

  • apply_to_input – if True, apply the splitter to the input data frame; if False, apply it to the output data frame

split(data: sensai.data.InputOutputData) Tuple[sensai.data.InputOutputData, sensai.data.InputOutputData]
class DataSplitterFromSkLearnSplitter(sklearn_splitter)[source]

Bases: sensai.data.DataSplitter

__init__(sklearn_splitter)
Parameters

sklearn_splitter – an instance of one of the splitter classes from sklearn.model_selection, see https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection

split(data: sensai.data.TInputOutputData) Tuple[sensai.data.TInputOutputData, sensai.data.TInputOutputData]
class DataSplitterStratifiedShuffleSplit(fractional_size_of_first_set: float, random_seed=42)[source]

Bases: sensai.data.DataSplitterFromSkLearnSplitter

__init__(fractional_size_of_first_set: float, random_seed=42)
Parameters

sklearn_splitter – an instance of one of the splitter classes from sklearn.model_selection, see https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection

static is_applicable(io_data: sensai.data.InputOutputData)
class DataFrameSplitter[source]

Bases: abc.ABC

abstract compute_split_indices(df: pandas.core.frame.DataFrame, fractional_size_of_first_set: float) Tuple[Sequence[int], Sequence[int]]
static split_with_indices(df: pandas.core.frame.DataFrame, indices_pair: Tuple[Sequence[int], Sequence[int]]) Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]
split(df: pandas.core.frame.DataFrame, fractional_size_of_first_set: float) Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]
class DataFrameSplitterFractional(shuffle=False, random_seed=42)[source]

Bases: sensai.data.DataFrameSplitter

__init__(shuffle=False, random_seed=42)
compute_split_indices(df: pandas.core.frame.DataFrame, fractional_size_of_first_set: float) Tuple[Sequence[int], Sequence[int]]
class DataFrameSplitterColumnEquivalenceClass(column: str, shuffle=True, random_seed=42)[source]

Bases: sensai.data.DataFrameSplitter

Performs a split that keeps together data points/rows that have the same value in a given column, i.e. with respect to that column, the items having the same values are viewed as a unit; they form an equivalence class, and all data points belonging to the same class are either in the first set or the second set.

The split is performed at the level of unique items in the column, i.e. the given fraction of equivalence classes will end up in the first set and the rest in the second set.

The list if unique items in the column can be shuffled before applying the split. If no shuffling is applied, the original order in the data frame is maintained, and if the items were grouped by equivalence class in the original data frame, the split will correspond to a fractional split without shuffling where the split boundary is adjusted to not separate an equivalence class.

__init__(column: str, shuffle=True, random_seed=42)
Parameters
  • column – the column which defines the equivalence classes (groups of data points/rows that must not be separated)

  • shuffle – whether to shuffle the list of unique values in the given column before applying the split

  • random_seed

compute_split_indices(df: pandas.core.frame.DataFrame, fractional_size_of_first_set: float) Tuple[Sequence[int], Sequence[int]]