Source code for pasteur.extras.utils
from typing import cast
import pandas as pd
from ..hierarchy import rebalance_attributes
from .transformers import IdxTransformer, OrdinalTransformer
[docs]class ColumnResampler:
def __init__(self, col: pd.Series, height: int, ordinal: bool = False) -> None:
import numpy as np
name = cast(str, col.name)
self.name = name
self.trn = (OrdinalTransformer if ordinal else IdxTransformer)("ukn", nullable=True)
attr = self.trn.fit(col)
counts = {name: np.bincount(self.trn.transform(col)[name])}
attrs = {name: attr}
self.val = rebalance_attributes(counts, attrs, reshape_domain=False, warn=False)[name][name] # type: ignore
self.height = height
[docs] def resample(self, data: pd.DataFrame):
trn = self.trn.transform(data[self.name])[self.name]
ds = self.val.downsample(trn.to_numpy(), self.height)
up = self.val.upsample(ds, self.height, deterministic=True)
return self.trn.reverse(pd.DataFrame(pd.Series(up, name=self.name))).cat.remove_unused_categories()