Source code for pasteur.extras.views.rfel
from functools import partial
import pandas as pd
from ....utils import LazyChunk, LazyFrame, get_relative_fn, to_chunked
from ....view import TabularView, View, filter_by_keys, filter_by_keys_merged
[docs]
class RfelView(View):
"""The mimic core tables, slightly post processed."""
def __init__(self, short_name: str, deps: dict[str, list[str]], **kwargs) -> None:
self.name = f"rfel_{short_name}"
self.dataset = self.name
self.deps = deps
# Current datasets do not need transformer inter-table references
self.trn_deps = {}
self.parameters = get_relative_fn(f"parameters_{short_name}.yml")
super().__init__(**kwargs)
[docs]
@to_chunked
def ingest(self, name, **tables: LazyChunk):
assert len(tables) == 1
return next(iter(tables.values()))()
[docs]
class ConsumerExpendituresView(RfelView):
def __init__(self, **kwargs) -> None:
super().__init__(
short_name="ce",
deps={
"households": ["households"],
"expenditures": ["expenditures"],
"members": ["household_members"],
},
**kwargs,
)
[docs]
@to_chunked
def ingest(self, name, **tables: LazyChunk):
match name:
case "households":
new_df = tables["households"]()
new_df.index = new_df.index.astype(pd.Int64Dtype())
new_df.index.name = "household_id"
# sort by index
new_df = new_df.sort_index()
return new_df
case "expenditures":
new_df = tables["expenditures"]()
new_df['household_id'] = new_df['household_id'].astype(pd.Int64Dtype())
new_df = new_df.drop(columns=['year'])
new_df.index.name = "expenditure_id"
# sort by index and then by month
new_df = new_df.sort_values(by=['household_id', 'month'])
return new_df
case "members":
new_df = tables["household_members"]()
new_df['household_id'] = new_df['household_id'].astype(pd.Int64Dtype())
new_df = new_df.drop(columns=['year'])
new_df.index.name = "member_id"
return new_df
case other:
assert False, f"Table {other} not part of view {self.name}"