Source code for pasteur.extras.views.texas
from ....view import View
from ....utils import get_relative_fn, to_chunked, LazyChunk, LazyFrame
import pandas as pd
[docs]
class TexasChargesView(View):
name = "texas_charges"
dataset = "texas"
tabular = True
deps = {"table": ["charges"]}
parameters = get_relative_fn("./parameters_charges.yml")
[docs]
@to_chunked
def ingest(self, name: str, charges: LazyChunk):
return charges()
[docs]
class TexasBillionView(View):
name = "texas_billion"
dataset = "texas"
tabular = True
deps = {"table": ["charges"]}
parameters = get_relative_fn("./parameters_billion.yml")
[docs]
def ingest(self, name: str, charges: LazyFrame):
from ...utils import ColumnResampler
sample = charges.sample()
res_rev = ColumnResampler(sample["revenue_code"], height=300)
res_proc = ColumnResampler(sample["hcpcs_procedure_code"], height=2950)
return self._ingest(name, charges, res_rev, res_proc) # type: ignore
@to_chunked
def _ingest(self, name: str, charges: LazyChunk, res_rev, res_proc):
a = charges().drop(columns=["modifier_3", "modifier_4"])
# Limit domain of rev code and hcpcs
a = a.assign(
revenue_code=res_rev.resample(a), hcpcs_procedure_code=res_proc.resample(a)
)
return (
pd.concat([a, a, a])[:25_000_000]
.reset_index(drop=True)
.rename_axis("charge_id")
)
[docs]
class TexasBaseView(View):
name = "texas_base"
dataset = "texas"
tabular = True
pid_pattern = "" # "20(?:06|07|11|15)"
deps = {"table": ["base"]}
parameters = get_relative_fn("./parameters_base.yml")
[docs]
@to_chunked
def ingest(self, name: str, base: LazyChunk):
return base()
# parameters = get_relative_fn("parameters.yml")