Source code for pasteur.extras.datasets.boston
from __future__ import annotations
from typing import TYPE_CHECKING
from ....dataset import TabularDataset
from ....utils import get_relative_fn, RawSource
import logging
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)
DSFN = "boston-housing-dataset"
[docs]
class BostonDataset(TabularDataset):
name = "boston"
deps = {"table": ["table"]}
folder_name = "boston"
catalog = get_relative_fn("catalog.yml")
raw_sources = RawSource(
[
"https://www.kaggle.com/api/v1/datasets/download/altavish/boston-housing-dataset",
]
)
[docs]
def bootstrap(self, raw: str, dst: str):
from zipfile import ZipFile
import os
os.makedirs(dst, exist_ok=True)
with ZipFile(os.path.join(raw, DSFN), "r") as zf:
logger.info(f"Extracting {DSFN}...")
zf.extractall(dst)