Source code for pasteur.mare.privacy

from pasteur.attribute import SeqAttributes
import logging

logger = logging.getLogger(__name__)


[docs] def calc_table_complexity(ver, attrs, params={}): c = len(attrs[None]) h = 0 if not params.get("no_hist", False): for table_sel, tattrs in attrs.items(): if table_sel is None: continue if isinstance(tattrs, SeqAttributes) and not params.get("no_seq", False): if tattrs.attrs: h += len(tattrs.attrs) if tattrs.hist: if not params.get("rake", False): for ha in tattrs.hist.values(): h += len(ha) else: h += len(tattrs) n = ver.rows return c * ((c + h) ** (1 / 3)) * (n ** (1 / 3))
[docs] def get_table_sens(ver, skip_sens=False): # unwrap table partition if hasattr(ver, "table"): ver = getattr(ver, "table") if skip_sens: return 1 elif ver.max_len: return ver.max_len elif ver.children: return ver.children else: return 1
[docs] def calc_sens(ver, skip_sens=False, ctx=False): # unwrap table partition if hasattr(ver, "table"): ver = getattr(ver, "table") sens = get_table_sens(ver, skip_sens or ctx) # If we repeat the unroll, we only use 1 row per grandparent, so we # have to skip the sensitivity of the parent skip_sens_next = ctx and ver.seq_repeat if ver.parents: sens *= max([calc_sens(p, skip_sens=skip_sens_next) for p in ver.parents]) return sens
[docs] def calc_privacy_budgets(total: float, mvers, params={}): complexities = { mver: calc_table_complexity(mver.ver, attrs, params) for mver, (attrs, _) in mvers.items() } MAX_COMPLEXITY = 3 # FIXME: Dirty heuristic to avoid the medicine table having 80% of the privacy budget for i in range(100): initial_sum = sum(complexities.values()) for mver, compl in list(complexities.items()): max_complexity = MAX_COMPLEXITY * (initial_sum - compl) / len(mvers) if compl > max_complexity: complexities[mver] = max_complexity # print(i, list(complexities.values())) total_complexity = sum(complexities.values()) budgets = {} sensitivities = {} for mver, (attrs, _) in mvers.items(): sens = calc_sens(mver.ver, ctx=mver.ctx) if smax := params.get("max_sens", None): sens = min(sens, smax) budget = total * (complexities[mver] / total_complexity) budgets[mver] = budget sensitivities[mver] = sens return budgets, sensitivities