From eaeed25b2397a2ef4415d05b0d0cd041d2fe97ee Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Thu, 31 Oct 2024 10:31:11 +1100 Subject: [PATCH 1/4] Add schema support for split_attribute --- src/feafea/config_schema.json | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/feafea/config_schema.json b/src/feafea/config_schema.json index 4383930..f8fb2ec 100644 --- a/src/feafea/config_schema.json +++ b/src/feafea/config_schema.json @@ -111,7 +111,12 @@ { "required": [ "variants" - ] + ], + "not": { + "required": [ + "split_attribute" + ] + } }, { "required": [ @@ -173,6 +178,10 @@ ] } }, + "split_attribute": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$" + }, "split_group": { "type": "string", "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$" From 52847c532196b11b43640f4873dde7e55ea2188a Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Fri, 1 Nov 2024 17:07:45 +1100 Subject: [PATCH 2/4] Disallow insplit function and splits rule at the same time --- src/feafea/__init__.py | 23 +++++++++++++++++------ tests/test_configs.py | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/feafea/__init__.py b/src/feafea/__init__.py index daf8fc9..0267e96 100644 --- a/src/feafea/__init__.py +++ b/src/feafea/__init__.py @@ -365,7 +365,7 @@ class _CompiledFilter: The compiled filter function. """ - __slots__ = ("eval", "flag_refs", "rule_refs") + __slots__ = ("eval", "flag_refs", "rule_refs", "uses_insplit") # The compiled filter function. eval: Callable[[TargetID, Attributes], bool] @@ -377,6 +377,9 @@ class _CompiledFilter: # The set of rule names/splits referenced in the filter. This is used to ensure # no circular references to rules occur. rule_refs: set[tuple[str, str | None]] + # Whether the filter uses the insplit function. This is used to disallow the + # simultaneous use of insplit and split rules in the same rule. + uses_insplit: bool # Filter references are not needed since filter references in a filter are # inlined during compilation and any circular references checked. @@ -528,7 +531,7 @@ def _inline( return (f0, f1, f2) - def _pythonize(self, f: _ParsedFilter) -> str: + def _pythonize(self, f: _ParsedFilter, visited_insplit: Callable[[], None]) -> str: """ Convert the parse tree into a python expression. @@ -540,17 +543,18 @@ def _pythonize(self, f: _ParsedFilter) -> str: case "AND" | "OR": # We know for certain that lhs and rhs are booleans. AND/ORing them # together will always yield a boolean and will never raise an exception. - return f" {self._py_op_map[op]} ".join(f"({self._pythonize(a)})" for a in arg1) + return f" {self._py_op_map[op]} ".join(f"({self._pythonize(a, visited_insplit)})" for a in arg1) case "NOT": # We know for certain that the argument is a boolean. Negating it # will always yield a boolean and will never raise an exception. - return f"(not ({self._pythonize(arg1)}))" + return f"(not ({self._pythonize(arg1, visited_insplit)}))" case "EQ" | "NE" | "LE" | "GE" | "GT" | "LT" | "IN" | "NOTIN" | "INTERSECTS": sym_type, sym_name, *sym_args = arg1 match sym_type: case "FUNC": func_name, func_args = sym_name, sym_args[0] if func_name == "insplit": + visited_insplit() # We have two types of attributes we need to handle, # sets and single values. All values and set values # must be handled. For single values, we convert @@ -626,13 +630,19 @@ def compile(self, name: str, flags: dict[str, CompiledFlag], rules: dict[str, _C """ Compile the filter into a callable form. """ + comp_filter = _CompiledFilter() + seen = set() sets: list[set] = [] flag_refs: dict[str, type] = {} rule_refs: set[tuple[str, str | None]] = set() inlined = self._inline(self._filters[name], seen, sets, flag_refs, rule_refs) optimized = self._optimize(inlined) - py_expr = self._pythonize(optimized) + + def visited_insplit(): + comp_filter.uses_insplit = True + + py_expr = self._pythonize(optimized, visited_insplit) code_str = f""" def a(target_id, attributes): return {py_expr} @@ -646,7 +656,6 @@ def a(target_id, attributes): "_hash_percent": _hash_percent, } exec(py_code, _globals, _locals) - comp_filter = _CompiledFilter() comp_filter.eval = _locals["a"] comp_filter.flag_refs = flag_refs comp_filter.rule_refs = rule_refs @@ -912,6 +921,8 @@ def from_dict(c: DictConfig) -> CompiledConfig: compiled_rule.split_names = set() if "splits" in r: + if hasattr(compiled_rule, "_compiled_filter") and compiled_rule._compiled_filter.uses_insplit: + raise ValueError("insplit filter and split rules cannot be used in the same rule") compiled_rule.split_names.update(s["name"] for s in r["splits"] if "name" in s) split_seed = r.get("split_group", rule_name) py_common += [f"split_target_percent = _hash_percent(target_id, seed={split_seed!r})"] diff --git a/tests/test_configs.py b/tests/test_configs.py index b1bb88d..6727ac8 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -477,6 +477,7 @@ def test_invalid_rules_def(self): ({"variants": {"a": 2}, "schedule": {"start": "2024-10-10 10:10:10", "end": "2024-10-10 20:20:20"}}, ValueError, "Timezone missing"), ({"variants": {"a": 2}, "schedule": {"start": "2024-10-10 20:10:10Z", "end": "2024-10-10 10:20:20Z"}}, ValueError, "start must be before end"), ({"variants": {"a": 2}, "schedule": {"start": "2024-10-10 10:10:10Z", "end": "2024-10-10 10:10:10Z"}}, ValueError, "start must be before end"), + ({"filter": "insplit(attr:abc, 0, 10)", "splits": [{"percentage": 10}]}, ValueError, "insplit filter and split rules cannot be used in the same rule"), ( {"variants": {"a": 2}, "schedule": {"start": "2024-10-10 10:10:00Z", "end": "2024-10-10 10:20:00Z", "ramp_up": "8m", "ramp_down": "8m"}}, ValueError, From e25b34d03f162e10cd7469ad49246ad4f4de6e48 Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Fri, 1 Nov 2024 22:16:01 +1100 Subject: [PATCH 3/4] Implement split_attribute --- src/feafea/__init__.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/feafea/__init__.py b/src/feafea/__init__.py index 0267e96..870a60a 100644 --- a/src/feafea/__init__.py +++ b/src/feafea/__init__.py @@ -32,7 +32,7 @@ _feafea_checksum = md5(f.read()).hexdigest() -def _hash_percent(s: str, seed: str = "") -> float: +def _hash_percent(s: Any, seed: str = "") -> float: """ Hashes the given string and seed to a float in the range [0, 100). @@ -631,6 +631,7 @@ def compile(self, name: str, flags: dict[str, CompiledFlag], rules: dict[str, _C Compile the filter into a callable form. """ comp_filter = _CompiledFilter() + comp_filter.uses_insplit = False seen = set() sets: list[set] = [] @@ -692,7 +693,7 @@ class _CompiledFlagRule: __slots__ = ("rule_name", "eval") rule_name: str - eval: Callable[[TargetID, Attributes], Variant | tuple[Variant, str] | None] + eval: Callable[[TargetID, Attributes], Variant | tuple[Variant, str, list[AttributeValue]] | None] class CompiledFlag: @@ -721,12 +722,13 @@ def eval(self, target_id: TargetID, attributes: Attributes) -> FlagEvaluation: e.flag = self.name e.target_id = target_id e.split = "" + e.split_attribute_values = [] for rule in self._rules: v = rule.eval(target_id, attributes) if v is not None: e.rule = rule.rule_name if isinstance(v, tuple): - e.variant, e.split = v + e.variant, e.split, e.split_attribute_values = v e.reason = "split_rule" else: e.variant = v @@ -925,18 +927,23 @@ def from_dict(c: DictConfig) -> CompiledConfig: raise ValueError("insplit filter and split rules cannot be used in the same rule") compiled_rule.split_names.update(s["name"] for s in r["splits"] if "name" in s) split_seed = r.get("split_group", rule_name) - py_common += [f"split_target_percent = _hash_percent(target_id, seed={split_seed!r})"] + split_attribute = r.get("split_attribute") + # Build a map of (attribute value -> percent) for each attribute value. + if split_attribute is None: + py_common += [f"split_percents = {'{'}target_id:_hash_percent(target_id, seed={split_seed!r}){'}'}"] + else: + py_common += [f"split_attribute = attributes[{split_attribute}] if isinstance(attributes.get({split_attribute}), set) else [attributes.get({split_attribute})]"] + py_common += [f"split_percents = {'{'}(v:_hash_percent(v, seed={split_seed!r}) for v in split_attribute)"] comulative_percentage_start = 0 comulative_percentage_end = 0 for split in r["splits"]: comulative_percentage_end += split["percentage"] - py_common += [ - f"#FIRULE if split_target_percent >= {comulative_percentage_start!r} and split_target_percent < {comulative_percentage_end!r}: return (True, {split.get("name", "")!r})" - ] + matched_attr_values = f"matched_attr_values = [v for v, p in split_percents.items() if {comulative_percentage_start!r} <= p < {comulative_percentage_end!r}]" + py_common += ["#FIRULE " + matched_attr_values] + py_common += [f"#FIRULE if matched_attr_values: return (True, {split.get("name", "")!r})"] for flag, variant in split.get("variants", {}).items(): - py_flag[flag].append( - f"if split_target_percent >= {comulative_percentage_start!r} and split_target_percent < {comulative_percentage_end!r}: return ({variant!r}, {split.get("name", "")!r})" - ) + py_flag[flag].append(matched_attr_values) + py_flag[flag].append(f"if matched_attr_values: return ({variant!r}, {split.get("name", "")!r}, matched_attr_values)") comulative_percentage_start += split["percentage"] # Compile the flag independent rule. @@ -964,9 +971,9 @@ def from_dict(c: DictConfig) -> CompiledConfig: for flag, py_lines in py_flag.items(): if not py_lines: assert False, "unreachable" # pragma: no cover - # Returns variant | (variant, split_name) | None + # Returns variant | (variant, split_name, split_attr_values) | None # - variant: The variant evaluated for non-split rule - # - (variant, split_name): The variant evaluated for split rule + # - (variant, split_name, split_attr_values): The variant evaluated for split rule # - None: The rule does not apply lines = ["def a(target_id, attributes):"] lines += list(f" {line}" for line in (py_common + py_lines)) @@ -1057,6 +1064,7 @@ class FlagEvaluation: "rule", "reason", "split", + "split_attribute_values", "timestamp", ) flag: str @@ -1066,6 +1074,7 @@ class FlagEvaluation: rule: str | Literal[""] reason: Literal["split_rule", "const_rule", "default"] split: str | Literal[""] + split_attribute_values: list[AttributeValue] timestamp: float From 745adade13db13ca68f857c9c2d631e722ceed14 Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Sat, 2 Nov 2024 00:23:05 +1100 Subject: [PATCH 4/4] wip --- src/feafea/__init__.py | 4 ++-- tests/test_configs.py | 49 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/feafea/__init__.py b/src/feafea/__init__.py index 870a60a..8360867 100644 --- a/src/feafea/__init__.py +++ b/src/feafea/__init__.py @@ -932,8 +932,8 @@ def from_dict(c: DictConfig) -> CompiledConfig: if split_attribute is None: py_common += [f"split_percents = {'{'}target_id:_hash_percent(target_id, seed={split_seed!r}){'}'}"] else: - py_common += [f"split_attribute = attributes[{split_attribute}] if isinstance(attributes.get({split_attribute}), set) else [attributes.get({split_attribute})]"] - py_common += [f"split_percents = {'{'}(v:_hash_percent(v, seed={split_seed!r}) for v in split_attribute)"] + py_common += [f"split_attribute = attributes[{split_attribute!r}] if isinstance(attributes.get({split_attribute!r}), set) else [attributes.get({split_attribute!r})]"] + py_common += [f"split_percents = {'{'}v:_hash_percent(v, seed={split_seed!r}) for v in split_attribute{'}'}"] comulative_percentage_start = 0 comulative_percentage_end = 0 for split in r["splits"]: diff --git a/tests/test_configs.py b/tests/test_configs.py index 6727ac8..80a4665 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -337,6 +337,55 @@ def test_valid_split_probability(self): self.assertAlmostEqual(variant_count[3] / 100000, 0.6, delta=0.002) self.assertAlmostEqual(variant_count[1] / 100000, 0.3, delta=0.002) + def test_valid_split_probability_on_single_value_attribute(self): + config = { + "flags": { + "a": {"variants": [1, 2, 3], "default": 1}, + }, + "rules": { + "r1": { + "split_attribute": "school_id", + "splits": [ + {"percentage": 10, "variants": {"a": 2}}, + {"percentage": 60, "variants": {"a": 3}}, + ], + }, + }, + } + variant_count = {1: 0, 2: 0, 3: 0} + cc = CompiledConfig.from_dict(config) + for id in range(100000): + v = cc.flags["a"].eval("", {"school_id": str(id)}).variant + assert isinstance(v, int) + variant_count[v] += 1 + self.assertAlmostEqual(variant_count[2] / 100000, 0.1, delta=0.002) + self.assertAlmostEqual(variant_count[3] / 100000, 0.6, delta=0.002) + self.assertAlmostEqual(variant_count[1] / 100000, 0.3, delta=0.002) + + def test_valid_split_probability_on_multi_value_attribute(self): + config = { + "flags": { + "a": {"variants": [1, 2, 3], "default": 1}, + }, + "rules": { + "r1": { + "split_attribute": "school_ids", + "splits": [ + {"percentage": 10, "variants": {"a": 2}}, + {"percentage": 60, "variants": {"a": 3}}, + ], + }, + }, + } + cc = CompiledConfig.from_dict(config) + for id in range(100000): + v = cc.flags["a"].eval("", {"school_ids": str(id)}).variant + assert isinstance(v, int) + variant_count[v] += 1 + self.assertAlmostEqual(variant_count[2] / 100000, 0.1, delta=0.002) + self.assertAlmostEqual(variant_count[3] / 100000, 0.6, delta=0.002) + self.assertAlmostEqual(variant_count[1] / 100000, 0.3, delta=0.002) + def test_valid_schedule(self): cases = [ # schedule, ts, expected