diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index ec75d3a3a4..a6eb7182e9 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -42,3 +42,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: @BINARY_OP_REGISTRATION.register(ops.ge_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.GTE(this=left.expr, expression=right.expr) + + +@BINARY_OP_REGISTRATION.register(ops.JSONSet) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 716917b455..9cca15f352 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -70,3 +70,49 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression: ) return sge.array(selected_elements) + + +# JSON Ops +@UNARY_OP_REGISTRATION.register(ops.JSONExtract) +def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONExtractArray) +def _(op: ops.JSONExtractArray, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_EXTRACT_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONExtractStringArray) +def _(op: ops.JSONExtractStringArray, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONQuery) +def _(op: ops.JSONQuery, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONQueryArray) +def _(op: ops.JSONQueryArray, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_QUERY_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONValue) +def _(op: ops.JSONValue, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_VALUE", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.JSONValueArray) +def _(op: ops.JSONValueArray, expr: TypedExpr) -> sge.Expression: + return sge.func("JSON_VALUE_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@UNARY_OP_REGISTRATION.register(ops.ParseJSON) +def _(op: ops.ParseJSON, expr: TypedExpr) -> sge.Expression: + return sge.func("PARSE_JSON", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.ToJSONString) +def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression: + return sge.func("TO_JSON_STRING", expr.expr) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_json_set/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_json_set/out.sql new file mode 100644 index 0000000000..f501dd3b86 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_json_set/out.sql @@ -0,0 +1,20 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `json_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`json_types` +), `bfcte_1` AS ( + SELECT + *, + JSON_SET(`bfcol_1`, '$.a', 100) AS `bfcol_4` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + JSON_SET(`bfcol_4`, '$.b', 'hi') AS `bfcol_7` + FROM `bfcte_1` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_7` AS `json_col` +FROM `bfcte_2` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract/out.sql new file mode 100644 index 0000000000..2ffb0174a8 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `json_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`json_types` +), `bfcte_1` AS ( + SELECT + *, + JSON_EXTRACT(`bfcol_1`, '$') AS `bfcol_4` + FROM `bfcte_0` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_4` AS `json_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_parse_json/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_parse_json/out.sql new file mode 100644 index 0000000000..d965ea8f1b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_parse_json/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + JSON_VALUE(`bfcol_1`, '$') AS `bfcol_4` + FROM `bfcte_0` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_4` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index f3c96e9253..9daff51c9f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -14,6 +14,7 @@ import pytest +import bigframes.bigquery as bbq import bigframes.pandas as bpd pytest.importorskip("pytest_snapshot") @@ -41,3 +42,8 @@ def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df["string_col"] = bf_df["string_col"] + "a" snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_json_set(json_types_df: bpd.DataFrame, snapshot): + result = bbq.json_set(json_types_df["json_col"], [("$.a", 100), ("$.b", "hi")]) + snapshot.assert_match(result.to_frame().sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 317c2f891b..6d9101aff0 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -14,14 +14,14 @@ import pytest -from bigframes import bigquery +import bigframes.bigquery as bbq import bigframes.pandas as bpd pytest.importorskip("pytest_snapshot") def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): - result = bigquery.array_to_string(repeated_types_df["string_list_col"], ".") + result = bbq.array_to_string(repeated_types_df["string_list_col"], ".") snapshot.assert_match(result.to_frame().sql, "out.sql") @@ -42,3 +42,46 @@ def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snaps result = repeated_types_df["string_list_col"].list[1:5] snapshot.assert_match(result.to_frame().sql, "out.sql") + + +# JSON Ops +def test_json_extract(json_types_df: bpd.DataFrame, snapshot): + result = bbq.json_extract(json_types_df["json_col"], "$") + expected_sql = "JSON_EXTRACT(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + snapshot.assert_match(result.to_frame().sql, "out.sql") + + +def test_json_extract_array(json_types_df: bpd.DataFrame): + result = bbq.json_extract_array(json_types_df["json_col"], "$") + expected_sql = "JSON_EXTRACT_ARRAY(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + + +def test_json_extract_string_array(json_types_df: bpd.DataFrame): + result = bbq.json_extract_string_array(json_types_df["json_col"], "$") + expected_sql = "JSON_EXTRACT_STRING_ARRAY(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + + +def test_json_query(json_types_df: bpd.DataFrame): + result = bbq.json_query(json_types_df["json_col"], "$") + expected_sql = "JSON_QUERY(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + + +def test_json_query_array(json_types_df: bpd.DataFrame): + result = bbq.json_query_array(json_types_df["json_col"], "$") + expected_sql = "JSON_QUERY_ARRAY(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + + +def test_json_value(json_types_df: bpd.DataFrame): + result = bbq.json_value(json_types_df["json_col"], "$") + expected_sql = "JSON_VALUE(`bfcol_1`, '$') AS `bfcol_4`" + assert expected_sql in result.to_frame().sql + + +def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): + result = bbq.json_value(scalar_types_df["string_col"], "$") + snapshot.assert_match(result.to_frame().sql, "out.sql")