diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index f65509e5b7..c3529cfd19 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -479,9 +479,8 @@ def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue: for column_id in column_ids: assert bigframes.dtypes.is_array_like(self.get_column_type(column_id)) - return ArrayValue( - nodes.ExplodeNode(child=self.node, column_ids=tuple(column_ids)) - ) + offsets = tuple(self.get_offset_for_name(id) for id in column_ids) + return ArrayValue(nodes.ExplodeNode(child=self.node, column_ids=offsets)) def _uniform_sampling(self, fraction: float) -> ArrayValue: """Sampling the table on given fraction. @@ -490,3 +489,6 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue: The row numbers of result is non-deterministic, avoid to use. """ return ArrayValue(nodes.RandomSampleNode(self.node, fraction)) + + def get_offset_for_name(self, name: str): + return self.schema.names.index(name) diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 9a9f598e89..fcd717992c 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -401,8 +401,9 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR: columns=columns, ) - def explode(self, column_ids: typing.Sequence[str]) -> UnorderedIR: + def explode(self, offsets: typing.Sequence[int]) -> UnorderedIR: table = self._to_ibis_expr() + column_ids = tuple(table.columns[offset] for offset in offsets) # The offset array ensures null represents empty arrays after unnesting. offset_array_id = bigframes.core.guid.generate_guid("offset_array_") @@ -712,8 +713,9 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR: ordering=self._ordering, ) - def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR: + def explode(self, offsets: typing.Sequence[int]) -> OrderedIR: table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True) + column_ids = tuple(table.columns[offset] for offset in offsets) offset_array_id = bigframes.core.guid.generate_guid("offset_array_") offset_array = ( @@ -721,7 +723,10 @@ def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR: ibis.greatest( 0, ibis.least( - *[table[column_id].length() - 1 for column_id in column_ids] + *[ + table[table.columns[offset]].length() - 1 + for offset in offsets + ] ), ) ) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 27e76c7910..857a19663c 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -42,6 +42,9 @@ OVERHEAD_VARIABLES = 5 +COL_OFFSET = int + + @dataclass(frozen=True) class BigFrameNode: """ @@ -826,7 +829,7 @@ def variables_introduced(self) -> int: @dataclass(frozen=True) class ExplodeNode(UnaryNode): - column_ids: typing.Tuple[str, ...] + column_ids: typing.Tuple[COL_OFFSET, ...] @property def row_preserving(self) -> bool: @@ -844,9 +847,9 @@ def schema(self) -> schemata.ArraySchema: self.child.schema.get_type(name).pyarrow_dtype.value_type ), ) - if name in self.column_ids + if offset in self.column_ids else schemata.SchemaItem(name, self.child.schema.get_type(name)) - for name in self.child.schema.names + for offset, name in enumerate(self.child.schema.names) ) return schemata.ArraySchema(items)