Compositor: Rewrite and optimize Double Edge Mask node

This patch rewrites and optimizes the Double Edge Mask node to be orders of magnitude faster. For a 1k complex mask, it is 650x faster, while for a 1k simple mask, it is only 50x faster. This improvement is attributed to the use of a new Jump Flooding algorithm as well as multi-threading, matching the GPU implementation. The result of the new implementation differs in that the boundaries of the masks are now identified as the last pixels inside the mask. Pull Request: #117545
2024-01-30 19:49:32 +01:00 · 2024-01-30 19:49:32 +01:00 · 049b0e6539
commit 049b0e6539
parent 316f14b834
7 changed files with 272 additions and 1268 deletions
--- a/source/blender/compositor/CMakeLists.txt
+++ b/source/blender/compositor/CMakeLists.txt
@ -10,6 +10,7 @@ if(WITH_COMPOSITOR_CPU)
    intern
    nodes
    operations
+    algorithms
    realtime_compositor
    ../blenkernel
    ../blentranslation
@ -589,6 +590,9 @@ if(WITH_COMPOSITOR_CPU)

    operations/COM_MaskOperation.cc
    operations/COM_MaskOperation.h
+
+    algorithms/COM_JumpFloodingAlgorithm.cc
+    algorithms/COM_JumpFloodingAlgorithm.h
  )

  set(LIB
--- a/source/blender/compositor/algorithms/COM_JumpFloodingAlgorithm.cc
+++ b/source/blender/compositor/algorithms/COM_JumpFloodingAlgorithm.cc
@ -0,0 +1,106 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <limits>
+#include <utility>
+
+#include "BLI_array.hh"
+#include "BLI_math_base.h"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+#include "BLI_span.hh"
+#include "BLI_task.hh"
+
+#include "COM_JumpFloodingAlgorithm.h"
+
+/* Exact copies of the functions in gpu_shader_compositor_jump_flooding_lib.glsl and
+ * jump_flooding.cc but adapted for CPU. See those files for more information. */
+
+namespace blender::compositor {
+
+int2 encode_jump_flooding_value(int2 closest_seed_texel, bool is_flooded)
+{
+  return is_flooded ? closest_seed_texel : JUMP_FLOODING_NON_FLOODED_VALUE;
+}
+
+int2 initialize_jump_flooding_value(int2 texel, bool is_seed)
+{
+  return encode_jump_flooding_value(texel, is_seed);
+}
+
+static int2 load_jump_flooding(Span<int2> input, int2 texel, int2 size, int2 fallback)
+{
+  if (texel.x < 0 || texel.x >= size.x || texel.y < 0 || texel.y >= size.y) {
+    return fallback;
+  }
+  return input[size_t(texel.y) * size.x + texel.x];
+}
+
+static void jump_flooding_pass(Span<int2> input,
+                               MutableSpan<int2> output,
+                               int2 size,
+                               int step_size)
+{
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+
+        int2 closest_seed_texel = int2(0);
+        float minimum_squared_distance = std::numeric_limits<float>::max();
+        for (int j = -1; j <= 1; j++) {
+          for (int i = -1; i <= 1; i++) {
+            int2 offset = int2(i, j) * step_size;
+
+            int2 fallback = JUMP_FLOODING_NON_FLOODED_VALUE;
+            int2 jump_flooding_value = load_jump_flooding(input, texel + offset, size, fallback);
+
+            if (jump_flooding_value == JUMP_FLOODING_NON_FLOODED_VALUE) {
+              continue;
+            }
+
+            int2 closest_seed_texel_to_neighbor = jump_flooding_value;
+
+            float squared_distance = math::distance_squared(float2(closest_seed_texel_to_neighbor),
+                                                            float2(texel));
+
+            if (squared_distance < minimum_squared_distance) {
+              minimum_squared_distance = squared_distance;
+              closest_seed_texel = closest_seed_texel_to_neighbor;
+            }
+          }
+        }
+
+        bool flooding_happened = minimum_squared_distance != std::numeric_limits<float>::max();
+        int2 jump_flooding_value = encode_jump_flooding_value(closest_seed_texel,
+                                                              flooding_happened);
+
+        output[size_t(texel.y) * size.x + texel.x] = jump_flooding_value;
+      }
+    }
+  });
+}
+
+Array<int2> jump_flooding(Span<int2> input, int2 size)
+{
+  Array<int2> initial_flooded_result(size_t(size.x) * size.y);
+  jump_flooding_pass(input, initial_flooded_result, size, 1);
+
+  Array<int2> *result_to_flood = &initial_flooded_result;
+  Array<int2> intermediate_result(size_t(size.x) * size.y);
+  Array<int2> *result_after_flooding = &intermediate_result;
+
+  const int max_size = math::max(size.x, size.y);
+  int step_size = power_of_2_max_i(max_size) / 2;
+
+  while (step_size != 0) {
+    jump_flooding_pass(*result_to_flood, *result_after_flooding, size, step_size);
+    std::swap(result_to_flood, result_after_flooding);
+    step_size /= 2;
+  }
+
+  return *result_to_flood;
+}
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/algorithms/COM_JumpFloodingAlgorithm.h
+++ b/source/blender/compositor/algorithms/COM_JumpFloodingAlgorithm.h
@ -0,0 +1,24 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_array.hh"
+#include "BLI_math_vector_types.hh"
+#include "BLI_span.hh"
+
+/* Exact copies of the functions in gpu_shader_compositor_jump_flooding_lib.glsl and
+ * COM_algorithm_jump_flooding.hh but adapted for CPU. See those files for more information. */
+
+#define JUMP_FLOODING_NON_FLOODED_VALUE int2(-1)
+
+namespace blender::compositor {
+
+int2 encode_jump_flooding_value(int2 closest_seed_texel, bool is_flooded);
+
+int2 initialize_jump_flooding_value(int2 texel, bool is_seed);
+
+Array<int2> jump_flooding(Span<int2> input, int2 size);
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/nodes/COM_DoubleEdgeMaskNode.cc
+++ b/source/blender/compositor/nodes/COM_DoubleEdgeMaskNode.cc
@ -19,8 +19,8 @@ void DoubleEdgeMaskNode::convert_to_operations(NodeConverter &converter,
  const bNode *bnode = this->get_bnode();

  operation = new DoubleEdgeMaskOperation();
-  operation->set_adjacent_only(bnode->custom1);
-  operation->set_keep_inside(bnode->custom2);
+  operation->set_include_all_inner_edges(!bool(bnode->custom1));
+  operation->set_include_edges_of_image(bool(bnode->custom2));
  converter.add_operation(operation);

  converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
--- a/source/blender/compositor/operations/COM_DoubleEdgeMaskOperation.cc
+++ b/source/blender/compositor/operations/COM_DoubleEdgeMaskOperation.cc
--- a/source/blender/compositor/operations/COM_DoubleEdgeMaskOperation.h
+++ b/source/blender/compositor/operations/COM_DoubleEdgeMaskOperation.h
@ -4,6 +4,8 @@

 #pragma once

+#include "BLI_span.hh"
+
 #include "COM_MultiThreadedOperation.h"

 namespace blender::compositor {
@ -15,8 +17,8 @@ class DoubleEdgeMaskOperation : public NodeOperation {
   */
  SocketReader *input_outer_mask_;
  SocketReader *input_inner_mask_;
-  bool adjacent_only_;
-  bool keep_inside_;
+  bool include_all_inner_edges_;
+  bool include_edges_of_image_;

  /* TODO(manzanilla): To be removed with tiled implementation. */
  float *cached_instance_;
@ -26,7 +28,21 @@ class DoubleEdgeMaskOperation : public NodeOperation {
 public:
  DoubleEdgeMaskOperation();

-  void do_double_edge_mask(float *imask, float *omask, float *res);
+  void compute_boundary(const float *inner_mask,
+                        const float *outer_mask,
+                        MutableSpan<int2> inner_boundary,
+                        MutableSpan<int2> outer_boundary);
+
+  void compute_gradient(const float *inner_mask_buffer,
+                        const float *outer_mask_buffer,
+                        MutableSpan<int2> flooded_inner_boundary,
+                        MutableSpan<int2> flooded_outer_boundary,
+                        float *output_mask);
+
+  void compute_double_edge_mask(const float *inner_mask,
+                                const float *outer_mask,
+                                float *output_mask);
+
  /**
   * The inner loop of this operation.
   */
@ -48,13 +64,13 @@ class DoubleEdgeMaskOperation : public NodeOperation {
                                            ReadBufferOperation *read_operation,
                                            rcti *output) override;

-  void set_adjacent_only(bool adjacent_only)
+  void set_include_all_inner_edges(bool include_all_inner_edges)
  {
-    adjacent_only_ = adjacent_only;
+    include_all_inner_edges_ = include_all_inner_edges;
  }
-  void set_keep_inside(bool keep_inside)
+  void set_include_edges_of_image(bool include_edges_of_image)
  {
-    keep_inside_ = keep_inside;
+    include_edges_of_image_ = include_edges_of_image;
  }

  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_double_edge_mask_compute_boundary.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_double_edge_mask_compute_boundary.glsl
@ -54,10 +54,11 @@ void main()

  /* The pixels at the boundary are those that are masked and have non masked neighbors. The inner
   * boundary has a specialization, if include_all_inner_edges is false, only inner boundaries that
-   * lie inside the outer mask will be considered a boundary. */
+   * lie inside the outer mask will be considered a boundary. The outer boundary is only considered
+   * if it is not inside the inner mask. */
  bool is_inner_boundary = is_inner_masked && has_inner_non_masked_neighbors &&
                           (is_outer_masked || include_all_inner_edges);
-  bool is_outer_boundary = is_outer_masked && has_outer_non_masked_neighbors;
+  bool is_outer_boundary = is_outer_masked && !is_inner_masked && has_outer_non_masked_neighbors;

  /* Encode the boundary information in the format expected by the jump flooding algorithm. */
  ivec2 inner_jump_flooding_value = initialize_jump_flooding_value(texel, is_inner_boundary);