Compositor: Rewrite and optimize Double Edge Mask node
All checks were successful
buildbot/vdev-code-daily-lint Build done.
buildbot/vdev-code-daily-linux-x86_64 Build done.
buildbot/vdev-code-daily-darwin-x86_64 Build done.
buildbot/vdev-code-daily-windows-amd64 Build done.
buildbot/vdev-code-daily-darwin-arm64 Build done.
buildbot/vdev-code-daily-coordinator Build done.
All checks were successful
buildbot/vdev-code-daily-lint Build done.
buildbot/vdev-code-daily-linux-x86_64 Build done.
buildbot/vdev-code-daily-darwin-x86_64 Build done.
buildbot/vdev-code-daily-windows-amd64 Build done.
buildbot/vdev-code-daily-darwin-arm64 Build done.
buildbot/vdev-code-daily-coordinator Build done.
This patch rewrites and optimizes the Double Edge Mask node to be orders of magnitude faster. For a 1k complex mask, it is 650x faster, while for a 1k simple mask, it is only 50x faster. This improvement is attributed to the use of a new Jump Flooding algorithm as well as multi-threading, matching the GPU implementation. The result of the new implementation differs in that the boundaries of the masks are now identified as the last pixels inside the mask. Pull Request: #117545
This commit is contained in:
parent
316f14b834
commit
049b0e6539
@ -10,6 +10,7 @@ if(WITH_COMPOSITOR_CPU)
|
||||
intern
|
||||
nodes
|
||||
operations
|
||||
algorithms
|
||||
realtime_compositor
|
||||
../blenkernel
|
||||
../blentranslation
|
||||
@ -589,6 +590,9 @@ if(WITH_COMPOSITOR_CPU)
|
||||
|
||||
operations/COM_MaskOperation.cc
|
||||
operations/COM_MaskOperation.h
|
||||
|
||||
algorithms/COM_JumpFloodingAlgorithm.cc
|
||||
algorithms/COM_JumpFloodingAlgorithm.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
@ -0,0 +1,106 @@
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "BLI_array.hh"
|
||||
#include "BLI_math_base.h"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_span.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
||||
#include "COM_JumpFloodingAlgorithm.h"
|
||||
|
||||
/* Exact copies of the functions in gpu_shader_compositor_jump_flooding_lib.glsl and
|
||||
* jump_flooding.cc but adapted for CPU. See those files for more information. */
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
int2 encode_jump_flooding_value(int2 closest_seed_texel, bool is_flooded)
|
||||
{
|
||||
return is_flooded ? closest_seed_texel : JUMP_FLOODING_NON_FLOODED_VALUE;
|
||||
}
|
||||
|
||||
int2 initialize_jump_flooding_value(int2 texel, bool is_seed)
|
||||
{
|
||||
return encode_jump_flooding_value(texel, is_seed);
|
||||
}
|
||||
|
||||
static int2 load_jump_flooding(Span<int2> input, int2 texel, int2 size, int2 fallback)
|
||||
{
|
||||
if (texel.x < 0 || texel.x >= size.x || texel.y < 0 || texel.y >= size.y) {
|
||||
return fallback;
|
||||
}
|
||||
return input[size_t(texel.y) * size.x + texel.x];
|
||||
}
|
||||
|
||||
static void jump_flooding_pass(Span<int2> input,
|
||||
MutableSpan<int2> output,
|
||||
int2 size,
|
||||
int step_size)
|
||||
{
|
||||
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
|
||||
for (const int64_t y : sub_y_range) {
|
||||
for (const int64_t x : IndexRange(size.x)) {
|
||||
int2 texel = int2(x, y);
|
||||
|
||||
int2 closest_seed_texel = int2(0);
|
||||
float minimum_squared_distance = std::numeric_limits<float>::max();
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
int2 offset = int2(i, j) * step_size;
|
||||
|
||||
int2 fallback = JUMP_FLOODING_NON_FLOODED_VALUE;
|
||||
int2 jump_flooding_value = load_jump_flooding(input, texel + offset, size, fallback);
|
||||
|
||||
if (jump_flooding_value == JUMP_FLOODING_NON_FLOODED_VALUE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int2 closest_seed_texel_to_neighbor = jump_flooding_value;
|
||||
|
||||
float squared_distance = math::distance_squared(float2(closest_seed_texel_to_neighbor),
|
||||
float2(texel));
|
||||
|
||||
if (squared_distance < minimum_squared_distance) {
|
||||
minimum_squared_distance = squared_distance;
|
||||
closest_seed_texel = closest_seed_texel_to_neighbor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool flooding_happened = minimum_squared_distance != std::numeric_limits<float>::max();
|
||||
int2 jump_flooding_value = encode_jump_flooding_value(closest_seed_texel,
|
||||
flooding_happened);
|
||||
|
||||
output[size_t(texel.y) * size.x + texel.x] = jump_flooding_value;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Array<int2> jump_flooding(Span<int2> input, int2 size)
|
||||
{
|
||||
Array<int2> initial_flooded_result(size_t(size.x) * size.y);
|
||||
jump_flooding_pass(input, initial_flooded_result, size, 1);
|
||||
|
||||
Array<int2> *result_to_flood = &initial_flooded_result;
|
||||
Array<int2> intermediate_result(size_t(size.x) * size.y);
|
||||
Array<int2> *result_after_flooding = &intermediate_result;
|
||||
|
||||
const int max_size = math::max(size.x, size.y);
|
||||
int step_size = power_of_2_max_i(max_size) / 2;
|
||||
|
||||
while (step_size != 0) {
|
||||
jump_flooding_pass(*result_to_flood, *result_after_flooding, size, step_size);
|
||||
std::swap(result_to_flood, result_after_flooding);
|
||||
step_size /= 2;
|
||||
}
|
||||
|
||||
return *result_to_flood;
|
||||
}
|
||||
|
||||
} // namespace blender::compositor
|
@ -0,0 +1,24 @@
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_array.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_span.hh"
|
||||
|
||||
/* Exact copies of the functions in gpu_shader_compositor_jump_flooding_lib.glsl and
|
||||
* COM_algorithm_jump_flooding.hh but adapted for CPU. See those files for more information. */
|
||||
|
||||
#define JUMP_FLOODING_NON_FLOODED_VALUE int2(-1)
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
int2 encode_jump_flooding_value(int2 closest_seed_texel, bool is_flooded);
|
||||
|
||||
int2 initialize_jump_flooding_value(int2 texel, bool is_seed);
|
||||
|
||||
Array<int2> jump_flooding(Span<int2> input, int2 size);
|
||||
|
||||
} // namespace blender::compositor
|
@ -19,8 +19,8 @@ void DoubleEdgeMaskNode::convert_to_operations(NodeConverter &converter,
|
||||
const bNode *bnode = this->get_bnode();
|
||||
|
||||
operation = new DoubleEdgeMaskOperation();
|
||||
operation->set_adjacent_only(bnode->custom1);
|
||||
operation->set_keep_inside(bnode->custom2);
|
||||
operation->set_include_all_inner_edges(!bool(bnode->custom1));
|
||||
operation->set_include_edges_of_image(bool(bnode->custom2));
|
||||
converter.add_operation(operation);
|
||||
|
||||
converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_span.hh"
|
||||
|
||||
#include "COM_MultiThreadedOperation.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
@ -15,8 +17,8 @@ class DoubleEdgeMaskOperation : public NodeOperation {
|
||||
*/
|
||||
SocketReader *input_outer_mask_;
|
||||
SocketReader *input_inner_mask_;
|
||||
bool adjacent_only_;
|
||||
bool keep_inside_;
|
||||
bool include_all_inner_edges_;
|
||||
bool include_edges_of_image_;
|
||||
|
||||
/* TODO(manzanilla): To be removed with tiled implementation. */
|
||||
float *cached_instance_;
|
||||
@ -26,7 +28,21 @@ class DoubleEdgeMaskOperation : public NodeOperation {
|
||||
public:
|
||||
DoubleEdgeMaskOperation();
|
||||
|
||||
void do_double_edge_mask(float *imask, float *omask, float *res);
|
||||
void compute_boundary(const float *inner_mask,
|
||||
const float *outer_mask,
|
||||
MutableSpan<int2> inner_boundary,
|
||||
MutableSpan<int2> outer_boundary);
|
||||
|
||||
void compute_gradient(const float *inner_mask_buffer,
|
||||
const float *outer_mask_buffer,
|
||||
MutableSpan<int2> flooded_inner_boundary,
|
||||
MutableSpan<int2> flooded_outer_boundary,
|
||||
float *output_mask);
|
||||
|
||||
void compute_double_edge_mask(const float *inner_mask,
|
||||
const float *outer_mask,
|
||||
float *output_mask);
|
||||
|
||||
/**
|
||||
* The inner loop of this operation.
|
||||
*/
|
||||
@ -48,13 +64,13 @@ class DoubleEdgeMaskOperation : public NodeOperation {
|
||||
ReadBufferOperation *read_operation,
|
||||
rcti *output) override;
|
||||
|
||||
void set_adjacent_only(bool adjacent_only)
|
||||
void set_include_all_inner_edges(bool include_all_inner_edges)
|
||||
{
|
||||
adjacent_only_ = adjacent_only;
|
||||
include_all_inner_edges_ = include_all_inner_edges;
|
||||
}
|
||||
void set_keep_inside(bool keep_inside)
|
||||
void set_include_edges_of_image(bool include_edges_of_image)
|
||||
{
|
||||
keep_inside_ = keep_inside;
|
||||
include_edges_of_image_ = include_edges_of_image;
|
||||
}
|
||||
|
||||
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
|
||||
|
@ -54,10 +54,11 @@ void main()
|
||||
|
||||
/* The pixels at the boundary are those that are masked and have non masked neighbors. The inner
|
||||
* boundary has a specialization, if include_all_inner_edges is false, only inner boundaries that
|
||||
* lie inside the outer mask will be considered a boundary. */
|
||||
* lie inside the outer mask will be considered a boundary. The outer boundary is only considered
|
||||
* if it is not inside the inner mask. */
|
||||
bool is_inner_boundary = is_inner_masked && has_inner_non_masked_neighbors &&
|
||||
(is_outer_masked || include_all_inner_edges);
|
||||
bool is_outer_boundary = is_outer_masked && has_outer_non_masked_neighbors;
|
||||
bool is_outer_boundary = is_outer_masked && !is_inner_masked && has_outer_non_masked_neighbors;
|
||||
|
||||
/* Encode the boundary information in the format expected by the jump flooding algorithm. */
|
||||
ivec2 inner_jump_flooding_value = initialize_jump_flooding_value(texel, is_inner_boundary);
|
||||
|
Loading…
x
Reference in New Issue
Block a user