8000 ggml-backend: backend-agnostic tensor parallelism by JohannesGaessler · Pull Request #13776 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

ggml-backend: backend-agnostic tensor parallelism #13776

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Changes from 1 commit
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
0c90859
WIP
JohannesGaessler May 17, 2025
838f577
WIP
JohannesGaessler May 17, 2025
aedc3f7
WIP
JohannesGaessler May 17, 2025
99bb015
try fix
JohannesGaessler May 17, 2025
06d7a88
WIP
JohannesGaessler May 17, 2025
0a69555
WIP
JohannesGaessler May 17, 2025
7563db8
WIP
JohannesGaessler May 17, 2025
363e237
WIP
JohannesGaessler May 19, 2025
47e6d24
fix
JohannesGaessler May 20, 2025
751e488
WIP
JohannesGaessler May 20, 2025
3f8f323
WIP
JohannesGaessler May 20, 2025
316ef4e
WIP
JohannesGaessler May 20, 2025
47b228f
try fix
JohannesGaessler May 20, 2025
cf4d0b6
try fix
JohannesGaessler May 20, 2025
bb48a90
try fix
JohannesGaessler May 20, 2025
016405b
WIP
JohannesGaessler May 21, 2025
7c17ff1
WIP
JohannesGaessler May 21, 2025
deda9c2
WIP
JohannesGaessler May 22, 2025
3c1291f
WIP
JohannesGaessler May 22, 2025
16d29fe
WIP
JohannesGaessler May 22, 2025
7468e9d
WIP
JohannesGaessler May 22, 2025
119657a
WIP
JohannesGaessler May 22, 2025
50d2c5e
WIP
JohannesGaessler May 22, 2025
fe2747e
try fix
JohannesGaessler May 22, 2025
6ddf206
try fix
JohannesGaessler May 22, 2025
996d263
WIP
JohannesGaessler May 22, 2025
3a432ab
WIP
JohannesGaessler May 22, 2025
9c6550e
WIP
JohannesGaessler May 23, 2025
2da2cc3
WIP
JohannesGaessler May 23, 2025
67f02bf
WIP
JohannesGaessler May 23, 2025
2e282d5
WIP
JohannesGaessler May 23, 2025
8860122
WIP
JohannesGaessler May 23, 2025
3d96528
WIP
JohannesGaessler May 23, 2025
2d2ef89
WIP
JohannesGaessler May 23, 2025
6b836c8
WIP
JohannesGaessler May 23, 2025
f5a5155
WIP
JohannesGaessler May 23, 2025
cc91ca1
WIP
JohannesGaessler May 23, 2025
6ee4d0e
WIP
JohannesGaessler May 23, 2025
935d652
WIP
JohannesGaessler May 23, 2025
4dacb2f
WIP
JohannesGaessler May 23, 2025
7b7f399
WIP
JohannesGaessler May 23, 2025
aeda7e0
WIP
JohannesGaessler May 23, 2025
95f1caf
WIP
JohannesGaessler May 23, 2025
1f648ba
WIP
JohannesGaessler May 23, 2025
e18d1ef
WIP
JohannesGaessler May 23, 2025
66c8eec
WIP
JohannesGaessler May 23, 2025
206ab58
WIP
JohannesGaessler May 23, 2025
ae1617c
WIP
JohannesGaessler May 24, 2025
f617bbb
WIP
JohannesGaessler May 24, 2025
528dd51
WIP
JohannesGaessler May 24, 2025
4006293
WIP
JohannesGaessler May 24, 2025
943456b
WIP
JohannesGaessler May 24, 2025
25c25ea
WIP
JohannesGaessler May 24, 2025
739d902
WIP
JohannesGaessler May 24, 2025
26807a9
WIP
JohannesGaessler May 24, 2025
1c9dcde
WIP
JohannesGaessler May 24, 2025
3c21fdd
WIP
JohannesGaessler May 24, 2025
9719003
WIP
JohannesGaessler May 24, 2025
1c37a20
WIP
JohannesGaessler May 24, 2025
f6dd08e
WIP
JohannesGaessler May 24, 2025
02e4af1
WIP
JohannesGaessler May 24, 2025
07ca4b8
WIP
JohannesGaessler May 24, 2025
c0358bd
WIP
JohannesGaessler May 24, 2025
ea3cab5
WIP
JohannesGaessler May 24, 2025
027d97e
WIP
JohannesGaessler May 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP
  • Loading branch information
JohannesGaessler committed May 22, 2025
commit 996d263325d5adb419eeef3e4918dc4e1fc9ba8b
22 changes: 10 additions & 12 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
}
split.i_start = 0;
split.n_inputs = 0;
int cur_backend_id = split.backend_id;
for (; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];

Expand All @@ -1124,7 +1123,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg

// check if we should start a new split based on the sources of the current node
bool need_new_split = false;
if (node_backend_id == cur_backend_id && split.n_inputs > 0) {
if (node_backend_id == split.backend_id && split.n_inputs > 0) {
for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j];
if (src == nullptr) {
Expand All @@ -1135,7 +1134,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// by starting a new split, the memory of the previously offloaded weights can be reused
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
const int src_backend_id = tensor_backend_id(src);
if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
if (src_backend_id != split.backend_id && !ggml_backend_sched_buffer_supported(sched, src, split.backend_id)) {
need_new_split = true;
break;
}
Expand All @@ -1145,24 +1144,23 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
if (split.n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
const size_t id = hash_id(src);
const int src_backend_id = sched->hv_tensor_backend_ids[id];
const bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == nullptr && !supported) {
const bool supported = ggml_backend_sched_buffer_supported(sched, src, split.backend_id);
if (src_backend_id != split.backend_id && tensor_id_copy(id, split.backend_id, 0) == nullptr && !supported) {
need_new_split = true;
break;
}
}
}
}

if (node_backend_id != cur_backend_id || need_new_split) {
if (node_backend_id != split.backend_id || need_new_split) {
split.i_end = i;
split.graph = ggml_graph_view(graph, split.i_start, split.i_end);
splits_no_tp.push_back(split);

split.backend_id = node_backend_id;
split.i_start = i;
split.n_inputs = 0;
cur_backend_id = node_backend_id;
}

// find inputs that are not on the same backend
Expand Down Expand Up @@ -1200,26 +1198,26 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
}
}

if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
if (src_backend_id != split.backend_id && !ggml_backend_sched_buffer_supported(sched, src, split.backend_id)) {
// create a copy of the input in the split's backend
if (tensor_id_copy(src_id, cur_backend_id, 0) == nullptr) {
ggml_backend_t backend = sched->backends[cur_backend_id];
if (tensor_id_copy(src_id, split.backend_id, 0) == nullptr) {
ggml_backend_t backend = sched->backends[split.backend_id];
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
if (sched->n_copies > 1) {
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
tensor_id_copy(src_id, split.backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
const int n_inputs = split.n_inputs++;
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
split.inputs[n_inputs] = src;
}
// fprintf(stderr, "%s: 200 replacing src%d=%s of %s\n", __func__, j, node->src[j]->name, node->name);
node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy);
node->src[j] = tensor_id_copy(src_id, split.backend_id, sched->cur_copy);
}
}
}
Expand Down
0