8000 (draft) tts: Orpheus support by jamorphy · Pull Request #12487 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

(draft) tts: Orpheus support #12487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Scaffolding for snake activation fn
SNAC uses the snake activation function. Added scaffolding to include
`GGML_OP_SNAKE` as a new op. Should this be a unary op?

The SNAC decoder uses noise blocks to enhance outputs, its optional,
so omitting it for now until the model is integrated e2e.

Next steps: write the `llm_graph_context` for SNAC
  • Loading branch information
jamorphy committed Mar 22, 2025
commit ad7d7ff05fe601545ee3c313f7af20dee897a707
3 changes: 1 addition & 2 deletions convert_hf_to_gguf.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -2329,7 +2329,7 @@ def set_gguf_parameters(self):

@Model.register("SNACDec")
class SNACDecModel(Model):
model_arch = gguf.MODEL_ARCH.SNAC_DEC # Assumes this constant is defined in gguf
model_arch = gguf.MODEL_ARCH.SNAC_DEC

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[Tuple[str, Tensor]]:
del bid # unused
Expand Down Expand Up @@ -2357,7 +2357,6 @@ def set_gguf_parameters(self):
self.gguf_writer.add_embedding_length(self.hparams["decoder_dim"])
self.gguf_writer.add_decoder_upsample_rates(self.hparams["decoder_rates"])
self.gguf_writer.add_decoder_channel_dims(self.hparams["decoder_channel_dims"])
self.gguf_writer.add_convnext_block_count(1)

@Model.register("Qwen2MoeForCausalLM")
class Qwen2MoeModel(Model):
Expand Down
11 changes: 11 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ extern "C" {
GGML_OP_TIMESTEP_EMBEDDING,
GGML_OP_ARGSORT,
GGML_OP_LEAKY_RELU,
GGML_OP_SNAKE,

GGML_OP_FLASH_ATTN_EXT,
GGML_OP_FLASH_ATTN_BACK,
Expand Down Expand Up @@ -1062,6 +1063,16 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_snake(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * alpha);

GGML_API struct ggml_tensor * ggml_snake_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct 8000 ggml_tensor * alpha);

// normalize along rows
GGML_API struct ggml_tensor * ggml_norm(
struct ggml_context * ctx,
Expand Down
99 changes: 99 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
8000
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,21 @@ inline static void ggml_vec_leaky_relu_f16 (const int n, ggml_fp16_t * y, const
y[i] = GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
}
}
inline static void ggml_vec_snake_f32(const int n, float * y, const float * x, const float a) {
for (int i = 0; i < n; ++i) {
float x_val = x[i];
float sin_val = sinf(a * x_val);
y[i] = x_val + sin_val * sin_val;
}
}
inline static void ggml_vec_snake_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t a) {
for (int i = 0; i < n; ++i) {
float x_val = GGML_FP16_TO_FP32(x[i]); // TODO: double check this conversion
float a_val = GGML_FP16_TO_FP32(a);
float sin_val = sinf(a_val * x_val);
y[i] = GGML_FP32_TO_FP16(x_val + sin_val * sin_val);
}
}
inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
inline static void ggml_vec_sigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
for (int i = 0; i < n; ++i) {
Expand Down Expand Up @@ -7817,6 +7832,86 @@ static void ggml_compute_forward_leaky_relu(
}
}

// ggml_compute_forward_snake

static void ggml_compute_forward_snake_f32(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {
const struct ggml_tensor * src0 = dst->src[0];

// Scaffold code, 1 thread for now
// TODO: add multithreading
if (params->ith != 0) {
return;
}

struct ggml_tensor * alpha = *(struct ggml_tensor **)(dst->op_params);
const float * x = (const float *)src0->data;
const float * a = (const float *)alpha->data;
float * y = (float *)dst->data;

const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
const int channels = src0->ne[1];

for (int i = 0; i < n; i++) {
int c = i % channels;
ggml_vec_snake_f32(nc,
(float *) ((char *) y + i * dst->nb[1]),
(const float *) ((const char *) x + i * src0->nb[1]),
a[c]); // alpha tensor for this channel
}
}

static void ggml_compute_forward_snake_f16(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {
const struct ggml_tensor * src0 = dst->src[0];

if (params->ith != 0) {
return;
}

struct ggml_tensor * alpha = *(struct ggml_tensor **)(dst->op_params);
const ggml_fp16_t * x = (const ggml_fp16_t *)src0->data;
const ggml_fp16_t * a = (const ggml_fp16_t *)alpha->data;
ggml_fp16_t * y = (ggml_fp16_t *)dst->data;

const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
const int channels = src0->ne[1];

for (int i = 0; i < n; i++) {
int c = i % channels;
ggml_vec_snake_f16(nc,
(ggml_fp16_t *) ((char *) y + i * dst->nb[1]),
(const ggml_fp16_t *) ((const char *) x + i * src0->nb[1]),
a[c]);
}
}

static void ggml_compute_forward_snake(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {

const struct ggml_tensor * src0 = dst->src[0];

switch (src0->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_snake_f32(params, dst);
} break;
case GGML_TYPE_F16:
{
ggml_compute_forward_snake_f16(params, dst);
} break;
default:
{
GGML_ABORT("fatal error");
}
}
}

// ggml_compute_forward_silu_back

static void ggml_compute_forward_silu_back_f32(
Expand Down Expand Up @@ -14555,6 +14650,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_leaky_relu(params, tensor);
} break;
case GGML_OP_SNAKE:
{
ggml_compute_forward_snake(params, tensor);
} break;
case GGML_OP_FLASH_ATTN_EXT:
{
ggml_compute_forward_flash_attn_ext(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor);
Expand Down
34 changes: 32 additions & 2 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"TIMESTEP_EMBEDDING",
"ARGSORT",
"LEAKY_RELU",
"SNAKE",

"FLASH_ATTN_EXT",
"FLASH_ATTN_BACK",
Expand Down Expand Up @@ -998,7 +999,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"OPT_STEP_ADAMW",
};

static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");

static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
Expand Down Expand Up @@ -1097,7 +1098,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"adamw(x)",
};

static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");

static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");

Expand Down Expand Up @@ -2474,6 +2475,35 @@ struct ggml_tensor * ggml_leaky_relu(
return result;
}

// ggml snake

struct ggml_tensor * ggml_snake(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * alpha) {
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);

// store ptr to alpha tensor
ggml_set_op_params(result, &alpha, sizeof(alpha));
result->op = GGML_OP_SNAKE;
result->src[0] = a;

return result;
}

struct ggml_tensor * ggml_snake_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * alpha) {
struct ggml_tensor * result = ggml_view_tensor(ctx, a);

ggml_set_op_params(result, &alpha, sizeof(alpha));
result->op = GGML_OP_SNAKE;
result->src[0] = a;

return result;
}

// ggml_sigmoid

struct ggml_tensor * ggml_sigmoid(
Expand Down
0