8000 Gary/add llmobs svc functionalities traceid by gary-huang · Pull Request #8505 · DataDog/dd-trace-java · GitHub
[go: up one dir, main page]

Skip to content

Gary/add llmobs svc functionalities traceid #8505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ private HttpUrl getAgentlessUrl(Intake intake) {

public enum Intake {
API("api", "v2", Config::isCiVisibilityAgentlessEnabled, Config::getCiVisibilityAgentlessUrl),
LLMOBS_API("api", "v2", Config::isLlmObsAgentlessEnabled, Config::getLlMObsAgentlessUrl),
LOGS(
"http-intake.logs",
"v2",
Expand Down
WriterConstants.DD_INTAKE_WRITER_TYPE);
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import datadog.trace.api.config.GeneralConfig;
import datadog.trace.api.config.IastConfig;
import datadog.trace.api.config.JmxFetchConfig;
import datadog.trace.api.config.LlmObsConfig;
import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.api.config.RemoteConfigConfig;
import datadog.trace.api.config.TraceInstrumentationConfig;
Expand All @@ -38,6 +39,7 @@
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
import datadog.trace.bootstrap.instrumentation.api.AgentTracer.TracerAPI;
import datadog.trace.bootstrap.instrumentation.api.ProfilingContextIntegration;
import datadog.trace.bootstrap.instrumentation.api.WriterConstants;
import datadog.trace.bootstrap.instrumentation.jfr.InstrumentationBasedProfiling;
import datadog.trace.util.AgentTaskScheduler;
import datadog.trace.util.AgentThreadFactory.AgentThread;
Expand Down Expand Up @@ -105,7 +107,9 @@ private enum AgentFeature {
EXCEPTION_REPLAY(DebuggerConfig.EXCEPTION_REPLAY_ENABLED, false),
CODE_ORIGIN(TraceInstrumentationConfig.CODE_ORIGIN_FOR_SPANS_ENABLED, false),
DATA_JOBS(GeneralConfig.DATA_JOBS_ENABLED, false),
AGENTLESS_LOG_SUBMISSION(GeneralConfig.AGENTLESS_LOG_SUBMISSION_ENABLED, false);
AGENTLESS_LOG_SUBMISSION(GeneralConfig.AGENTLESS_LOG_SUBMISSION_ENABLED, false),
LLMOBS(LlmObsConfig.LLMOBS_ENABLED, false),
LLMOBS_AGENTLESS(LlmObsConfig.LLMOBS_AGENTLESS_ENABLED, false);

private final String configKey;
private final String systemProp;
Expand Down Expand Up @@ -152,6 +156,8 @@ public boolean isEnabledByDefault() {
private static boolean iastFullyDisabled;
private static boolean cwsEnabled = false;
private static boolean ciVisibilityEnabled = false;
private static boolean llmObsEnabled = false;
private static boolean llmObsAgentlessEnabled = false;
private static boolean usmEnabled = false;
private static boolean telemetryEnabled = true;
private static boolean dynamicInstrumentationEnabled = false;
Expand Down Expand Up @@ -277,6 +283,25 @@ public static void start(
exceptionReplayEnabled = isFeatureEnabled(AgentFeature.EXCEPTION_REPLAY);
codeOriginEnabled = isFeatureEnabled(AgentFeature.CODE_ORIGIN);
agentlessLogSubmissionEnabled = isFeatureEnabled(AgentFeature.AGENTLESS_LOG_SUBMISSION);
llmObsEnabled = isFeatureEnabled(AgentFeature.LLMOBS);

// setup writers when llmobs is enabled to accomodate apm and llmobs
if (llmObsEnabled) {
// for llm obs spans, use agent proxy by default, apm spans will use agent writer
setSystemPropertyDefault(
propertyNameToSystemPropertyName(TracerConfig.WRITER_TYPE),
WriterConstants.MULTI_WRITER_TYPE
+ ":"
+ WriterConstants.DD_INTAKE_WRITER_TYPE
+ ","
+ WriterConstants.DD_AGENT_WRITER_TYPE);
if (llmObsAgentlessEnabled) {
// use API writer only
setSystemPropertyDefault(
propertyNameToSystemPropertyName(TracerConfig.WRITER_TYPE),
}
}

if (profilingEnabled) {
if (!isOracleJDK8()) {
Expand Down Expand Up @@ -565,6 +590,7 @@ public void execute() {

maybeStartAppSec(scoClass, sco);
maybeStartCiVisibility(instrumentation, scoClass, sco);
maybeStartLLMObs(instrumentation, scoClass, sco);
// start debugger before remote config to subscribe to it before starting to poll
maybeStartDebugger(instrumentation, scoClass, sco);
maybeStartRemoteConfig(scoClass, sco);
Expand Down Expand Up @@ -920,6 +946,24 @@ private static void maybeStartCiVisibility(Instrumentation inst, Class<?> scoCla
}
}

private static void maybeStartLLMObs(Instrumentation inst, Class<?> scoClass, Object sco) {
if (llmObsEnabled) {
StaticEventLogger.begin("LLM Observability");

try {
final Class<?> llmObsSysClass =
AGENT_CLASSLOADER.loadClass("datadog.trace.llmobs.LLMObsSystem");
final Method llmObsInstallerMethod =
llmObsSysClass.getMethod("start", Instrumentation.class, scoClass);
llmObsInstallerMethod.invoke(null, inst, sco);
} catch (final Throwable e) {
log.warn("Not starting LLM Observability subsystem", e);
}

StaticEventLogger.end("LLM Observability");
}
}

private static void maybeInstallLogsIntake(Class<?> scoClass, Object sco) {
if (agentlessLogSubmissionEnabled) {
StaticEventLogger.begin("Logs Intake");
Expand Down
2 changes: 1 addition & 1 deletion dd-java-agent/agent-jmxfetch/integrations-core
42 changes: 42 additions & 0 deletions dd-java-agent/agent-llmobs/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
buildscript {
repositories {
mavenCentral()
}

dependencies {
classpath group: 'org.jetbrains.kotlin', name: 'kotlin-gradle-plugin', version: libs.versions.kotlin.get()
}
}

plugins {
id 'com.github.johnrengelman.shadow'
id 'java-test-fixtures'
}

apply from: "$rootDir/gradle/java.gradle"
apply from: "$rootDir/gradle/version.gradle"
apply from: "$rootDir/gradle/test-with-kotlin.gradle"

minimumBranchCoverage = 0.0
minimumInstructionCoverage = 0.0

dependencies {
api libs.slf4j

implementation project(':communication')
implementation project(':components:json')
implementation project(':internal-api')

testImplementation project(":utils:test-utils")

testFixturesApi project(':dd-java-agent:testing')
testFixturesApi project(':utils:test-utils')
}

shadowJar {
dependencies deps.excludeShared
}

jar {
archiveClassifier = 'unbundled'
}
< 10000 td class="blob-num blob-num-addition empty-cell">
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package datadog.trace.llmobs;

import datadog.communication.BackendApi;
import datadog.communication.BackendApiFactory;
import datadog.communication.ddagent.SharedCommunicationObjects;
import datadog.trace.api.Config;
import datadog.trace.api.DDTraceId;
import datadog.trace.bootstrap.instrumentation.api.AgentScope;
import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext;
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
import datadog.trace.llmobs.domain.SpanContextInfo;
import java.util.Deque;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import javax.annotation.Nonnull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LLMObsServices {

private static final Logger logger = LoggerFactory.getLogger(LLMObsServices.class);

final Config config;
final BackendApi backendApi;

Map<DDTraceId, Deque<SpanContextInfo>> activeSpanContextByTID = new ConcurrentHashMap<>();

LLMObsServices(Config config, SharedCommunicationObjects sco) {
this.config = config;
this.backendApi =
new BackendApiFactory(config, sco).createBackendApi(BackendApiFactory.Intake.LLMOBS_API);
}

@Nonnull
public SpanContextInfo getActiveSpanContext() {
// Valid case: possibly start root llm obs span/trace while there is NOT an active apm trace
AgentScope activeScope = AgentTracer.activeScope();
if (activeScope == null) {
return new SpanContextInfo();
}

// Unexpected case: null active scope span, log to avoid crashes
if (activeScope.span() == null) {
logger.warn("active span scope found but no null span");
return new SpanContextInfo();
}

// Unexpected case: null trace ID, log to avoid crashes
DDTraceId traceId = activeScope.span().getTraceId();
if (traceId == null) {
logger.warn("active scope found but unexpectedly null trace ID");
return new SpanContextInfo();
}

Deque<SpanContextInfo> activeSpanCtxForTID = activeSpanContextByTID.get(traceId);
// Valid case: possibly start root llm obs span/trace while there's an active apm trace
if (activeSpanCtxForTID == null || activeSpanCtxForTID.isEmpty()) {
return new SpanContextInfo();
}

// Valid case: possibly start child llm obs span for a given trace ID
return activeSpanCtxForTID.peek();
}

public void setActiveSpanContext(SpanContextInfo spanContext) {
AgentSpanContext activeCtx = spanContext.getActiveContext();
if (activeCtx == null) {
logger.warn("unexpected null active context");
return;
}

DDTraceId traceId = activeCtx.getTraceId();
if (traceId == null) {
logger.warn("unexpected null trace ID");
return;
}

Deque<SpanContextInfo> contexts = activeSpanContextByTID.get(activeCtx.getTraceId());
if (contexts == null) {
contexts = new ConcurrentLinkedDeque<>();
}
contexts.push(spanContext);
this.activeSpanContextByTID.put(traceId, contexts);
}

public void removeActiveSpanContext(DDTraceId traceId) {
if (!activeSpanContextByTID.containsKey(traceId)) {
logger.debug("active span contexts not found for trace {}", traceId);
return;
}
Deque<SpanContextInfo> contexts = activeSpanContextByTID.get(traceId);
if (contexts == null) {
return;
}
if (!contexts.isEmpty()) {
try {
contexts.pop();
if (contexts.isEmpty()) {
// the trace MAY still be active, however, the next set should re-create the hierarchy as
// needed
activeSpanContextByTID.remove(traceId);
}
} catch (NoSuchElementException noSuchElementException) {
logger.debug("failed to pop context stack for trace {}", traceId);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package datadog.trace.llmobs;

import datadog.communication.ddagent.SharedCommunicationObjects;
import datadog.trace.api.Config;
import datadog.trace.api.llmobs.LLMObs;
import datadog.trace.api.llmobs.LLMObsSpan;
import datadog.trace.api.llmobs.LLMObsTags;
import datadog.trace.bootstrap.instrumentation.api.Tags;
import datadog.trace.llmobs.domain.DDLLMObsSpan;
import datadog.trace.llmobs.domain.LLMObsInternal;
import java.lang.instrument.Instrumentation;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LLMObsSystem {

private static final Logger LOGGER = LoggerFactory.getLogger(LLMObsSystem.class);

private static final String CUSTOM_MODEL_VAL = "custom";

public static void start(Instrumentation inst, SharedCommunicationObjects sco) {
Config config = Config.get();
if (!config.isLlmObsEnabled()) {
LOGGER.debug("LLM Observability is disabled");
return;
}

sco.createRemaining(config);

LLMObsServices llmObsServices = new LLMObsServices(config, sco);
LLMObsInternal.setLLMObsSpanFactory(
new LLMObsManualSpanFactory(
config.getLlmObsMlApp(), config.getServiceName(), llmObsServices));
}

private static class LLMObsManualSpanFactory implements LLMObs.LLMObsSpanFactory {

private final LLMObsServices llmObsServices;
private final String serviceName;
private final String defaultMLApp;

public LLMObsManualSpanFactory(
String defaultMLApp, String serviceName, LLMObsServices llmObsServices) {
this.defaultMLApp = defaultMLApp;
this.llmObsServices = llmObsServices;
this.serviceName = serviceName;
}

@Override
public LLMObsSpan startLLMSpan(
String spanName,
String modelName,
String modelProvider,
@Nullable String mlApp,
@Nullable String sessionID) {

DDLLMObsSpan span =
new DDLLMObsSpan(
Tags.LLMOBS_LLM_SPAN_KIND,
spanName,
getMLApp(mlApp),
sessionID,
serviceName,
llmObsServices);

if (modelName == null || modelName.isEmpty()) {
modelName = CUSTOM_MODEL_VAL;
}
span.setTag(LLMObsTags.MODEL_NAME, modelName);

if (modelProvider == null || modelProvider.isEmpty()) {
modelProvider = CUSTOM_MODEL_VAL;
}
span.setTag(LLMObsTags.MODEL_PROVIDER, modelProvider);
return span;
}

@Override
public LLMObsSpan startAgentSpan(
String spanName, @Nullable String mlApp, @Nullable String sessionID) {
return new DDLLMObsSpan(
Tags.LLMOBS_AGENT_SPAN_KIND,
spanName,
getMLApp(mlApp),
sessionID,
serviceName,
llmObsServices);
}

@Override
public LLMObsSpan startToolSpan(
String spanName, @Nullable String mlApp, @Nullable String sessionID) {
return new DDLLMObsSpan(
Tags.LLMOBS_TOOL_SPAN_KIND,
spanName,
getMLApp(mlApp),
sessionID,
serviceName,
llmObsServices);
}

@Override
public LLMObsSpan startTaskSpan(
String spanName, @Nullable String mlApp, @Nullable String sessionID) {
return new DDLLMObsSpan(
Tags.LLMOBS_TASK_SPAN_KIND,
spanName,
getMLApp(mlApp),
sessionID,
serviceName,
llmObsServices);
}

@Override
public LLMObsSpan startWorkflowSpan(
String spanName, @Nullable String mlApp, @Nullable String sessionID) {
return new DDLLMObsSpan(
Tags.LLMOBS_WORKFLOW_SPAN_KIND,
spanName,
getMLApp(mlApp),
sessionID,
serviceName,
llmObsServices);
}

private String getMLApp(String mlApp) {
if (mlApp == null || mlApp.isEmpty()) {
return defaultMLApp;
}
return mlApp;
}
}
}
Loading
0