8000 parameterize path, first stab at wrapping multiple routers · szechyjs/sentry-javascript@3f1dd4d · GitHub
[go: up one dir, main page]

8000
Skip to content

Commit 3f1dd4d

Browse files
committed
parameterize path, first stab at wrapping multiple routers
1 parent 57423ee commit 3f1dd4d

File tree

5 files changed

+199
-27
lines changed

5 files changed

+199
-27
lines changed

packages/node/src/handlers.ts

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export interface ExpressRequest {
3939
user?: {
4040
[key: string]: any;
4141
};
42+
_reconstructedPath?: string;
4243
}
4344

4445
/**
@@ -63,6 +64,7 @@ export function tracingHandler(): (
6364

6465
const transaction = startTransaction(
6566
{
67+
// TODO Do we need to do this at both the beginning AND end of the transaction?
6668
name: extractExpressTransactionName(req, { path: true, method: true }),
6769
op: 'http.server',
6870
...traceparentData,
@@ -101,7 +103,8 @@ export function tracingHandler(): (
101103
function addExpressReqToTransaction(transaction: Transaction | undefined, req: ExpressRequest): void {
102104
if (!transaction) return;
103105
transaction.name = extractExpressTransactionName(req, { path: true, method: true });
104-
transaction.setData('url', req.originalUrl);
106+
transaction.setData('url', req.originalUrl || req.url);
107+
// TODO if we have the full parameterized URL as the transaction name, do we need baseUrl?
105108
transaction.setData('baseUrl', req.baseUrl);
106109
transaction.setData('query', req.query);
107110
}
@@ -122,13 +125,28 @@ function extractExpressTransactionName(
122125
): string {
123126
const method = req.method?.toUpperCase();
124127

125-
let path = '';
126-
if (req.route) {
127-
// if the mountpoint is `/`, req.baseUrl is '' (not undefined), so it's safe to include it here
128-
// see https://github.com/expressjs/express/blob/508936853a6e311099c9985d4c11a4b1b8f6af07/test/req.baseUrl.js#L7
129-
path = `${req.baseUrl}${req.route.path}`;
130-
} else if (req.originalUrl || req.url) {
131-
path = stripUrlQueryAndFragment(req.originalUrl || req.url || '');
128+
// `req.originalUrl` comes from Express, but in practice sometimes is missing; `req.url` comes from Node's `http`
129+
// module, but can get rewritten by Express. Either way, strip any trailing slash so we can later compare it to
130+
// `reconstructedPath`, which we create, and which will never have a trailing slash.
131+
let origUrl = stripUrlQueryAndFragment(req.originalUrl || req.url || '');
132+
origUrl = origUrl.endsWith('/') ? origUrl.slice(0, -1) : origUrl;
133+
134+
let path = origUrl;
135+
136+
// Since Express doesn't keep track of which routers its visited, if the app has nested routers, the parameterized
137+
// version of the path will only include the final route's bit of the full path. We therefore track this ourselves as
138+
// part of wrapping middleware and routes.
139+
const reconstructedPath = req._reconstructedPath;
140+
141+
// we'd prefer our path be parameterized, so transactions will group better, so use that instead if possible
142+
if (reconstructedPath) {
143+
path = reconstructedPath;
144+
// if we're 404ing, but part of the path matched, include as much as did match, in case it's helpful to know where
145+
// people are going wrong (doing this is also better than the alternative, which is having a separate transaction
146+
// group for every wrong URL someone types)
147+
if (reconstructedPath.split('/').length !== origUrl.split('/').length) {
148+
path += '/<missing resource>';
149+
}
132150
}
133151

134152
let info = '';

packages/tracing/src/index.ts

Lines changed: 2 additions & 1 deletion
< F438 /tr>
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import { BrowserTracing } from './browser';
22
import { addExtensionMethods } from './hubextensions';
3-
import * as TracingIntegrations from './integrations';
3+
import { Integrations as TracingIntegrations } from './integrations';
44

55
const Integrations = { ...TracingIntegrations, BrowserTracing };
66

77
export { Integrations };
8+
export { instrumentMiddlewares } from './integrations';
89
export { Span } from './span';
910
export { Transaction } from './transaction';
1011
export {

packages/tracing/src/integrations/express.ts

Lines changed: 154 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
/* eslint-disable max-lines */
12
import { Integration, Transaction } from '@sentry/types';
23
import { logger } from '@sentry/utils';
4+
import * as http from 'http';
35

46
type Method =
57
| 'all'
@@ -30,7 +32,7 @@ type Method =
3032

3133
type Router = {
3234
[method in Method]: (...args: any) => any; // eslint-disable-line @typescript-eslint/no-explicit-any
33-
};
35+
} & { _router?: Router; lazyrouter: () => void; process_params: (...args: unknown[]) => void };
3436

3537
interface ExpressResponse {
3638
once(name: string, callback: () => void): void;
@@ -102,15 +104,21 @@ export class Express implements Integration {
102104
*/
103105
// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any
104106
function wrap(fn: Function, method: Method): (...args: any[]) => void {
107+
let wrappedFunction;
105108
const arity = fn.length;
106109

107110
switch (arity) {
108111
case 2: {
109-
return function(this: NodeJS.Global, req: unknown, res: ExpressResponse & SentryTracingResponse): void {
112+
wrappedFunction = function(
113+
this: NodeJS.Global,
114+
req: http.IncomingMessage,
115+
res: ExpressResponse & SentryTracingResponse,
116+
): void {
110117
const transaction = res.__sentry_transaction;
111118
if (transaction) {
112119
const span = transaction.startChild({
113-
description: fn.name,
120+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
121+
description: (fn as any)._name || fn.name,
114122
op: `middleware.${method}`,
115123
});
116124
res.once('finish', () => {
@@ -119,48 +127,56 @@ function wrap(fn: Function, method: Method): (...args: any[]) => void {
119127
}
120128
return fn.call(this, req, res);
121129
};
130+
break;
122131
}
123132
case 3: {
124-
return function(
133+
wrappedFunction = function(
125134
this: NodeJS.Global,
126-
req: unknown,
135+
req: http.IncomingMessage,
127136
res: ExpressResponse & SentryTracingResponse,
128137
next: () => void,
129138
): void {
130139
const transaction = res.__sentry_transaction;
131140
const span = transaction?.startChild({
132-
description: fn.name,
141+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
142+
description: (fn as any)._name || fn.name,
133143
op: `middleware.${method}`,
134144
});
135145
fn.call(this, req, res, function(this: NodeJS.Global, ...args: unknown[]): void {
136146
span?.finish();
137-
next.call(this, ...args);
147+
next.apply(this, args);
138148
});
139149
};
150+
break;
140151
}
141152
case 4: {
142-
return function(
153+
wrappedFunction = function(
143154
this: NodeJS.Global,
144155
err: Error,
145-
req: Request,
156+
req: http.IncomingMessage,
146157
res: Response & SentryTracingResponse,
147158
next: () => void,
148159
): void {
149160
const transaction = res.__sentry_transaction;
150161
const span = transaction?.startChild({
151-
description: fn.name,
162+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
163+
description: (fn as any)._name || fn.name,
152164
op: `middleware.${method}`,
153165
});
154166
fn.call(this, err, req, res, function(this: NodeJS.Global, ...args: unknown[]): void {
155167
span?.finish();
156-
next.call(this, ...args);
168+
next.apply(this, args);
157169
});
158170
};
171+
break;
159172
}
160173
default: {
161174
throw new Error(`Express middleware takes 2-4 arguments. Got: ${arity}`);
162175
}
163176
}
177+
178+
Object.defineProperty(wrappedFunction, 'name', { value: fn.name });
179+
return wrappedFunction;
164180
}
165181

166182
/**
@@ -205,9 +221,133 @@ function patchMiddleware(router: Router, method: Method): Router {
205221
return router;
206222
}
207223

224+
interface ExpressRouterLayer {
225+
path: string;
226+
keys: Array<{
227+
name: string;
228+
optional: boolean;
229+
offset: number;
230+
}>;
231+
regexp: RegExp;
232+
}
233+
234+
/**
235+
*
236+
*/
237+
function wrapProcessParams(routerPrototype: Router): void {
238+
const oldProcessParams = routerPrototype.process_params;
239+
240+
/**
241+
*
242+
*/
243+
function newProcessParams(this: Router, ...args: unknown[]): void {
244+
const layer = args[0] as ExpressRouterLayer;
245+
const req = args[2] as { _reconstructedPath: string };
246+
req._reconstructedPath = req._reconstructedPath || '';
247+
248+
// Every router, top-level or nested, contains an array of Layer objects, one per path/method/handler combo within
249+
// that router. When Express finds a layer that matches the current part of the specific path being considered, it
250+
// temporarily adds that path to the layer (otherwise, layers only contain a regex to match to future paths).
251+
if (layer.path) {
252+
// This is a real hack, necessitated by the fact that Express neither keeps track of the full parameterized path
253+
// itself nor exports Layer, the constructor of which seems to be the only place the path as typed by the Express
254+
// user is accessible. Alas. So, we reconstruct it, making sure to differentiate between hard-coded path segments
255+
// and parameters, between parameters with identical names, and between parameters with identical values.
256+
257+
// The overall idea here is to modify the path's regex to insert capture groups around each hard-coded part of the
258+
// path. The parameters already have capture groups around them, so by capturing the hard-coded parts, too, we end
259+
// up being able to reconstruct the entire path, while at the same time being sure that we're only substituting
260+
// parameter names for values which actually are in the parameter spots. (Since each capture group for a
261+
// hard-coded part of the path fills the entire space between consecutive parameters, we can be sure that the
262+
// groups alternate between hard-coded segments and parameterized segments, making it easy to tell which is
263+
// which.) Parameter keys are stored on the layer in order, so we also can be assured that we're matching the
264+
// right parameter name to the right value, even if the values happen to be the same for this instance of the
265+
// path.
266+
const parameterRegexPattern = '(?:([^\\/]+?))';
267+
const pathPartsRegexSource = layer.regexp.source
268+
// get all the hard-coded in-between parts
269+
.split(parameterRegexPattern)
270+
// surround each one with parentheses
271+
.map(hardCodedSegment => `(${hardCodedSegment})`)
272+
// and then put all the parameter segments back in place
273+
.join(parameterRegexPattern);
274+
const pathParts = new RegExp(pathPartsRegexSource, layer.regexp.flags).exec(layer.path);
275+
// we wouldn't be here otherwise, but this keeps TS happy
276+
if (pathParts) {
277+
// The first (index 0) thing in the match array is the full match; the capture groups start at index 1. Even if
278+
// the first path segment is a parameter, there will always be at least a leading slash to take up the first
279+
// capture group slot. The first parameter capture group will therefore always be at index 2.
280+
for (let i = 2; i < pathParts.length; i += 2) {
281+
const keyIndex = i / 2 - 1;
282+
pathParts[i] = `:${layer.keys[keyIndex].name}`;
283+
}
284+
285+
// slice so we don't include the full match as part of the path, then glue everything back together and add it to
286+
// the parameterized path we're building
287+
req._reconstructedPath += pathParts.slice(1).join('');
288+
}
289+
// no else because on the other end `req.url` will just be used instead
290+
}
291+
292+
// now we can let Express do its thing
293+
oldProcessParams.apply(this, args);
294+
295+
// TODO kmclb We're currently not handling the case where an array of paths is passed.
296+
297+
// We'll need to strip off the initial (?: and ending ), detect the pattern
298+
// \/?(?=\/|$)|^\/
299+
// (which is the end of one option, the OR, and the beginning of another option), replace the | with some illegal
300+
// regex character (illegal so we know it won't appear anywhere else in the regex string), split on that character,
301+
// apply the above logic to each piece, for any that match (could there be more than one, or will it just always use
302+
// the first or last one?) do the substitution, wrap all options with quotes and surround the whole thing with [] to
303+
// make it an array, and finally deal with a) how to display these arrays when they're only part of a larger path
304+
// (concatenation alone probably won't cut it) and b) if there can be multiple matches, figure out if we need some
305+
// sort of cross-product... or maybe we just need to find one matching one, and as long as we're consistent about
306+
// which one we pick, we'll only be wrong some of the time? The offset might help here, even if it's off by a
307+
// little, though it's not at all guaranteed to... I think it's first necessary to know how express does it (since
308+
// the parameters get acted upon, it actually does matter if it matches /blah or /:someParam where the value of
309+
// someParam turns out to be "blah", so they must have some way of deciding). Anyway, this is an edgy edge case, so
310+
// not for solving now.
311+
312+
// TODO kmclb - we should pull the method and replace the .use in the span's "middleware.xxxx"
313+
314+
// TODO kmclb - also, what description are we going to use? the ultimate handler, I guess? Probably need to adjust
315+
// this, as we likely default to the first router
316+
}
317+
318+
routerPrototype.process_params = newProcessParams;
319+
}
320+
208321
/**
209-
* Patches original router methods
322+
* Patches original app methods (app.use, app.get, app.post, app.patch, etc) and router methods (someRouter.use,
323+
* someRouter.get, etc) as well as an internal router method which allows us to keep track of the full parameterized
324+
* path (since Express doesn't)
210325
*/
211-
function instrumentMiddlewares(router: Router, methods: Method[] = []): void {
212-
methods.forEach((method: Method) => patchMiddleware(router, method));
326+
export function instrumentMiddlewares(appOrRouter: Router, methods: Method[] = []): void {
327+
// TODO kmclb If we do export this, pull out the `wrapProcessParams` stuff, because otherwise we end up with stuff
328+
// double-wrapped or triple-wrapped and that breaks transaction naming
329+
330+
// Wrap .use, .get, etc. so that each invokation creates a span. (These methods live both on the app and on its
331+
// individual routers, so it doesn't matter which one we're working with.)
332+
methods.forEach((method: Method) => patchMiddlewareOnRouter(appOrRouter, method));
333+
334+
// Wrap an internal router method which gets called on each potential "layer" (router or middleware) which could be
335+
// applied given where we are in the path resolution process (i.e., how much of the path has already been matched with
336+
// routers). This lets us detect when there's a match and gives us access to the parameter keys and values and the
337+
// regex used for the match, so that we can substitute parameter names for values (in the right places, even if
338+
// there are multiple instances of the same value).
339+
340+
// Here it does matter whether we have an App instance or a Router instance
341+
const isApp = 'settings' in appOrRouter;
342+
343+
// Since we're doing this wrapping before any requests have been handled, the main router for the app likely hasn't
344+
// yet been initialized; force that to happen now.
345+
if (isApp && appOrRouter._router === undefined) {
346+
appOrRouter.lazyrouter();
347+
}
348+
349+
const routerPrototype = Object.getPrototypeOf(isApp ? appOrRouter._router : appOrRouter);
350+
wrapProcessParams(routerPrototype);
351+
// TODO kmclb there's a much easier way, now that I know we can get ahold of the Layer prototype
352+
// it's at appOrRouter._router.stack[0].__proto__.constructor
213353
}
Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
export { Express } from './express';
2-
export { Postgres } from './postgres';
3-
export { Mysql } from './mysql';
4-
export { Mongo } from './mongo';
1+
import { Express } from './express';
2+
import { Mongo } from './mongo';
3+
import { Mysql } from './mysql';
4+
import { Postgres } from './postgres';
5+
6+
export { instrumentMiddlewares } from './express';
7+
8+
export const Integrations = { Express, Postgres, Mysql, Mongo };

packages/tracing/src/transaction.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ export class Transaction extends SpanClass implements TransactionInterface {
102102
return undefined;
103103
}
104104

105+
this.spanRecorder?.spans?.forEach(span => {
106+
if (span.op?.startsWith('middleware')) {
107+
console.log(`force-finishing ${span.description || (span as any).name}`);
108+
span.finish();
109+
} else {
110+
console.log(`not force-finishing ${span.description || (span as any).name} with op ${span.op}`);
111+
}
112+
});
113+
105114
const finishedSpans = this.spanRecorder ? this.spanRecorder.spans.filter(s => s !== this && s.endTimestamp) : [];
106115

107116
if (this._trimEnd && finishedSpans.length > 0) {

0 commit comments

Comments
 (0)
0