Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(localenv): add performance metrics #2999

Merged
merged 36 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5de7ad9
refactor(backend): add NoopTelemetryServiceImpl to make tel optional
BlairCurrey Sep 19, 2024
751ffa9
refactor(backend): rm unecessary telemetry exists check
BlairCurrey Sep 19, 2024
a003c3e
fix(localenv): rm telemetry log noise
BlairCurrey Sep 19, 2024
10b4ea4
feat(backend): add startTimer tel method
BlairCurrey Sep 20, 2024
6c660ad
feat: update local grafana dash w/ performance metrics
BlairCurrey Sep 20, 2024
46e00aa
feat(backend): add telemetry service dep to services
BlairCurrey Sep 20, 2024
bbddc36
feat(backend): add accounting service timer metrics
BlairCurrey Sep 20, 2024
ff3fd70
feat(backend): add service timer metrics
BlairCurrey Sep 20, 2024
6357f79
feat(backend): add more performance timer metrics
BlairCurrey Sep 20, 2024
0b83806
chore: rename performance metrics, update dashbaord
BlairCurrey Sep 23, 2024
29143f4
chore: format
BlairCurrey Sep 23, 2024
522b339
fix(backend): rm problematic connector middleware timers
BlairCurrey Sep 24, 2024
3033c2a
fix(backend): fix deposit outgoing payment liquidity performance metrics
BlairCurrey Sep 24, 2024
0ede073
chore: format
BlairCurrey Sep 24, 2024
a3efc73
chore: rename metric for case
BlairCurrey Sep 24, 2024
84f825f
fix(backend): balance tests, missing tel dep
BlairCurrey Sep 24, 2024
dfd8b2b
feat(backend): switch tel servive impl in dep definition
BlairCurrey Sep 24, 2024
c8753fc
Merge branch 'main' into bc/2980/refactor-telemetry-optionality
BlairCurrey Sep 24, 2024
f70cc29
Merge branch 'bc/2980/refactor-telemetry-optionality' into bc/2983/pe…
BlairCurrey Sep 24, 2024
b1b1866
fix(backend): test deps setup
BlairCurrey Sep 25, 2024
87e73d3
chore(backend): cleanup test
BlairCurrey Sep 25, 2024
e050a40
chore: restore erroneously rm test, fix dep
BlairCurrey Sep 25, 2024
328ebbe
fix(localenv): dashboard visualization name
BlairCurrey Sep 25, 2024
5c06039
chore: format
BlairCurrey Oct 1, 2024
b5143b7
Merge branch 'main' into bc/2983/performance-metrics
BlairCurrey Oct 1, 2024
fb19aeb
fix(backend): rm dupe class from merge
BlairCurrey Oct 2, 2024
4ae9649
Update packages/backend/src/open_payments/quote/service.ts
BlairCurrey Oct 15, 2024
2ad883b
refactor(backend): use try/catch/finally to enforce stopTimer
BlairCurrey Oct 15, 2024
24a5eff
feat(backend): support additional attributes on stopTimer and add tests
BlairCurrey Oct 15, 2024
ba42a29
chore: format
BlairCurrey Oct 15, 2024
6803522
Merge branch 'main' into bc/2983/performance-metrics
BlairCurrey Oct 15, 2024
ef85d8b
chore: format
BlairCurrey Oct 15, 2024
658f85f
chore: generate auth gql
BlairCurrey Oct 15, 2024
03d929b
refactor(backend): timer callNames to include scope
BlairCurrey Oct 16, 2024
ed1b5d3
fix(localenv): rm deprecated otel collector log config
BlairCurrey Oct 16, 2024
7f07761
Merge branch 'main' into bc/2983/performance-metrics
BlairCurrey Nov 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,527 changes: 1,298 additions & 229 deletions localenv/telemetry/grafana/provisioning/dashboards/example.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion packages/backend/src/accounting/psql/balance.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ describe('Balances', (): void => {
appContainer = await createTestApp(deps)
serviceDeps = {
logger: await deps.use('logger'),
knex: await deps.use('knex')
knex: await deps.use('knex'),
telemetry: await deps.use('telemetry')
}
knex = appContainer.knex
})
Expand Down
5 changes: 5 additions & 0 deletions packages/backend/src/accounting/psql/balance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ export async function getAccountBalances(
account: LedgerAccount,
trx?: TransactionOrKnex
): Promise<AccountBalance> {
const stopTimer = deps.telemetry.startTimer('psql_get_account_balances', {
callName: 'psql_get_account_balances'
})
try {
const queryResult = await (trx ?? deps.knex).raw(
`
Expand Down Expand Up @@ -54,5 +57,7 @@ export async function getAccountBalances(
)

throw err
} finally {
stopTimer()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ describe('Ledger Account', (): void => {
appContainer = await createTestApp(deps)
serviceDeps = {
logger: await deps.use('logger'),
knex: await deps.use('knex')
knex: await deps.use('knex'),
telemetry: await deps.use('telemetry')
}
knex = appContainer.knex
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ describe('Ledger Transfer', (): void => {
appContainer = await createTestApp(deps)
serviceDeps = {
logger: await deps.use('logger'),
knex: await deps.use('knex')
knex: await deps.use('knex'),
telemetry: await deps.use('telemetry')
}
knex = appContainer.knex
})
Expand Down
15 changes: 14 additions & 1 deletion packages/backend/src/accounting/psql/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ import {
getAccountTransfers
} from './ledger-transfer'
import { LedgerTransfer, LedgerTransferType } from './ledger-transfer/model'
import { TelemetryService } from '../../telemetry/service'

export interface ServiceDependencies extends BaseService {
knex: TransactionOrKnex
telemetry: TelemetryService
withdrawalThrottleDelay?: number
}

Expand Down Expand Up @@ -145,13 +147,24 @@ export async function getAccountTotalSent(
deps: ServiceDependencies,
accountRef: string
): Promise<bigint | undefined> {
const stopTimer = deps.telemetry.startTimer(
'psql_get_account_total_sent_ms',
{
callName: 'psql_getAccountTotalSent'
}
)
const account = await getLiquidityAccount(deps, accountRef)

if (!account) {
stopTimer()
return
}

return (await getAccountBalances(deps, account)).debitsPosted
const totalsSent = (await getAccountBalances(deps, account)).debitsPosted

stopTimer()

return totalsSent
}

export async function getAccountsTotalSent(
Expand Down
7 changes: 7 additions & 0 deletions packages/backend/src/accounting/tigerbeetle/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
getAccountTransfers
} from './transfers'
import { toTigerBeetleId } from './utils'
import { TelemetryService } from '../../telemetry/service'

export enum TigerBeetleAccountCode {
LIQUIDITY_WEB_MONETIZATION = 1,
Expand Down Expand Up @@ -68,6 +69,7 @@ export const convertToTigerBeetleTransferCode: {

export interface ServiceDependencies extends BaseService {
tigerBeetle: Client
telemetry: TelemetryService
withdrawalThrottleDelay?: number
}

Expand Down Expand Up @@ -218,10 +220,15 @@ export async function getAccountTotalSent(
deps: ServiceDependencies,
id: string
): Promise<bigint | undefined> {
const stopTimer = deps.telemetry.startTimer('tb_get_account_total_sent_ms', {
callName: 'tb_getAccountTotalSent'
})
const account = (await getAccounts(deps, [id]))[0]
if (account) {
stopTimer()
return account.debits_posted
}
stopTimer()
}

export async function getAccountsTotalSent(
Expand Down
20 changes: 20 additions & 0 deletions packages/backend/src/graphql/resolvers/liquidity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -440,13 +440,25 @@ export const depositOutgoingPaymentLiquidity: MutationResolvers<ApolloContext>['
args,
ctx
): Promise<ResolversTypes['LiquidityMutationResponse']> => {
const telemetry = await ctx.container.use('telemetry')
const stopTimer = telemetry.startTimer(
'deposit_outgoing_payment_liquidity_ms',
{
callName: 'depositOutgoingPaymentLiquidity'
}
)
const { outgoingPaymentId } = args.input
const webhookService = await ctx.container.use('webhookService')
const stopTimerWh = telemetry.startTimer('wh_get_latest_ms', {
callName: 'webhookService.getLatestByResourceId'
})
const event = await webhookService.getLatestByResourceId({
outgoingPaymentId,
types: [OutgoingPaymentDepositType.PaymentCreated]
})
stopTimerWh()
if (!event || !isOutgoingPaymentEvent(event)) {
stopTimer()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something we don't necessary have to tackle in this PR, but what can we do when it comes to stopping timers during a happy path vs an error? is there something we can add to make that distinction in the metrics?

Copy link
Contributor Author

@BlairCurrey BlairCurrey Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah we could have a success: boolean attribute on the underlying histogram. I was inspired by a pattern that was shown to me during the WW, and it had that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not used currently, but it should be possible now. I updated the closure returned from startTimer to accept additionalAttributes which will be merged on the ones initially provided to startTimer. Usage example added to tests (and tests added for startTimer in general - noticed we didnt have any).

throw new GraphQLError(errorToMessage[LiquidityError.InvalidId], {
extensions: {
code: errorToCode[LiquidityError.InvalidId]
Expand All @@ -455,23 +467,31 @@ export const depositOutgoingPaymentLiquidity: MutationResolvers<ApolloContext>['
}

if (!event.data.debitAmount) {
stopTimer()
throw new Error('No debit amount')
}
const outgoingPaymentService = await ctx.container.use(
'outgoingPaymentService'
)
const stopTimerFund = telemetry.startTimer('fund_payment_ms', {
callName: 'outgoingPaymentService.fund'
})
const paymentOrErr = await outgoingPaymentService.fund({
id: outgoingPaymentId,
amount: BigInt(event.data.debitAmount.value),
transferId: event.id
})
stopTimerFund()

if (isFundingError(paymentOrErr)) {
stopTimer()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a finally would be nice here to avoid calling stopTimer() multiple times

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea. put everything in a new try and removed the stopTimers. catch just re-throws and finally calls stopTimer. This is a good pattern for this sort of thing... ensures we dont miss a place to stop the timer.

throw new GraphQLError(fundingErrorToMessage[paymentOrErr], {
extensions: {
code: fundingErrorToCode[paymentOrErr]
}
})
}
stopTimer()
return {
success: true
}
Expand Down
15 changes: 11 additions & 4 deletions packages/backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ export function initIocContainer(
const logger = await deps.use('logger')
const knex = await deps.use('knex')
const config = await deps.use('config')
const telemetry = await deps.use('telemetry')

if (config.useTigerBeetle) {
container.singleton('tigerBeetle', async (deps) => {
Expand All @@ -231,14 +232,16 @@ export function initIocContainer(
logger,
knex,
tigerBeetle,
withdrawalThrottleDelay: config.withdrawalThrottleDelay
withdrawalThrottleDelay: config.withdrawalThrottleDelay,
telemetry
})
}

return createPsqlAccountingService({
logger,
knex,
withdrawalThrottleDelay: config.withdrawalThrottleDelay
withdrawalThrottleDelay: config.withdrawalThrottleDelay,
telemetry
})
})
container.singleton('peerService', async (deps) => {
Expand Down Expand Up @@ -346,7 +349,8 @@ export function initIocContainer(
walletAddressService: await deps.use('walletAddressService'),
remoteIncomingPaymentService: await deps.use(
'remoteIncomingPaymentService'
)
),
telemetry: await deps.use('telemetry')
})
})

Expand Down Expand Up @@ -450,7 +454,10 @@ export function initIocContainer(
receiverService: await deps.use('receiverService'),
feeService: await deps.use('feeService'),
walletAddressService: await deps.use('walletAddressService'),
paymentMethodHandlerService: await deps.use('paymentMethodHandlerService')
paymentMethodHandlerService: await deps.use(
'paymentMethodHandlerService'
),
telemetry: await deps.use('telemetry')
})
})

Expand Down
24 changes: 18 additions & 6 deletions packages/backend/src/open_payments/payment/outgoing/lifecycle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,22 @@ export async function handleSending(
throw LifecycleError.BadState
}

const payStartTime = Date.now()
const stopTimer = deps.telemetry.startTimer('ilp_pay_time_ms', {
description: 'Time to complete an ILP payment',
callName: 'paymentMethodHandlerService.pay (ILP)'
})
await deps.paymentMethodHandlerService.pay('ILP', {
receiver,
outgoingPayment: payment,
finalDebitAmount: maxDebitAmount,
finalReceiveAmount: maxReceiveAmount
})
const payEndTime = Date.now()
stopTimer()

const payDuration = payEndTime - payStartTime
await Promise.all([
deps.telemetry.incrementCounter('transactions_total', 1, {
description: 'Count of funded transactions'
}),
deps.telemetry.recordHistogram('ilp_pay_time_ms', payDuration, {
description: 'Time to complete an ILP payment'
}),
deps.telemetry.incrementCounterWithTransactionAmountDifference(
'transaction_fee_amounts',
payment.sentAmount,
Expand Down Expand Up @@ -142,17 +141,24 @@ export async function handleFailed(
payment: OutgoingPayment,
error: string
): Promise<void> {
const stopTimer = deps.telemetry.startTimer('handle_failed_ms', {
callName: 'handleFailed'
})
await payment.$query(deps.knex).patch({
state: OutgoingPaymentState.Failed,
error
})
await sendWebhookEvent(deps, payment, OutgoingPaymentEventType.PaymentFailed)
stopTimer()
}

async function handleCompleted(
deps: ServiceDependencies,
payment: OutgoingPayment
): Promise<void> {
const stopTimer = deps.telemetry.startTimer('handle_completed_ms', {
callName: 'handleCompleted'
})
await payment.$query(deps.knex).patch({
state: OutgoingPaymentState.Completed
})
Expand All @@ -162,6 +168,7 @@ async function handleCompleted(
payment,
OutgoingPaymentEventType.PaymentCompleted
)
stopTimer()
}

export async function sendWebhookEvent(
Expand All @@ -170,6 +177,9 @@ export async function sendWebhookEvent(
type: OutgoingPaymentEventType,
trx?: TransactionOrKnex
): Promise<void> {
const stopTimer = deps.telemetry.startTimer('op_send_webhook_event_ms', {
callName: 'opLifecycle_sendWebhookEvent'
})
// TigerBeetle accounts are only created as the OutgoingPayment is funded.
// So default the amountSent and balance to 0 for outgoing payments still in the funding state
const amountSent =
Expand All @@ -182,6 +192,7 @@ export async function sendWebhookEvent(
: await deps.accountingService.getBalance(payment.id)

if (amountSent === undefined || balance === undefined) {
stopTimer()
throw LifecycleError.MissingBalance
}

Expand All @@ -199,6 +210,7 @@ export async function sendWebhookEvent(
data: payment.toData({ amountSent, balance }),
withdrawal
})
stopTimer()
}

function validateAssets(
Expand Down
Loading
Loading