-
Notifications
You must be signed in to change notification settings - Fork 59
/
pg_stat_monitor.h
546 lines (480 loc) · 17.1 KB
/
pg_stat_monitor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/*-------------------------------------------------------------------------
*
* pg_stat_monitor.h
* Track statement execution times across a whole database cluster.
*
* Portions Copyright © 2018-2024, Percona LLC and/or its affiliates
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
* Portions Copyright (c) 1994, The Regents of the University of California
*
* IDENTIFICATION
* contrib/pg_stat_monitor/pg_stat_monitor.h
*
*-------------------------------------------------------------------------
*/
#ifndef __PG_STAT_MONITOR_H__
#define __PG_STAT_MONITOR_H__
#include "postgres.h"
#include <arpa/inet.h>
#include <math.h>
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "lib/dshash.h"
#include "utils/dsa.h"
#include "access/hash.h"
#include "catalog/pg_authid.h"
#include "executor/instrument.h"
#include "common/ip.h"
#include "jit/jit.h"
#include "funcapi.h"
#include "access/twophase.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "optimizer/planner.h"
#include "postmaster/bgworker.h"
#include "parser/analyze.h"
#include "parser/parsetree.h"
#include "parser/scanner.h"
#include "parser/scansup.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/spin.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "utils/lsyscache.h"
#include "utils/guc.h"
#include "utils/guc_tables.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
#define MAX_BACKEND_PROCESES (MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts)
#define IntArrayGetTextDatum(x,y) intarray_get_datum(x,y)
/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
#define USAGE_EXEC(duration) (1.0)
#define USAGE_INIT (1.0) /* including initial planning */
#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
#define HISTOGRAM_MAX_TIME 50000000
#define MAX_RESPONSE_BUCKET 50
#define INVALID_BUCKET_ID -1
#define TEXT_LEN 255
#define ERROR_MESSAGE_LEN 100
#define REL_TYPENAME_LEN 64
#define REL_LST 10
#define REL_LEN 132 /* REL_TYPENAME_LEN * 2 (relname + schema) + 1
* (for view indication) + 1 and dot and
* string terminator */
#define CMD_LST 10
#define CMD_LEN 20
#define APPLICATIONNAME_LEN NAMEDATALEN
#define COMMENTS_LEN 256
#define PGSM_OVER_FLOW_MAX 10
#define PLAN_TEXT_LEN 1024
/* the assumption of query max nested level */
#define DEFAULT_MAX_NESTED_LEVEL 10
#define MAX_QUERY_BUF ((int64)pgsm_query_shared_buffer * 1024 * 1024)
#define MAX_BUCKETS_MEM ((int64)pgsm_max * 1024 * 1024)
#define BUCKETS_MEM_OVERFLOW() ((hash_get_num_entries(pgsm_hash) * sizeof(pgsmEntry)) >= MAX_BUCKETS_MEM)
#define MAX_BUCKET_ENTRIES (MAX_BUCKETS_MEM / sizeof(pgsmEntry))
#define QUERY_BUFFER_OVERFLOW(x,y) ((x + y + sizeof(uint64) + sizeof(uint64)) > MAX_QUERY_BUF)
#define QUERY_MARGIN 100
#define MIN_QUERY_LEN 10
#define SQLCODE_LEN 20
#define TOTAL_RELS_LENGTH (REL_LST * REL_LEN)
#if PG_VERSION_NUM >= 130000
#define MAX_SETTINGS 15
#else
#define MAX_SETTINGS 14
#endif
/* Update this if need a enum GUC with more options. */
#define MAX_ENUM_OPTIONS 6
/*
* API for disabling error capture ereport(ERROR,..) by PGSM's error capture hook
* pgsm_emit_log_hook()
*
* Use these macros as follows:
* PGSM_DISABLE_ERROR_CAPUTRE();
* {
* ... code that might throw ereport(ERROR) ...
* }PGSM_END_DISABLE_ERROR_CAPTURE();
*
* These macros can be used to error recursion if the error gets
* thrown from within the function called from pgsm_emit_log_hook()
*/
extern volatile bool __pgsm_do_not_capture_error;
#define PGSM_DISABLE_ERROR_CAPUTRE() \
do { \
__pgsm_do_not_capture_error = true
#define PGSM_END_DISABLE_ERROR_CAPTURE() \
__pgsm_do_not_capture_error = false; \
} while (0)
#define PGSM_ERROR_CAPTURE_ENABLED \
__pgsm_do_not_capture_error == false
/*
* pg_stat_monitor uses the hash structure to store all query statistics
* except the query text, which gets stored out of line in the raw DSA area.
* Enabling USE_DYNAMIC_HASH uses the dshash for storing the query statistics
* that get created in the DSA area and can grow to any size.
*
* The only issue with using the dshash is that the newly created hash entries
* are explicitly locked by dshash, and its caller is required to release the lock.
* That works well as long as we do not want to swallow the errors thrown from
* dshash function. Since the lightweight locks acquired internally by dshash
* automatically get released by error.
* But throwing an error from pg_stat_monitor would mean erroring out the user query,
* which is not acceptable for any stat collector extension.
*
* Moreover, some of the pg_stat_monitor functions perform the sequence scan on the
* hash table, while the sequence scan support for dshash table is only available
* for PG 15 and onwards.
* So until we figure out the way to release the locks acquired internally by dshash
* in case of an error while ignoring the error at the same time, we will keep using
* the classic shared memory hash table.
*/
#ifdef USE_DYNAMIC_HASH
#define PGSM_HASH_TABLE dshash_table
#define PGSM_HASH_TABLE_HANDLE dshash_table_handle
#define PGSM_HASH_SEQ_STATUS dshash_seq_status
#else
#define PGSM_HASH_TABLE HTAB
#define PGSM_HASH_TABLE_HANDLE HTAB*
#define PGSM_HASH_SEQ_STATUS HASH_SEQ_STATUS
#endif
#if PG_VERSION_NUM < 130000
typedef struct WalUsage
{
long wal_records; /* # of WAL records produced */
long wal_fpi; /* # of WAL full page images produced */
uint64 wal_bytes; /* size of WAL records produced */
} WalUsage;
#endif
typedef enum pgsmStoreKind
{
PGSM_INVALID = -1,
/*
* PGSM_PLAN and PGSM_EXEC must be respectively 0 and 1 as they're used to
* reference the underlying values in the arrays in the Counters struct,
* and this order is required in pg_stat_monitor_internal().
*/
PGSM_PARSE = 0,
PGSM_PLAN,
PGSM_EXEC,
PGSM_STORE,
PGSM_ERROR,
PGSM_NUMKIND /* Must be last value of this enum */
} pgsmStoreKind;
/* the assumption of query max nested level */
#define DEFAULT_MAX_NESTED_LEVEL 10
/*
* Type of aggregate keys
*/
typedef enum AGG_KEY
{
AGG_KEY_DATABASE = 0,
AGG_KEY_USER,
AGG_KEY_HOST
} AGG_KEY;
#define MAX_QUERY_LEN 1024
/* shared memory storage for the query */
typedef struct CallTime
{
double total_time; /* total execution time, in msec */
double min_time; /* minimum execution time in msec */
double max_time; /* maximum execution time in msec */
double mean_time; /* mean execution time in msec */
double sum_var_time; /* sum of variances in execution time in msec */
} CallTime;
typedef struct PlanInfo
{
uint64 planid; /* plan identifier */
char plan_text[PLAN_TEXT_LEN]; /* plan text */
size_t plan_len; /* strlen(plan_text) */
} PlanInfo;
typedef struct pgsmHashKey
{
uint64 bucket_id; /* bucket number */
uint64 queryid; /* query identifier */
uint64 planid; /* plan identifier */
uint64 appid; /* hash of application name */
Oid userid; /* user OID */
Oid dbid; /* database OID */
uint32 ip; /* client ip address */
bool toplevel; /* query executed at top level */
uint64 parentid; /* parent queryid of current query */
} pgsmHashKey;
typedef struct QueryInfo
{
dsa_pointer parent_query;
int64 type; /* type of query, options are query, info,
* warning, error, fatal */
char application_name[APPLICATIONNAME_LEN];
char comments[COMMENTS_LEN];
char relations[REL_LST][REL_LEN]; /* List of relation involved
* in the query */
int num_relations; /* Number of relation in the query */
CmdType cmd_type; /* query command type
* SELECT/UPDATE/DELETE/INSERT */
} QueryInfo;
typedef struct ErrorInfo
{
int64 elevel; /* error elevel */
char sqlcode[SQLCODE_LEN]; /* error sqlcode */
char message[ERROR_MESSAGE_LEN]; /* error message text */
} ErrorInfo;
typedef struct Calls
{
int64 calls; /* # of times executed */
int64 rows; /* total # of retrieved or affected rows */
double usage; /* usage factor */
} Calls;
typedef struct Blocks
{
int64 shared_blks_hit; /* # of shared buffer hits */
int64 shared_blks_read; /* # of shared disk blocks read */
int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
int64 shared_blks_written; /* # of shared disk blocks written */
int64 local_blks_hit; /* # of local buffer hits */
int64 local_blks_read; /* # of local disk blocks read */
int64 local_blks_dirtied; /* # of local disk blocks dirtied */
int64 local_blks_written; /* # of local disk blocks written */
int64 temp_blks_read; /* # of temp blocks read */
int64 temp_blks_written; /* # of temp blocks written */
double shared_blk_read_time; /* time spent reading shared blocks,
* in msec */
double shared_blk_write_time; /* time spent writing shared blocks,
* in msec */
double local_blk_read_time; /* time spent reading local blocks, in
* msec */
double local_blk_write_time; /* time spent writing local blocks, in
* msec */
double temp_blk_read_time; /* time spent reading temp blocks, in msec */
double temp_blk_write_time; /* time spent writing temp blocks, in
* msec */
/*
* Variables for local entry. The values to be passed to pgsm_update_entry
* from pgsm_store.
*/
instr_time instr_shared_blk_read_time; /* time spent reading shared
* blocks */
instr_time instr_shared_blk_write_time; /* time spent writing shared
* blocks */
instr_time instr_local_blk_read_time; /* time spent reading local blocks */
instr_time instr_local_blk_write_time; /* time spent writing local blocks */
instr_time instr_temp_blk_read_time; /* time spent reading temp blocks */
instr_time instr_temp_blk_write_time; /* time spent writing temp blocks */
} Blocks;
typedef struct JitInfo
{
int64 jit_functions; /* total number of JIT functions emitted */
double jit_generation_time; /* total time to generate jit code */
int64 jit_inlining_count; /* number of times inlining time has been
* > 0 */
double jit_deform_time; /* total time to deform tuples in jit code */
int64 jit_deform_count; /* number of times deform time has been >
* 0 */
double jit_inlining_time; /* total time to inline jit code */
int64 jit_optimization_count; /* number of times optimization time
* has been > 0 */
double jit_optimization_time; /* total time to optimize jit code */
int64 jit_emission_count; /* number of times emission time has been
* > 0 */
double jit_emission_time; /* total time to emit jit code */
/*
* Variables for local entry. The values to be passed to pgsm_update_entry
* from pgsm_store.
*/
instr_time instr_generation_counter; /* generation counter */
instr_time instr_inlining_counter; /* inlining counter */
instr_time instr_deform_counter; /* deform counter */
instr_time instr_optimization_counter; /* optimization counter */
instr_time instr_emission_counter; /* emission counter */
} JitInfo;
typedef struct SysInfo
{
double utime; /* user cpu time */
double stime; /* system cpu time */
} SysInfo;
typedef struct Wal_Usage
{
int64 wal_records; /* # of WAL records generated */
int64 wal_fpi; /* # of WAL full page images generated */
uint64 wal_bytes; /* total amount of WAL bytes generated */
} Wal_Usage;
typedef struct Counters
{
Calls calls;
QueryInfo info;
CallTime time;
Calls plancalls;
CallTime plantime;
PlanInfo planinfo;
Blocks blocks;
SysInfo sysinfo;
JitInfo jitinfo;
ErrorInfo error;
Wal_Usage walusage;
int resp_calls[MAX_RESPONSE_BUCKET]; /* execution time's in
* msec */
} Counters;
/* Some global structure to get the cpu usage, really don't like the idea of global variable */
/*
* Statistics per statement
*/
typedef struct pgsmEntry
{
pgsmHashKey key; /* hash key of entry - MUST BE FIRST */
uint64 pgsm_query_id; /* pgsm generate normalized query hash */
char datname[NAMEDATALEN]; /* database name */
char username[NAMEDATALEN]; /* user name */
Counters counters; /* the statistics for this query */
int encoding; /* query text encoding */
TimestampTz stats_since; /* timestamp of entry allocation */
TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
slock_t mutex; /* protects the counters only */
union
{
dsa_pointer query_pos; /* query location within query buffer */
char *query_pointer;
} query_text;
} pgsmEntry;
/*
* Global shared state
*/
typedef struct pgsmSharedState
{
LWLock *lock; /* protects hashtable search/modification */
slock_t mutex; /* protects following fields only: */
pg_atomic_uint64 current_wbucket;
pg_atomic_uint64 prev_bucket_sec;
int hash_tranche_id;
void *raw_dsa_area; /* DSA area pointer to store query texts.
* dshash also lives in this memory when
* USE_DYNAMIC_HASH is enabled */
PGSM_HASH_TABLE_HANDLE hash_handle;
/*
* hash table handle. can be either classic shared memory hash or dshash
* (if we are using USE_DYNAMIC_HASH)
*/
bool pgsm_oom;
TimestampTz bucket_start_time[]; /* start time of the bucket */
} pgsmSharedState;
typedef struct pgsmLocalState
{
pgsmSharedState *shared_pgsmState;
dsa_area *dsa; /* local dsa area for backend attached to the
* dsa area created by postmaster at startup. */
PGSM_HASH_TABLE *shared_hash;
MemoryContext pgsm_mem_cxt;
} pgsmLocalState;
#if PG_VERSION_NUM < 140000
/*
* Struct for tracking locations/lengths of constants during normalization
*/
typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
} LocationLen;
/*
* Working state for computing a query jumble and producing a normalized
* query string
*/
typedef struct JumbleState
{
/* Jumble of current query tree */
unsigned char *jumble;
/* Number of bytes used in jumble[] */
Size jumble_len;
/* Array of locations of constants that should be removed */
LocationLen *clocations;
/* Allocated length of clocations array */
int clocations_buf_size;
/* Current number of valid entries in clocations array */
int clocations_count;
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} JumbleState;
#endif
/* guc.c */
void init_guc(void);
/* hash_create.c */
dsa_area *get_dsa_area_for_query_text(void);
PGSM_HASH_TABLE *get_pgsmHash(void);
void pgsm_attach_shmem(void);
bool IsHashInitialize(void);
bool IsSystemOOM(void);
void pgsm_shmem_startup(void);
void pgsm_shmem_shutdown(int code, Datum arg);
int pgsm_get_bucket_size(void);
pgsmSharedState *pgsm_get_ss(void);
void hash_query_entries();
void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]);
void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer);
pgsmEntry *hash_entry_alloc(pgsmSharedState *pgsm, pgsmHashKey *key, int encoding);
Size pgsm_ShmemSize(void);
void pgsm_startup(void);
/* hash_query.c */
void pgsm_startup(void);
MemoryContext GetPgsmMemoryContext(void);
/* guc.c */
void init_guc(void);
/* GUC variables*/
/*---- GUC variables ----*/
typedef enum
{
PSGM_TRACK_NONE = 0, /* track no statements */
PGSM_TRACK_TOP, /* only top level statements */
PGSM_TRACK_ALL /* all statements, including nested ones */
} PGSMTrackLevel;
static const struct config_enum_entry track_options[] =
{
{"none", PSGM_TRACK_NONE, false},
{"top", PGSM_TRACK_TOP, false},
{"all", PGSM_TRACK_ALL, false},
{NULL, 0, false}
};
typedef enum
{
HISTOGRAM_START,
HISTOGRAM_END,
HISTOGRAM_COUNT
} HistogramTimingType;
extern int pgsm_max;
extern int pgsm_query_max_len;
extern int pgsm_bucket_time;
extern int pgsm_max_buckets;
extern int pgsm_histogram_buckets;
extern double pgsm_histogram_min;
extern double pgsm_histogram_max;
extern int pgsm_query_shared_buffer;
extern bool pgsm_track_planning;
extern bool pgsm_extract_comments;
extern bool pgsm_enable_query_plan;
extern bool pgsm_enable_overflow;
extern bool pgsm_normalized_query;
extern bool pgsm_track_utility;
extern bool pgsm_track_application_names;
extern bool pgsm_enable_pgsm_query_id;
extern int pgsm_track;
#define DECLARE_HOOK(hook, ...) \
static hook(__VA_ARGS__);
#define HOOK(name) name
#define HOOK_STATS_SIZE 0
#endif
void *pgsm_hash_find_or_insert(PGSM_HASH_TABLE * shared_hash, pgsmHashKey *key, bool *found);
void *pgsm_hash_find(PGSM_HASH_TABLE * shared_hash, pgsmHashKey *key, bool *found);
void pgsm_hash_seq_init(PGSM_HASH_SEQ_STATUS * hstat, PGSM_HASH_TABLE * shared_hash, bool lock);
void *pgsm_hash_seq_next(PGSM_HASH_SEQ_STATUS * hstat);
void pgsm_hash_seq_term(PGSM_HASH_SEQ_STATUS * hstat);
void pgsm_hash_delete_current(PGSM_HASH_SEQ_STATUS * hstat, PGSM_HASH_TABLE * shared_hash, void *key);