Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updated table size for Prometheus workbook. #2444

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@
"content": {
"version": "KqlItem/1.0",
"query": "// of all currently running endpoints\r\n// find the most recent correlation id\r\nlet upstatus = Prometheus_HaClusterExporter_CL\r\n| where TimeGenerated > ago(10min)\r\n| where name_s == \"sapmon\"\r\n| summarize arg_max(TimeGenerated, correlation_id_g, value_d) by sid_s, clusterName_s, hostname_s\r\n| project correlation_id_g;\r\n//identify the master (status = DC)\r\nlet dcstatus = materialize(Prometheus_HaClusterExporter_CL\r\n| where correlation_id_g in (upstatus)\r\n| where name_s == 'ha_cluster_pacemaker_nodes'\r\n| where value_d == 1\r\n| extend node_status=parse_json(labels_s)\r\n| where node_status['status']=='dc'\r\n| where tostring(node_status['node']) == hostname_s\r\n| summarize arg_max(TimeGenerated, correlation_id_g) by sid_s, clusterName_s, hostname_s\r\n| project correlation_id_g);\r\n//find all pacemaker resources and their status from dc metrics\r\n( Prometheus_HaClusterExporter_CL\r\n| where correlation_id_g in (dcstatus)\r\n| where name_s == \"ha_cluster_pacemaker_resources\" \r\n| where value_d == 1\r\n| extend resources = parse_json(labels_s)\r\n| summarize \r\n resources_failed = countif(resources['status'] == 'failed' or resources['status'] == 'failed_ignored'), \r\n resources_blocked = countif(resources['status'] == 'blocked' or resources['status'] == 'orphaned'), \r\n resources_active = countif(resources['role'] == 'started' and resources['managed'] == 'true')\r\n + countif(resources['role'] == 'master' and resources['managed'] == 'true')\r\n + countif(resources['role'] == 'slave' and resources['managed'] == 'true'),\r\n resources_unmanaged = countif(resources['managed'] == 'false'),\r\n status_red = countif(resources['status'] == 'failed' or resources['status'] == 'failed_ignored'), \r\n status_yellow = countif(resources['status'] == 'blocked' or resources['status'] == 'orphaned'), \r\n status_green = countif(resources['status'] == 'active' and resources['managed'] == 'true'),\r\n status_grey = countif(resources['managed'] == 'false')\r\n by sid_s, clusterName_s)\r\n| union \r\n//find all pacemaker nodes and their status from dc metrics\r\n( Prometheus_HaClusterExporter_CL\r\n| where correlation_id_g in (dcstatus)\r\n| where name_s == \"ha_cluster_pacemaker_nodes\" \r\n| where value_d == 1\r\n| extend nodes = parse_json(labels_s)\r\n| summarize \r\n nodes_unclean = countif(nodes['status'] == 'unclean'), \r\n nodes_pending_shutdown = countif(nodes['status'] == 'pending' or nodes['status'] == 'shutdown' or nodes['status'] == 'standby_onfail'), \r\n nodes_online = countif(nodes['status'] == 'online'),\r\n nodes_maint_standby = countif(nodes['status'] == 'maintenance' or nodes['status'] == 'standby'),\r\n status_red = countif(nodes['status'] == 'unclean'), \r\n status_yellow = countif(nodes['status'] == 'pending' or nodes['status'] == 'shutdown' or nodes['status'] == 'standby_onfail'), \r\n status_green = countif(nodes['status'] == 'online' or nodes['status'] == 'dc'),\r\n status_grey = countif(nodes['status'] == 'maintenance' or nodes['status'] == 'standby')\r\n by sid_s, clusterName_s) \r\n| union \r\n//find all exporter up-status as additional metric\r\n(Prometheus_HaClusterExporter_CL\r\n| where correlation_id_g in (upstatus)\r\n| where name_s == 'up'\r\n| summarize status_grey = case(countif(value_d==1) == 0, 1, 0)//only count grey status if there is no (0) endpoints up\r\n by sid_s, clusterName_s)\r\n//summarize per cluster per sid\r\n| summarize sum(resources_failed),sum(resources_blocked),sum(resources_active),sum(resources_unmanaged),sum(nodes_unclean),sum(nodes_pending_shutdown),sum(nodes_online),sum(nodes_maint_standby),cluster_status = case(sum(status_red) > 0, 'red', sum(status_yellow) > 0, 'yellow', sum(status_grey) > 0, 'grey', sum(status_green) > 0, 'green', 'greyblue') by sid_s, clusterName_s\r\n| project cluster_status,sum_resources_failed,sum_resources_blocked, sum_resources_active, sum_resources_unmanaged,sum_nodes_unclean, sum_nodes_pending_shutdown,sum_nodes_online,sum_nodes_maint_standby,sid_s,clusterName_s",
"size": 4,
"size": 3,
"exportedParameters": [
{
"fieldName": "sid_s",
Expand Down
Loading