diff --git a/cmd/terraformer/terraformer.go b/cmd/terraformer/terraformer.go index 4093d7b..86039e6 100644 --- a/cmd/terraformer/terraformer.go +++ b/cmd/terraformer/terraformer.go @@ -62,13 +62,13 @@ const ( tfCmdPlan = "plan" tfCmdRemote = "remote" - tplTFVars = ` -domain = "{{.Domain}}" + tplTFVars = `domain = "{{.Domain}}" key = { access = "{{.KeyAccess}}" } pg_password = "{{.PGPassword}}" -` +google_client_id = "{{.GoogleID}}" +google_client_secret = "{{.GoogleSecret}}"` varAccount = "account" varEnv = "env" @@ -77,9 +77,11 @@ pg_password = "{{.PGPassword}}" // vars bundles together all generated or given input that is custom to the env. type vars struct { - KeyAccess string - Domain string - PGPassword string + Domain string + GoogleID string + GoogleSecret string + KeyAccess string + PGPassword string } func main() { @@ -205,15 +207,35 @@ func main() { log.Fatal("Can't work without a domain.") } + fmt.Println("\nIn order to guard the monitoring setup we need Google OAuth credentials.\nWhat is your Google client ID?") + fmt.Print("|> ") + googleID := "" + fmt.Scanf("%s", &googleID) + + if googleID == "" { + log.Fatal("Can't work without a Google OAuth credentials.") + } + + fmt.Println("\nWhat is your Google client Secret?") + fmt.Print("|> ") + googleSecret := "" + fmt.Scanf("%s", &googleSecret) + + if googleSecret == "" { + log.Fatal("Can't work without a Google OAuth credentials.") + } + pubKey, err := generateKeyPair(filepath.Join(statePath, defaultKeyPath)) if err != nil { log.Fatal(err) } if err = generateVarFile(varFile, vars{ - Domain: domain, - KeyAccess: strings.Trim(string(pubKey), "\n"), - PGPassword: generate.RandomStringSafe(32), + Domain: domain, + GoogleID: googleID, + GoogleSecret: googleSecret, + KeyAccess: strings.Trim(string(pubKey), "\n"), + PGPassword: generate.RandomStringSafe(32), }); err != nil { log.Fatalf("var file create failed: %s", err) } diff --git a/infrastructure/terraform/template/files/dashboard-ops.json b/infrastructure/terraform/template/files/dashboard-ops.json new file mode 100644 index 0000000..684d52b --- /dev/null +++ b/infrastructure/terraform/template/files/dashboard-ops.json @@ -0,0 +1,1510 @@ +{ + "id": 1, + "title": "Operations", + "originalTitle": "Operations", + "tags": [], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": true, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "25px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "height": "", + "id": 12, + "interval": null, + "isNew": true, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "((sum(rate(handler_request_latency_seconds_bucket{le=\"0.25\"}[5m])) + sum(rate(handler_request_latency_seconds_bucket{le=\"0.5\"}[5m]))) / 2) / sum(rate(handler_request_latency_seconds_count[5m]))", + "intervalFactor": 2, + "metric": "job:handler_http_latency:apdex", + "refId": "A", + "step": 240 + } + ], + "thresholds": "0.95,0.99", + "title": "Gateway", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "id": 13, + "interval": null, + "isNew": true, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "((sum(rate(cache_op_latency_seconds_bucket{le=\"0.005\"}[5m])) + sum(rate(cache_op_latency_seconds_bucket{le=\"0.025\"}[5m]))) / 2) / sum(rate(cache_op_latency_seconds_count[5m]))", + "intervalFactor": 2, + "metric": "job:service_latency:apdex", + "refId": "A", + "step": 240 + } + ], + "thresholds": "0.95,0.99", + "title": "Cache", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "id": 21, + "interval": null, + "isNew": true, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "job:service_latency:apdex", + "intervalFactor": 2, + "metric": "job:service_latency:apdex", + "refId": "A", + "step": 240 + } + ], + "thresholds": "0.95,0.99", + "title": "Service", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "showTitle": false, + "title": "KPIs" + }, + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "aliasColors": { + "50th": "#6ED0E0", + "95th": "#EF843C", + "99th": "#E24D42", + "apdex": "#7EB26D" + }, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": 1, + "rightMin": 0.75, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 4, + "isNew": true, + "leftYAxisLabel": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "rightYAxisLabel": "", + "seriesOverrides": [ + { + "alias": "apdex", + "fill": 2, + "legend": false, + "linewidth": 0, + "yaxis": 2 + } + ], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "((sum(rate(handler_request_latency_seconds_bucket{le=\"0.25\"}[5m])) + sum(rate(handler_request_latency_seconds_bucket{le=\"0.5\"}[5m]))) / 2) / sum(rate(handler_request_latency_seconds_count[5m]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "apdex", + "refId": "D", + "step": 40 + }, + { + "expr": "job:handler_http_latency:50", + "interval": "", + "intervalFactor": 2, + "legendFormat": "50th", + "refId": "A", + "step": 40 + }, + { + "expr": "job:handler_http_latency:95", + "intervalFactor": 2, + "legendFormat": "95th", + "refId": "B", + "step": 40 + }, + { + "expr": "job:handler_http_latency:99", + "intervalFactor": 2, + "legendFormat": "99th", + "metric": "job", + "refId": "C", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Gateway", + "tooltip": { + "msResolution": true, + "shared": true, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "s", + "short" + ] + }, + { + "aliasColors": { + "50th": "#6ED0E0", + "95th": "#EF843C", + "99th": "#E24D42" + }, + "bars": false, + "datasource": null, + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": 1, + "rightMin": 0.75, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 17, + "isNew": true, + "leftYAxisLabel": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 50, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "rightYAxisLabel": "", + "seriesOverrides": [ + { + "alias": "apdex", + "fill": 2, + "legend": false, + "linewidth": 0, + "yaxis": 2 + } + ], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "((sum(rate(cache_op_latency_seconds_bucket{le=\"0.005\"}[5m])) + sum(rate(cache_op_latency_seconds_bucket{le=\"0.025\"}[5m]))) / 2) / sum(rate(cache_op_latency_seconds_count[5m]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "apdex", + "metric": "", + "refId": "D", + "step": 40 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(cache_op_latency_seconds_bucket [5m])) by (le))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "50th", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(cache_op_latency_seconds_bucket [5m])) by (le))", + "intervalFactor": 2, + "legendFormat": "95th", + "refId": "B", + "step": 40 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(cache_op_latency_seconds_bucket [5m])) by (le))", + "intervalFactor": 2, + "legendFormat": "99th", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cache", + "tooltip": { + "msResolution": true, + "shared": true, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "s", + "short" + ] + }, + { + "aliasColors": { + "50th": "#6ED0E0", + "95th": "#EF843C", + "99th": "#E24D42" + }, + "bars": false, + "datasource": null, + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": 1, + "rightMin": 0.75, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 2, + "isNew": true, + "leftYAxisLabel": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 10, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "rightYAxisLabel": "", + "seriesOverrides": [ + { + "alias": "apdex", + "fill": 2, + "legend": false, + "linewidth": 0, + "yaxis": 2 + } + ], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "job:service_latency:apdex", + "hide": false, + "intervalFactor": 2, + "legendFormat": "apdex", + "refId": "D", + "step": 40 + }, + { + "expr": "job:service_latency:50", + "interval": "", + "intervalFactor": 2, + "legendFormat": "50th", + "refId": "A", + "step": 40 + }, + { + "expr": "job:service_latency:95", + "intervalFactor": 2, + "legendFormat": "95th", + "refId": "B", + "step": 40 + }, + { + "expr": "job:service_latency:99", + "intervalFactor": 2, + "legendFormat": "99th", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Service", + "tooltip": { + "msResolution": true, + "shared": true, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "s", + "short" + ] + } + ], + "showTitle": true, + "title": "LATENCIES" + }, + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 6, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 5, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "job:handler_http_route:sum", + "intervalFactor": 2, + "legendFormat": "{{route}}", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Gateway", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "short", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 18, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 50, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cache_op_count [5m])) by (service, method)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}}.{{method}}", + "metric": "job:gateway_service_op:count", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cache", + "tooltip": { + "msResolution": true, + "shared": false, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "short", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "job:service_op:count", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}}.{{method}}", + "metric": "job:gateway_service_op:count", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Service", + "tooltip": { + "msResolution": true, + "shared": false, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "short", + "short" + ] + } + ], + "showTitle": true, + "title": "OPERATIONS" + }, + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 5, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(handler_request_count{status=~\"4.*|5.*\"} [5m])) by (status)", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "metric": "job:handler_http_status:sum", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Gateway", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "short", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 19, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cache_err_count [5m])) by (method, service)", + "intervalFactor": 2, + "legendFormat": "{{service}}.{{method}}", + "metric": "job:service", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cache", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "none", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 11, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "job:service_err:count", + "intervalFactor": 2, + "legendFormat": "{{service}}.{{method}}", + "metric": "job:service", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Service", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "none", + "short" + ] + } + ], + "showTitle": true, + "title": "ERRORS" + }, + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "job:platform_process_cpu:max", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "none", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 0, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 10, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "process_resident_memory_bytes", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "bytes", + "short" + ] + } + ], + "showTitle": true, + "title": "PROCESS" + }, + { + "collapse": false, + "editable": true, + "height": "200px", + "panels": [ + { + "aliasColors": { + "50th": "#6ED0E0", + "95th": "#EF843C", + "99th": "#E24D42" + }, + "bars": false, + "datasource": null, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": 1, + "rightMin": 0.75, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 16, + "isNew": true, + "leftYAxisLabel": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "rightYAxisLabel": "", + "seriesOverrides": [ + { + "alias": "apdex", + "fill": 2, + "legend": false, + "linewidth": 0, + "yaxis": 2 + } + ], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "job:source_latency:apdex", + "hide": false, + "intervalFactor": 2, + "legendFormat": "apdex", + "refId": "D", + "step": 40 + }, + { + "expr": "job:source_latency:50", + "interval": "", + "intervalFactor": 2, + "legendFormat": "50th", + "refId": "A", + "step": 40 + }, + { + "expr": "job:source_latency:95", + "intervalFactor": 2, + "legendFormat": "95th", + "refId": "B", + "step": 40 + }, + { + "expr": "job:source_latency:99", + "intervalFactor": 2, + "legendFormat": "99th", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Latencies", + "tooltip": { + "msResolution": true, + "shared": true, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "s", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": 1, + "rightMin": 0.5, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 15, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "rightYAxisLabel": "", + "seriesOverrides": [ + { + "alias": "e/i ratio2", + "fill": 5, + "yaxis": 2 + } + ], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "job:source_op:count", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{source}}.{{method}}", + "metric": "job:source_op:count", + "refId": "A", + "step": 40 + }, + { + "expr": "sum(rate(source_op_count{method=\"Consume\"} [5m])) / sum(rate(source_op_count{method=\"Propagate\"} [5m]))", + "intervalFactor": 2, + "legendFormat": "e/i ratio", + "refId": "B", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Operations", + "tooltip": { + "msResolution": false, + "shared": false, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "short", + "short" + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": 2, + "editable": true, + "error": false, + "fill": 8, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null, + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 20, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percent": false, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "job:source_err:count", + "intervalFactor": 2, + "legendFormat": "{{source}}.{{method}}", + "metric": "job:service", + "refId": "A", + "step": 40 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Errors", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "y-axis": true, + "y_formats": [ + "none", + "short" + ] + } + ], + "showTitle": true, + "title": "SOURCE" + } + ], + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": "30s", + "schemaVersion": 11, + "version": 105, + "links": [] +} \ No newline at end of file diff --git a/infrastructure/terraform/template/platform.tf b/infrastructure/terraform/template/platform.tf index 7c5fc03..995a96e 100644 --- a/infrastructure/terraform/template/platform.tf +++ b/infrastructure/terraform/template/platform.tf @@ -6,6 +6,31 @@ data "aws_acm_certificate" "perimeter" { ] } +data "template_file" "domain_canonical" { + template = "$${root}.$${tld}" + + vars { + root = "${element(split(".", var.domain), length(split(".", var.domain)) - 2)}" + tld = "${element(split(".", var.domain), length(split(".", var.domain)) - 1)}" + } +} + +data "template_file" "monitoring-user_data" { + template = "${file("${path.module}/scripts/setup_monitoring.sh")}" + + vars { + aws_id = "${aws_iam_access_key.monitoring.id}" + aws_secret = "${aws_iam_access_key.monitoring.secret}" + dashboard = "${file("${path.module}/files/dashboard-ops.json")}" + domain = "${replace(var.domain, "*.", "")}" + domain_canonical = "${data.template_file.domain_canonical.rendered}" + google_client_id = "${var.google_client_id}" + google_client_secret = "${var.google_client_secret}" + region = "${var.region}" + zone = "${var.env}-${var.region}" + } +} + resource "aws_instance" "monitoring" { ami = "${var.ami_minimal["${var.region}"]}" instance_type = "t2.medium" @@ -20,6 +45,36 @@ resource "aws_instance" "monitoring" { tags { Name = "monitoring" } + + provisioner "file" { + connection { + type = "ssh" + user = "admin" + private_key = "${file("${path.cwd}/access.pem")}" + bastion_user = "admin" + bastion_private_key = "${file("${path.cwd}/access.pem")}" + bastion_host = "${aws_eip.bastion.public_ip}" + } + + destination = "/tmp/setup.sh" + content = "${data.template_file.monitoring-user_data.rendered}" + } + + provisioner "remote-exec" { + connection { + type = "ssh" + user = "admin" + private_key = "${file("${path.cwd}/access.pem")}" + bastion_user = "admin" + bastion_private_key = "${file("${path.cwd}/access.pem")}" + bastion_host = "${aws_eip.bastion.public_ip}" + } + + inline = [ + "chmod +x /tmp/setup.sh", + "sudo /tmp/setup.sh", + ] + } } resource "aws_elb" "monitoring" { diff --git a/infrastructure/terraform/template/scripts/setup_monitoring.sh b/infrastructure/terraform/template/scripts/setup_monitoring.sh new file mode 100644 index 0000000..12d8db8 --- /dev/null +++ b/infrastructure/terraform/template/scripts/setup_monitoring.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +set -ex + +# Base setup +sudo apt-get update +sudo apt-get install -y curl + +# Install Prometheus +echo -n 'deb http://deb.robustperception.io/ precise nightly' | sudo tee /etc/apt/sources.list.d/robustperception.io.list > /dev/null +curl https://s3-eu-west-1.amazonaws.com/deb.robustperception.io/41EFC99D.gpg | sudo apt-key add - +sudo apt-get update +sudo apt-get install -y alertmanager node-exporter pushgateway prometheus + +# Setup and start Prometheus +sudo service prometheus start +sudo update-rc.d prometheus defaults 95 10 + +# Install Grafana +echo -n 'deb https://packagecloud.io/grafana/stable/debian/ jessie main' | sudo tee /etc/apt/sources.list.d/packagecloud.io.list > /dev/null +curl https://packagecloud.io/gpg.key | sudo apt-key add - +sudo apt-get update +sudo apt-get install -y grafana + +# Setup and start Grafana +sudo /bin/systemctl daemon-reload +sudo /bin/systemctl enable grafana-server +sudo /bin/systemctl start grafana-server + +sleep 30 + +# Setup Grafana dashboard +sudo mkdir -p /var/lib/grafana/dashboards +echo '${dashboard}' | sudo tee /var/lib/grafana/dashboards/dashboard-ops.json > /dev/null + +# Add Prometheus data source +curl -vvv \ + -X POST \ + -u admin:admin \ + -H 'Content-Type: application/json;charset=UTF-8' \ + --data-binary '{"name":"prometheus", "type":"prometheus","url":"http://localhost:9090","access":"proxy","isDefault":true}' \ + 'http://0.0.0.0:3000/api/datasources' + +# Setup Grafana config +PASSWORD=$(date +%s | sha256sum | base64 | head -c 32 ; echo) + +echo " +[auth] +disable_login_form = true +[auth.basic] +enabled = false +[auth.google] +enabled = true +client_id = ${google_client_id} +client_secret = ${google_client_secret} +scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email +auth_url = https://accounts.google.com/o/oauth2/auth +token_url = https://accounts.google.com/o/oauth2/token +allowed_domains = ${domain} ${domain_canonical} +allow_sign_up = true +[dashboards.json] +enabled = true +path = /var/lib/grafana/dashboards +[security] +admin_user = admin +admin_password = $PASSWORD +[server] +root_url = https://monitoring-${zone}.${domain} +[users] +allow_sign_up = false +auto_assign_org = true +auto_assign_org_role = Editor +" | sudo tee /etc/grafana/grafana.ini > /dev/null +sudo chown grafana:grafana /etc/grafana/grafana.ini + +sudo /bin/systemctl restart grafana-server + +# Setup prometheus config +# /etc/prometheus/prometheus.yml +echo " +global: + evaluation_interval: '1m' + scrape_interval: '30s' +rule_files: + - /etc/prometheus/api.rules +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: + - 'localhost:9090' + - job_name: 'pushgateway' + honor_labels: true + static_configs: + - targets: + - 'localhost:9091' + - job_name: 'alertmanager' + static_configs: + - targets: + - 'localhost:9093' + - job_name: 'node-exporter' + ec2_sd_configs: + - region: '${region}' + access_key: ${aws_id} + secret_key: ${aws_secret} + port: 9100 + - job_name: 'gateway-http' + ec2_sd_configs: + - region: '${region}' + access_key: ${aws_id} + secret_key: ${aws_secret} + port: 9000 + - job_name: 'sims' + ec2_sd_configs: + - region: '${region}' + access_key: ${aws_id} + secret_key: ${aws_secret} + port: 9001 +" | sudo tee /etc/prometheus/prometheus.yml > /dev/null + +# /etc/prometheus/api.rules +echo ' +job:handler_http_status:sum = sum(rate(handler_request_count [5m])) by (status) +job:handler_http_route:sum = sum(rate(handler_request_count [5m])) by (route) +job:handler_http_latency:apdex = ((sum(rate(handler_request_latency_seconds_bucket{le="0.05"}[5m])) + sum(rate(handler_request_latency_seconds_bucket{le="0.25"}[5m]))) / 2) / sum(rate(handler_request_latency_seconds_count[5m])) +job:handler_http_latency:50 = histogram_quantile(0.5, sum(rate(handler_request_latency_seconds_bucket [5m])) by (le)) +job:handler_http_latency:95 = histogram_quantile(0.95, sum(rate(handler_request_latency_seconds_bucket [5m])) by (le)) +job:handler_http_latency:99 = histogram_quantile(0.99, sum(rate(handler_request_latency_seconds_bucket [5m])) by (le)) +job:service_latency:apdex = ((sum(rate(service_op_latency_seconds_bucket{le="0.005"}[5m])) + sum(rate(service_op_latency_seconds_bucket{le="0.025"}[5m]))) / 2) / sum(rate(service_op_latency_seconds_count[5m])) +job:service_latency:50 = histogram_quantile(0.5, sum(rate(service_op_latency_seconds_bucket [5m])) by (le)) +job:service_latency:95 = histogram_quantile(0.95, sum(rate(service_op_latency_seconds_bucket [5m])) by (le)) +job:service_latency:99 = histogram_quantile(0.99, sum(rate(service_op_latency_seconds_bucket [5m])) by (le)) +job:service_err:count = sum(rate(service_err_count [5m])) by (method, service) +job:service_op:count = sum(rate(service_op_count [5m])) by (method, service) +job:source_latency:apdex = ((sum(rate(source_op_latency_seconds_bucket{le="0.005"}[5m])) + sum(rate(source_op_latency_seconds_bucket{le="0.025"}[5m]))) / 2) / sum(rate(source_op_latency_seconds_count[5m])) +job:source_latency:50 = histogram_quantile(0.5, sum(rate(source_op_latency_seconds_bucket [5m])) by (le)) +job:source_latency:95 = histogram_quantile(0.95, sum(rate(source_op_latency_seconds_bucket [5m])) by (le)) +job:source_latency:99 = histogram_quantile(0.99, sum(rate(source_op_latency_seconds_bucket [5m])) by (le)) +job:source_err:count = sum(rate(source_err_count [5m])) by (method, source) +job:source_op:count = sum(rate(source_op_count [5m])) by (method, source) +job:source_queue_latency:50 = histogram_quantile(0.5, sum(rate(source_queue_latency_seconds_bucket [5m])) by (le)) +job:source_queue_latency:95 = histogram_quantile(0.95, sum(rate(source_queue_latency_seconds_bucket [5m])) by (le)) +job:source_queue_latency:99 = histogram_quantile(0.99, sum(rate(source_queue_latency_seconds_bucket [5m])) by (le)) +job:platform_process_res:sum = sum(process_resident_memory_bytes) by (instance, job) +job:platform_process_cpu:max = max(rate(process_cpu_seconds_total [5m])) by (instance, job) +' | sudo tee /etc/prometheus/api.rules > /dev/null + +sudo service prometheus restart \ No newline at end of file diff --git a/infrastructure/terraform/template/security.tf b/infrastructure/terraform/template/security.tf index 46d0551..e4c01bc 100644 --- a/infrastructure/terraform/template/security.tf +++ b/infrastructure/terraform/template/security.tf @@ -1,3 +1,77 @@ +resource "aws_key_pair" "access" { + key_name = "access" + public_key = "${var.key["access"]}" +} + +resource "aws_iam_user" "monitoring" { + name = "monitoring-${var.env}-${var.region}" + path = "/" +} + +resource "aws_iam_user_policy" "monitoring" { + name = "monitoring-${var.env}-${var.region}" + user = "${aws_iam_user.monitoring.name}" + + policy = <