-
Notifications
You must be signed in to change notification settings - Fork 211
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Alex Castilio dos Santos <[email protected]>
- Loading branch information
1 parent
8a62ddb
commit 7135f75
Showing
6 changed files
with
230 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
package scaletest | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"log" | ||
"sync" | ||
"time" | ||
|
||
"github.com/microsoft/retina/pkg/telemetry" | ||
"github.com/microsoft/retina/test/e2e/common" | ||
"github.com/pkg/errors" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/tools/clientcmd" | ||
metrics "k8s.io/metrics/pkg/client/clientset/versioned" | ||
) | ||
|
||
type GetAndPublishMetrics struct { | ||
KubeConfigFilePath string | ||
AppInsightsKey string | ||
AdditionalTelemetryProperty map[string]string | ||
Labels map[string]string | ||
stop chan struct{} | ||
wg sync.WaitGroup | ||
telemetryClient *telemetry.TelemetryClient | ||
} | ||
|
||
func (g *GetAndPublishMetrics) Run() error { | ||
telemetry.InitAppInsights(g.AppInsightsKey, g.AdditionalTelemetryProperty["retinaVersion"]) | ||
|
||
telemetryClient, err := telemetry.NewAppInsightsTelemetryClient("retina-scale-test", g.AdditionalTelemetryProperty) | ||
if err != nil { | ||
return errors.Wrap(err, "error creating telemetry client") | ||
} | ||
|
||
g.telemetryClient = telemetryClient | ||
|
||
g.stop = make(chan struct{}) | ||
g.wg.Add(1) | ||
|
||
go func() { | ||
|
||
t := time.NewTicker(5 * time.Minute) | ||
|
||
for { | ||
select { | ||
|
||
case <-t.C: | ||
err = g.getAndPublishMetrics() | ||
if err != nil { | ||
log.Fatalf("error getting and publishing number of restarts: %v", err) | ||
return | ||
} | ||
|
||
case <-g.stop: | ||
g.wg.Done() | ||
return | ||
|
||
} | ||
} | ||
|
||
}() | ||
|
||
return nil | ||
} | ||
|
||
func (g *GetAndPublishMetrics) Stop() error { | ||
telemetry.ShutdownAppInsights() | ||
close(g.stop) | ||
g.wg.Wait() | ||
return nil | ||
} | ||
|
||
func (g *GetAndPublishMetrics) Prevalidate() error { | ||
if g.AppInsightsKey == "" { | ||
return fmt.Errorf("AppInsightsKey is required") | ||
} | ||
if _, ok := g.AdditionalTelemetryProperty["retinaVersion"]; !ok { | ||
return fmt.Errorf("retinaVersion is required in AdditionalTelemetryProperty") | ||
} | ||
return nil | ||
} | ||
|
||
func (g *GetAndPublishMetrics) getAndPublishMetrics() error { | ||
// Get the number of restarts | ||
config, err := clientcmd.BuildConfigFromFlags("", g.KubeConfigFilePath) | ||
if err != nil { | ||
return fmt.Errorf("error building kubeconfig: %w", err) | ||
} | ||
|
||
clientset, err := kubernetes.NewForConfig(config) | ||
if err != nil { | ||
return fmt.Errorf("error creating Kubernetes client: %w", err) | ||
} | ||
|
||
mc, err := metrics.NewForConfig(config) | ||
if err != nil { | ||
return fmt.Errorf("error creating metrics client: %w", err) | ||
} | ||
|
||
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second) | ||
defer cancel() | ||
|
||
metrics, err := g.getMetrics(ctx, clientset, mc) | ||
if err != nil { | ||
return fmt.Errorf("error getting metrics: %w", err) | ||
} | ||
|
||
// Publish the number of restarts | ||
for _, metric := range metrics { | ||
g.telemetryClient.TrackEvent("scale-test", metric) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
type metric map[string]string | ||
|
||
func (g *GetAndPublishMetrics) getMetrics(ctx context.Context, k8sClient *kubernetes.Clientset, metricsClient *metrics.Clientset) ([]metric, error) { | ||
|
||
labelSelector := labels.Set(g.Labels).String() | ||
|
||
pods, err := k8sClient.CoreV1().Pods(common.KubeSystemNamespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "error getting nodes") | ||
} | ||
|
||
nodesMetricsInt := metricsClient.MetricsV1beta1().NodeMetricses() | ||
podMetricsInt := metricsClient.MetricsV1beta1().PodMetricses(common.KubeSystemNamespace) | ||
|
||
var allPodsHealth []metric | ||
|
||
timestamp := time.Now().UTC().Format(time.RFC3339) | ||
|
||
for _, pod := range pods.Items { | ||
var podHealth metric = make(map[string]string) | ||
|
||
podMetrics, err := podMetricsInt.Get(ctx, pod.Name, metav1.GetOptions{}) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "error getting pod metrics") | ||
} | ||
|
||
podMem := resource.MustParse("0") | ||
podCpu := resource.MustParse("0") | ||
for _, cm := range podMetrics.Containers { | ||
podMem.Add(cm.Usage["memory"]) | ||
podCpu.Add(cm.Usage["cpu"]) | ||
} | ||
|
||
nodeMetrics, err := nodesMetricsInt.Get(ctx, pod.Spec.NodeName, metav1.GetOptions{}) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "error getting node metrics") | ||
} | ||
|
||
nodeMem := nodeMetrics.Usage["memory"] | ||
nodeCpu := nodeMetrics.Usage["cpu"] | ||
|
||
restarts := 0 | ||
|
||
for _, containerStatus := range pod.Status.ContainerStatuses { | ||
restarts = restarts + int(containerStatus.RestartCount) | ||
} | ||
|
||
podHealth["timestamp"] = timestamp | ||
podHealth["pod"] = pod.Name | ||
podHealth["podCpuInMilliCore"] = fmt.Sprintf("%d", podCpu.MilliValue()) | ||
podHealth["podMemoryInMB"] = fmt.Sprintf("%d", podMem.Value()/(1048576)) | ||
podHealth["podRestarts"] = fmt.Sprintf("%d", restarts) | ||
podHealth["node"] = pod.Spec.NodeName | ||
podHealth["nodeCpuInMilliCore"] = fmt.Sprintf("%d", nodeCpu.MilliValue()) | ||
podHealth["nodeMemoryInMB"] = fmt.Sprintf("%d", nodeMem.Value()/(1048576)) | ||
|
||
allPodsHealth = append(allPodsHealth, podHealth) | ||
|
||
} | ||
|
||
return allPodsHealth, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters