diff --git a/config.sample.toml b/config.sample.toml index e5ac45d9..d9de8414 100644 --- a/config.sample.toml +++ b/config.sample.toml @@ -190,6 +190,8 @@ port = 4000 # Time Interval (in seconds) in which metrics of all application containers # running in the current node are collected and stored in the central mongoDB database metrics_interval = 600 +# Time Interval (in seconds) in which health is checked of all application containers and if unhealthy, they are restarted +health_interval = 300 # Hard Limits the total number of app instances that can be deployed by an user # Set app_limit = -1 if no hard limit is to be imposed app_limit = 10 diff --git a/configs/types.go b/configs/types.go index d5b2c6f1..4950c9c9 100644 --- a/configs/types.go +++ b/configs/types.go @@ -49,6 +49,7 @@ type GenericService struct { type AppMakerService struct { GenericService MetricsInterval time.Duration `toml:"metrics_interval"` + HealthInterval time.Duration `toml:"health_interval"` AppLimit int `toml:"app_limit"` } diff --git a/lib/docker/container.go b/lib/docker/container.go index 1ea2e660..d2f1ba61 100644 --- a/lib/docker/container.go +++ b/lib/docker/container.go @@ -159,3 +159,11 @@ func ContainerStats(containerID string) (*types.Stats, error) { err = json.Unmarshal(body, containerStatsInterface) return containerStatsInterface, err } + +// ContainerRestart restarts the container corresponding to given containerID +func ContainerRestart(containerID string) error { + ctx := context.Background() + return cli.ContainerRestart(ctx, containerID, nil) +} + + diff --git a/lib/docker/inspect.go b/lib/docker/inspect.go index 217b8e7d..bc9668a5 100644 --- a/lib/docker/inspect.go +++ b/lib/docker/inspect.go @@ -5,6 +5,11 @@ import ( "golang.org/x/net/context" ) +const ( + // Strings for ContainterHealth + Container_Healthy = "healthy" + Container_Unhealthy = "unhealthy" +) // InspectContainerState returns the state of the container using the containerID func InspectContainerState(containerID string) (*dockerTypes.ContainerState, error) { ctx := context.Background() @@ -14,3 +19,13 @@ func InspectContainerState(containerID string) (*dockerTypes.ContainerState, err } return containerStatus.ContainerJSONBase.State, nil } + +// ContainerHealth returns the health status of the container +func InspectContainerHealth(containerID string) (string, error) { + ctx := context.Background() + health, err := cli.ContainerInspect(ctx, containerID) + if err != nil { + return "", err + } + return health.State.Health.Status, nil +} diff --git a/main.go b/main.go index b082c284..941b15c8 100644 --- a/main.go +++ b/main.go @@ -23,6 +23,7 @@ func initMaster() { func initAppMaker() { if configs.ServiceConfig.AppMaker.Deploy { go appmaker.ScheduleMetricsCollection() + go appmaker.ScheduleHealthCheck() } } diff --git a/services/appmaker/helper.go b/services/appmaker/helper.go index 3ee5ff5d..bc11a44e 100644 --- a/services/appmaker/helper.go +++ b/services/appmaker/helper.go @@ -55,3 +55,14 @@ func stateCleanup(appName string) { utils.LogError("AppMaker-Helper-4", err) } } + +func fetchAllApplicationNames() []string { + apps := mongo.FetchDocs(mongo.InstanceCollection, types.M{ + mongo.InstanceTypeKey: mongo.AppInstance, + }) + var appNames []string + for _, app := range apps { + appNames = append(appNames, app[mongo.NameKey].(string)) + } + return appNames +} \ No newline at end of file diff --git a/services/appmaker/monitor.go b/services/appmaker/monitor.go index 30c2c1cc..f9d51ceb 100644 --- a/services/appmaker/monitor.go +++ b/services/appmaker/monitor.go @@ -80,3 +80,29 @@ func ScheduleMetricsCollection() { scheduler := utils.NewScheduler(interval, registerMetrics) scheduler.RunAsync() } + +// checkContainerHealth checks the health of the containers and restarts the unhealthy ones +func checkContainerHealth(){ + apps := fetchAllApplicationNames() + for _, app := range apps { + containerStatus, err := docker.InspectContainerHealth(app) + if err != nil { + utils.LogError("AppMaker-Monitor-9", err) + continue + } + // If container is unhealthy, log the error and restart the container + if containerStatus == docker.Container_Unhealthy{ + utils.Log("AppMaker-Monitor-10", fmt.Sprintf("Container %s has stopped", app), utils.ErrorTAG) + if err := docker.ContainerRestart(app); err != nil { + utils.LogError("AppMaker-Monitor-11", err) + } + } + } +} + +// ScheduleHealthCheck runs the checkContainerHealthHandler at the given health interval +func ScheduleHealthCheck() { + interval := configs.ServiceConfig.AppMaker.HealthInterval * time.Second + scheduler := utils.NewScheduler(interval, checkContainerHealth) + scheduler.RunAsync() +} \ No newline at end of file