Skip to content

Commit

Permalink
adds functionality to check and respawn application containers on fai…
Browse files Browse the repository at this point in the history
…lure

Signed-off-by: Darshan Kumar <[email protected]>
Co-authored-by: Ayush Patel <[email protected]>
Co-authored-by: Navin KUmar <[email protected]>
  • Loading branch information
3 people committed Feb 7, 2024
1 parent 870cc59 commit e4727ca
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 0 deletions.
2 changes: 2 additions & 0 deletions config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ port = 4000
# Time Interval (in seconds) in which metrics of all application containers
# running in the current node are collected and stored in the central mongoDB database
metrics_interval = 600
# Time Interval (in seconds) in which health is checked of all application containers and if unhealthy, they are restarted
health_interval = 300
# Hard Limits the total number of app instances that can be deployed by an user
# Set app_limit = -1 if no hard limit is to be imposed
app_limit = 10
Expand Down
1 change: 1 addition & 0 deletions configs/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type GenericService struct {
type AppMakerService struct {
GenericService
MetricsInterval time.Duration `toml:"metrics_interval"`
HealthInterval time.Duration `toml:"health_interval"`
AppLimit int `toml:"app_limit"`
}

Expand Down
8 changes: 8 additions & 0 deletions lib/docker/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,11 @@ func ContainerStats(containerID string) (*types.Stats, error) {
err = json.Unmarshal(body, containerStatsInterface)
return containerStatsInterface, err
}

// ContainerRestart restarts the container corresponding to given containerID
func ContainerRestart(containerID string) error {
ctx := context.Background()
return cli.ContainerRestart(ctx, containerID, nil)
}


15 changes: 15 additions & 0 deletions lib/docker/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ import (
"golang.org/x/net/context"
)

const (
// Strings for ContainterHealth
Container_Healthy = "healthy"
Container_Unhealthy = "unhealthy"
)
// InspectContainerState returns the state of the container using the containerID
func InspectContainerState(containerID string) (*dockerTypes.ContainerState, error) {
ctx := context.Background()
Expand All @@ -14,3 +19,13 @@ func InspectContainerState(containerID string) (*dockerTypes.ContainerState, err
}
return containerStatus.ContainerJSONBase.State, nil
}

// ContainerHealth returns the health status of the container
func InspectContainerHealth(containerID string) (string, error) {
ctx := context.Background()
health, err := cli.ContainerInspect(ctx, containerID)
if err != nil {
return "", err
}
return health.State.Health.Status, nil
}
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ func initMaster() {
func initAppMaker() {
if configs.ServiceConfig.AppMaker.Deploy {
go appmaker.ScheduleMetricsCollection()
go appmaker.ScheduleHealthCheck()
}
}

Expand Down
11 changes: 11 additions & 0 deletions services/appmaker/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,14 @@ func stateCleanup(appName string) {
utils.LogError("AppMaker-Helper-4", err)
}
}

func fetchAllApplicationNames() []string {
apps := mongo.FetchDocs(mongo.InstanceCollection, types.M{
mongo.InstanceTypeKey: mongo.AppInstance,
})
var appNames []string
for _, app := range apps {
appNames = append(appNames, app[mongo.NameKey].(string))
}
return appNames
}
26 changes: 26 additions & 0 deletions services/appmaker/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,29 @@ func ScheduleMetricsCollection() {
scheduler := utils.NewScheduler(interval, registerMetrics)
scheduler.RunAsync()
}

// checkContainerHealth checks the health of the containers and restarts the unhealthy ones
func checkContainerHealth(){
apps := fetchAllApplicationNames()
for _, app := range apps {
containerStatus, err := docker.InspectContainerHealth(app)
if err != nil {
utils.LogError("AppMaker-Monitor-9", err)
continue
}
// If container is unhealthy, log the error and restart the container
if containerStatus == docker.Container_Unhealthy{
utils.Log("AppMaker-Monitor-10", fmt.Sprintf("Container %s has stopped", app), utils.ErrorTAG)
if err := docker.ContainerRestart(app); err != nil {
utils.LogError("AppMaker-Monitor-11", err)
}
}
}
}

// ScheduleHealthCheck runs the checkContainerHealthHandler at the given health interval
func ScheduleHealthCheck() {
interval := configs.ServiceConfig.AppMaker.HealthInterval * time.Second
scheduler := utils.NewScheduler(interval, checkContainerHealth)
scheduler.RunAsync()
}

0 comments on commit e4727ca

Please sign in to comment.