Skip to content

Commit

Permalink
Allow reporting cpu physical core count.
Browse files Browse the repository at this point in the history
cpu.Info() reports a list of cpus and the list
will be double in length when hyperthreading
is enabled.  This difference also scales cpu utilization.

New config property: cpu.stats.physicalcore.enabled
Default set to false to allow opt-in usage.
When set to true domainmgr will:
	Take the last cpu.InfoStat's CoreID
	Add 1 (since CoreID starts at 0)
	Set HostMemory.Ncpus to that value.

Signed-off-by: Andrew Durbin <[email protected]>
  • Loading branch information
andrewd-zededa committed Oct 25, 2024
1 parent 8e7e500 commit 82fa369
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/CONFIG-PROPERTIES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
| ---- | ---- | ------- | ----------- |
| app.allow.vnc | boolean | false (only local access) | allow access to EVE's VNC ports from external IPs |
| app.fml.resolution | string | notset | Set system-wide value of forced resolution for applications running in FML mode, it can be one of [predefined](/pkg/pillar/types/global.go) FmlResolution* values. |
| cpu.stats.physicalcore.enable | boolean | false | Report Ncpus as physical cores instead of HyperThread/SMT cores |
| timer.config.interval | integer in seconds | 60 | how frequently device gets config |
| timer.cert.interval | integer in seconds | 1 day (24*3600) | how frequently device checks for new controller certificates |
| timer.metric.interval | integer in seconds | 60 | how frequently device reports metrics |
Expand Down
4 changes: 3 additions & 1 deletion pkg/pillar/cmd/domainmgr/domainmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ type domainContext struct {
setInitialVgaAccess bool
consoleAccess bool
setInitialConsoleAccess bool
reportPhyCores bool

GCInitialized bool
domainBootRetryTime uint32 // In seconds
Expand Down Expand Up @@ -543,7 +544,7 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar
var resources types.HostMemory
for i := 0; true; i++ {
delay := 10
resources, err = hyper.GetHostCPUMem()
resources, err = hyper.GetHostCPUMem(domainCtx.reportPhyCores)
if err == nil {
break
}
Expand Down Expand Up @@ -2639,6 +2640,7 @@ func handleGlobalConfigImpl(ctxArg interface{}, key string,
ctx.metricInterval = metricInterval
}
ctx.processCloudInitMultiPart = gcp.GlobalValueBool(types.ProcessCloudInitMultiPart)
ctx.reportPhyCores = gcp.GlobalValueBool(types.CpuStatsPhysicalCoreEnable)
ctx.GCInitialized = true
}
log.Functionf("handleGlobalConfigImpl done for %s. "+
Expand Down
5 changes: 3 additions & 2 deletions pkg/pillar/cmd/domainmgr/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ package domainmgr

import (
"fmt"
"github.com/lf-edge/eve/pkg/pillar/hypervisor"
"time"

"github.com/lf-edge/eve/pkg/pillar/hypervisor"

"github.com/lf-edge/eve/pkg/pillar/flextimer"
"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/shirou/gopsutil/cpu"
Expand Down Expand Up @@ -93,7 +94,7 @@ func logWatermarks(ctx *domainContext, status *types.DomainStatus, dm *types.Dom

func getAndPublishMetrics(ctx *domainContext, hyper hypervisor.Hypervisor) {
dmList, _ := hyper.GetDomsCPUMem()
hm, err := hyper.GetHostCPUMem()
hm, err := hyper.GetHostCPUMem(ctx.reportPhyCores)
if err != nil {
log.Errorf("Cannot obtain HostCPUMem: %s", err)
return
Expand Down
4 changes: 2 additions & 2 deletions pkg/pillar/hypervisor/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ func (ctx ctrdContext) PCISameController(id1 string, id2 string) bool {
return types.PCISameController(id1, id2)
}

func (ctx ctrdContext) GetHostCPUMem() (types.HostMemory, error) {
return selfDomCPUMem()
func (ctx ctrdContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) {
return selfDomCPUMem(reportPhyCores)
}

const nanoSecToSec uint64 = 1000000000
Expand Down
19 changes: 17 additions & 2 deletions pkg/pillar/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"

"github.com/lf-edge/eve/pkg/pillar/base"
Expand All @@ -33,7 +34,7 @@ type Hypervisor interface {
PCIRelease(string) error
PCISameController(string, string) bool

GetHostCPUMem() (types.HostMemory, error)
GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error)
GetDomsCPUMem() (map[string]types.DomainMetric, error)

GetCapabilities() (*types.Capabilities, error)
Expand Down Expand Up @@ -111,7 +112,7 @@ func GetAvailableHypervisors() (all []string, enabled []string) {
return
}

func selfDomCPUMem() (types.HostMemory, error) {
func selfDomCPUMem(reportPhysCores bool) (types.HostMemory, error) {
hm := types.HostMemory{}
vm, err := mem.VirtualMemory()
if err != nil {
Expand Down Expand Up @@ -139,6 +140,20 @@ func selfDomCPUMem() (types.HostMemory, error) {
return hm, err
}
hm.Ncpus = uint32(len(info))
if reportPhysCores {
if len(info) < 1 {
return hm, nil
}
// The list should be ordered so that CoreIds for a 4 core / 8 thread
// CPU would be eg. 0,1,2,3,0,1,2,3. Pull the last entry:
lastInfoStat := info[len(info)-1]
val, err := strconv.ParseInt(lastInfoStat.CoreID, 10, 32)
if err != nil {
return hm, err
}
// Account for coreid 0
hm.Ncpus = uint32(val) + 1
}
return hm, nil
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/pillar/hypervisor/null.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ func (ctx nullContext) PCISameController(id1 string, id2 string) bool {
return types.PCISameController(id1, id2)
}

func (ctx nullContext) GetHostCPUMem() (types.HostMemory, error) {
return selfDomCPUMem()
func (ctx nullContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) {
return selfDomCPUMem(reportPhyCores)
}

func (ctx nullContext) GetDomsCPUMem() (map[string]types.DomainMetric, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/pillar/hypervisor/xen.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ func (ctx xenContext) PCISameController(id1 string, id2 string) bool {
return false
}

func (ctx xenContext) GetHostCPUMem() (types.HostMemory, error) {
func (ctx xenContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) {
hm := types.HostMemory{}
ctrdSystemCtx, done := ctx.ctrdClient.CtrNewSystemServicesCtx()
defer done()
Expand Down
3 changes: 3 additions & 0 deletions pkg/pillar/types/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ const (
EnableARPSnoop GlobalSettingKey = "network.switch.enable.arpsnoop"
// WwanQueryVisibleProviders : periodically query visible cellular service providers
WwanQueryVisibleProviders GlobalSettingKey = "wwan.query.visible.providers"
// CpuStatsPhysicalCoreEnable: report Ncpus as Physical Cores instead of Hyperthread/SMT
CpuStatsPhysicalCoreEnable GlobalSettingKey = "cpu.stats.physicalcore.enable"

Check failure on line 260 in pkg/pillar/types/global.go

View workflow job for this annotation

GitHub Actions / yetus

revive: const CpuStatsPhysicalCoreEnable should be CPUStatsPhysicalCoreEnable https://revive.run/r#var-naming

// TriState Items
// NetworkFallbackAnyEth global setting key
Expand Down Expand Up @@ -947,6 +949,7 @@ func NewConfigItemSpecMap() ConfigItemSpecMap {
configItemSpecMap.AddBoolItem(EnableARPSnoop, true)
configItemSpecMap.AddBoolItem(WwanQueryVisibleProviders, false)
configItemSpecMap.AddBoolItem(NetworkLocalLegacyMACAddress, false)
configItemSpecMap.AddBoolItem(CpuStatsPhysicalCoreEnable, false)

// Add TriState Items
configItemSpecMap.AddTriStateItem(NetworkFallbackAnyEth, TS_DISABLED)
Expand Down

0 comments on commit 82fa369

Please sign in to comment.