diff options
author | Franck Cuny <franck@fcuny.net> | 2022-06-19 14:37:37 -0700 |
---|---|---|
committer | Franck Cuny <franck@fcuny.net> | 2022-06-19 14:55:29 -0700 |
commit | 2b361e68a6cb0bacfcf8d3a41745689a005e9d7d (patch) | |
tree | 761bd5b0d68fb6302e3deb8e810045e5c651acf7 /tools | |
parent | feat(tools/scheddomain): add a tool to report on scheduler domains (diff) | |
download | world-2b361e68a6cb0bacfcf8d3a41745689a005e9d7d.tar.gz |
feat(tools/schedlatency): add a tool to report scheduler latency
Given a PID, the tool reports the scheduler latency for it. Change-Id: I52e1de81a91f53ac74734dfd808fd4d7ac00c685 Reviewed-on: https://cl.fcuny.net/c/world/+/452 Tested-by: CI Reviewed-by: Franck Cuny <franck@fcuny.net>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/schedlatency/go.mod | 3 | ||||
-rw-r--r-- | tools/schedlatency/main.go | 255 |
2 files changed, 258 insertions, 0 deletions
diff --git a/tools/schedlatency/go.mod b/tools/schedlatency/go.mod new file mode 100644 index 0000000..9a073ac --- /dev/null +++ b/tools/schedlatency/go.mod @@ -0,0 +1,3 @@ +module golang.fcuny.net/schedlatency + +go 1.17 diff --git a/tools/schedlatency/main.go b/tools/schedlatency/main.go new file mode 100644 index 0000000..0522026 --- /dev/null +++ b/tools/schedlatency/main.go @@ -0,0 +1,255 @@ +package main + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "strconv" + "strings" + "time" +) + +type SchedStat struct { + Pid int `json:"pid"` + RunTicks int `json:"run_ticks"` + WaitTicks int `json:"wait_ticks"` + SlicesRan int `json:"ran_slices"` + AverageRun float64 `json:"avg_run"` + AverageWait float64 `json:"avg_wait"` +} + +func usage() { + fmt.Fprintf(os.Stderr, "usage: %s <pid>\n", os.Args[0]) +} + +func main() { + if len(os.Args) == 1 { + usage() + os.Exit(1) + } + + input := os.Args[1] + pid, err := strconv.Atoi(input) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to convert %s to a PID: %v", input, err) + os.Exit(1) + } + + p := Proc{ + PID: pid, + } + oran := 0 + owait_ticks := 0 + orun_ticks := 0 + for { + stat, err := p.SchedStat() + if err != nil { + fmt.Fprintf(os.Stderr, "failed to get schedstat for %d: %v\n", p.PID, err) + os.Exit(1) + } + diff := stat.SlicesRan - oran + var avgrun, avgwait float64 + + if diff > 0 { + avgrun = float64((stat.RunTicks - orun_ticks) / diff) + avgwait = float64((stat.WaitTicks - owait_ticks) / diff) + } else { + avgrun = 0 + avgwait = 0 + } + + stat.AverageRun = avgrun + stat.AverageWait = avgwait + + out, err := json.Marshal(stat) + if err != nil { + fmt.Fprintln(err) + os.Exit(1) + } + fmt.Println(string(out)) + oran = stat.SlicesRan + orun_ticks = stat.RunTicks + owait_ticks = stat.WaitTicks + time.Sleep(5 * time.Second) + } +} + +// This the the path that contains the scheduler statistics. +// Note that they are not populated unless the value for +// /proc/sys/kernel/sched_schedstats is 1 +const procSchedStat = "/proc/schedstat" + +var idleness = []string{"idle", "busy", "newlyIdle"} + +type ProcSchedStat struct { + RunTicks int `json:"run_ticks"` + WaitTicks int `json:"wait_ticks"` + SlicesRan int `json:"ran_slices"` + AverageRun float64 `json:"avg_run"` + AverageWait float64 `json:"avg_wait"` +} + +// SchedCPUStat contains the load balancer statistics for a CPU. +type SchedCPUStat struct { + YieldCount uint64 `json:"yield_count"` + SchedulerCount uint64 `json:"sched_count"` + SchedulerGoIdle uint64 `json:"sched_go_idle"` + TryToWakeUp uint64 `json:"try_to_wake"` + TryToWakeUpLocal uint64 `json:"try_to_wake_local"` + Running uint64 `json:"running"` + Waiting uint64 `json:"waiting"` + Slices uint64 `json:"slices"` + Domains map[string]SchedDomain `json:"domains"` +} + +// SchedLoadBalance contains the load balancer statistics for a domain +// in a given domain. +type SchedLoadBalance struct { + LBCount uint64 `json:"lb_count"` + LBBalanced uint64 `json:"lb_balanced"` + LBFailed uint64 `json:"lb_failed"` + LBImbalanced uint64 `json:"lb_imbalanced"` + LBGained uint64 `json:"lb_gained"` + LBHotGain uint64 `json:"lb_hot_gain"` + LBNoBusyQueue uint64 `json:"lb_no_busy_queue"` + LBNoBusyGroup uint64 `json:"lb_no_busy_group"` +} + +// SchedDomain contains the statistics for a domain. +type SchedDomain struct { + LoadBalancers map[string]SchedLoadBalance `json:"lbs"` + ActiveLoadBalanceCount uint64 `json:"active_lb_count"` + ActiveLoadBalanceFailed uint64 `json:"active_lb_failed"` + ActiveLoadBalancePushed uint64 `json:"active_lb_pushed"` + TryToWakeUpRemote uint64 `json:"try_to_wake_up_remote"` + TryToWakeUpMoveAffine uint64 `json:"try_to_wake_up_move_affine"` + TryToWakeUpMoveBalance uint64 `json:"try_to_wake_up_move_balance"` +} + +// Proc provides information about a running process. +type Proc struct { + // The process ID. + PID int +} + +// SchedStat returns scheduler statistics for the process. +// The information available are: +// 1. time spent on the cpu +// 2. time spent waiting on a runqueue +// 3. # of timeslices run on this cpu +// +func (p Proc) SchedStat() (ProcSchedStat, error) { + path := fmt.Sprintf("/proc/%d/schedstat", p.PID) + b, err := ioutil.ReadFile(path) + if err != nil { + return ProcSchedStat{}, err + } + content := string(b) + stats := strings.Fields(content) + + run_ticks, err := strconv.Atoi(stats[0]) + if err != nil { + return ProcSchedStat{}, err + } + + wait_ticks, err := strconv.Atoi(stats[1]) + if err != nil { + return ProcSchedStat{}, err + } + + nran, err := strconv.Atoi(stats[2]) + if err != nil { + return ProcSchedStat{}, err + } + + stat := ProcSchedStat{ + RunTicks: run_ticks, + WaitTicks: wait_ticks, + SlicesRan: nran, + } + return stat, nil +} + +// ReadSchedstat returns statistics from the scheduler. +// Information about the statistics can be found at +// https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html. +func ReadSchedStat() (map[string]SchedCPUStat, error) { + b, err := ioutil.ReadFile(procSchedStat) + if err != nil { + return nil, fmt.Errorf("procfs: failed to open %s: %v", procSchedStat, err) + } + content := string(b) + + cpus := map[string]SchedCPUStat{} + + lines := strings.Split(content, "\n") + + var currentCpu string + + // The first line is the version of the stats + // TODO(fcuny): we should check which version is used, because the + // format changes. + for _, line := range lines[2:] { + // The format is as follow: + // cpu<N> 1 2 3 4 5 6 7 8 9 + // domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 + if strings.HasPrefix(line, "cpu") { + // meaning of the fields: https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html#cpu-statistics + fields := strings.Fields(line) + cpuStat := SchedCPUStat{ + YieldCount: convertField(fields[1]), + SchedulerCount: convertField(fields[3]), + SchedulerGoIdle: convertField(fields[4]), + TryToWakeUp: convertField(fields[5]), + TryToWakeUpLocal: convertField(fields[6]), + Running: convertField(fields[7]), + Waiting: convertField(fields[8]), + Slices: convertField(fields[9]), + Domains: map[string]SchedDomain{}, + } + currentCpu = fields[0] + cpus[currentCpu] = cpuStat + } else if strings.HasPrefix(line, "domain") { + // meaning of the fields: https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html#domain-statistics + fields := strings.Fields(line) + i := 2 + lbs := map[string]SchedLoadBalance{} + for _, idle := range idleness { + lb := SchedLoadBalance{ + LBCount: convertField(fields[i]), + LBBalanced: convertField(fields[i+1]), + LBFailed: convertField(fields[i+2]), + LBImbalanced: convertField(fields[i+3]), + LBGained: convertField(fields[i+4]), + LBHotGain: convertField(fields[i+5]), + LBNoBusyQueue: convertField(fields[i+6]), + LBNoBusyGroup: convertField(fields[i+7]), + } + i = i + 8 + lbs[idle] = lb + } + domain := SchedDomain{ + LoadBalancers: lbs, + ActiveLoadBalanceCount: convertField(fields[26]), + ActiveLoadBalanceFailed: convertField(fields[27]), + ActiveLoadBalancePushed: convertField(fields[28]), + TryToWakeUpRemote: convertField(fields[35]), + TryToWakeUpMoveAffine: convertField(fields[36]), + TryToWakeUpMoveBalance: convertField(fields[37]), + } + c := cpus[currentCpu] + c.Domains[fields[0]] = domain + cpus[currentCpu] = c + } + } + return cpus, nil +} + +func convertField(field string) uint64 { + val, err := strconv.ParseUint(field, 10, 64) + if err != nil { + return 0 + } + return val +} |