about summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-06-19 14:37:37 -0700
committerFranck Cuny <franck@fcuny.net>2022-06-19 14:55:29 -0700
commit2b361e68a6cb0bacfcf8d3a41745689a005e9d7d (patch)
tree761bd5b0d68fb6302e3deb8e810045e5c651acf7 /tools
parentfeat(tools/scheddomain): add a tool to report on scheduler domains (diff)
downloadworld-2b361e68a6cb0bacfcf8d3a41745689a005e9d7d.tar.gz
feat(tools/schedlatency): add a tool to report scheduler latency
Given a PID, the tool reports the scheduler latency for it.

Change-Id: I52e1de81a91f53ac74734dfd808fd4d7ac00c685
Reviewed-on: https://cl.fcuny.net/c/world/+/452
Tested-by: CI
Reviewed-by: Franck Cuny <franck@fcuny.net>
Diffstat (limited to '')
-rw-r--r--tools/schedlatency/go.mod3
-rw-r--r--tools/schedlatency/main.go255
2 files changed, 258 insertions, 0 deletions
diff --git a/tools/schedlatency/go.mod b/tools/schedlatency/go.mod
new file mode 100644
index 0000000..9a073ac
--- /dev/null
+++ b/tools/schedlatency/go.mod
@@ -0,0 +1,3 @@
+module golang.fcuny.net/schedlatency
+
+go 1.17
diff --git a/tools/schedlatency/main.go b/tools/schedlatency/main.go
new file mode 100644
index 0000000..0522026
--- /dev/null
+++ b/tools/schedlatency/main.go
@@ -0,0 +1,255 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type SchedStat struct {
+	Pid         int     `json:"pid"`
+	RunTicks    int     `json:"run_ticks"`
+	WaitTicks   int     `json:"wait_ticks"`
+	SlicesRan   int     `json:"ran_slices"`
+	AverageRun  float64 `json:"avg_run"`
+	AverageWait float64 `json:"avg_wait"`
+}
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "usage: %s <pid>\n", os.Args[0])
+}
+
+func main() {
+	if len(os.Args) == 1 {
+		usage()
+		os.Exit(1)
+	}
+
+	input := os.Args[1]
+	pid, err := strconv.Atoi(input)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "failed to convert %s to a PID: %v", input, err)
+		os.Exit(1)
+	}
+
+	p := Proc{
+		PID: pid,
+	}
+	oran := 0
+	owait_ticks := 0
+	orun_ticks := 0
+	for {
+		stat, err := p.SchedStat()
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "failed to get schedstat for %d: %v\n", p.PID, err)
+			os.Exit(1)
+		}
+		diff := stat.SlicesRan - oran
+		var avgrun, avgwait float64
+
+		if diff > 0 {
+			avgrun = float64((stat.RunTicks - orun_ticks) / diff)
+			avgwait = float64((stat.WaitTicks - owait_ticks) / diff)
+		} else {
+			avgrun = 0
+			avgwait = 0
+		}
+
+		stat.AverageRun = avgrun
+		stat.AverageWait = avgwait
+
+		out, err := json.Marshal(stat)
+		if err != nil {
+			fmt.Fprintln(err)
+			os.Exit(1)
+		}
+		fmt.Println(string(out))
+		oran = stat.SlicesRan
+		orun_ticks = stat.RunTicks
+		owait_ticks = stat.WaitTicks
+		time.Sleep(5 * time.Second)
+	}
+}
+
+// This the the path that contains the scheduler statistics.
+// Note that they are not populated unless the value for
+// /proc/sys/kernel/sched_schedstats is 1
+const procSchedStat = "/proc/schedstat"
+
+var idleness = []string{"idle", "busy", "newlyIdle"}
+
+type ProcSchedStat struct {
+	RunTicks    int     `json:"run_ticks"`
+	WaitTicks   int     `json:"wait_ticks"`
+	SlicesRan   int     `json:"ran_slices"`
+	AverageRun  float64 `json:"avg_run"`
+	AverageWait float64 `json:"avg_wait"`
+}
+
+// SchedCPUStat contains the load balancer statistics for a CPU.
+type SchedCPUStat struct {
+	YieldCount       uint64                 `json:"yield_count"`
+	SchedulerCount   uint64                 `json:"sched_count"`
+	SchedulerGoIdle  uint64                 `json:"sched_go_idle"`
+	TryToWakeUp      uint64                 `json:"try_to_wake"`
+	TryToWakeUpLocal uint64                 `json:"try_to_wake_local"`
+	Running          uint64                 `json:"running"`
+	Waiting          uint64                 `json:"waiting"`
+	Slices           uint64                 `json:"slices"`
+	Domains          map[string]SchedDomain `json:"domains"`
+}
+
+// SchedLoadBalance contains the load balancer statistics for a domain
+// in a given domain.
+type SchedLoadBalance struct {
+	LBCount       uint64 `json:"lb_count"`
+	LBBalanced    uint64 `json:"lb_balanced"`
+	LBFailed      uint64 `json:"lb_failed"`
+	LBImbalanced  uint64 `json:"lb_imbalanced"`
+	LBGained      uint64 `json:"lb_gained"`
+	LBHotGain     uint64 `json:"lb_hot_gain"`
+	LBNoBusyQueue uint64 `json:"lb_no_busy_queue"`
+	LBNoBusyGroup uint64 `json:"lb_no_busy_group"`
+}
+
+// SchedDomain contains the statistics for a domain.
+type SchedDomain struct {
+	LoadBalancers           map[string]SchedLoadBalance `json:"lbs"`
+	ActiveLoadBalanceCount  uint64                      `json:"active_lb_count"`
+	ActiveLoadBalanceFailed uint64                      `json:"active_lb_failed"`
+	ActiveLoadBalancePushed uint64                      `json:"active_lb_pushed"`
+	TryToWakeUpRemote       uint64                      `json:"try_to_wake_up_remote"`
+	TryToWakeUpMoveAffine   uint64                      `json:"try_to_wake_up_move_affine"`
+	TryToWakeUpMoveBalance  uint64                      `json:"try_to_wake_up_move_balance"`
+}
+
+// Proc provides information about a running process.
+type Proc struct {
+	// The process ID.
+	PID int
+}
+
+// SchedStat returns scheduler statistics for the process.
+// The information available are:
+// 1. time spent on the cpu
+// 2. time spent waiting on a runqueue
+// 3. # of timeslices run on this cpu
+//
+func (p Proc) SchedStat() (ProcSchedStat, error) {
+	path := fmt.Sprintf("/proc/%d/schedstat", p.PID)
+	b, err := ioutil.ReadFile(path)
+	if err != nil {
+		return ProcSchedStat{}, err
+	}
+	content := string(b)
+	stats := strings.Fields(content)
+
+	run_ticks, err := strconv.Atoi(stats[0])
+	if err != nil {
+		return ProcSchedStat{}, err
+	}
+
+	wait_ticks, err := strconv.Atoi(stats[1])
+	if err != nil {
+		return ProcSchedStat{}, err
+	}
+
+	nran, err := strconv.Atoi(stats[2])
+	if err != nil {
+		return ProcSchedStat{}, err
+	}
+
+	stat := ProcSchedStat{
+		RunTicks:  run_ticks,
+		WaitTicks: wait_ticks,
+		SlicesRan: nran,
+	}
+	return stat, nil
+}
+
+// ReadSchedstat returns statistics from the scheduler.
+// Information about the statistics can be found at
+// https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html.
+func ReadSchedStat() (map[string]SchedCPUStat, error) {
+	b, err := ioutil.ReadFile(procSchedStat)
+	if err != nil {
+		return nil, fmt.Errorf("procfs: failed to open %s: %v", procSchedStat, err)
+	}
+	content := string(b)
+
+	cpus := map[string]SchedCPUStat{}
+
+	lines := strings.Split(content, "\n")
+
+	var currentCpu string
+
+	// The first line is the version of the stats
+	// TODO(fcuny): we should check which version is used, because the
+	// format changes.
+	for _, line := range lines[2:] {
+		// The format is as follow:
+		// cpu<N> 1 2 3 4 5 6 7 8 9
+		// domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+		if strings.HasPrefix(line, "cpu") {
+			// meaning of the fields: https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html#cpu-statistics
+			fields := strings.Fields(line)
+			cpuStat := SchedCPUStat{
+				YieldCount:       convertField(fields[1]),
+				SchedulerCount:   convertField(fields[3]),
+				SchedulerGoIdle:  convertField(fields[4]),
+				TryToWakeUp:      convertField(fields[5]),
+				TryToWakeUpLocal: convertField(fields[6]),
+				Running:          convertField(fields[7]),
+				Waiting:          convertField(fields[8]),
+				Slices:           convertField(fields[9]),
+				Domains:          map[string]SchedDomain{},
+			}
+			currentCpu = fields[0]
+			cpus[currentCpu] = cpuStat
+		} else if strings.HasPrefix(line, "domain") {
+			// meaning of the fields: https://www.kernel.org/doc/html/latest/scheduler/sched-stats.html#domain-statistics
+			fields := strings.Fields(line)
+			i := 2
+			lbs := map[string]SchedLoadBalance{}
+			for _, idle := range idleness {
+				lb := SchedLoadBalance{
+					LBCount:       convertField(fields[i]),
+					LBBalanced:    convertField(fields[i+1]),
+					LBFailed:      convertField(fields[i+2]),
+					LBImbalanced:  convertField(fields[i+3]),
+					LBGained:      convertField(fields[i+4]),
+					LBHotGain:     convertField(fields[i+5]),
+					LBNoBusyQueue: convertField(fields[i+6]),
+					LBNoBusyGroup: convertField(fields[i+7]),
+				}
+				i = i + 8
+				lbs[idle] = lb
+			}
+			domain := SchedDomain{
+				LoadBalancers:           lbs,
+				ActiveLoadBalanceCount:  convertField(fields[26]),
+				ActiveLoadBalanceFailed: convertField(fields[27]),
+				ActiveLoadBalancePushed: convertField(fields[28]),
+				TryToWakeUpRemote:       convertField(fields[35]),
+				TryToWakeUpMoveAffine:   convertField(fields[36]),
+				TryToWakeUpMoveBalance:  convertField(fields[37]),
+			}
+			c := cpus[currentCpu]
+			c.Domains[fields[0]] = domain
+			cpus[currentCpu] = c
+		}
+	}
+	return cpus, nil
+}
+
+func convertField(field string) uint64 {
+	val, err := strconv.ParseUint(field, 10, 64)
+	if err != nil {
+		return 0
+	}
+	return val
+}