about summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-06-19 14:53:12 -0700
committerFranck Cuny <franck@fcuny.net>2022-06-19 14:55:34 -0700
commita0893edf184aa760236e30e08f0e40154bb405c6 (patch)
treebce090cdf915817199c3f675981075816e2e6aff /tools
parentfeat(tools/schedlatency): add a tool to report scheduler latency (diff)
downloadworld-a0893edf184aa760236e30e08f0e40154bb405c6.tar.gz
feat(tools/numap): add a tool to report NUMA topology of a host
The tool maps the various PCI devices to the NUMA node they are attached
to and print the result to STDOUT in JSON.

Only ethernet, NVMe and GPU devices are accounted for at the moment.

Change-Id: If32c805e61211f0ef4838a82eabc70d7fc1985fe
Reviewed-on: https://cl.fcuny.net/c/world/+/453
Tested-by: CI
Reviewed-by: Franck Cuny <franck@fcuny.net>
Diffstat (limited to '')
-rw-r--r--tools/numap/README.org48
-rw-r--r--tools/numap/go.mod3
-rw-r--r--tools/numap/internal/hwids/hwids.go148
-rw-r--r--tools/numap/internal/sysfs/parse.go21
-rw-r--r--tools/numap/internal/sysfs/pci.go145
-rw-r--r--tools/numap/numa.go116
-rw-r--r--tools/numap/numap.go31
7 files changed, 512 insertions, 0 deletions
diff --git a/tools/numap/README.org b/tools/numap/README.org
new file mode 100644
index 0000000..5781030
--- /dev/null
+++ b/tools/numap/README.org
@@ -0,0 +1,48 @@
+#+TITLE: numap
+
+Print the NUMA topology of a host.
+
+* Usage
+#+BEGIN_SRC sh
+./numap |jq .
+{
+  "node0": {
+    "name": "node0",
+    "path": "/sys/devices/system/node/node0",
+    "cpulist": "0-19,40-59",
+    "pci_devices": [
+      {
+        "vendor": "Mellanox Technologies",
+        "name": "MT27710 Family [ConnectX-4 Lx]"
+      },
+      {
+        "vendor": "Mellanox Technologies",
+        "name": "MT27710 Family [ConnectX-4 Lx]"
+      }
+    ]
+  },
+  "node1": {
+    "name": "node1",
+    "path": "/sys/devices/system/node/node1",
+    "cpulist": "20-39,60-79",
+    "pci_devices": [
+      {
+        "vendor": "Intel Corporation",
+        "name": "NVMe Datacenter SSD [3DNAND, Beta Rock Controller]"
+      }
+    ]
+  }
+}
+#+END_SRC
+
+The command will scan the host to find the NUMA nodes, and all the PCI devices, and map the PCI devices back to the NUMA node.
+
+It also provides a way to see the list of CPUs attached to the node.
+
+* Limitations
+** Device class
+For now only the following classes of hardware are cared for:
+- NVMe
+- network
+- GPU
+
diff --git a/tools/numap/go.mod b/tools/numap/go.mod
new file mode 100644
index 0000000..92b1885
--- /dev/null
+++ b/tools/numap/go.mod
@@ -0,0 +1,3 @@
+module golang.fcuny.net/numap
+
+go 1.17
diff --git a/tools/numap/internal/hwids/hwids.go b/tools/numap/internal/hwids/hwids.go
new file mode 100644
index 0000000..6aa9d8a
--- /dev/null
+++ b/tools/numap/internal/hwids/hwids.go
@@ -0,0 +1,148 @@
+package hwids
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+)
+
+var pciPath = []string{
+	"/usr/share/hwdata/pci.ids",
+	"/usr/share/misc/pci.ids",
+}
+
+type PCIType int
+
+const (
+	PCIVendor PCIType = iota
+	PCIDevice
+	PCISubsystem
+)
+
+type PciDevices map[uint16][]PciDevice
+
+// PciDevice represents a PCI device
+type PciDevice struct {
+	Type                   PCIType
+	Vendor, Device         uint16
+	SubVendor, SubDevice   uint16
+	VendorName, DeviceName string
+	SubName                string
+}
+
+// Load load the hardware database for PCI devices and return a map of
+// vendor -> list of devices.
+func Load() (PciDevices, error) {
+	// if the environment variable HWDATAPATH is set, we add it to the
+	// list of paths we check for the hardware database.
+	extraPath := os.Getenv("HWDATA")
+	if extraPath != "" {
+		pciPath = append(pciPath, extraPath)
+	}
+
+	for _, f := range pciPath {
+		fh, err := os.Open(f)
+		if err != nil {
+			continue
+		}
+		defer fh.Close()
+		return parse(fh)
+	}
+	return PciDevices{}, fmt.Errorf("hwids: could not find a pci.ids file")
+}
+
+func parse(f *os.File) (PciDevices, error) {
+	devices := make(PciDevices)
+
+	s := bufio.NewScanner(f)
+
+	// this is to keep track of the current device. The format of the
+	// file is as follow:
+	// vendor  vendor_name
+	//       device  device_name                             <-- single tab
+	//               subvendor subdevice  subsystem_name     <-- two tabs
+	// the variable is to keep track of the current vendor / device
+	cur := PciDevice{}
+
+	for s.Scan() {
+		l := s.Text()
+		// skip empty lines or lines that are a comment
+		if len(l) == 0 || l[0] == '#' {
+			continue
+		}
+		// lines starting with a C are the classes definitions, and
+		// they are at the end of the file, which means we're done
+		// parsing the devices
+		if l[0] == 'C' {
+			break
+		}
+
+		parts := strings.SplitN(l, "  ", 2)
+		if len(parts) != 2 {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line (missing ID separator): %s", l)
+		}
+
+		ids, name := parts[0], parts[1]
+		if len(ids) < 2 || len(name) == 0 {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line (empty ID or name): %s", l)
+		}
+
+		cur.Type = PCIVendor
+
+		if ids[0] == '\t' {
+			if ids[1] == '\t' {
+				cur.Type = PCISubsystem
+			} else {
+				cur.Type = PCIDevice
+			}
+		}
+
+		var err error
+		switch cur.Type {
+		case PCIVendor:
+			_, err = fmt.Sscanf(ids, "%x", &cur.Vendor)
+			cur.VendorName = name
+		case PCIDevice:
+			_, err = fmt.Sscanf(ids, "%x", &cur.Device)
+			cur.DeviceName = name
+		case PCISubsystem:
+			_, err = fmt.Sscanf(ids, "%x %x", &cur.SubVendor, &cur.SubDevice)
+			cur.SubName = name
+		}
+
+		if err != nil {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line: %s: %v", l, err)
+		}
+
+		// This is to reset the state when we are moving to a
+		// different vendor or device
+		switch cur.Type {
+		case PCIVendor:
+			cur.Device = 0
+			cur.DeviceName = ""
+			fallthrough
+		case PCIDevice:
+			cur.SubVendor = 0
+			cur.SubDevice = 0
+			cur.SubName = ""
+		}
+
+		_, ok := devices[cur.Vendor]
+		if ok {
+			_devices := devices[cur.Vendor]
+			_devices = append(_devices, cur)
+			devices[cur.Vendor] = _devices
+
+		} else {
+			_devices := []PciDevice{cur}
+			devices[cur.Vendor] = _devices
+		}
+	}
+
+	if err := s.Err(); err != nil {
+		return devices, fmt.Errorf("hwids: failed to read PCI ID line: %v", err)
+	}
+
+	return devices, nil
+}
diff --git a/tools/numap/internal/sysfs/parse.go b/tools/numap/internal/sysfs/parse.go
new file mode 100644
index 0000000..d518653
--- /dev/null
+++ b/tools/numap/internal/sysfs/parse.go
@@ -0,0 +1,21 @@
+package sysfs
+
+import (
+	"io/ioutil"
+	"strconv"
+	"strings"
+)
+
+// ContentUint64 parses the content of a file in sysfs, and convert
+// from hex to uint64.
+func ContentUint64(path string) (uint64, error) {
+	content, err := ioutil.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+	result, err := strconv.ParseUint(strings.TrimSpace(string(content)), 0, 64)
+	if err != nil {
+		return 0, err
+	}
+	return result, nil
+}
diff --git a/tools/numap/internal/sysfs/pci.go b/tools/numap/internal/sysfs/pci.go
new file mode 100644
index 0000000..9e714b1
--- /dev/null
+++ b/tools/numap/internal/sysfs/pci.go
@@ -0,0 +1,145 @@
+package sysfs
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+const (
+	sysFsPCIDevicesPath = "/sys/bus/pci/devices/"
+)
+
+type PCIDevice struct {
+	NumaNode             int
+	ID                   string
+	Device, Vendor       uint64
+	SubVendor, SubDevice uint64
+	Class                uint64
+	MSIs                 []int
+}
+
+func ScanPCIDevices() []PCIDevice {
+	devices, err := ioutil.ReadDir(sysFsPCIDevicesPath)
+	if err != nil {
+		panic(err)
+	}
+
+	pciDevices := []PCIDevice{}
+
+	for _, device := range devices {
+		dpath := filepath.Join(sysFsPCIDevicesPath, device.Name())
+		pcid, err := NewPCIDevice(dpath, device.Name())
+		if err != nil {
+			panic(err)
+		}
+		pciDevices = append(pciDevices, pcid)
+	}
+	return pciDevices
+}
+
+func getPCIDeviceClass(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "class"))
+}
+
+func getPCIDeviceVendor(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "vendor"))
+}
+
+func getPCIDeviceId(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "device"))
+}
+
+func getPCIDeviceSubsystemDevice(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "subsystem_device"))
+}
+
+func getPCIDeviceSubsystemVendor(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "subsystem_vendor"))
+}
+
+func getPCIDeviceNumaNode(path string) int {
+	content, err := ioutil.ReadFile(filepath.Join(path, "numa_node"))
+	if err != nil {
+		panic(err)
+	}
+	nodeNum, err := strconv.Atoi(strings.TrimSpace(string(content)))
+	if err != nil {
+		panic(err)
+	}
+	return nodeNum
+}
+
+func getPCIDeviceMSIx(p string) []int {
+	g := fmt.Sprintf("%s/*", filepath.Join(p, "msi_irqs"))
+	files, err := filepath.Glob(g)
+	if err != nil {
+		panic(err)
+	}
+	if len(files) == 0 {
+		return []int{}
+	}
+
+	msix := []int{}
+
+	for _, f := range files {
+		content, err := ioutil.ReadFile(f)
+		if err != nil {
+			panic(err)
+		}
+		if strings.TrimSpace(string(content)) == "msix" {
+			base := path.Base(f)
+			v, err := strconv.Atoi(base)
+			if err != nil {
+				panic(err)
+			}
+			msix = append(msix, v)
+		}
+	}
+	return msix
+}
+
+func NewPCIDevice(path, name string) (PCIDevice, error) {
+	nodeNum := getPCIDeviceNumaNode(path)
+
+	device, err := getPCIDeviceId(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	vendor, err := getPCIDeviceVendor(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	subvendor, err := getPCIDeviceSubsystemVendor(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	subdevice, err := getPCIDeviceSubsystemDevice(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	deviceClass, err := getPCIDeviceClass(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	msix := getPCIDeviceMSIx(path)
+
+	return PCIDevice{
+		ID:        name,
+		Device:    device,
+		Class:     deviceClass,
+		NumaNode:  nodeNum,
+		Vendor:    vendor,
+		SubVendor: subvendor,
+		SubDevice: subdevice,
+		MSIs:      msix,
+	}, nil
+}
diff --git a/tools/numap/numa.go b/tools/numap/numa.go
new file mode 100644
index 0000000..402ea1d
--- /dev/null
+++ b/tools/numap/numa.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"golang.fcuny.net/numap/internal/hwids"
+	"golang.fcuny.net/numap/internal/sysfs"
+)
+
+const (
+	node_root      = "/sys/devices/system/node/node*"
+	CLASS_NVMe     = 67586
+	CLASS_ETHERNET = 131072
+	CLASS_GPU      = 197120
+)
+
+type node struct {
+	Name       string      `json:"name"`
+	Path       string      `json:"path"`
+	CpuList    string      `json:"cpulist"`
+	PCIDevices []PCIDevice `json:"pci_devices"`
+}
+
+type PCIDevice struct {
+	Vendor string `json:"vendor"`
+	Name   string `json:"name"`
+}
+
+func findNodes(hwdb hwids.PciDevices) (map[string]node, error) {
+	nodes := make(map[string]node)
+
+	files, err := filepath.Glob(node_root)
+	if err != nil {
+		return nil, fmt.Errorf("Failed to find NUMA nodes under %s: %+v", node_root, err)
+	}
+	if len(files) == 0 {
+		return nil, fmt.Errorf("Could not find NUMA node in %s", node_root)
+	}
+
+	for _, f := range files {
+		n, err := newNode(f)
+		if err != nil {
+			return make(map[string]node), err
+		}
+		nodes[n.Name] = n
+	}
+
+	r, err := mapPCIDevicesToNumaNode(hwdb)
+	if err != nil {
+		panic(err)
+	}
+	for k, v := range r {
+		nodeName := fmt.Sprintf("node%d", k)
+		n := nodes[nodeName]
+		n.PCIDevices = v
+		nodes[nodeName] = n
+	}
+	return nodes, nil
+}
+
+func mapPCIDevicesToNumaNode(hwdb hwids.PciDevices) (map[int][]PCIDevice, error) {
+	devices := sysfs.ScanPCIDevices()
+	r := map[int][]PCIDevice{}
+
+	for _, d := range devices {
+		if d.Class == CLASS_NVMe || d.Class == CLASS_ETHERNET || d.Class == CLASS_GPU {
+			_, ok := hwdb[uint16(d.Vendor)]
+			if ok {
+				desc := hwdb[uint16(d.Vendor)]
+				var vendor, name string
+				for _, m := range desc {
+					if uint64(m.Device) == d.Device && uint64(m.Vendor) == d.Vendor {
+						vendor = m.VendorName
+						name = m.DeviceName
+						break
+					}
+				}
+				pciDevice := PCIDevice{
+					Vendor: vendor,
+					Name:   name,
+				}
+				r[d.NumaNode] = append(r[d.NumaNode], pciDevice)
+			}
+		}
+	}
+	return r, nil
+}
+
+func newNode(p string) (node, error) {
+	_, name := path.Split(p)
+
+	cpulist, err := cpuList(p)
+	if err != nil {
+		return node{}, err
+	}
+
+	return node{
+		Name:       name,
+		Path:       p,
+		CpuList:    cpulist,
+		PCIDevices: []PCIDevice{},
+	}, nil
+}
+
+func cpuList(p string) (string, error) {
+	lpath := filepath.Join(p, "cpulist")
+	c, err := ioutil.ReadFile(lpath)
+	if err != nil {
+		return "", fmt.Errorf("Failed to open %s: %+v", lpath, err)
+	}
+	return strings.TrimRight(string(c), "\n"), nil
+}
diff --git a/tools/numap/numap.go b/tools/numap/numap.go
new file mode 100644
index 0000000..c65f1f0
--- /dev/null
+++ b/tools/numap/numap.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"golang.fcuny.net/numap/internal/hwids"
+)
+
+func main() {
+	hwdb, err := hwids.Load()
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	nodes, err := findNodes(hwdb)
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	out, err := json.Marshal(nodes)
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	fmt.Println(string(out))
+}