diff options
author | Franck Cuny <franck@fcuny.net> | 2022-06-19 14:53:12 -0700 |
---|---|---|
committer | Franck Cuny <franck@fcuny.net> | 2022-06-19 14:55:34 -0700 |
commit | a0893edf184aa760236e30e08f0e40154bb405c6 (patch) | |
tree | bce090cdf915817199c3f675981075816e2e6aff /tools/numap | |
parent | feat(tools/schedlatency): add a tool to report scheduler latency (diff) | |
download | world-a0893edf184aa760236e30e08f0e40154bb405c6.tar.gz |
feat(tools/numap): add a tool to report NUMA topology of a host
The tool maps the various PCI devices to the NUMA node they are attached to and print the result to STDOUT in JSON. Only ethernet, NVMe and GPU devices are accounted for at the moment. Change-Id: If32c805e61211f0ef4838a82eabc70d7fc1985fe Reviewed-on: https://cl.fcuny.net/c/world/+/453 Tested-by: CI Reviewed-by: Franck Cuny <franck@fcuny.net>
Diffstat (limited to 'tools/numap')
-rw-r--r-- | tools/numap/README.org | 48 | ||||
-rw-r--r-- | tools/numap/go.mod | 3 | ||||
-rw-r--r-- | tools/numap/internal/hwids/hwids.go | 148 | ||||
-rw-r--r-- | tools/numap/internal/sysfs/parse.go | 21 | ||||
-rw-r--r-- | tools/numap/internal/sysfs/pci.go | 145 | ||||
-rw-r--r-- | tools/numap/numa.go | 116 | ||||
-rw-r--r-- | tools/numap/numap.go | 31 |
7 files changed, 512 insertions, 0 deletions
diff --git a/tools/numap/README.org b/tools/numap/README.org new file mode 100644 index 0000000..5781030 --- /dev/null +++ b/tools/numap/README.org @@ -0,0 +1,48 @@ +#+TITLE: numap + +Print the NUMA topology of a host. + +* Usage +#+BEGIN_SRC sh +./numap |jq . +{ + "node0": { + "name": "node0", + "path": "/sys/devices/system/node/node0", + "cpulist": "0-19,40-59", + "pci_devices": [ + { + "vendor": "Mellanox Technologies", + "name": "MT27710 Family [ConnectX-4 Lx]" + }, + { + "vendor": "Mellanox Technologies", + "name": "MT27710 Family [ConnectX-4 Lx]" + } + ] + }, + "node1": { + "name": "node1", + "path": "/sys/devices/system/node/node1", + "cpulist": "20-39,60-79", + "pci_devices": [ + { + "vendor": "Intel Corporation", + "name": "NVMe Datacenter SSD [3DNAND, Beta Rock Controller]" + } + ] + } +} +#+END_SRC + +The command will scan the host to find the NUMA nodes, and all the PCI devices, and map the PCI devices back to the NUMA node. + +It also provides a way to see the list of CPUs attached to the node. + +* Limitations +** Device class +For now only the following classes of hardware are cared for: +- NVMe +- network +- GPU + diff --git a/tools/numap/go.mod b/tools/numap/go.mod new file mode 100644 index 0000000..92b1885 --- /dev/null +++ b/tools/numap/go.mod @@ -0,0 +1,3 @@ +module golang.fcuny.net/numap + +go 1.17 diff --git a/tools/numap/internal/hwids/hwids.go b/tools/numap/internal/hwids/hwids.go new file mode 100644 index 0000000..6aa9d8a --- /dev/null +++ b/tools/numap/internal/hwids/hwids.go @@ -0,0 +1,148 @@ +package hwids + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +var pciPath = []string{ + "/usr/share/hwdata/pci.ids", + "/usr/share/misc/pci.ids", +} + +type PCIType int + +const ( + PCIVendor PCIType = iota + PCIDevice + PCISubsystem +) + +type PciDevices map[uint16][]PciDevice + +// PciDevice represents a PCI device +type PciDevice struct { + Type PCIType + Vendor, Device uint16 + SubVendor, SubDevice uint16 + VendorName, DeviceName string + SubName string +} + +// Load load the hardware database for PCI devices and return a map of +// vendor -> list of devices. +func Load() (PciDevices, error) { + // if the environment variable HWDATAPATH is set, we add it to the + // list of paths we check for the hardware database. + extraPath := os.Getenv("HWDATA") + if extraPath != "" { + pciPath = append(pciPath, extraPath) + } + + for _, f := range pciPath { + fh, err := os.Open(f) + if err != nil { + continue + } + defer fh.Close() + return parse(fh) + } + return PciDevices{}, fmt.Errorf("hwids: could not find a pci.ids file") +} + +func parse(f *os.File) (PciDevices, error) { + devices := make(PciDevices) + + s := bufio.NewScanner(f) + + // this is to keep track of the current device. The format of the + // file is as follow: + // vendor vendor_name + // device device_name <-- single tab + // subvendor subdevice subsystem_name <-- two tabs + // the variable is to keep track of the current vendor / device + cur := PciDevice{} + + for s.Scan() { + l := s.Text() + // skip empty lines or lines that are a comment + if len(l) == 0 || l[0] == '#' { + continue + } + // lines starting with a C are the classes definitions, and + // they are at the end of the file, which means we're done + // parsing the devices + if l[0] == 'C' { + break + } + + parts := strings.SplitN(l, " ", 2) + if len(parts) != 2 { + return devices, fmt.Errorf("hwids: malformed PCI ID line (missing ID separator): %s", l) + } + + ids, name := parts[0], parts[1] + if len(ids) < 2 || len(name) == 0 { + return devices, fmt.Errorf("hwids: malformed PCI ID line (empty ID or name): %s", l) + } + + cur.Type = PCIVendor + + if ids[0] == '\t' { + if ids[1] == '\t' { + cur.Type = PCISubsystem + } else { + cur.Type = PCIDevice + } + } + + var err error + switch cur.Type { + case PCIVendor: + _, err = fmt.Sscanf(ids, "%x", &cur.Vendor) + cur.VendorName = name + case PCIDevice: + _, err = fmt.Sscanf(ids, "%x", &cur.Device) + cur.DeviceName = name + case PCISubsystem: + _, err = fmt.Sscanf(ids, "%x %x", &cur.SubVendor, &cur.SubDevice) + cur.SubName = name + } + + if err != nil { + return devices, fmt.Errorf("hwids: malformed PCI ID line: %s: %v", l, err) + } + + // This is to reset the state when we are moving to a + // different vendor or device + switch cur.Type { + case PCIVendor: + cur.Device = 0 + cur.DeviceName = "" + fallthrough + case PCIDevice: + cur.SubVendor = 0 + cur.SubDevice = 0 + cur.SubName = "" + } + + _, ok := devices[cur.Vendor] + if ok { + _devices := devices[cur.Vendor] + _devices = append(_devices, cur) + devices[cur.Vendor] = _devices + + } else { + _devices := []PciDevice{cur} + devices[cur.Vendor] = _devices + } + } + + if err := s.Err(); err != nil { + return devices, fmt.Errorf("hwids: failed to read PCI ID line: %v", err) + } + + return devices, nil +} diff --git a/tools/numap/internal/sysfs/parse.go b/tools/numap/internal/sysfs/parse.go new file mode 100644 index 0000000..d518653 --- /dev/null +++ b/tools/numap/internal/sysfs/parse.go @@ -0,0 +1,21 @@ +package sysfs + +import ( + "io/ioutil" + "strconv" + "strings" +) + +// ContentUint64 parses the content of a file in sysfs, and convert +// from hex to uint64. +func ContentUint64(path string) (uint64, error) { + content, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + result, err := strconv.ParseUint(strings.TrimSpace(string(content)), 0, 64) + if err != nil { + return 0, err + } + return result, nil +} diff --git a/tools/numap/internal/sysfs/pci.go b/tools/numap/internal/sysfs/pci.go new file mode 100644 index 0000000..9e714b1 --- /dev/null +++ b/tools/numap/internal/sysfs/pci.go @@ -0,0 +1,145 @@ +package sysfs + +import ( + "fmt" + "io/ioutil" + "path" + "path/filepath" + "strconv" + "strings" +) + +const ( + sysFsPCIDevicesPath = "/sys/bus/pci/devices/" +) + +type PCIDevice struct { + NumaNode int + ID string + Device, Vendor uint64 + SubVendor, SubDevice uint64 + Class uint64 + MSIs []int +} + +func ScanPCIDevices() []PCIDevice { + devices, err := ioutil.ReadDir(sysFsPCIDevicesPath) + if err != nil { + panic(err) + } + + pciDevices := []PCIDevice{} + + for _, device := range devices { + dpath := filepath.Join(sysFsPCIDevicesPath, device.Name()) + pcid, err := NewPCIDevice(dpath, device.Name()) + if err != nil { + panic(err) + } + pciDevices = append(pciDevices, pcid) + } + return pciDevices +} + +func getPCIDeviceClass(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "class")) +} + +func getPCIDeviceVendor(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "vendor")) +} + +func getPCIDeviceId(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "device")) +} + +func getPCIDeviceSubsystemDevice(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "subsystem_device")) +} + +func getPCIDeviceSubsystemVendor(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "subsystem_vendor")) +} + +func getPCIDeviceNumaNode(path string) int { + content, err := ioutil.ReadFile(filepath.Join(path, "numa_node")) + if err != nil { + panic(err) + } + nodeNum, err := strconv.Atoi(strings.TrimSpace(string(content))) + if err != nil { + panic(err) + } + return nodeNum +} + +func getPCIDeviceMSIx(p string) []int { + g := fmt.Sprintf("%s/*", filepath.Join(p, "msi_irqs")) + files, err := filepath.Glob(g) + if err != nil { + panic(err) + } + if len(files) == 0 { + return []int{} + } + + msix := []int{} + + for _, f := range files { + content, err := ioutil.ReadFile(f) + if err != nil { + panic(err) + } + if strings.TrimSpace(string(content)) == "msix" { + base := path.Base(f) + v, err := strconv.Atoi(base) + if err != nil { + panic(err) + } + msix = append(msix, v) + } + } + return msix +} + +func NewPCIDevice(path, name string) (PCIDevice, error) { + nodeNum := getPCIDeviceNumaNode(path) + + device, err := getPCIDeviceId(path) + if err != nil { + return PCIDevice{}, err + } + + vendor, err := getPCIDeviceVendor(path) + if err != nil { + return PCIDevice{}, err + } + + subvendor, err := getPCIDeviceSubsystemVendor(path) + if err != nil { + return PCIDevice{}, err + } + + subdevice, err := getPCIDeviceSubsystemDevice(path) + if err != nil { + return PCIDevice{}, err + } + + deviceClass, err := getPCIDeviceClass(path) + if err != nil { + return PCIDevice{}, err + } + + msix := getPCIDeviceMSIx(path) + + return PCIDevice{ + ID: name, + Device: device, + Class: deviceClass, + NumaNode: nodeNum, + Vendor: vendor, + SubVendor: subvendor, + SubDevice: subdevice, + MSIs: msix, + }, nil +} diff --git a/tools/numap/numa.go b/tools/numap/numa.go new file mode 100644 index 0000000..402ea1d --- /dev/null +++ b/tools/numap/numa.go @@ -0,0 +1,116 @@ +package main + +import ( + "fmt" + "io/ioutil" + "path" + "path/filepath" + "strings" + + "golang.fcuny.net/numap/internal/hwids" + "golang.fcuny.net/numap/internal/sysfs" +) + +const ( + node_root = "/sys/devices/system/node/node*" + CLASS_NVMe = 67586 + CLASS_ETHERNET = 131072 + CLASS_GPU = 197120 +) + +type node struct { + Name string `json:"name"` + Path string `json:"path"` + CpuList string `json:"cpulist"` + PCIDevices []PCIDevice `json:"pci_devices"` +} + +type PCIDevice struct { + Vendor string `json:"vendor"` + Name string `json:"name"` +} + +func findNodes(hwdb hwids.PciDevices) (map[string]node, error) { + nodes := make(map[string]node) + + files, err := filepath.Glob(node_root) + if err != nil { + return nil, fmt.Errorf("Failed to find NUMA nodes under %s: %+v", node_root, err) + } + if len(files) == 0 { + return nil, fmt.Errorf("Could not find NUMA node in %s", node_root) + } + + for _, f := range files { + n, err := newNode(f) + if err != nil { + return make(map[string]node), err + } + nodes[n.Name] = n + } + + r, err := mapPCIDevicesToNumaNode(hwdb) + if err != nil { + panic(err) + } + for k, v := range r { + nodeName := fmt.Sprintf("node%d", k) + n := nodes[nodeName] + n.PCIDevices = v + nodes[nodeName] = n + } + return nodes, nil +} + +func mapPCIDevicesToNumaNode(hwdb hwids.PciDevices) (map[int][]PCIDevice, error) { + devices := sysfs.ScanPCIDevices() + r := map[int][]PCIDevice{} + + for _, d := range devices { + if d.Class == CLASS_NVMe || d.Class == CLASS_ETHERNET || d.Class == CLASS_GPU { + _, ok := hwdb[uint16(d.Vendor)] + if ok { + desc := hwdb[uint16(d.Vendor)] + var vendor, name string + for _, m := range desc { + if uint64(m.Device) == d.Device && uint64(m.Vendor) == d.Vendor { + vendor = m.VendorName + name = m.DeviceName + break + } + } + pciDevice := PCIDevice{ + Vendor: vendor, + Name: name, + } + r[d.NumaNode] = append(r[d.NumaNode], pciDevice) + } + } + } + return r, nil +} + +func newNode(p string) (node, error) { + _, name := path.Split(p) + + cpulist, err := cpuList(p) + if err != nil { + return node{}, err + } + + return node{ + Name: name, + Path: p, + CpuList: cpulist, + PCIDevices: []PCIDevice{}, + }, nil +} + +func cpuList(p string) (string, error) { + lpath := filepath.Join(p, "cpulist") + c, err := ioutil.ReadFile(lpath) + if err != nil { + return "", fmt.Errorf("Failed to open %s: %+v", lpath, err) + } + return strings.TrimRight(string(c), "\n"), nil +} diff --git a/tools/numap/numap.go b/tools/numap/numap.go new file mode 100644 index 0000000..c65f1f0 --- /dev/null +++ b/tools/numap/numap.go @@ -0,0 +1,31 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + + "golang.fcuny.net/numap/internal/hwids" +) + +func main() { + hwdb, err := hwids.Load() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + nodes, err := findNodes(hwdb) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + out, err := json.Marshal(nodes) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + fmt.Println(string(out)) +} |