diff options
Diffstat (limited to '')
-rw-r--r-- | tools/numap/README.org | 48 | ||||
-rw-r--r-- | tools/numap/go.mod | 3 | ||||
-rw-r--r-- | tools/numap/internal/hwids/hwids.go | 148 | ||||
-rw-r--r-- | tools/numap/internal/sysfs/parse.go | 21 | ||||
-rw-r--r-- | tools/numap/internal/sysfs/pci.go | 145 | ||||
-rw-r--r-- | tools/numap/numa.go | 116 | ||||
-rw-r--r-- | tools/numap/numap.go | 31 |
7 files changed, 512 insertions, 0 deletions
diff --git a/tools/numap/README.org b/tools/numap/README.org new file mode 100644 index 0000000..5781030 --- /dev/null +++ b/tools/numap/README.org @@ -0,0 +1,48 @@ +#+TITLE: numap + +Print the NUMA topology of a host. + +* Usage +#+BEGIN_SRC sh +./numap |jq . +{ + "node0": { + "name": "node0", + "path": "/sys/devices/system/node/node0", + "cpulist": "0-19,40-59", + "pci_devices": [ + { + "vendor": "Mellanox Technologies", + "name": "MT27710 Family [ConnectX-4 Lx]" + }, + { + "vendor": "Mellanox Technologies", + "name": "MT27710 Family [ConnectX-4 Lx]" + } + ] + }, + "node1": { + "name": "node1", + "path": "/sys/devices/system/node/node1", + "cpulist": "20-39,60-79", + "pci_devices": [ + { + "vendor": "Intel Corporation", + "name": "NVMe Datacenter SSD [3DNAND, Beta Rock Controller]" + } + ] + } +} +#+END_SRC + +The command will scan the host to find the NUMA nodes, and all the PCI devices, and map the PCI devices back to the NUMA node. + +It also provides a way to see the list of CPUs attached to the node. + +* Limitations +** Device class +For now only the following classes of hardware are cared for: +- NVMe +- network +- GPU + diff --git a/tools/numap/go.mod b/tools/numap/go.mod new file mode 100644 index 0000000..92b1885 --- /dev/null +++ b/tools/numap/go.mod @@ -0,0 +1,3 @@ +module golang.fcuny.net/numap + +go 1.17 diff --git a/tools/numap/internal/hwids/hwids.go b/tools/numap/internal/hwids/hwids.go new file mode 100644 index 0000000..6aa9d8a --- /dev/null +++ b/tools/numap/internal/hwids/hwids.go @@ -0,0 +1,148 @@ +package hwids + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +var pciPath = []string{ + "/usr/share/hwdata/pci.ids", + "/usr/share/misc/pci.ids", +} + +type PCIType int + +const ( + PCIVendor PCIType = iota + PCIDevice + PCISubsystem +) + +type PciDevices map[uint16][]PciDevice + +// PciDevice represents a PCI device +type PciDevice struct { + Type PCIType + Vendor, Device uint16 + SubVendor, SubDevice uint16 + VendorName, DeviceName string + SubName string +} + +// Load load the hardware database for PCI devices and return a map of +// vendor -> list of devices. +func Load() (PciDevices, error) { + // if the environment variable HWDATAPATH is set, we add it to the + // list of paths we check for the hardware database. + extraPath := os.Getenv("HWDATA") + if extraPath != "" { + pciPath = append(pciPath, extraPath) + } + + for _, f := range pciPath { + fh, err := os.Open(f) + if err != nil { + continue + } + defer fh.Close() + return parse(fh) + } + return PciDevices{}, fmt.Errorf("hwids: could not find a pci.ids file") +} + +func parse(f *os.File) (PciDevices, error) { + devices := make(PciDevices) + + s := bufio.NewScanner(f) + + // this is to keep track of the current device. The format of the + // file is as follow: + // vendor vendor_name + // device device_name <-- single tab + // subvendor subdevice subsystem_name <-- two tabs + // the variable is to keep track of the current vendor / device + cur := PciDevice{} + + for s.Scan() { + l := s.Text() + // skip empty lines or lines that are a comment + if len(l) == 0 || l[0] == '#' { + continue + } + // lines starting with a C are the classes definitions, and + // they are at the end of the file, which means we're done + // parsing the devices + if l[0] == 'C' { + break + } + + parts := strings.SplitN(l, " ", 2) + if len(parts) != 2 { + return devices, fmt.Errorf("hwids: malformed PCI ID line (missing ID separator): %s", l) + } + + ids, name := parts[0], parts[1] + if len(ids) < 2 || len(name) == 0 { + return devices, fmt.Errorf("hwids: malformed PCI ID line (empty ID or name): %s", l) + } + + cur.Type = PCIVendor + + if ids[0] == '\t' { + if ids[1] == '\t' { + cur.Type = PCISubsystem + } else { + cur.Type = PCIDevice + } + } + + var err error + switch cur.Type { + case PCIVendor: + _, err = fmt.Sscanf(ids, "%x", &cur.Vendor) + cur.VendorName = name + case PCIDevice: + _, err = fmt.Sscanf(ids, "%x", &cur.Device) + cur.DeviceName = name + case PCISubsystem: + _, err = fmt.Sscanf(ids, "%x %x", &cur.SubVendor, &cur.SubDevice) + cur.SubName = name + } + + if err != nil { + return devices, fmt.Errorf("hwids: malformed PCI ID line: %s: %v", l, err) + } + + // This is to reset the state when we are moving to a + // different vendor or device + switch cur.Type { + case PCIVendor: + cur.Device = 0 + cur.DeviceName = "" + fallthrough + case PCIDevice: + cur.SubVendor = 0 + cur.SubDevice = 0 + cur.SubName = "" + } + + _, ok := devices[cur.Vendor] + if ok { + _devices := devices[cur.Vendor] + _devices = append(_devices, cur) + devices[cur.Vendor] = _devices + + } else { + _devices := []PciDevice{cur} + devices[cur.Vendor] = _devices + } + } + + if err := s.Err(); err != nil { + return devices, fmt.Errorf("hwids: failed to read PCI ID line: %v", err) + } + + return devices, nil +} diff --git a/tools/numap/internal/sysfs/parse.go b/tools/numap/internal/sysfs/parse.go new file mode 100644 index 0000000..d518653 --- /dev/null +++ b/tools/numap/internal/sysfs/parse.go @@ -0,0 +1,21 @@ +package sysfs + +import ( + "io/ioutil" + "strconv" + "strings" +) + +// ContentUint64 parses the content of a file in sysfs, and convert +// from hex to uint64. +func ContentUint64(path string) (uint64, error) { + content, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + result, err := strconv.ParseUint(strings.TrimSpace(string(content)), 0, 64) + if err != nil { + return 0, err + } + return result, nil +} diff --git a/tools/numap/internal/sysfs/pci.go b/tools/numap/internal/sysfs/pci.go new file mode 100644 index 0000000..9e714b1 --- /dev/null +++ b/tools/numap/internal/sysfs/pci.go @@ -0,0 +1,145 @@ +package sysfs + +import ( + "fmt" + "io/ioutil" + "path" + "path/filepath" + "strconv" + "strings" +) + +const ( + sysFsPCIDevicesPath = "/sys/bus/pci/devices/" +) + +type PCIDevice struct { + NumaNode int + ID string + Device, Vendor uint64 + SubVendor, SubDevice uint64 + Class uint64 + MSIs []int +} + +func ScanPCIDevices() []PCIDevice { + devices, err := ioutil.ReadDir(sysFsPCIDevicesPath) + if err != nil { + panic(err) + } + + pciDevices := []PCIDevice{} + + for _, device := range devices { + dpath := filepath.Join(sysFsPCIDevicesPath, device.Name()) + pcid, err := NewPCIDevice(dpath, device.Name()) + if err != nil { + panic(err) + } + pciDevices = append(pciDevices, pcid) + } + return pciDevices +} + +func getPCIDeviceClass(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "class")) +} + +func getPCIDeviceVendor(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "vendor")) +} + +func getPCIDeviceId(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "device")) +} + +func getPCIDeviceSubsystemDevice(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "subsystem_device")) +} + +func getPCIDeviceSubsystemVendor(path string) (uint64, error) { + return ContentUint64(filepath.Join(path, "subsystem_vendor")) +} + +func getPCIDeviceNumaNode(path string) int { + content, err := ioutil.ReadFile(filepath.Join(path, "numa_node")) + if err != nil { + panic(err) + } + nodeNum, err := strconv.Atoi(strings.TrimSpace(string(content))) + if err != nil { + panic(err) + } + return nodeNum +} + +func getPCIDeviceMSIx(p string) []int { + g := fmt.Sprintf("%s/*", filepath.Join(p, "msi_irqs")) + files, err := filepath.Glob(g) + if err != nil { + panic(err) + } + if len(files) == 0 { + return []int{} + } + + msix := []int{} + + for _, f := range files { + content, err := ioutil.ReadFile(f) + if err != nil { + panic(err) + } + if strings.TrimSpace(string(content)) == "msix" { + base := path.Base(f) + v, err := strconv.Atoi(base) + if err != nil { + panic(err) + } + msix = append(msix, v) + } + } + return msix +} + +func NewPCIDevice(path, name string) (PCIDevice, error) { + nodeNum := getPCIDeviceNumaNode(path) + + device, err := getPCIDeviceId(path) + if err != nil { + return PCIDevice{}, err + } + + vendor, err := getPCIDeviceVendor(path) + if err != nil { + return PCIDevice{}, err + } + + subvendor, err := getPCIDeviceSubsystemVendor(path) + if err != nil { + return PCIDevice{}, err + } + + subdevice, err := getPCIDeviceSubsystemDevice(path) + if err != nil { + return PCIDevice{}, err + } + + deviceClass, err := getPCIDeviceClass(path) + if err != nil { + return PCIDevice{}, err + } + + msix := getPCIDeviceMSIx(path) + + return PCIDevice{ + ID: name, + Device: device, + Class: deviceClass, + NumaNode: nodeNum, + Vendor: vendor, + SubVendor: subvendor, + SubDevice: subdevice, + MSIs: msix, + }, nil +} diff --git a/tools/numap/numa.go b/tools/numap/numa.go new file mode 100644 index 0000000..402ea1d --- /dev/null +++ b/tools/numap/numa.go @@ -0,0 +1,116 @@ +package main + +import ( + "fmt" + "io/ioutil" + "path" + "path/filepath" + "strings" + + "golang.fcuny.net/numap/internal/hwids" + "golang.fcuny.net/numap/internal/sysfs" +) + +const ( + node_root = "/sys/devices/system/node/node*" + CLASS_NVMe = 67586 + CLASS_ETHERNET = 131072 + CLASS_GPU = 197120 +) + +type node struct { + Name string `json:"name"` + Path string `json:"path"` + CpuList string `json:"cpulist"` + PCIDevices []PCIDevice `json:"pci_devices"` +} + +type PCIDevice struct { + Vendor string `json:"vendor"` + Name string `json:"name"` +} + +func findNodes(hwdb hwids.PciDevices) (map[string]node, error) { + nodes := make(map[string]node) + + files, err := filepath.Glob(node_root) + if err != nil { + return nil, fmt.Errorf("Failed to find NUMA nodes under %s: %+v", node_root, err) + } + if len(files) == 0 { + return nil, fmt.Errorf("Could not find NUMA node in %s", node_root) + } + + for _, f := range files { + n, err := newNode(f) + if err != nil { + return make(map[string]node), err + } + nodes[n.Name] = n + } + + r, err := mapPCIDevicesToNumaNode(hwdb) + if err != nil { + panic(err) + } + for k, v := range r { + nodeName := fmt.Sprintf("node%d", k) + n := nodes[nodeName] + n.PCIDevices = v + nodes[nodeName] = n + } + return nodes, nil +} + +func mapPCIDevicesToNumaNode(hwdb hwids.PciDevices) (map[int][]PCIDevice, error) { + devices := sysfs.ScanPCIDevices() + r := map[int][]PCIDevice{} + + for _, d := range devices { + if d.Class == CLASS_NVMe || d.Class == CLASS_ETHERNET || d.Class == CLASS_GPU { + _, ok := hwdb[uint16(d.Vendor)] + if ok { + desc := hwdb[uint16(d.Vendor)] + var vendor, name string + for _, m := range desc { + if uint64(m.Device) == d.Device && uint64(m.Vendor) == d.Vendor { + vendor = m.VendorName + name = m.DeviceName + break + } + } + pciDevice := PCIDevice{ + Vendor: vendor, + Name: name, + } + r[d.NumaNode] = append(r[d.NumaNode], pciDevice) + } + } + } + return r, nil +} + +func newNode(p string) (node, error) { + _, name := path.Split(p) + + cpulist, err := cpuList(p) + if err != nil { + return node{}, err + } + + return node{ + Name: name, + Path: p, + CpuList: cpulist, + PCIDevices: []PCIDevice{}, + }, nil +} + +func cpuList(p string) (string, error) { + lpath := filepath.Join(p, "cpulist") + c, err := ioutil.ReadFile(lpath) + if err != nil { + return "", fmt.Errorf("Failed to open %s: %+v", lpath, err) + } + return strings.TrimRight(string(c), "\n"), nil +} diff --git a/tools/numap/numap.go b/tools/numap/numap.go new file mode 100644 index 0000000..c65f1f0 --- /dev/null +++ b/tools/numap/numap.go @@ -0,0 +1,31 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + + "golang.fcuny.net/numap/internal/hwids" +) + +func main() { + hwdb, err := hwids.Load() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + nodes, err := findNodes(hwdb) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + out, err := json.Marshal(nodes) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + fmt.Println(string(out)) +} |