about summary refs log tree commit diff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/numap/README.org48
-rw-r--r--tools/numap/go.mod3
-rw-r--r--tools/numap/internal/hwids/hwids.go148
-rw-r--r--tools/numap/internal/sysfs/parse.go21
-rw-r--r--tools/numap/internal/sysfs/pci.go145
-rw-r--r--tools/numap/numa.go116
-rw-r--r--tools/numap/numap.go31
7 files changed, 512 insertions, 0 deletions
diff --git a/tools/numap/README.org b/tools/numap/README.org
new file mode 100644
index 0000000..5781030
--- /dev/null
+++ b/tools/numap/README.org
@@ -0,0 +1,48 @@
+#+TITLE: numap
+
+Print the NUMA topology of a host.
+
+* Usage
+#+BEGIN_SRC sh
+./numap |jq .
+{
+  "node0": {
+    "name": "node0",
+    "path": "/sys/devices/system/node/node0",
+    "cpulist": "0-19,40-59",
+    "pci_devices": [
+      {
+        "vendor": "Mellanox Technologies",
+        "name": "MT27710 Family [ConnectX-4 Lx]"
+      },
+      {
+        "vendor": "Mellanox Technologies",
+        "name": "MT27710 Family [ConnectX-4 Lx]"
+      }
+    ]
+  },
+  "node1": {
+    "name": "node1",
+    "path": "/sys/devices/system/node/node1",
+    "cpulist": "20-39,60-79",
+    "pci_devices": [
+      {
+        "vendor": "Intel Corporation",
+        "name": "NVMe Datacenter SSD [3DNAND, Beta Rock Controller]"
+      }
+    ]
+  }
+}
+#+END_SRC
+
+The command will scan the host to find the NUMA nodes, and all the PCI devices, and map the PCI devices back to the NUMA node.
+
+It also provides a way to see the list of CPUs attached to the node.
+
+* Limitations
+** Device class
+For now only the following classes of hardware are cared for:
+- NVMe
+- network
+- GPU
+
diff --git a/tools/numap/go.mod b/tools/numap/go.mod
new file mode 100644
index 0000000..92b1885
--- /dev/null
+++ b/tools/numap/go.mod
@@ -0,0 +1,3 @@
+module golang.fcuny.net/numap
+
+go 1.17
diff --git a/tools/numap/internal/hwids/hwids.go b/tools/numap/internal/hwids/hwids.go
new file mode 100644
index 0000000..6aa9d8a
--- /dev/null
+++ b/tools/numap/internal/hwids/hwids.go
@@ -0,0 +1,148 @@
+package hwids
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+)
+
+var pciPath = []string{
+	"/usr/share/hwdata/pci.ids",
+	"/usr/share/misc/pci.ids",
+}
+
+type PCIType int
+
+const (
+	PCIVendor PCIType = iota
+	PCIDevice
+	PCISubsystem
+)
+
+type PciDevices map[uint16][]PciDevice
+
+// PciDevice represents a PCI device
+type PciDevice struct {
+	Type                   PCIType
+	Vendor, Device         uint16
+	SubVendor, SubDevice   uint16
+	VendorName, DeviceName string
+	SubName                string
+}
+
+// Load load the hardware database for PCI devices and return a map of
+// vendor -> list of devices.
+func Load() (PciDevices, error) {
+	// if the environment variable HWDATAPATH is set, we add it to the
+	// list of paths we check for the hardware database.
+	extraPath := os.Getenv("HWDATA")
+	if extraPath != "" {
+		pciPath = append(pciPath, extraPath)
+	}
+
+	for _, f := range pciPath {
+		fh, err := os.Open(f)
+		if err != nil {
+			continue
+		}
+		defer fh.Close()
+		return parse(fh)
+	}
+	return PciDevices{}, fmt.Errorf("hwids: could not find a pci.ids file")
+}
+
+func parse(f *os.File) (PciDevices, error) {
+	devices := make(PciDevices)
+
+	s := bufio.NewScanner(f)
+
+	// this is to keep track of the current device. The format of the
+	// file is as follow:
+	// vendor  vendor_name
+	//       device  device_name                             <-- single tab
+	//               subvendor subdevice  subsystem_name     <-- two tabs
+	// the variable is to keep track of the current vendor / device
+	cur := PciDevice{}
+
+	for s.Scan() {
+		l := s.Text()
+		// skip empty lines or lines that are a comment
+		if len(l) == 0 || l[0] == '#' {
+			continue
+		}
+		// lines starting with a C are the classes definitions, and
+		// they are at the end of the file, which means we're done
+		// parsing the devices
+		if l[0] == 'C' {
+			break
+		}
+
+		parts := strings.SplitN(l, "  ", 2)
+		if len(parts) != 2 {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line (missing ID separator): %s", l)
+		}
+
+		ids, name := parts[0], parts[1]
+		if len(ids) < 2 || len(name) == 0 {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line (empty ID or name): %s", l)
+		}
+
+		cur.Type = PCIVendor
+
+		if ids[0] == '\t' {
+			if ids[1] == '\t' {
+				cur.Type = PCISubsystem
+			} else {
+				cur.Type = PCIDevice
+			}
+		}
+
+		var err error
+		switch cur.Type {
+		case PCIVendor:
+			_, err = fmt.Sscanf(ids, "%x", &cur.Vendor)
+			cur.VendorName = name
+		case PCIDevice:
+			_, err = fmt.Sscanf(ids, "%x", &cur.Device)
+			cur.DeviceName = name
+		case PCISubsystem:
+			_, err = fmt.Sscanf(ids, "%x %x", &cur.SubVendor, &cur.SubDevice)
+			cur.SubName = name
+		}
+
+		if err != nil {
+			return devices, fmt.Errorf("hwids: malformed PCI ID line: %s: %v", l, err)
+		}
+
+		// This is to reset the state when we are moving to a
+		// different vendor or device
+		switch cur.Type {
+		case PCIVendor:
+			cur.Device = 0
+			cur.DeviceName = ""
+			fallthrough
+		case PCIDevice:
+			cur.SubVendor = 0
+			cur.SubDevice = 0
+			cur.SubName = ""
+		}
+
+		_, ok := devices[cur.Vendor]
+		if ok {
+			_devices := devices[cur.Vendor]
+			_devices = append(_devices, cur)
+			devices[cur.Vendor] = _devices
+
+		} else {
+			_devices := []PciDevice{cur}
+			devices[cur.Vendor] = _devices
+		}
+	}
+
+	if err := s.Err(); err != nil {
+		return devices, fmt.Errorf("hwids: failed to read PCI ID line: %v", err)
+	}
+
+	return devices, nil
+}
diff --git a/tools/numap/internal/sysfs/parse.go b/tools/numap/internal/sysfs/parse.go
new file mode 100644
index 0000000..d518653
--- /dev/null
+++ b/tools/numap/internal/sysfs/parse.go
@@ -0,0 +1,21 @@
+package sysfs
+
+import (
+	"io/ioutil"
+	"strconv"
+	"strings"
+)
+
+// ContentUint64 parses the content of a file in sysfs, and convert
+// from hex to uint64.
+func ContentUint64(path string) (uint64, error) {
+	content, err := ioutil.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+	result, err := strconv.ParseUint(strings.TrimSpace(string(content)), 0, 64)
+	if err != nil {
+		return 0, err
+	}
+	return result, nil
+}
diff --git a/tools/numap/internal/sysfs/pci.go b/tools/numap/internal/sysfs/pci.go
new file mode 100644
index 0000000..9e714b1
--- /dev/null
+++ b/tools/numap/internal/sysfs/pci.go
@@ -0,0 +1,145 @@
+package sysfs
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+const (
+	sysFsPCIDevicesPath = "/sys/bus/pci/devices/"
+)
+
+type PCIDevice struct {
+	NumaNode             int
+	ID                   string
+	Device, Vendor       uint64
+	SubVendor, SubDevice uint64
+	Class                uint64
+	MSIs                 []int
+}
+
+func ScanPCIDevices() []PCIDevice {
+	devices, err := ioutil.ReadDir(sysFsPCIDevicesPath)
+	if err != nil {
+		panic(err)
+	}
+
+	pciDevices := []PCIDevice{}
+
+	for _, device := range devices {
+		dpath := filepath.Join(sysFsPCIDevicesPath, device.Name())
+		pcid, err := NewPCIDevice(dpath, device.Name())
+		if err != nil {
+			panic(err)
+		}
+		pciDevices = append(pciDevices, pcid)
+	}
+	return pciDevices
+}
+
+func getPCIDeviceClass(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "class"))
+}
+
+func getPCIDeviceVendor(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "vendor"))
+}
+
+func getPCIDeviceId(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "device"))
+}
+
+func getPCIDeviceSubsystemDevice(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "subsystem_device"))
+}
+
+func getPCIDeviceSubsystemVendor(path string) (uint64, error) {
+	return ContentUint64(filepath.Join(path, "subsystem_vendor"))
+}
+
+func getPCIDeviceNumaNode(path string) int {
+	content, err := ioutil.ReadFile(filepath.Join(path, "numa_node"))
+	if err != nil {
+		panic(err)
+	}
+	nodeNum, err := strconv.Atoi(strings.TrimSpace(string(content)))
+	if err != nil {
+		panic(err)
+	}
+	return nodeNum
+}
+
+func getPCIDeviceMSIx(p string) []int {
+	g := fmt.Sprintf("%s/*", filepath.Join(p, "msi_irqs"))
+	files, err := filepath.Glob(g)
+	if err != nil {
+		panic(err)
+	}
+	if len(files) == 0 {
+		return []int{}
+	}
+
+	msix := []int{}
+
+	for _, f := range files {
+		content, err := ioutil.ReadFile(f)
+		if err != nil {
+			panic(err)
+		}
+		if strings.TrimSpace(string(content)) == "msix" {
+			base := path.Base(f)
+			v, err := strconv.Atoi(base)
+			if err != nil {
+				panic(err)
+			}
+			msix = append(msix, v)
+		}
+	}
+	return msix
+}
+
+func NewPCIDevice(path, name string) (PCIDevice, error) {
+	nodeNum := getPCIDeviceNumaNode(path)
+
+	device, err := getPCIDeviceId(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	vendor, err := getPCIDeviceVendor(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	subvendor, err := getPCIDeviceSubsystemVendor(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	subdevice, err := getPCIDeviceSubsystemDevice(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	deviceClass, err := getPCIDeviceClass(path)
+	if err != nil {
+		return PCIDevice{}, err
+	}
+
+	msix := getPCIDeviceMSIx(path)
+
+	return PCIDevice{
+		ID:        name,
+		Device:    device,
+		Class:     deviceClass,
+		NumaNode:  nodeNum,
+		Vendor:    vendor,
+		SubVendor: subvendor,
+		SubDevice: subdevice,
+		MSIs:      msix,
+	}, nil
+}
diff --git a/tools/numap/numa.go b/tools/numap/numa.go
new file mode 100644
index 0000000..402ea1d
--- /dev/null
+++ b/tools/numap/numa.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"golang.fcuny.net/numap/internal/hwids"
+	"golang.fcuny.net/numap/internal/sysfs"
+)
+
+const (
+	node_root      = "/sys/devices/system/node/node*"
+	CLASS_NVMe     = 67586
+	CLASS_ETHERNET = 131072
+	CLASS_GPU      = 197120
+)
+
+type node struct {
+	Name       string      `json:"name"`
+	Path       string      `json:"path"`
+	CpuList    string      `json:"cpulist"`
+	PCIDevices []PCIDevice `json:"pci_devices"`
+}
+
+type PCIDevice struct {
+	Vendor string `json:"vendor"`
+	Name   string `json:"name"`
+}
+
+func findNodes(hwdb hwids.PciDevices) (map[string]node, error) {
+	nodes := make(map[string]node)
+
+	files, err := filepath.Glob(node_root)
+	if err != nil {
+		return nil, fmt.Errorf("Failed to find NUMA nodes under %s: %+v", node_root, err)
+	}
+	if len(files) == 0 {
+		return nil, fmt.Errorf("Could not find NUMA node in %s", node_root)
+	}
+
+	for _, f := range files {
+		n, err := newNode(f)
+		if err != nil {
+			return make(map[string]node), err
+		}
+		nodes[n.Name] = n
+	}
+
+	r, err := mapPCIDevicesToNumaNode(hwdb)
+	if err != nil {
+		panic(err)
+	}
+	for k, v := range r {
+		nodeName := fmt.Sprintf("node%d", k)
+		n := nodes[nodeName]
+		n.PCIDevices = v
+		nodes[nodeName] = n
+	}
+	return nodes, nil
+}
+
+func mapPCIDevicesToNumaNode(hwdb hwids.PciDevices) (map[int][]PCIDevice, error) {
+	devices := sysfs.ScanPCIDevices()
+	r := map[int][]PCIDevice{}
+
+	for _, d := range devices {
+		if d.Class == CLASS_NVMe || d.Class == CLASS_ETHERNET || d.Class == CLASS_GPU {
+			_, ok := hwdb[uint16(d.Vendor)]
+			if ok {
+				desc := hwdb[uint16(d.Vendor)]
+				var vendor, name string
+				for _, m := range desc {
+					if uint64(m.Device) == d.Device && uint64(m.Vendor) == d.Vendor {
+						vendor = m.VendorName
+						name = m.DeviceName
+						break
+					}
+				}
+				pciDevice := PCIDevice{
+					Vendor: vendor,
+					Name:   name,
+				}
+				r[d.NumaNode] = append(r[d.NumaNode], pciDevice)
+			}
+		}
+	}
+	return r, nil
+}
+
+func newNode(p string) (node, error) {
+	_, name := path.Split(p)
+
+	cpulist, err := cpuList(p)
+	if err != nil {
+		return node{}, err
+	}
+
+	return node{
+		Name:       name,
+		Path:       p,
+		CpuList:    cpulist,
+		PCIDevices: []PCIDevice{},
+	}, nil
+}
+
+func cpuList(p string) (string, error) {
+	lpath := filepath.Join(p, "cpulist")
+	c, err := ioutil.ReadFile(lpath)
+	if err != nil {
+		return "", fmt.Errorf("Failed to open %s: %+v", lpath, err)
+	}
+	return strings.TrimRight(string(c), "\n"), nil
+}
diff --git a/tools/numap/numap.go b/tools/numap/numap.go
new file mode 100644
index 0000000..c65f1f0
--- /dev/null
+++ b/tools/numap/numap.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"golang.fcuny.net/numap/internal/hwids"
+)
+
+func main() {
+	hwdb, err := hwids.Load()
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	nodes, err := findNodes(hwdb)
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	out, err := json.Marshal(nodes)
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	fmt.Println(string(out))
+}