about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-06-19 15:11:59 -0700
committerFranck Cuny <franck@fcuny.net>2022-06-19 15:17:50 -0700
commitc4c1b7af140c1dd99c7b0520178eea02edd63e2b (patch)
treead5a5a0244b921d09cfedbd72be758a0c5f9e867
parentfeat(tools/numap): add a tool to report NUMA topology of a host (diff)
downloadworld-c4c1b7af140c1dd99c7b0520178eea02edd63e2b.tar.gz
feat(tools/seqstat): add a tool to report stats about a sequence
For example:
```
% echo 1 20 12 32 19 2 | ./seqstat -S
▁▅▃█▅▁
min:   1.000000
max:   32.000000
avg:   14.333333
p50:   19.000000
p90:   32.000000
p99:   32.000000
p999:  32.000000
ordered sequence: [1 2 12 19 20 32]
```

Change-Id: I9303bd7d0e964948143e77c868de8777cd7a9951
Reviewed-on: https://cl.fcuny.net/c/world/+/454
Tested-by: CI
Reviewed-by: Franck Cuny <franck@fcuny.net>
Diffstat (limited to '')
-rw-r--r--tools/seqstat/go.mod3
-rw-r--r--tools/seqstat/seqstat.go63
-rw-r--r--tools/seqstat/sequence.go79
-rw-r--r--tools/seqstat/sequence_test.go49
4 files changed, 194 insertions, 0 deletions
diff --git a/tools/seqstat/go.mod b/tools/seqstat/go.mod
new file mode 100644
index 0000000..39f343f
--- /dev/null
+++ b/tools/seqstat/go.mod
@@ -0,0 +1,3 @@
+module golang.fcuny.net/seqstat
+
+go 1.17
diff --git a/tools/seqstat/seqstat.go b/tools/seqstat/seqstat.go
new file mode 100644
index 0000000..8709fa4
--- /dev/null
+++ b/tools/seqstat/seqstat.go
@@ -0,0 +1,63 @@
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+)
+
+var (
+	stats = flag.Bool("S", false, "Display statistics about the sequence.")
+)
+
+func main() {
+	flag.Parse()
+
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "usage: [-S] <INPUT>")
+		flag.PrintDefaults()
+	}
+
+	elements := argsToElements(flag.Args())
+
+	if len(elements) < 1 {
+		scanner := bufio.NewScanner(os.Stdin)
+		var e []string
+		for scanner.Scan() {
+			e = append(e, strings.Split(scanner.Text(), " ")...)
+		}
+		elements = argsToElements(e)
+	}
+
+	seq := newSequence(elements)
+
+	fmt.Println(string(seq.histogram()))
+
+	if *stats {
+		fmt.Printf("min:   %f\n", seq.min)
+		fmt.Printf("max:   %f\n", seq.max)
+		fmt.Printf("avg:   %f\n", seq.avg())
+		fmt.Printf("p50:   %f\n", seq.p50())
+		fmt.Printf("p90:   %f\n", seq.p90())
+		fmt.Printf("p99:   %f\n", seq.p99())
+		fmt.Printf("p999:  %f\n", seq.p999())
+		fmt.Printf("ordered sequence: %v\n", seq.elementsSorted)
+	}
+}
+
+// converts the input to float64
+func argsToElements(args []string) []float64 {
+	elements := make([]float64, len(args))
+
+	for i, input := range args {
+		num, err := strconv.ParseFloat(input, 64)
+		if err != nil {
+			panic(err)
+		}
+		elements[i] = num
+	}
+	return elements
+}
diff --git a/tools/seqstat/sequence.go b/tools/seqstat/sequence.go
new file mode 100644
index 0000000..d4ec91b
--- /dev/null
+++ b/tools/seqstat/sequence.go
@@ -0,0 +1,79 @@
+package main
+
+import (
+	"sort"
+)
+
+var (
+	ticks = []rune{'\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586', '\u2587', '\u2588'}
+)
+
+type sequence struct {
+	elements       []float64
+	elementsSorted []float64
+	min            float64
+	max            float64
+	factor         int
+	sum            float64
+}
+
+func (s *sequence) avg() float64 {
+	return s.sum / float64(len(s.elements))
+}
+
+func (s *sequence) p50() float64 {
+	return s.elementsSorted[len(s.elementsSorted)*50/100]
+}
+
+func (s *sequence) p90() float64 {
+	return s.elementsSorted[len(s.elementsSorted)*90/100]
+}
+
+func (s *sequence) p99() float64 {
+	return s.elementsSorted[len(s.elementsSorted)*99/100]
+}
+
+func (s *sequence) p999() float64 {
+	return s.elementsSorted[len(s.elementsSorted)*999/1000]
+}
+func (s *sequence) histogram() []rune {
+	histogram := make([]rune, len(s.elements))
+	for i, num := range s.elements {
+		v := (((int(num) - int(s.min)) << 8) / s.factor)
+		histogram[i] = ticks[v]
+	}
+	return histogram
+}
+
+func newSequence(elements []float64) *sequence {
+	s := new(sequence)
+	s.elements = elements
+
+	s.min = s.elements[0]
+	s.max = s.elements[0]
+
+	s.sum = 0
+
+	for _, element := range s.elements {
+		if element > s.max {
+			s.max = element
+		}
+		if element < s.min {
+			s.min = element
+		}
+		s.sum += element
+	}
+
+	s.factor = ((int(s.max) - int(s.min)) << 8) / (len(ticks) - 1)
+
+	if s.factor < 1 {
+		s.factor = 1
+	}
+
+	elementsSorted := make([]float64, len(elements))
+	copy(elementsSorted, elements)
+	sort.Float64s(elementsSorted)
+	s.elementsSorted = elementsSorted
+
+	return s
+}
diff --git a/tools/seqstat/sequence_test.go b/tools/seqstat/sequence_test.go
new file mode 100644
index 0000000..1299086
--- /dev/null
+++ b/tools/seqstat/sequence_test.go
@@ -0,0 +1,49 @@
+package main
+
+import (
+	"testing"
+)
+
+func TestHistogram(t *testing.T) {
+	cases := []struct {
+		in        []float64
+		histogram []rune
+	}{
+		{[]float64{1, 2}, []rune{'▁', '█'}},
+		{[]float64{1, 10, 4}, []rune{'▁', '█', '▃'}},
+		{[]float64{1, 5, 22, 13, 53}, []rune{'▁', '▁', '▃', '▂', '█'}},
+	}
+
+	for _, c := range cases {
+		seq := newSequence(c.in)
+		if string(seq.histogram()) != string(c.histogram) {
+			t.Errorf("Not matching: got %q, want %q", string(seq.histogram()), string(c.histogram))
+		}
+	}
+}
+
+func TestStats(t *testing.T) {
+	cases := []struct {
+		in   []float64
+		min  float64
+		max  float64
+		p999 float64
+	}{
+		{[]float64{1, 10, 52, 12}, 1, 52, 52},
+	}
+
+	for _, c := range cases {
+		seq := newSequence(c.in)
+		if seq.min != c.min {
+			t.Errorf("Not matching: got min %f want %f", seq.min, c.min)
+		}
+		if seq.max != c.max {
+			if seq.max != c.max {
+				t.Errorf("Not matching: got max %f want %f", seq.max, c.max)
+			}
+			if seq.p999() != c.p999 {
+				t.Errorf("Not matching: got p999 %f want %f", seq.p999(), c.p999)
+			}
+		}
+	}
+}