about summary refs log tree commit diff
path: root/tools/seqstat
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-10-15 12:32:28 -0700
committerFranck Cuny <franck@fcuny.net>2022-10-15 12:32:28 -0700
commitce39b5fd65a91e241b2edbb9fd6a9a12967a064d (patch)
treebd0ec3519ac3a41c55193023dc3f3bb2705114a1 /tools/seqstat
parentci: update the flake once a week (diff)
downloadworld-ce39b5fd65a91e241b2edbb9fd6a9a12967a064d.tar.gz
ref(seqstat): rewrite from go to python
This is a simple script, there's no benefit in having this in go. Having
it Python makes it easier to extend with panda or other libraries in the
future if I need more statistics too.
Diffstat (limited to 'tools/seqstat')
-rw-r--r--tools/seqstat/default.nix24
-rw-r--r--tools/seqstat/go.mod3
-rw-r--r--tools/seqstat/seqstat.go63
-rwxr-xr-xtools/seqstat/seqstat.py26
-rw-r--r--tools/seqstat/sequence.go79
-rw-r--r--tools/seqstat/sequence_test.go49
6 files changed, 50 insertions, 194 deletions
diff --git a/tools/seqstat/default.nix b/tools/seqstat/default.nix
new file mode 100644
index 0000000..d45f4bf
--- /dev/null
+++ b/tools/seqstat/default.nix
@@ -0,0 +1,24 @@
+{ self, lib, python3, stdenvNoCC, pkgs }:
+
+stdenvNoCC.mkDerivation rec {
+  pname = "seqstat";
+  src = ./seqstat.py;
+  version = "0.1.0";
+
+  buildInputs = [ python3 ];
+
+  dontUnpack = true;
+  dontBuild = true;
+
+  installPhase = ''
+    mkdir -p $out/bin
+    cp $src $out/bin/${pname}
+  '';
+
+  meta = with lib; {
+    description = "Display an histogram for a given sequence of numbers.";
+    license = with licenses; [ mit ];
+    platforms = platforms.unix;
+    maintainers = with maintainers; [ fcuny ];
+  };
+}
diff --git a/tools/seqstat/go.mod b/tools/seqstat/go.mod
deleted file mode 100644
index 39f343f..0000000
--- a/tools/seqstat/go.mod
+++ /dev/null
@@ -1,3 +0,0 @@
-module golang.fcuny.net/seqstat
-
-go 1.17
diff --git a/tools/seqstat/seqstat.go b/tools/seqstat/seqstat.go
deleted file mode 100644
index 8709fa4..0000000
--- a/tools/seqstat/seqstat.go
+++ /dev/null
@@ -1,63 +0,0 @@
-package main
-
-import (
-	"bufio"
-	"flag"
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-)
-
-var (
-	stats = flag.Bool("S", false, "Display statistics about the sequence.")
-)
-
-func main() {
-	flag.Parse()
-
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "usage: [-S] <INPUT>")
-		flag.PrintDefaults()
-	}
-
-	elements := argsToElements(flag.Args())
-
-	if len(elements) < 1 {
-		scanner := bufio.NewScanner(os.Stdin)
-		var e []string
-		for scanner.Scan() {
-			e = append(e, strings.Split(scanner.Text(), " ")...)
-		}
-		elements = argsToElements(e)
-	}
-
-	seq := newSequence(elements)
-
-	fmt.Println(string(seq.histogram()))
-
-	if *stats {
-		fmt.Printf("min:   %f\n", seq.min)
-		fmt.Printf("max:   %f\n", seq.max)
-		fmt.Printf("avg:   %f\n", seq.avg())
-		fmt.Printf("p50:   %f\n", seq.p50())
-		fmt.Printf("p90:   %f\n", seq.p90())
-		fmt.Printf("p99:   %f\n", seq.p99())
-		fmt.Printf("p999:  %f\n", seq.p999())
-		fmt.Printf("ordered sequence: %v\n", seq.elementsSorted)
-	}
-}
-
-// converts the input to float64
-func argsToElements(args []string) []float64 {
-	elements := make([]float64, len(args))
-
-	for i, input := range args {
-		num, err := strconv.ParseFloat(input, 64)
-		if err != nil {
-			panic(err)
-		}
-		elements[i] = num
-	}
-	return elements
-}
diff --git a/tools/seqstat/seqstat.py b/tools/seqstat/seqstat.py
new file mode 100755
index 0000000..8450ec8
--- /dev/null
+++ b/tools/seqstat/seqstat.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+import argparse
+
+ticks = ["▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"]
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "numbers", metavar="N", type=float, nargs="+", help="a number for the accumulator"
+)
+args = parser.parse_args()
+
+
+def histogram(sequence):
+    min_val = min(sequence)
+    max_val = max(sequence)
+
+    scale = (int(max_val - min_val) << 8) / (len(ticks) - 1)
+    if scale < 1:
+        scale = 1
+
+    return [ticks[int((int(i - min_val) << 8) / scale)] for i in sequence]
+
+
+h = histogram(args.numbers)
+print("".join(h))
diff --git a/tools/seqstat/sequence.go b/tools/seqstat/sequence.go
deleted file mode 100644
index d4ec91b..0000000
--- a/tools/seqstat/sequence.go
+++ /dev/null
@@ -1,79 +0,0 @@
-package main
-
-import (
-	"sort"
-)
-
-var (
-	ticks = []rune{'\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586', '\u2587', '\u2588'}
-)
-
-type sequence struct {
-	elements       []float64
-	elementsSorted []float64
-	min            float64
-	max            float64
-	factor         int
-	sum            float64
-}
-
-func (s *sequence) avg() float64 {
-	return s.sum / float64(len(s.elements))
-}
-
-func (s *sequence) p50() float64 {
-	return s.elementsSorted[len(s.elementsSorted)*50/100]
-}
-
-func (s *sequence) p90() float64 {
-	return s.elementsSorted[len(s.elementsSorted)*90/100]
-}
-
-func (s *sequence) p99() float64 {
-	return s.elementsSorted[len(s.elementsSorted)*99/100]
-}
-
-func (s *sequence) p999() float64 {
-	return s.elementsSorted[len(s.elementsSorted)*999/1000]
-}
-func (s *sequence) histogram() []rune {
-	histogram := make([]rune, len(s.elements))
-	for i, num := range s.elements {
-		v := (((int(num) - int(s.min)) << 8) / s.factor)
-		histogram[i] = ticks[v]
-	}
-	return histogram
-}
-
-func newSequence(elements []float64) *sequence {
-	s := new(sequence)
-	s.elements = elements
-
-	s.min = s.elements[0]
-	s.max = s.elements[0]
-
-	s.sum = 0
-
-	for _, element := range s.elements {
-		if element > s.max {
-			s.max = element
-		}
-		if element < s.min {
-			s.min = element
-		}
-		s.sum += element
-	}
-
-	s.factor = ((int(s.max) - int(s.min)) << 8) / (len(ticks) - 1)
-
-	if s.factor < 1 {
-		s.factor = 1
-	}
-
-	elementsSorted := make([]float64, len(elements))
-	copy(elementsSorted, elements)
-	sort.Float64s(elementsSorted)
-	s.elementsSorted = elementsSorted
-
-	return s
-}
diff --git a/tools/seqstat/sequence_test.go b/tools/seqstat/sequence_test.go
deleted file mode 100644
index 1299086..0000000
--- a/tools/seqstat/sequence_test.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package main
-
-import (
-	"testing"
-)
-
-func TestHistogram(t *testing.T) {
-	cases := []struct {
-		in        []float64
-		histogram []rune
-	}{
-		{[]float64{1, 2}, []rune{'▁', '█'}},
-		{[]float64{1, 10, 4}, []rune{'▁', '█', '▃'}},
-		{[]float64{1, 5, 22, 13, 53}, []rune{'▁', '▁', '▃', '▂', '█'}},
-	}
-
-	for _, c := range cases {
-		seq := newSequence(c.in)
-		if string(seq.histogram()) != string(c.histogram) {
-			t.Errorf("Not matching: got %q, want %q", string(seq.histogram()), string(c.histogram))
-		}
-	}
-}
-
-func TestStats(t *testing.T) {
-	cases := []struct {
-		in   []float64
-		min  float64
-		max  float64
-		p999 float64
-	}{
-		{[]float64{1, 10, 52, 12}, 1, 52, 52},
-	}
-
-	for _, c := range cases {
-		seq := newSequence(c.in)
-		if seq.min != c.min {
-			t.Errorf("Not matching: got min %f want %f", seq.min, c.min)
-		}
-		if seq.max != c.max {
-			if seq.max != c.max {
-				t.Errorf("Not matching: got max %f want %f", seq.max, c.max)
-			}
-			if seq.p999() != c.p999 {
-				t.Errorf("Not matching: got p999 %f want %f", seq.p999(), c.p999)
-			}
-		}
-	}
-}