about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-10-16 19:03:35 -0700
committerFranck Cuny <franck@fcuny.net>2022-10-16 19:06:45 -0700
commit9d3fff359b6c2cea4ac69299f3c1621af8bde64c (patch)
tree95c5db67b6b94e0967404cb30d51f24f79331d72
parentops(github): archive the repository govanity (diff)
downloadworld-9d3fff359b6c2cea4ac69299f3c1621af8bde64c.tar.gz
ref(tools/git-blame-stats): rewrite the tool in python
The tool can calculate some stats about authors in a git repository. It
computes the number of lines that are authored by authors at a given
revision (HEAD by default), and the number of commits.
-rw-r--r--home/packages/default.nix1
-rw-r--r--tools/default.nix5
-rw-r--r--tools/git-blame-stats/default.nix24
-rwxr-xr-xtools/git-blame-stats/git-blame-stats.py95
-rw-r--r--tools/git-blame-stats/go.mod3
-rw-r--r--tools/git-blame-stats/main.go86
6 files changed, 113 insertions, 101 deletions
diff --git a/home/packages/default.nix b/home/packages/default.nix
index 4f3c22b..fd75556 100644
--- a/home/packages/default.nix
+++ b/home/packages/default.nix
@@ -36,6 +36,7 @@ in
         album-to-nas
         tools.ipconverter
         tools.seqstat
+        tools.git-blame-stats
         gh-ssh-keys
       ]
       ++ cfg.additionalPackages);
diff --git a/tools/default.nix b/tools/default.nix
index c266842..4078f35 100644
--- a/tools/default.nix
+++ b/tools/default.nix
@@ -2,14 +2,9 @@
 
 pkgs.lib.makeScope pkgs.newScope (pkgs: {
   dnsupdate = pkgs.callPackage ./dnsupdate { };
-
   gerrit-hook = pkgs.callPackage ./gerrit-hook { };
-
   ipconverter = pkgs.callPackage ./ipconverter { };
-
   seqstat = pkgs.callPackage ./seqstat { };
-
   git-blame-stats = pkgs.callPackage ./git-blame-stats { };
-
   sendsms = pkgs.callPackage ./sendsms { };
 })
diff --git a/tools/git-blame-stats/default.nix b/tools/git-blame-stats/default.nix
index 5071f10..767329b 100644
--- a/tools/git-blame-stats/default.nix
+++ b/tools/git-blame-stats/default.nix
@@ -1,15 +1,25 @@
-{ pkgs, buildGoModule, ... }:
+{ self, lib, python3, stdenvNoCC, pkgs }:
+
+stdenvNoCC.mkDerivation rec {
+  pname = "git-blame-stats";
+  src = ./git-blame-stats.py;
+  version = "0.1.1";
+
+  nativeBuildInputs = with pkgs; [ python3 ];
+
+  dontUnpack = true;
+  dontBuild = true;
+
+  installPhase = ''
+    mkdir -p $out/bin
+    cp $src $out/bin/${pname}
+  '';
 
-buildGoModule rec {
-  name = "git-blame-stats";
-  src = ./.;
-  vendorSha256 = "sha256-pQpattmS9VmO3ZIQUFn66az8GSmB4IvYhTTCFn6SUmo=";
-  nativeBuildInputs = with pkgs; [ go ];
 
   meta = with pkgs.lib; {
     description = "CLI to reports git blame statistics per author.";
     license = licenses.mit;
-    platforms = platforms.linux;
+    platforms = platforms.unix;
     maintainers = [ ];
   };
 }
diff --git a/tools/git-blame-stats/git-blame-stats.py b/tools/git-blame-stats/git-blame-stats.py
new file mode 100755
index 0000000..ee52ce4
--- /dev/null
+++ b/tools/git-blame-stats/git-blame-stats.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+import sys
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "rev", metavar="revision", type=str, help="the revision", default="HEAD", nargs="?"
+)
+args = parser.parse_args()
+
+authors = dict()
+max_lenght_author = 0
+max_lenght_email = 0
+
+
+def get_files(rev):
+    """Returns a list of files for the repository, at the given path, for the given revision."""
+    tree = subprocess.run(
+        ["git", "ls-tree", "--name-only", "-r", rev],
+        capture_output=True,
+        check=True,
+        encoding="utf-8",
+    )
+    return tree.stdout.splitlines()
+
+
+def line_info(filename, rev):
+    """Generates a set of commit blocks using `git blame` for a file.
+
+    Each block corresponds to the information about a single line of code."""
+    blame = subprocess.run(
+        ["git", "blame", "-w", "--line-porcelain", rev, "--", filename],
+        capture_output=True,
+        encoding="utf-8",
+        check=True,
+    )
+    block = []
+    for line in blame.stdout.splitlines():
+        block.append(line)
+        if line.startswith("\t"):
+            yield block
+            block = []
+
+
+files = get_files(args.rev)
+
+for filename in files:
+    try:
+        for block in line_info(filename.rstrip(), args.rev):
+            author = None
+            author_email = None
+            commit = None
+            skip = False
+            for i, val in enumerate(block):
+                if i == 0:
+                    commit = val.split()[0]
+                    continue
+                if val.startswith("author "):
+                    author = " ".join(val.split()[1:])
+                    continue
+                if val.startswith("author-mail"):
+                    author_email = " ".join(val.split()[1:])
+                    continue
+                if val.startswith("\t") and val == "\t":
+                    skip == True
+            if skip:
+                continue
+            if authors.get(author, None) == None:
+                authors[author] = {
+                    "email": author_email,
+                    "commits": set(),
+                    "files": set(),
+                    "lines": 0,
+                }
+            authors[author]["commits"].add(commit)
+            authors[author]["files"].add(filename)
+            authors[author]["lines"] += 1
+            if len(author) > max_lenght_author:
+                max_lenght_author = len(author)
+            if len(author_email) > max_lenght_email:
+                max_lenght_email = len(author_email)
+    except Exception as e:
+        continue
+
+for author, stats in authors.items():
+    email = stats["email"]
+    lines = stats["lines"]
+    commits = len(stats["commits"])
+    files = len(stats["files"])
+    print(
+        f"{author:{max_lenght_author}} {email:{max_lenght_email}} {lines:6} {commits:6} {files:6}"
+    )
diff --git a/tools/git-blame-stats/go.mod b/tools/git-blame-stats/go.mod
deleted file mode 100644
index 4738ac4..0000000
--- a/tools/git-blame-stats/go.mod
+++ /dev/null
@@ -1,3 +0,0 @@
-module golang.fcuny.net/git-blame-stats
-
-go 1.17
diff --git a/tools/git-blame-stats/main.go b/tools/git-blame-stats/main.go
deleted file mode 100644
index 8b1bc9a..0000000
--- a/tools/git-blame-stats/main.go
+++ /dev/null
@@ -1,86 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"log"
-	"os"
-	"os/exec"
-	"regexp"
-	"sort"
-	"strings"
-)
-
-func main() {
-	rev := "HEAD"
-	if len(os.Args) == 2 {
-		rev = os.Args[1]
-	}
-
-	files := gitListFiles(rev)
-
-	authors := gitBlameFiles(rev, files)
-
-	sortedAuthors, keys := sortAuthors(authors)
-
-	rank := 1
-
-	for _, k := range keys {
-		for i := 0; i < len(sortedAuthors[k]); i++ {
-			fmt.Printf("%3d - %6d %s\n", rank, k, sortedAuthors[k][i])
-			rank = rank + 1
-		}
-	}
-}
-
-func gitListFiles(rev string) []string {
-	out, err := exec.Command("git", "ls-tree", "--name-only", "-r", rev).Output()
-	if err != nil {
-		log.Fatal(err)
-	}
-	files := strings.Split(string(out), "\n")
-	return files
-}
-
-func gitBlameFiles(rev string, files []string) map[string]int {
-	authors := make(map[string]int)
-
-	for i := 0; i < len(files)-1; i++ {
-		gitBlameFile(rev, files[i], authors)
-	}
-	return authors
-}
-
-func gitBlameFile(rev, file string, authors map[string]int) {
-	out, err := exec.Command("git", "blame", "-e", "-w", rev, "--", file).Output()
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	lines := strings.Split(string(out), "\n")
-
-	authorRegex := regexp.MustCompile(`^.*?\((.*?)\s*\d{4}-\d{2}-\d{2}.*`)
-
-	for j := 0; j < len(lines)-1; j++ {
-		if string(lines[j][0]) != "^" {
-			matched := authorRegex.FindStringSubmatch(string(lines[j]))
-			if len(matched) > 0 {
-				authors[matched[1]] = authors[matched[1]] + 1
-			}
-		}
-	}
-}
-
-func sortAuthors(authors map[string]int) (map[int][]string, []int) {
-	var keys []int
-	sortedAuthors := make(map[int][]string)
-
-	for k, v := range authors {
-		sortedAuthors[v] = append(sortedAuthors[v], k)
-		if len(sortedAuthors[v]) == 1 {
-			keys = append(keys, v)
-		}
-	}
-	sort.Sort(sort.Reverse(sort.IntSlice(keys)))
-
-	return sortedAuthors, keys
-}