about summary refs log tree commit diff
path: root/tools/git-blame-stats/git-blame-stats.py
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2022-10-16 19:03:35 -0700
committerFranck Cuny <franck@fcuny.net>2022-10-16 19:06:45 -0700
commit9d3fff359b6c2cea4ac69299f3c1621af8bde64c (patch)
tree95c5db67b6b94e0967404cb30d51f24f79331d72 /tools/git-blame-stats/git-blame-stats.py
parentops(github): archive the repository govanity (diff)
downloadworld-9d3fff359b6c2cea4ac69299f3c1621af8bde64c.tar.gz
ref(tools/git-blame-stats): rewrite the tool in python
The tool can calculate some stats about authors in a git repository. It
computes the number of lines that are authored by authors at a given
revision (HEAD by default), and the number of commits.
Diffstat (limited to 'tools/git-blame-stats/git-blame-stats.py')
-rwxr-xr-xtools/git-blame-stats/git-blame-stats.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/tools/git-blame-stats/git-blame-stats.py b/tools/git-blame-stats/git-blame-stats.py
new file mode 100755
index 0000000..ee52ce4
--- /dev/null
+++ b/tools/git-blame-stats/git-blame-stats.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+import sys
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "rev", metavar="revision", type=str, help="the revision", default="HEAD", nargs="?"
+)
+args = parser.parse_args()
+
+authors = dict()
+max_lenght_author = 0
+max_lenght_email = 0
+
+
+def get_files(rev):
+    """Returns a list of files for the repository, at the given path, for the given revision."""
+    tree = subprocess.run(
+        ["git", "ls-tree", "--name-only", "-r", rev],
+        capture_output=True,
+        check=True,
+        encoding="utf-8",
+    )
+    return tree.stdout.splitlines()
+
+
+def line_info(filename, rev):
+    """Generates a set of commit blocks using `git blame` for a file.
+
+    Each block corresponds to the information about a single line of code."""
+    blame = subprocess.run(
+        ["git", "blame", "-w", "--line-porcelain", rev, "--", filename],
+        capture_output=True,
+        encoding="utf-8",
+        check=True,
+    )
+    block = []
+    for line in blame.stdout.splitlines():
+        block.append(line)
+        if line.startswith("\t"):
+            yield block
+            block = []
+
+
+files = get_files(args.rev)
+
+for filename in files:
+    try:
+        for block in line_info(filename.rstrip(), args.rev):
+            author = None
+            author_email = None
+            commit = None
+            skip = False
+            for i, val in enumerate(block):
+                if i == 0:
+                    commit = val.split()[0]
+                    continue
+                if val.startswith("author "):
+                    author = " ".join(val.split()[1:])
+                    continue
+                if val.startswith("author-mail"):
+                    author_email = " ".join(val.split()[1:])
+                    continue
+                if val.startswith("\t") and val == "\t":
+                    skip == True
+            if skip:
+                continue
+            if authors.get(author, None) == None:
+                authors[author] = {
+                    "email": author_email,
+                    "commits": set(),
+                    "files": set(),
+                    "lines": 0,
+                }
+            authors[author]["commits"].add(commit)
+            authors[author]["files"].add(filename)
+            authors[author]["lines"] += 1
+            if len(author) > max_lenght_author:
+                max_lenght_author = len(author)
+            if len(author_email) > max_lenght_email:
+                max_lenght_email = len(author_email)
+    except Exception as e:
+        continue
+
+for author, stats in authors.items():
+    email = stats["email"]
+    lines = stats["lines"]
+    commits = len(stats["commits"])
+    files = len(stats["files"])
+    print(
+        f"{author:{max_lenght_author}} {email:{max_lenght_email}} {lines:6} {commits:6} {files:6}"
+    )