diff options
author | Franck Cuny <franck@fcuny.net> | 2024-03-06 06:29:24 -0800 |
---|---|---|
committer | Franck Cuny <franck@fcuny.net> | 2024-03-06 06:29:24 -0800 |
commit | 1e4a5aa09c1c8f43722c9c260f011398799a8e8f (patch) | |
tree | cd73e0fb8ba53bd21cee6ccf2dcc85639bbbb93f /packages/git-blame-stats/git-blame-stats.py | |
parent | set correct git email in the profiles (diff) | |
download | world-1e4a5aa09c1c8f43722c9c260f011398799a8e8f.tar.gz |
rename `tools` to `packages` to follow convention
The convention is to use `pkgs` or `packages` for overlays and definition of custom packages. Since I'm already using `pkg` for go, I prefer to use `packages` for my scripts.
Diffstat (limited to 'packages/git-blame-stats/git-blame-stats.py')
-rwxr-xr-x | packages/git-blame-stats/git-blame-stats.py | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/packages/git-blame-stats/git-blame-stats.py b/packages/git-blame-stats/git-blame-stats.py new file mode 100755 index 0000000..3cc4f4a --- /dev/null +++ b/packages/git-blame-stats/git-blame-stats.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +import argparse +import subprocess +from typing import Any + + +parser = argparse.ArgumentParser() +parser.add_argument( + "rev", metavar="revision", type=str, help="the revision", default="HEAD", nargs="?" +) +args = parser.parse_args() + +authors: dict[str, Any] = dict() +max_lenght_author = 0 +max_lenght_email = 0 + + +def get_files(rev): + """Returns a list of files for the repository, at the given path, for the given revision.""" + tree = subprocess.run( + ["git", "ls-tree", "--name-only", "-r", rev], + capture_output=True, + check=True, + encoding="utf-8", + ) + return tree.stdout.splitlines() + + +def line_info(filename, rev): + """Generates a set of commit blocks using `git blame` for a file. + + Each block corresponds to the information about a single line of code.""" + blame = subprocess.run( + ["git", "blame", "-w", "--line-porcelain", rev, "--", filename], + capture_output=True, + encoding="utf-8", + check=True, + ) + block = [] + for line in blame.stdout.splitlines(): + block.append(line) + if line.startswith("\t"): + yield block + block = [] + + +files = get_files(args.rev) + +for filename in files: + try: + for block in line_info(filename.rstrip(), args.rev): + author = "" + author_email = "" + commit = "" + skip = False + for i, val in enumerate(block): + if i == 0: + commit = val.split()[0] + continue + if val.startswith("author "): + author = " ".join(val.split()[1:]) + continue + if val.startswith("author-mail"): + author_email = " ".join(val.split()[1:]) + continue + if val.startswith("\t") and val == "\t": + skip = True + if skip: + continue + if authors.get(author, None) is None: + authors[author] = { + "email": author_email, + "commits": set(), + "files": set(), + "lines": 0, + } + authors[author]["commits"].add(commit) + authors[author]["files"].add(filename) + authors[author]["lines"] += 1 + if len(author) > max_lenght_author: + max_lenght_author = len(author) + if len(author_email) > max_lenght_email: + max_lenght_email = len(author_email) + except Exception: + continue + +for author, stats in authors.items(): + email = stats["email"] + lines = stats["lines"] + commits = len(stats["commits"]) + files = len(stats["files"]) + print( + f"{author:{max_lenght_author}} {email:{max_lenght_email}} {lines:6} {commits:6} {files:6}" + ) |