about summary refs log tree commit diff
path: root/tools/git-blame-stats/git-blame-stats.py
blob: 3cc4f4a7906259e35f7e3eee57725397f7bf9293 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3

import argparse
import subprocess
from typing import Any


parser = argparse.ArgumentParser()
parser.add_argument(
    "rev", metavar="revision", type=str, help="the revision", default="HEAD", nargs="?"
)
args = parser.parse_args()

authors: dict[str, Any] = dict()
max_lenght_author = 0
max_lenght_email = 0


def get_files(rev):
    """Returns a list of files for the repository, at the given path, for the given revision."""
    tree = subprocess.run(
        ["git", "ls-tree", "--name-only", "-r", rev],
        capture_output=True,
        check=True,
        encoding="utf-8",
    )
    return tree.stdout.splitlines()


def line_info(filename, rev):
    """Generates a set of commit blocks using `git blame` for a file.

    Each block corresponds to the information about a single line of code."""
    blame = subprocess.run(
        ["git", "blame", "-w", "--line-porcelain", rev, "--", filename],
        capture_output=True,
        encoding="utf-8",
        check=True,
    )
    block = []
    for line in blame.stdout.splitlines():
        block.append(line)
        if line.startswith("\t"):
            yield block
            block = []


files = get_files(args.rev)

for filename in files:
    try:
        for block in line_info(filename.rstrip(), args.rev):
            author = ""
            author_email = ""
            commit = ""
            skip = False
            for i, val in enumerate(block):
                if i == 0:
                    commit = val.split()[0]
                    continue
                if val.startswith("author "):
                    author = " ".join(val.split()[1:])
                    continue
                if val.startswith("author-mail"):
                    author_email = " ".join(val.split()[1:])
                    continue
                if val.startswith("\t") and val == "\t":
                    skip = True
            if skip:
                continue
            if authors.get(author, None) is None:
                authors[author] = {
                    "email": author_email,
                    "commits": set(),
                    "files": set(),
                    "lines": 0,
                }
            authors[author]["commits"].add(commit)
            authors[author]["files"].add(filename)
            authors[author]["lines"] += 1
            if len(author) > max_lenght_author:
                max_lenght_author = len(author)
            if len(author_email) > max_lenght_email:
                max_lenght_email = len(author_email)
    except Exception:
        continue

for author, stats in authors.items():
    email = stats["email"]
    lines = stats["lines"]
    commits = len(stats["commits"])
    files = len(stats["files"])
    print(
        f"{author:{max_lenght_author}} {email:{max_lenght_email}} {lines:6} {commits:6} {files:6}"
    )