Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some functionality to misc/perf_compare.py #18471

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 73 additions & 20 deletions misc/perf_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ def build_mypy(target_dir: str) -> None:
subprocess.run(cmd, env=env, check=True, cwd=target_dir)


def clone(target_dir: str, commit: str | None) -> None:
heading(f"Cloning mypy to {target_dir}")
repo_dir = os.getcwd()
def clone(target_dir: str, commit: str | None, repo_source: str | None = None) -> None:
source_name = repo_source or "mypy"
heading(f"Cloning {source_name} to {target_dir}")
if repo_source is None:
repo_source = os.getcwd()
if os.path.isdir(target_dir):
print(f"{target_dir} exists: deleting")
shutil.rmtree(target_dir)
subprocess.run(["git", "clone", repo_dir, target_dir], check=True)
subprocess.run(["git", "clone", repo_source, target_dir], check=True)
if commit:
subprocess.run(["git", "checkout", commit], check=True, cwd=target_dir)

Expand All @@ -64,7 +66,7 @@ def edit_python_file(fnam: str) -> None:


def run_benchmark(
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None
) -> float:
cache_dir = os.path.join(compiled_dir, ".mypy_cache")
if os.path.isdir(cache_dir) and not incremental:
Expand All @@ -76,6 +78,8 @@ def run_benchmark(
cmd = [sys.executable, "-m", "mypy"]
if code:
cmd += ["-c", code]
elif foreign:
pass
else:
cmd += ["--config-file", os.path.join(abschk, "mypy_self_check.ini")]
cmd += glob.glob(os.path.join(abschk, "mypy/*.py"))
Expand All @@ -86,18 +90,30 @@ def run_benchmark(
edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
t0 = time.time()
# Ignore errors, since some commits being measured may generate additional errors.
subprocess.run(cmd, cwd=compiled_dir, env=env)
if foreign:
subprocess.run(cmd, cwd=check_dir, env=env)
else:
subprocess.run(cmd, cwd=compiled_dir, env=env)
return time.time() - t0


def main() -> None:
parser = argparse.ArgumentParser()
whole_program_time_0 = time.time()
parser = argparse.ArgumentParser(
epilog="Remember: you usually want the first argument to this command to be 'master'."
)
parser.add_argument(
"--incremental",
default=False,
action="store_true",
help="measure incremental run (fully cached)",
)
parser.add_argument(
"--dont-setup",
default=False,
action="store_true",
help="don't make the dirs or compile mypy, just run the performance measurement benchmark",
)
parser.add_argument(
"--num-runs",
metavar="N",
Expand All @@ -112,6 +128,14 @@ def main() -> None:
type=int,
help="set maximum number of parallel builds (default=8)",
)
parser.add_argument(
"-r",
metavar="FOREIGN_REPOSITORY",
default=None,
type=str,
help="measure time to type check the project at FOREIGN_REPOSITORY instead of mypy self-check; "
+ "provided value must be the URL or path of a git repo",
)
parser.add_argument(
"-c",
metavar="CODE",
Expand All @@ -122,10 +146,12 @@ def main() -> None:
parser.add_argument("commit", nargs="+", help="git revision to measure (e.g. branch name)")
args = parser.parse_args()
incremental: bool = args.incremental
dont_setup: bool = args.dont_setup
commits = args.commit
num_runs: int = args.num_runs + 1
max_workers: int = args.j
code: str | None = args.c
foreign_repo: str | None = args.r

if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
sys.exit("error: Run this the mypy repo root")
Expand All @@ -134,20 +160,28 @@ def main() -> None:
for i, commit in enumerate(commits):
target_dir = f"mypy.{i}.tmpdir"
target_dirs.append(target_dir)
clone(target_dir, commit)
if not dont_setup:
clone(target_dir, commit)

self_check_dir = "mypy.self.tmpdir"
clone(self_check_dir, commits[0])
if foreign_repo:
check_dir = "mypy.foreign.tmpdir"
if not dont_setup:
clone(check_dir, None, foreign_repo)
else:
check_dir = "mypy.self.tmpdir"
if not dont_setup:
clone(check_dir, commits[0])

heading("Compiling mypy")
print("(This will take a while...)")
if not dont_setup:
heading("Compiling mypy")
print("(This will take a while...)")

with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()

print(f"Finished compiling mypy ({len(commits)} builds)")
print(f"Finished compiling mypy ({len(commits)} builds)")

heading("Performing measurements")

Expand All @@ -160,7 +194,13 @@ def main() -> None:
items = list(enumerate(commits))
random.shuffle(items)
for i, commit in items:
tt = run_benchmark(target_dirs[i], self_check_dir, incremental=incremental, code=code)
tt = run_benchmark(
target_dirs[i],
check_dir,
incremental=incremental,
code=code,
foreign=bool(foreign_repo),
)
# Don't record the first warm-up run
if n > 0:
print(f"{commit}: t={tt:.3f}s")
Expand All @@ -171,15 +211,28 @@ def main() -> None:
first = -1.0
for commit in commits:
tt = statistics.mean(results[commit])
# pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
if first < 0:
delta = "0.0%"
first = tt
else:
d = (tt / first) - 1
delta = f"{d:+.1%}"
print(f"{commit:<25} {tt:.3f}s ({delta})")
print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")

t = int(time.time() - whole_program_time_0)
total_time_taken_formatted = ", ".join(
f"{v} {n if v==1 else n+'s'}"
for v, n in ((t // 3600, "hour"), (t // 60 % 60, "minute"), (t % 60, "second"))
if v
)
print(
"Total time taken by the whole benchmarking program (including any setup):",
total_time_taken_formatted,
)

shutil.rmtree(self_check_dir)
shutil.rmtree(check_dir)
for target_dir in target_dirs:
shutil.rmtree(target_dir)

Expand Down
Loading