From 9f8fe3021ec5eeaa66c0a5b14b5e5de32a2a35dc Mon Sep 17 00:00:00 2001
From: matthew-hennefarth <matthew.hennefarth@gmail.com>
Date: Sun, 24 Nov 2024 22:05:08 -0600
Subject: [PATCH] Verify Examples (#2379)

* add verify_examples

* make executable

* minor fix relating to cwd

* add doc string and usage example
---
 tools/verify_examples.py | 257 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100755 tools/verify_examples.py

diff --git a/tools/verify_examples.py b/tools/verify_examples.py
new file mode 100755
index 0000000000..3f9d484c5e
--- /dev/null
+++ b/tools/verify_examples.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python
+# Copyright 2014-2024 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Verify Examples
+===============
+
+Author: Matthew R. Hennefarth
+
+Script used to automatically run and verify PySCF example codes terminate
+successfully. For any job that does not terminate normally, the stderr of the
+example will be printed to the output. This script will exit with 0 only if all
+examples terminate normally. 
+
+Initially introduced in [PR 2379](https://github.com/pyscf/pyscf/pull/2379). 
+
+Usage
+-------------
+
+From the main pyscf repository directory, the tests can be run as 
+```sh
+./tools/verify_examples.py examples
+```
+This will run all example files (which can be very long). To run only a subset
+of examples, provide instead a path to a subdirectory. For example, to run only
+the example files in `pyscf/examples/gto` the command
+```sh
+./tools/verify_examples.py examples/gto
+```
+It is also possible to run the examples in parallel using the `-j` or `--jobs`
+flag (this is similar to make). As an example, to run the jobs in parallel over
+4 threads,
+```sh
+./tools/verify_examples.py -j 8
+```
+Note that the environmental variable such as `OMP_NUM_THREADS` should be set to
+an appropriate value such that number of jobs * OMP_NUM_THREADS does not exceed
+the maximum number of cores on the computer.
+
+"""
+
+import os
+import sys
+import time
+import subprocess
+import argparse
+import logging
+
+import multiprocessing as mp
+from glob import glob
+from enum import Enum
+
+logging.basicConfig(level=logging.DEBUG, format="%(message)s")
+
+logger = logging.getLogger()
+
+
+class StdOutFilter(logging.Filter):
+    def filter(self, record):
+        return record.levelno < logging.ERROR
+
+
+stdout_handler = logging.StreamHandler(sys.stdout)
+stdout_handler.setLevel(logging.INFO)
+stdout_handler.addFilter(StdOutFilter())
+
+stderr_handler = logging.StreamHandler(sys.stderr)
+stderr_handler.setLevel(logging.ERROR)
+
+logger.handlers = []
+logger.addHandler(stdout_handler)
+logger.addHandler(stderr_handler)
+
+
+class ANSIColors(Enum):
+    RESET = "\033[0m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+
+
+def colorize(text, color):
+    if sys.stdout.isatty():
+        return f"\033[{color.value}{text}{ANSIColors.RESET.value}"
+    else:
+        return text
+
+
+class Status(Enum):
+    OK = colorize("ok", ANSIColors.GREEN)
+    FAIL = colorize("FAILED", ANSIColors.RED)
+
+
+def get_path(p):
+    if not os.path.isdir(p):
+        raise ValueError("Path does not point to directory")
+
+    if os.path.basename(p) == "examples":
+        return p
+
+    if os.path.isdir(os.path.join(p, "examples")):
+        return os.path.join(p, "examples")
+
+    return p
+
+
+class ExampleResults:
+    def __init__(self):
+        self.common_prefix = ""
+        self.failed_examples = []
+        self.passed = 0
+        self.failed = 0
+        self.filtered = 0
+        self.time = 0.0
+        self.status = Status.OK
+
+
+def run_example(progress, nexamples, example, failed_examples, common_prefix):
+    idx, lock = progress
+
+    status = Status.OK
+    directory = os.path.dirname(example)
+    try:
+        subprocess.run(
+            ["python3", os.path.basename(example)],
+            cwd=directory,
+            capture_output=False,
+            stderr=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            check=True,
+            text=True,
+        )
+    except subprocess.CalledProcessError as e:
+        status = Status.FAIL
+        failed_examples.append((example, e.stderr))
+
+    with lock:
+        idx.value += 1
+        percent = int(100 * (idx.value) / nexamples)
+
+    message = (
+        f"[{percent:3}%]: {os.path.relpath(example, common_prefix)} ... {status.value}"
+    )
+    logger.info(message)
+
+
+def run_examples(example_path, num_threads):
+    examples = [
+        y for x in os.walk(example_path) for y in glob(os.path.join(x[0], "*.py"))
+    ]
+    # remove symlinks?
+    # examples = list(set([os.path.realpath(e) for e in examples]))
+
+    examples = sorted(examples, key=lambda e: e.split("/"))
+
+    results = ExampleResults()
+    results.common_prefix = os.path.dirname(os.path.commonpath(examples))
+    results.filtered = 0
+
+    with mp.Manager() as manager:
+        failed_examples = manager.list()
+        progress = (manager.Value("i", 0), manager.Lock())
+
+        logger.info("")
+        logger.info(f"running {len(examples)} examples")
+        tic = time.perf_counter()
+        with mp.Pool(num_threads) as pool:
+            pool.starmap(
+                run_example,
+                [
+                    (
+                        progress,
+                        len(examples),
+                        example,
+                        failed_examples,
+                        results.common_prefix,
+                    )
+                    for example in examples
+                ],
+            )
+        results.time = time.perf_counter() - tic
+        results.failed_examples = list(failed_examples)
+
+    results.failed = len(results.failed_examples)
+    results.passed = len(examples) - results.failed
+    results.status = Status.FAIL if results.failed else Status.OK
+
+    return results
+
+
+def log_failures(results):
+    logger.info("")
+    logger.info("failures: ")
+    logger.info("")
+
+    for e, msg in results.failed_examples:
+        logger.info(f"---- {os.path.relpath(e, results.common_prefix)} stderr ----")
+        logger.info(msg)
+
+    logger.info("")
+    logger.info("failures:")
+    for e, _ in results.failed_examples:
+        logger.info(f"    {os.path.relpath(e, results.common_prefix)}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Verify pyscf examples")
+    parser.add_argument(
+        "path",
+        type=str,
+        default="examples",
+        help="Path to examples directory (default: ./)",
+    )
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        type=int,
+        default=1,
+        help="Number of parallel threads (default: 1)",
+    )
+    args = parser.parse_args()
+
+    example_path = get_path(args.path)
+
+    results = run_examples(example_path, args.jobs)
+
+    if results.status is Status.FAIL:
+        log_failures(results)
+
+    logger.info("")
+    logger.info(
+        f"example results: {results.status.value}. {results.passed} passed; {results.failed} failed; {results.filtered} filtered out; finished in {results.time:.2f}s"
+    )
+    logger.info("")
+
+    if results.status is Status.OK:
+        sys.exit(0)
+    else:
+        logger.error(
+            f"{ANSIColors.RED.value}error{ANSIColors.RESET.value}: examples failed"
+        )
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()