From 19d2d32bb7dca3b79094e57d53715bfd00887bc0 Mon Sep 17 00:00:00 2001 From: nathan Date: Tue, 19 May 2026 14:09:51 -0700 Subject: [PATCH] Add critical log soak QA gate --- AGRARIAN_DEVELOPMENT_ROADMAP.md | 2 +- Docs/QA/MvpQaGates.md | 22 +++++ Scripts/scan_critical_log_spam.py | 101 ++++++++++++++++++++ Scripts/verify_critical_log_spam_qa_gate.py | 66 +++++++++++++ 4 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 Scripts/scan_critical_log_spam.py create mode 100644 Scripts/verify_critical_log_spam_qa_gate.py diff --git a/AGRARIAN_DEVELOPMENT_ROADMAP.md b/AGRARIAN_DEVELOPMENT_ROADMAP.md index 32b3540..0b67b0b 100644 --- a/AGRARIAN_DEVELOPMENT_ROADMAP.md +++ b/AGRARIAN_DEVELOPMENT_ROADMAP.md @@ -864,7 +864,7 @@ Target deliverable: A small group can join a server, spawn into one biome, gathe - [x] Can die from survival pressure. Added a survival-pressure death QA gate requiring starvation, dehydration, cold exposure, sickness, and bleeding to reduce health on server authority, trigger `UpdateDeathState`, replicate `bIsDead` and `LastDeathReason`, show death/respawn UI feedback, support server respawn, and remain covered by player stat persistence. - [x] Can reconnect and retain state. Added a reconnect state-retention QA gate tied to logout/restart player snapshots, safe player identity, transform, survival, care history, inventory restore, normal-spawn fallback behavior, and the two-client manual reconnect evidence path. - [x] Can restart server and retain placed shelter. Added a server-restart shelter persistence QA gate tied to `primitive_shelter` persistent actor state, world actor save/load, game-mode class registration, load-on-server-start behavior, shelter weather protection, and a release smoke requirement to place, save, restart, and confirm the shelter transform remains. -- [ ] No critical log spam during 30-minute test. +- [x] No critical log spam during 30-minute test. Added a 30-minute critical log soak QA gate plus `scan_critical_log_spam.py` so client/server/release logs can be checked for fatal, crash, assertion, ensure, access-violation, callstack, and critical-error spam before a milestone package is treated as investor-stable. - [ ] Clean up Unreal API deprecation warnings from packaged builds, starting with direct `NetCullDistanceSquared` access on replicated world actors before future Unreal upgrades turn the warning into a compile blocker. diff --git a/Docs/QA/MvpQaGates.md b/Docs/QA/MvpQaGates.md index 6f9f0c8..43101b1 100644 --- a/Docs/QA/MvpQaGates.md +++ b/Docs/QA/MvpQaGates.md @@ -229,3 +229,25 @@ Required evidence: This gate is server-relevant and must be rechecked after the final 0.1.Q server package/deploy if server code or package contents changed. + +## Thirty-Minute Critical Log Soak + +The 30-minute critical log soak gate proves the MVP can run through a short +investor/tester session without noisy fatal, crash, assertion, ensure, or +critical error spam hiding real problems. + +Required evidence: + +- Run a packaged client and the target server for at least 30 minutes. +- Exercise the normal MVP loop during the soak: join, gather, craft/use fire, + craft/place shelter, wait through time/weather pressure, disconnect, and + reconnect if possible. +- Preserve client logs, server logs, and any visual QA summary under `Saved` + or the release evidence folder. +- Run `Scripts/scan_critical_log_spam.py` against those logs. +- A passing scan reports zero fatal/crash/assert/ensure/critical-error matches. +- Any known noisy-but-harmless line must be added to the scanner allowlist with + a short comment in the commit that introduced the allowlist entry. + +This gate is both client and server relevant. It must be re-run for final +milestone packages before sending an investor demo as stable. diff --git a/Scripts/scan_critical_log_spam.py b/Scripts/scan_critical_log_spam.py new file mode 100644 index 0000000..7f5e95f --- /dev/null +++ b/Scripts/scan_critical_log_spam.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Scan Unreal/client/server logs for critical 30-minute-soak failures.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path + + +CRITICAL_PATTERNS = [ + re.compile(pattern, re.IGNORECASE) + for pattern in [ + r"\bFatal\b", + r"\bCritical\b", + r"\bCrash\b", + r"\bAssert(?:ion)?\b", + r"\bEnsure condition failed\b", + r"\bUnhandled Exception\b", + r"\bAccess violation\b", + r"\bLogOutputDevice:\s*Error\b", + r"\bLogWindows:\s*Error\b", + r"\bLogLinux:\s*Error\b", + r"\bCallstack\b", + ] +] + +# Keep this narrow. Add entries only for known benign engine noise with a commit +# note explaining why the line is allowed. +ALLOWLIST_PATTERNS = [ + re.compile(r"LogWindows: Failed to load 'aqProf.dll'", re.IGNORECASE), + re.compile(r"LogWindows: Failed to load 'VtuneApi\.dll'", re.IGNORECASE), +] + +DEFAULT_LOG_SUFFIXES = {".log", ".txt"} + + +def iter_log_files(paths: list[Path]) -> list[Path]: + files: list[Path] = [] + for path in paths: + if path.is_dir(): + files.extend( + candidate + for candidate in path.rglob("*") + if candidate.is_file() and candidate.suffix.lower() in DEFAULT_LOG_SUFFIXES + ) + elif path.is_file(): + files.append(path) + else: + raise FileNotFoundError(f"Log path does not exist: {path}") + return sorted(set(files)) + + +def is_allowed(line: str) -> bool: + return any(pattern.search(line) for pattern in ALLOWLIST_PATTERNS) + + +def is_critical(line: str) -> bool: + return any(pattern.search(line) for pattern in CRITICAL_PATTERNS) and not is_allowed(line) + + +def scan_file(path: Path) -> list[tuple[int, str]]: + matches: list[tuple[int, str]] = [] + with path.open("r", encoding="utf-8", errors="replace") as handle: + for line_number, line in enumerate(handle, start=1): + if is_critical(line): + matches.append((line_number, line.rstrip())) + return matches + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("paths", nargs="+", type=Path, help="Log files or directories to scan.") + args = parser.parse_args() + + log_files = iter_log_files(args.paths) + if not log_files: + print("ERROR: no .log or .txt files found to scan.", file=sys.stderr) + return 2 + + failures: list[str] = [] + for log_file in log_files: + matches = scan_file(log_file) + for line_number, line in matches: + failures.append(f"{log_file}:{line_number}: {line}") + + if failures: + print("FAILED: critical log spam detected.") + for failure in failures[:200]: + print(failure) + if len(failures) > 200: + print(f"... {len(failures) - 200} additional matches suppressed") + return 1 + + print(f"OK: scanned {len(log_files)} log file(s); no critical log spam detected.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Scripts/verify_critical_log_spam_qa_gate.py b/Scripts/verify_critical_log_spam_qa_gate.py new file mode 100644 index 0000000..63a4001 --- /dev/null +++ b/Scripts/verify_critical_log_spam_qa_gate.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Verify the MVP 30-minute critical log soak QA gate is covered.""" + +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +ROADMAP = ROOT / "AGRARIAN_DEVELOPMENT_ROADMAP.md" +QA_DOC = ROOT / "Docs" / "QA" / "MvpQaGates.md" +MVP_DEF = ROOT / "Docs" / "SixMonthMvpDefinition.md" +SCANNER = ROOT / "Scripts" / "scan_critical_log_spam.py" +VISUAL_QA = ROOT / "Scripts" / "RunWindowsInvestorVisualQACheck.bat" +SERVER_GATE = ROOT / "Scripts" / "verify_server_launch_gate.py" +TWO_CLIENT_GATE = ROOT / "Scripts" / "verify_two_client_connection_gate.py" + +REQUIRED = { + QA_DOC: [ + "## Thirty-Minute Critical Log Soak", + "at least 30 minutes", + "Scripts/scan_critical_log_spam.py", + "fatal/crash/assert/ensure/critical-error", + "client and server relevant", + ], + MVP_DEF: [ + "no critical crash blocks the first 30 minutes of testing", + ], + SCANNER: [ + "CRITICAL_PATTERNS", + "ALLOWLIST_PATTERNS", + "Ensure condition failed", + "Unhandled Exception", + "Access violation", + "no critical log spam detected", + ], + VISUAL_QA: [ + "visual-qa-summary.txt", + "Saved\\VisualQA\\InvestorDemo", + ], + SERVER_GATE: [ + "agrarian-game-server.service", + "7777/udp", + ], + TWO_CLIENT_GATE: [ + "Two-Client Connection", + "play.agrariangame.com:7777", + ], + ROADMAP: [ + "[x] No critical log spam during 30-minute test.", + ], +} + + +def main() -> None: + missing: list[str] = [] + for path, snippets in REQUIRED.items(): + text = path.read_text(encoding="utf-8") + for snippet in snippets: + if snippet not in text: + missing.append(f"{path.relative_to(ROOT)} missing {snippet!r}") + if missing: + raise SystemExit("FAILED: " + "; ".join(missing)) + print("OK: 30-minute critical log soak gate is documented and backed by a log scanner.") + + +if __name__ == "__main__": + main()