From b53295d4d4e71a6c77b7bdd585cb08585a12da43 Mon Sep 17 00:00:00 2001 From: Poyraxx Date: Mon, 1 Jun 2026 21:01:20 +0300 Subject: [PATCH 1/4] fix bench restore output handling --- agent/site.py | 14 ++++---- agent/tests/test_site.py | 69 ++++++++++++++++++++++++++++++++++++++++ agent/utils.py | 27 ++++++++++++++++ 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/agent/site.py b/agent/site.py index b6d21f19..2b981712 100644 --- a/agent/site.py +++ b/agent/site.py @@ -16,7 +16,7 @@ from agent.base import AgentException, Base from agent.database import Database from agent.job import job, step -from agent.utils import b2mb, compute_file_hash, db_client_cli, db_dump_cli, get_size +from agent.utils import b2mb, compute_file_hash, db_client_cli, db_dump_cli, get_size, parse_json_output if TYPE_CHECKING: from agent.bench import Bench @@ -125,7 +125,7 @@ def restore_site( ) try: return self.bench_execute( - "--force restore " + "--force restore --verbose " f"--mariadb-root-username {temp_user} " f"--mariadb-root-password {temp_password} " f"--admin-password {admin_password} " @@ -645,7 +645,9 @@ def clear_website_cache(self): @step("Uninstall Unavailable Apps") def uninstall_unavailable_apps(self, apps_to_keep): - installed_apps = json.loads(self.bench_execute("execute frappe.get_installed_apps")["output"]) + installed_apps = parse_json_output( + self.bench_execute("execute frappe.get_installed_apps")["output"] + ) for app in installed_apps: if app not in apps_to_keep: self.bench_execute(f"remove-from-installed-apps '{app}'") @@ -928,7 +930,7 @@ def get_usage(self): def get_analytics(self): analytics = self.bench_execute("execute frappe.utils.get_site_info")["output"] - return json.loads(analytics) + return parse_json_output(analytics) def get_database_size(self): config = {} @@ -974,7 +976,7 @@ def describe_database_table(self, doctype, columns=None): command += f"--column {column} " try: output = self.bench_execute(command)["output"] - return json.loads(output) + return parse_json_output(output) except Exception: return {} @@ -984,7 +986,7 @@ def apps(self): @property def apps_as_json(self): - return json.loads(self.bench_execute("list-apps -f json")["output"])[self.name] + return parse_json_output(self.bench_execute("list-apps -f json")["output"])[self.name] @job("Add Database Index") def add_database_index(self, doctype, columns=None): diff --git a/agent/tests/test_site.py b/agent/tests/test_site.py index fe24dac1..9c9ac115 100644 --- a/agent/tests/test_site.py +++ b/agent/tests/test_site.py @@ -11,6 +11,7 @@ from agent.bench import Bench, _get_cors_origins, _normalize_cors_origins from agent.server import Server from agent.site import Site +from agent.utils import parse_json_output class TestSite(unittest.TestCase): @@ -74,6 +75,11 @@ def _get_test_bench(self) -> Bench: server.benches_directory = self.benches_directory return Bench(self.bench_name, server) + def _get_test_site(self, site_name: str = "test.local") -> Site: + self._create_test_site(site_name) + self._make_site_config(site_name) + return Site(site_name, self._get_test_bench()) + def _render_bench_nginx(self, cors_origins, standalone=False): output_file = os.path.join(self.test_dir, "nginx.conf") with patch.object(Server, "__init__", new=lambda x: None): @@ -283,6 +289,69 @@ def test_normalize_cors_origins_strips_path_from_url(self): ["https://example.com"], ) + def test_parse_json_output_extracts_json_from_noisy_logs(self): + output = 'Duckwalk override: custom app loaded\n ["frappe", "erpnext", "insights"]' + + self.assertEqual(parse_json_output(output), ["frappe", "erpnext", "insights"]) + + def test_restore_site_uses_verbose_flag(self): + site = self._get_test_site("restore.local") + + with ( + patch.object(site.bench, "create_mariadb_user", return_value=("db", "temp-user", "temp-pass")), + patch.object(site.bench, "drop_mariadb_user"), + patch.object(Site, "bench_execute", return_value={"output": ""}) as bench_execute, + patch.object(Site, "restore_site", new=Site.restore_site.__wrapped__), + ): + site.restore_site( + "root-pass", + "admin-pass", + os.path.join(site.bench.sites_directory, "restore.local", "private", "backups", "db.sql.gz"), + "", + "", + ) + + self.assertIn("--force restore --verbose", bench_execute.call_args.args[0]) + + def test_uninstall_unavailable_apps_parses_noisy_installed_apps_output(self): + site = self._get_test_site("apps.local") + + with ( + patch.object( + Site, + "bench_execute", + side_effect=[ + { + "output": ( + "Duckwalk override: custom app loaded\n" + '["frappe", "erpnext", "insights", "greendigit"]' + ) + }, + {"output": ""}, + {"output": ""}, + {"output": ""}, + {"output": ""}, + ], + ) as bench_execute, + patch.object( + Site, + "uninstall_unavailable_apps", + new=Site.uninstall_unavailable_apps.__wrapped__, + ), + ): + site.uninstall_unavailable_apps(["frappe", "erpnext"]) + + self.assertEqual( + [call.args[0] for call in bench_execute.call_args_list], + [ + "execute frappe.get_installed_apps", + "remove-from-installed-apps 'insights'", + "clear-cache", + "remove-from-installed-apps 'greendigit'", + "clear-cache", + ], + ) + def test_get_cors_origins_with_quoted_allow_cors(self): sites = [ SimpleNamespace( diff --git a/agent/utils.py b/agent/utils.py index 0498c2a2..e7a53f09 100644 --- a/agent/utils.py +++ b/agent/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import hashlib +import json import os import re import shutil @@ -164,6 +165,32 @@ def end_execution( return res +def parse_json_output(output: str): + """ + Parse JSON from command output that may contain extra logs around the payload. + """ + try: + return json.loads(output) + except json.JSONDecodeError: + decoder = json.JSONDecoder() + candidate = None + + for match in re.finditer(r"[\[{]", output): + try: + value, end = decoder.raw_decode(output[match.start() :]) + except json.JSONDecodeError: + continue + + candidate = value + if not output[match.start() + end :].strip(): + return value + + if candidate is not None: + return candidate + + raise + + def compute_file_hash(file_path, algorithm="sha256", raise_exception=True): try: """Compute the hash of a file using the specified algorithm.""" From 577d959331368f098212ef37e163da16c62f6b29 Mon Sep 17 00:00:00 2001 From: Poyraxx Date: Tue, 2 Jun 2026 16:56:11 +0300 Subject: [PATCH 2/4] avoid ambiguous bench json parsing --- agent/site.py | 14 +++++++++++--- agent/tests/test_site.py | 35 ++++++++++++++++++++++++++++++++++- agent/utils.py | 25 ++++++++++++++++++++----- 3 files changed, 65 insertions(+), 9 deletions(-) diff --git a/agent/site.py b/agent/site.py index 2b981712..d88f49ac 100644 --- a/agent/site.py +++ b/agent/site.py @@ -646,7 +646,8 @@ def clear_website_cache(self): @step("Uninstall Unavailable Apps") def uninstall_unavailable_apps(self, apps_to_keep): installed_apps = parse_json_output( - self.bench_execute("execute frappe.get_installed_apps")["output"] + self.bench_execute("execute frappe.get_installed_apps")["output"], + validator=lambda value: isinstance(value, list), ) for app in installed_apps: if app not in apps_to_keep: @@ -976,7 +977,11 @@ def describe_database_table(self, doctype, columns=None): command += f"--column {column} " try: output = self.bench_execute(command)["output"] - return parse_json_output(output) + return parse_json_output( + output, + validator=lambda value: isinstance(value, dict) + and {"table_name", "schema", "indexes"}.issubset(value), + ) except Exception: return {} @@ -986,7 +991,10 @@ def apps(self): @property def apps_as_json(self): - return parse_json_output(self.bench_execute("list-apps -f json")["output"])[self.name] + return parse_json_output( + self.bench_execute("list-apps -f json")["output"], + validator=lambda value: isinstance(value, dict) and self.name in value, + )[self.name] @job("Add Database Index") def add_database_index(self, doctype, columns=None): diff --git a/agent/tests/test_site.py b/agent/tests/test_site.py index 9c9ac115..88fd610b 100644 --- a/agent/tests/test_site.py +++ b/agent/tests/test_site.py @@ -294,6 +294,23 @@ def test_parse_json_output_extracts_json_from_noisy_logs(self): self.assertEqual(parse_json_output(output), ["frappe", "erpnext", "insights"]) + def test_parse_json_output_uses_validator_to_ignore_trailing_json_logs(self): + output = '["frappe", "erpnext"]\n{"status": "done"}' + + self.assertEqual( + parse_json_output(output, validator=lambda value: isinstance(value, list)), + ["frappe", "erpnext"], + ) + + def test_parse_json_output_raises_for_ambiguous_matching_json(self): + output = '{"status": "running"}\n{"status": "done"}' + + with self.assertRaises(ValueError): + parse_json_output( + output, + validator=lambda value: isinstance(value, dict) and "status" in value, + ) + def test_restore_site_uses_verbose_flag(self): site = self._get_test_site("restore.local") @@ -324,7 +341,8 @@ def test_uninstall_unavailable_apps_parses_noisy_installed_apps_output(self): { "output": ( "Duckwalk override: custom app loaded\n" - '["frappe", "erpnext", "insights", "greendigit"]' + '["frappe", "erpnext", "insights", "greendigit"]\n' + '{"status": "done"}' ) }, {"output": ""}, @@ -352,6 +370,21 @@ def test_uninstall_unavailable_apps_parses_noisy_installed_apps_output(self): ], ) + def test_apps_as_json_ignores_trailing_json_logs(self): + site = self._get_test_site("apps-json.local") + + with patch.object( + Site, + "bench_execute", + return_value={ + "output": ( + '{"apps-json.local": [{"app": "frappe"}]}\n' + '{"status": "done"}' + ) + }, + ): + self.assertEqual(site.apps_as_json, [{"app": "frappe"}]) + def test_get_cors_origins_with_quoted_allow_cors(self): sites = [ SimpleNamespace( diff --git a/agent/utils.py b/agent/utils.py index e7a53f09..f643734f 100644 --- a/agent/utils.py +++ b/agent/utils.py @@ -165,13 +165,19 @@ def end_execution( return res -def parse_json_output(output: str): +def parse_json_output(output: str, validator=None): """ Parse JSON from command output that may contain extra logs around the payload. + + If multiple matching JSON values are found, raise instead of guessing. """ try: - return json.loads(output) - except json.JSONDecodeError: + value = json.loads(output) + if validator and not validator(value): + raise ValueError("Parsed JSON did not match expected shape") + return value + except json.JSONDecodeError as e: + original_error = e decoder = json.JSONDecoder() candidate = None @@ -181,14 +187,23 @@ def parse_json_output(output: str): except json.JSONDecodeError: continue - candidate = value + if validator and not validator(value): + continue + if not output[match.start() + end :].strip(): + if candidate is not None: + raise ValueError("Ambiguous JSON output") return value + if candidate is not None: + raise ValueError("Ambiguous JSON output") + + candidate = value + if candidate is not None: return candidate - raise + raise original_error def compute_file_hash(file_path, algorithm="sha256", raise_exception=True): From b594a9f43c041ae371d5772898b169b8cd4f12bf Mon Sep 17 00:00:00 2001 From: Poyraxx Date: Tue, 2 Jun 2026 17:13:05 +0300 Subject: [PATCH 3/4] prefer trailing bench json payload --- agent/site.py | 14 +++++++++++++- agent/tests/test_site.py | 38 +++++++++++++++++++++++++++++++++++++- agent/utils.py | 31 ++++++++++++++++++++----------- 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/agent/site.py b/agent/site.py index d88f49ac..2abc147b 100644 --- a/agent/site.py +++ b/agent/site.py @@ -931,7 +931,19 @@ def get_usage(self): def get_analytics(self): analytics = self.bench_execute("execute frappe.utils.get_site_info")["output"] - return parse_json_output(analytics) + return parse_json_output( + analytics, + validator=lambda value: isinstance(value, dict) + and { + "installed_apps", + "users", + "country", + "language", + "time_zone", + "setup_complete", + "scheduler_enabled", + }.issubset(value), + ) def get_database_size(self): config = {} diff --git a/agent/tests/test_site.py b/agent/tests/test_site.py index 88fd610b..987ead00 100644 --- a/agent/tests/test_site.py +++ b/agent/tests/test_site.py @@ -302,8 +302,16 @@ def test_parse_json_output_uses_validator_to_ignore_trailing_json_logs(self): ["frappe", "erpnext"], ) + def test_parse_json_output_prefers_clean_trailing_payload_over_prefix_json_logs(self): + output = '[200]\n["frappe", "erpnext"]' + + self.assertEqual( + parse_json_output(output, validator=lambda value: isinstance(value, list)), + ["frappe", "erpnext"], + ) + def test_parse_json_output_raises_for_ambiguous_matching_json(self): - output = '{"status": "running"}\n{"status": "done"}' + output = '{"status": "running"}\nlog line\n{"status": "done"}\nmore logs' with self.assertRaises(ValueError): parse_json_output( @@ -385,6 +393,34 @@ def test_apps_as_json_ignores_trailing_json_logs(self): ): self.assertEqual(site.apps_as_json, [{"app": "frappe"}]) + def test_get_analytics_ignores_prefix_and_trailing_json_logs(self): + site = self._get_test_site("analytics.local") + + with patch.object( + Site, + "bench_execute", + return_value={ + "output": ( + '[200]\n' + '{"installed_apps": [], "users": [], "country": "IN", "language": "english", ' + '"time_zone": "UTC", "setup_complete": true, "scheduler_enabled": true}\n' + '{"status": "done"}' + ) + }, + ): + self.assertEqual( + site.get_analytics(), + { + "installed_apps": [], + "users": [], + "country": "IN", + "language": "english", + "time_zone": "UTC", + "setup_complete": True, + "scheduler_enabled": True, + }, + ) + def test_get_cors_origins_with_quoted_allow_cors(self): sites = [ SimpleNamespace( diff --git a/agent/utils.py b/agent/utils.py index f643734f..bbb4fe13 100644 --- a/agent/utils.py +++ b/agent/utils.py @@ -169,17 +169,22 @@ def parse_json_output(output: str, validator=None): """ Parse JSON from command output that may contain extra logs around the payload. - If multiple matching JSON values are found, raise instead of guessing. + Prefer a clean-trailing payload when present. If multiple candidates remain + after that preference, raise instead of guessing. """ + def is_valid(value): + return not validator or validator(value) + try: value = json.loads(output) - if validator and not validator(value): + if not is_valid(value): raise ValueError("Parsed JSON did not match expected shape") return value - except json.JSONDecodeError as e: + except (json.JSONDecodeError, ValueError) as e: original_error = e decoder = json.JSONDecoder() - candidate = None + trailing_candidate = None + dirty_candidate = None for match in re.finditer(r"[\[{]", output): try: @@ -187,21 +192,25 @@ def parse_json_output(output: str, validator=None): except json.JSONDecodeError: continue - if validator and not validator(value): + if not is_valid(value): continue if not output[match.start() + end :].strip(): - if candidate is not None: + if trailing_candidate is not None: raise ValueError("Ambiguous JSON output") - return value + trailing_candidate = value + continue - if candidate is not None: + if dirty_candidate is not None: raise ValueError("Ambiguous JSON output") - candidate = value + dirty_candidate = value + + if trailing_candidate is not None: + return trailing_candidate - if candidate is not None: - return candidate + if dirty_candidate is not None: + return dirty_candidate raise original_error From c7889be7d8bc23dd28d701640e925b878725b644 Mon Sep 17 00:00:00 2001 From: Poyraxx Date: Tue, 2 Jun 2026 17:22:58 +0300 Subject: [PATCH 4/4] narrow installed app payload validation --- agent/site.py | 6 +++++- agent/tests/test_site.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/agent/site.py b/agent/site.py index 2abc147b..2c97c459 100644 --- a/agent/site.py +++ b/agent/site.py @@ -647,7 +647,11 @@ def clear_website_cache(self): def uninstall_unavailable_apps(self, apps_to_keep): installed_apps = parse_json_output( self.bench_execute("execute frappe.get_installed_apps")["output"], - validator=lambda value: isinstance(value, list), + validator=lambda value: ( + isinstance(value, list) + and all(isinstance(item, str) for item in value) + and "frappe" in value + ), ) for app in installed_apps: if app not in apps_to_keep: diff --git a/agent/tests/test_site.py b/agent/tests/test_site.py index 987ead00..241d107d 100644 --- a/agent/tests/test_site.py +++ b/agent/tests/test_site.py @@ -348,6 +348,7 @@ def test_uninstall_unavailable_apps_parses_noisy_installed_apps_output(self): side_effect=[ { "output": ( + "[200]\n" "Duckwalk override: custom app loaded\n" '["frappe", "erpnext", "insights", "greendigit"]\n' '{"status": "done"}'