diff --git a/agent/site.py b/agent/site.py index b6d21f19..2c97c459 100644 --- a/agent/site.py +++ b/agent/site.py @@ -16,7 +16,7 @@ from agent.base import AgentException, Base from agent.database import Database from agent.job import job, step -from agent.utils import b2mb, compute_file_hash, db_client_cli, db_dump_cli, get_size +from agent.utils import b2mb, compute_file_hash, db_client_cli, db_dump_cli, get_size, parse_json_output if TYPE_CHECKING: from agent.bench import Bench @@ -125,7 +125,7 @@ def restore_site( ) try: return self.bench_execute( - "--force restore " + "--force restore --verbose " f"--mariadb-root-username {temp_user} " f"--mariadb-root-password {temp_password} " f"--admin-password {admin_password} " @@ -645,7 +645,14 @@ def clear_website_cache(self): @step("Uninstall Unavailable Apps") def uninstall_unavailable_apps(self, apps_to_keep): - installed_apps = json.loads(self.bench_execute("execute frappe.get_installed_apps")["output"]) + installed_apps = parse_json_output( + self.bench_execute("execute frappe.get_installed_apps")["output"], + validator=lambda value: ( + isinstance(value, list) + and all(isinstance(item, str) for item in value) + and "frappe" in value + ), + ) for app in installed_apps: if app not in apps_to_keep: self.bench_execute(f"remove-from-installed-apps '{app}'") @@ -928,7 +935,19 @@ def get_usage(self): def get_analytics(self): analytics = self.bench_execute("execute frappe.utils.get_site_info")["output"] - return json.loads(analytics) + return parse_json_output( + analytics, + validator=lambda value: isinstance(value, dict) + and { + "installed_apps", + "users", + "country", + "language", + "time_zone", + "setup_complete", + "scheduler_enabled", + }.issubset(value), + ) def get_database_size(self): config = {} @@ -974,7 +993,11 @@ def describe_database_table(self, doctype, columns=None): command += f"--column {column} " try: output = self.bench_execute(command)["output"] - return json.loads(output) + return parse_json_output( + output, + validator=lambda value: isinstance(value, dict) + and {"table_name", "schema", "indexes"}.issubset(value), + ) except Exception: return {} @@ -984,7 +1007,10 @@ def apps(self): @property def apps_as_json(self): - return json.loads(self.bench_execute("list-apps -f json")["output"])[self.name] + return parse_json_output( + self.bench_execute("list-apps -f json")["output"], + validator=lambda value: isinstance(value, dict) and self.name in value, + )[self.name] @job("Add Database Index") def add_database_index(self, doctype, columns=None): diff --git a/agent/tests/test_site.py b/agent/tests/test_site.py index fe24dac1..241d107d 100644 --- a/agent/tests/test_site.py +++ b/agent/tests/test_site.py @@ -11,6 +11,7 @@ from agent.bench import Bench, _get_cors_origins, _normalize_cors_origins from agent.server import Server from agent.site import Site +from agent.utils import parse_json_output class TestSite(unittest.TestCase): @@ -74,6 +75,11 @@ def _get_test_bench(self) -> Bench: server.benches_directory = self.benches_directory return Bench(self.bench_name, server) + def _get_test_site(self, site_name: str = "test.local") -> Site: + self._create_test_site(site_name) + self._make_site_config(site_name) + return Site(site_name, self._get_test_bench()) + def _render_bench_nginx(self, cors_origins, standalone=False): output_file = os.path.join(self.test_dir, "nginx.conf") with patch.object(Server, "__init__", new=lambda x: None): @@ -283,6 +289,139 @@ def test_normalize_cors_origins_strips_path_from_url(self): ["https://example.com"], ) + def test_parse_json_output_extracts_json_from_noisy_logs(self): + output = 'Duckwalk override: custom app loaded\n ["frappe", "erpnext", "insights"]' + + self.assertEqual(parse_json_output(output), ["frappe", "erpnext", "insights"]) + + def test_parse_json_output_uses_validator_to_ignore_trailing_json_logs(self): + output = '["frappe", "erpnext"]\n{"status": "done"}' + + self.assertEqual( + parse_json_output(output, validator=lambda value: isinstance(value, list)), + ["frappe", "erpnext"], + ) + + def test_parse_json_output_prefers_clean_trailing_payload_over_prefix_json_logs(self): + output = '[200]\n["frappe", "erpnext"]' + + self.assertEqual( + parse_json_output(output, validator=lambda value: isinstance(value, list)), + ["frappe", "erpnext"], + ) + + def test_parse_json_output_raises_for_ambiguous_matching_json(self): + output = '{"status": "running"}\nlog line\n{"status": "done"}\nmore logs' + + with self.assertRaises(ValueError): + parse_json_output( + output, + validator=lambda value: isinstance(value, dict) and "status" in value, + ) + + def test_restore_site_uses_verbose_flag(self): + site = self._get_test_site("restore.local") + + with ( + patch.object(site.bench, "create_mariadb_user", return_value=("db", "temp-user", "temp-pass")), + patch.object(site.bench, "drop_mariadb_user"), + patch.object(Site, "bench_execute", return_value={"output": ""}) as bench_execute, + patch.object(Site, "restore_site", new=Site.restore_site.__wrapped__), + ): + site.restore_site( + "root-pass", + "admin-pass", + os.path.join(site.bench.sites_directory, "restore.local", "private", "backups", "db.sql.gz"), + "", + "", + ) + + self.assertIn("--force restore --verbose", bench_execute.call_args.args[0]) + + def test_uninstall_unavailable_apps_parses_noisy_installed_apps_output(self): + site = self._get_test_site("apps.local") + + with ( + patch.object( + Site, + "bench_execute", + side_effect=[ + { + "output": ( + "[200]\n" + "Duckwalk override: custom app loaded\n" + '["frappe", "erpnext", "insights", "greendigit"]\n' + '{"status": "done"}' + ) + }, + {"output": ""}, + {"output": ""}, + {"output": ""}, + {"output": ""}, + ], + ) as bench_execute, + patch.object( + Site, + "uninstall_unavailable_apps", + new=Site.uninstall_unavailable_apps.__wrapped__, + ), + ): + site.uninstall_unavailable_apps(["frappe", "erpnext"]) + + self.assertEqual( + [call.args[0] for call in bench_execute.call_args_list], + [ + "execute frappe.get_installed_apps", + "remove-from-installed-apps 'insights'", + "clear-cache", + "remove-from-installed-apps 'greendigit'", + "clear-cache", + ], + ) + + def test_apps_as_json_ignores_trailing_json_logs(self): + site = self._get_test_site("apps-json.local") + + with patch.object( + Site, + "bench_execute", + return_value={ + "output": ( + '{"apps-json.local": [{"app": "frappe"}]}\n' + '{"status": "done"}' + ) + }, + ): + self.assertEqual(site.apps_as_json, [{"app": "frappe"}]) + + def test_get_analytics_ignores_prefix_and_trailing_json_logs(self): + site = self._get_test_site("analytics.local") + + with patch.object( + Site, + "bench_execute", + return_value={ + "output": ( + '[200]\n' + '{"installed_apps": [], "users": [], "country": "IN", "language": "english", ' + '"time_zone": "UTC", "setup_complete": true, "scheduler_enabled": true}\n' + '{"status": "done"}' + ) + }, + ): + self.assertEqual( + site.get_analytics(), + { + "installed_apps": [], + "users": [], + "country": "IN", + "language": "english", + "time_zone": "UTC", + "setup_complete": True, + "scheduler_enabled": True, + }, + ) + def test_get_cors_origins_with_quoted_allow_cors(self): sites = [ SimpleNamespace( diff --git a/agent/utils.py b/agent/utils.py index 0498c2a2..bbb4fe13 100644 --- a/agent/utils.py +++ b/agent/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import hashlib +import json import os import re import shutil @@ -164,6 +165,56 @@ def end_execution( return res +def parse_json_output(output: str, validator=None): + """ + Parse JSON from command output that may contain extra logs around the payload. + + Prefer a clean-trailing payload when present. If multiple candidates remain + after that preference, raise instead of guessing. + """ + def is_valid(value): + return not validator or validator(value) + + try: + value = json.loads(output) + if not is_valid(value): + raise ValueError("Parsed JSON did not match expected shape") + return value + except (json.JSONDecodeError, ValueError) as e: + original_error = e + decoder = json.JSONDecoder() + trailing_candidate = None + dirty_candidate = None + + for match in re.finditer(r"[\[{]", output): + try: + value, end = decoder.raw_decode(output[match.start() :]) + except json.JSONDecodeError: + continue + + if not is_valid(value): + continue + + if not output[match.start() + end :].strip(): + if trailing_candidate is not None: + raise ValueError("Ambiguous JSON output") + trailing_candidate = value + continue + + if dirty_candidate is not None: + raise ValueError("Ambiguous JSON output") + + dirty_candidate = value + + if trailing_candidate is not None: + return trailing_candidate + + if dirty_candidate is not None: + return dirty_candidate + + raise original_error + + def compute_file_hash(file_path, algorithm="sha256", raise_exception=True): try: """Compute the hash of a file using the specified algorithm."""