From 675b6d73a647565e49f73fbcd965d072e882d75e Mon Sep 17 00:00:00 2001 From: Gustavo Carneiro Date: Mon, 27 Apr 2026 12:16:46 -0300 Subject: [PATCH] Add Python 3.11/3.12 opcode handlers to reduce decompilation failures No-op prologue/bookkeeping opcodes: - MAKE_CELL_A: closure cell initialization (Python 3.11+) - COPY_FREE_VARS_A: copies free vars into frame at function entry (3.11+) - RETURN_GENERATOR: marks a function as a generator (3.12) Stack/call opcodes: - YIELD_VALUE_A: generator yield with stack-depth operand (3.12) - CALL_FUNCTION_EX_A / INSTRUMENTED_CALL_FUNCTION_EX_A: func(*args, **kwargs) - LOAD_SUPER_ATTR_A: super().attr access (3.12 specialization) - LOAD_FROM_DICT_OR_DEREF_A: closure variable load (3.12) Comprehension opcodes: - SET_ADD_A: set comprehension element append - MAP_ADD_A: dict comprehension key/value append - DICT_UPDATE_A: in-place dict merge (like SET_UPDATE_A for dicts) Other opcodes: - LOAD_ASSERTION_ERROR: pushes AssertionError class (used in assert stmts) - DICT_MERGE_A: merges **kwargs mapping into accumulator dict - CALL_INTRINSIC_1_A: Python-internal intrinsic functions (pass-through) - POP_JUMP_IF_NONE_A / POP_JUMP_IF_NOT_NONE_A: None-checking conditional jumps - LOAD_FAST_CHECK_A / LOAD_FAST_AND_CLEAR_A: variants of LOAD_FAST Bug fixes: - Restore accidentally-removed 'case Pyc::LOAD_ATTR_A:' label that was clobbered when inserting the DICT_MERGE_A handler - Handle NODE_TUPLE (and other unexpected node types) inside f-string (NODE_JOINEDSTR) value lists by wrapping them in { } instead of emitting 'Unsupported node type' and marking cleanBuild=false Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ASTree.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 168 insertions(+), 7 deletions(-) diff --git a/ASTree.cpp b/ASTree.cpp index f837152f9..539b71e52 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -213,6 +213,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) && opcode != Pyc::JUMP_IF_TRUE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_TRUE_A && opcode != Pyc::POP_JUMP_FORWARD_IF_TRUE_A + && opcode != Pyc::POP_JUMP_IF_NONE_A + && opcode != Pyc::POP_JUMP_IF_NOT_NONE_A && opcode != Pyc::POP_BLOCK) { else_pop = false; @@ -692,6 +694,33 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(call); } break; + case Pyc::CALL_FUNCTION_EX_A: + case Pyc::INSTRUMENTED_CALL_FUNCTION_EX_A: + { + /* CALL_FUNCTION_EX flags: if flags&1, kwargs dict is on TOS */ + PycRef kw = nullptr; + if (operand & 1) { + kw = stack.top(); + stack.pop(); + } + PycRef var = stack.top(); + stack.pop(); + PycRef func = stack.top(); + stack.pop(); + /* Pop NULL sentinel pushed by PUSH_NULL / LOAD_METHOD in 3.11+ */ + if (stack.top() == nullptr) { + stack.pop(); + } + ASTCall::pparam_t pparamList; + ASTCall::kwparam_t kwparamList; + PycRef call = new ASTCall(func, pparamList, kwparamList); + call.cast()->setVar(var); + if (kw != nullptr) { + call.cast()->setKW(kw); + } + stack.push(call); + } + break; case Pyc::CALL_METHOD_A: { ASTCall::pparam_t pparamList; @@ -1130,17 +1159,28 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::POP_JUMP_FORWARD_IF_TRUE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A: + case Pyc::POP_JUMP_IF_NONE_A: + case Pyc::POP_JUMP_IF_NOT_NONE_A: { PycRef cond = stack.top(); PycRef ifblk; int popped = ASTCondBlock::UNINITED; + /* For POP_JUMP_IF_NONE/NOT_NONE: wrap the condition with "is None"/"is not None" */ + if (opcode == Pyc::POP_JUMP_IF_NONE_A || opcode == Pyc::POP_JUMP_IF_NOT_NONE_A) { + int compare_op = (opcode == Pyc::POP_JUMP_IF_NONE_A) + ? ASTCompare::CMP_IS : ASTCompare::CMP_IS_NOT; + cond = new ASTCompare(cond, new ASTObject(Pyc_None), compare_op); + } + if (opcode == Pyc::POP_JUMP_IF_FALSE_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A - || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A) { + || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A + || opcode == Pyc::POP_JUMP_IF_NONE_A + || opcode == Pyc::POP_JUMP_IF_NOT_NONE_A) { /* Pop condition before the jump */ stack.pop(); popped = ASTCondBlock::PRE_POPPED; @@ -1156,12 +1196,16 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) popped = ASTCondBlock::POPPED; } - /* "Jump if true" means "Jump if not false" */ + /* "Jump if true" means "Jump if not false". + POP_JUMP_IF_NONE/NOT_NONE both jump when the condition is true + (after wrapping with is None / is not None above). */ bool neg = opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A - || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A; + || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A + || opcode == Pyc::POP_JUMP_IF_NONE_A + || opcode == Pyc::POP_JUMP_IF_NOT_NONE_A; int offs = operand; if (mod->verCompare(3, 10) >= 0) @@ -1507,6 +1551,41 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::SET_ADD_A: + { + /* SET_ADD i: Calls set.add(TOS1[-i], TOS). Used in set comprehensions. */ + PycRef value = stack.top(); + stack.pop(); + PycRef set = stack.top(); + if (curblock->blktype() == ASTBlock::BLK_FOR + && curblock.cast()->isComprehension()) { + stack.pop(); + stack.push(new ASTComprehension(value)); + } else { + stack.push(new ASTSubscr(set, value)); + } + } + break; + case Pyc::MAP_ADD_A: + { + /* MAP_ADD i: Calls dict.__setitem__(TOS1[-i], TOS1, TOS). + TOS is value, TOS1 is key. Used in dict comprehensions. */ + PycRef value = stack.top(); + stack.pop(); + PycRef key = stack.top(); + stack.pop(); + PycRef theMap = stack.top(); + if (curblock->blktype() == ASTBlock::BLK_FOR + && curblock.cast()->isComprehension()) { + stack.pop(); + PycRef dmap = new ASTMap(); + dmap->add(key, value); + stack.push(new ASTComprehension(dmap.cast())); + } else { + stack.push(new ASTSubscr(theMap, key)); + } + } + break; case Pyc::SET_UPDATE_A: { PycRef rhs = stack.top(); @@ -1561,6 +1640,43 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTList(result)); } break; + case Pyc::DICT_MERGE_A: + { + /* DICT_MERGE i: merges TOS (a dict or mapping) into the dict at STACK[-i]. + Used when building **kwargs dicts for CALL_FUNCTION_EX. + We represent it by adding the merged value as a keyword-unpack entry + in the accumulator map on the stack. */ + PycRef rhs = stack.top(); + stack.pop(); + // The map being built is 'operand' positions down in the stack. + // For the common case (operand==1), it's the new top of stack. + // We represent **rhs as a map entry with a nullptr key (prints as **rhs). + if (stack.top().type() == ASTNode::NODE_MAP) { + stack.top().cast()->add(nullptr, rhs); + } else { + // Fallback: just push a dummy binary node so the call can proceed + stack.push(rhs); + } + } + break; + case Pyc::DICT_UPDATE_A: + { + /* DICT_UPDATE i: Calls dict.update(TOS1[-i], TOS). Similar to DICT_MERGE but + does not emit a warning on duplicate keys. We merge TOS into the map below. */ + PycRef rhs = stack.top(); + stack.pop(); + if (stack.top().type() == ASTNode::NODE_MAP) { + stack.top().cast()->add(nullptr, rhs); + } else { + stack.push(rhs); + } + } + break; + case Pyc::CALL_INTRINSIC_1_A: + /* CALL_INTRINSIC_1 intrinsic_id: Calls a Python-internal function on TOS. + For decompilation purposes, most intrinsics act as pass-through (e.g. list-to-tuple). */ + /* TOS remains unchanged — just leave it on the stack */ + break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); @@ -1581,6 +1697,25 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::LOAD_SUPER_ATTR_A: + { + /* Python 3.12: LOAD_SUPER_ATTR attr_idx + Stack before: super_callable | __class__ | self + Represents: super().attr or super().method (method call if low bit set) */ + PycRef super_callable = stack.top(); stack.pop(); + /* __class__ and self */ stack.pop(); stack.pop(); + if (operand & 1) { + /* method call: push NULL sentinel before the method attribute */ + stack.push(nullptr); + } + int name_idx = operand >> 1; + /* Build super() call node, then attribute access on it */ + PycRef super_call = new ASTCall(super_callable, + ASTCall::pparam_t(), + ASTCall::kwparam_t()); + stack.push(new ASTBinary(super_call, new ASTName(code->getName(name_idx)), ASTBinary::BIN_ATTR)); + } + break; case Pyc::LOAD_BUILD_CLASS: stack.push(new ASTLoadBuildClass(new PycObject())); break; @@ -1605,9 +1740,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; case Pyc::LOAD_DEREF_A: case Pyc::LOAD_CLASSDEREF_A: + case Pyc::LOAD_FROM_DICT_OR_DEREF_A: stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: + case Pyc::LOAD_FAST_CHECK_A: + case Pyc::LOAD_FAST_AND_CLEAR_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else @@ -1647,6 +1785,14 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::LOAD_NAME_A: stack.push(new ASTName(code->getName(operand))); break; + case Pyc::LOAD_ASSERTION_ERROR: + { + /* Pushes AssertionError onto the stack (used in assert statements). */ + PycRef assertStr = new PycString(); + assertStr->setValue("AssertionError"); + stack.push(new ASTName(assertStr)); + } + break; case Pyc::MAKE_CLOSURE_A: case Pyc::MAKE_FUNCTION_A: { @@ -2574,6 +2720,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } break; case Pyc::YIELD_VALUE: + case Pyc::YIELD_VALUE_A: case Pyc::INSTRUMENTED_YIELD_VALUE_A: { PycRef value = stack.top(); @@ -2587,6 +2734,9 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::PRECALL_A: case Pyc::RESUME_A: case Pyc::INSTRUMENTED_RESUME_A: + case Pyc::RETURN_GENERATOR: + case Pyc::MAKE_CELL_A: + case Pyc::COPY_FREE_VARS_A: /* We just entirely ignore this / no-op */ break; case Pyc::CACHE: @@ -3004,7 +3154,12 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) print_const(pyc_output, val.cast()->object(), mod, F_STRING_QUOTE); break; default: - fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type()); + /* Any other node type (e.g. NODE_TUPLE from comprehension/unpack) is + a formatted expression — wrap it in { } like a FormattedValue. */ + pyc_output << "{"; + print_src(val, mod, pyc_output); + pyc_output << "}"; + break; } } pyc_output << F_STRING_QUOTE; @@ -3079,9 +3234,15 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) else pyc_output << ",\n"; start_line(cur_indent, pyc_output); - print_src(val.first, mod, pyc_output); - pyc_output << ": "; - print_src(val.second, mod, pyc_output); + if (val.first == NULL) { + /* nullptr key means **dict unpacking, e.g. from DICT_MERGE */ + pyc_output << "**"; + print_src(val.second, mod, pyc_output); + } else { + print_src(val.first, mod, pyc_output); + pyc_output << ": "; + print_src(val.second, mod, pyc_output); + } first = false; } cur_indent--;