3-最好的语言 Reverse 题解writeup

3-最好的语言 Reverse 题解writeup 首先使用010打开发现格式类似于xml格式的文本文件头部有magic字段 是Python 字节码的反汇编文本不是 ELF不是 PE。编写脚本提取代码../../images/pyc_text_to_pseudo.py#!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse import ast import re from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple dataclass class CodeObj: argcount: int 0 nlocals: int 0 stacksize: int 0 flags: int 0 code_hex: str names: Tuple[Any, ...] () varnames: Tuple[Any, ...] () freevars: Tuple[Any, ...] () cellvars: Tuple[Any, ...] () filename: str name: str code firstlineno: int 0 consts: List[Any] field(default_factorylist) lnotab: str dataclass class Instr: offset: int op: int opname: str arg: Optional[int] argval: Any size: int next_offset: int OPNAME: Dict[int, str] { 1: POP_TOP, 17: BINARY_ADD, 22: BINARY_MODULO, 23: BINARY_ADD, 25: BINARY_SUBSCR, 26: BINARY_FLOOR_DIVIDE, 27: BINARY_TRUE_DIVIDE, 30: SLICE0, 31: SLICE1, 32: SLICE2, 33: SLICE3, 55: INPLACE_ADD, 65: BINARY_XOR, 68: GET_ITER, 71: PRINT_ITEM, 72: PRINT_NEWLINE, 80: BREAK_LOOP, 83: RETURN_VALUE, 87: POP_BLOCK, 90: STORE_NAME, 92: UNPACK_SEQUENCE, 93: FOR_ITER, 100: LOAD_CONST, 101: LOAD_NAME, 102: BUILD_TUPLE, 103: BUILD_LIST, 106: LOAD_ATTR, 107: COMPARE_OP, 108: IMPORT_NAME, 109: IMPORT_FROM, 110: JUMP_FORWARD, 111: JUMP_IF_FALSE_OR_POP, 112: JUMP_IF_TRUE_OR_POP, 113: JUMP_ABSOLUTE, 114: POP_JUMP_IF_FALSE, 115: POP_JUMP_IF_TRUE, 116: LOAD_GLOBAL, 120: SETUP_LOOP, 124: LOAD_FAST, 125: STORE_FAST, 131: CALL_FUNCTION, 132: MAKE_FUNCTION, } HAVE_ARGUMENT 90 def parse_tag_int(line: str, tag: str) - Optional[int]: m re.search(rf{tag}\s*([0-9a-fA-Fx])\s*/{tag}, line) if not m: return None raw m.group(1) if raw.lower().startswith(0x): return int(raw, 16) if all(c in 0123456789abcdefABCDEF for c in raw) and any(c in abcdefABCDEF for c in raw): return int(raw, 16) return int(raw, 10) def parse_tag_literal(line: str, tag: str) - Any: m re.search(rf{tag}\s*(.*?)\s*/{tag}, line) if not m: return None text m.group(1) try: return ast.literal_eval(text) except Exception: return text def parse_consts(lines: List[str], i: int) - Tuple[List[Any], int]: out: List[Any] [] while i len(lines): s lines[i].strip() if s /consts: return out, i 1 if not s: i 1 continue if s code: child, i parse_codeobj(lines, i) out.append(child) continue if s.startswith(): i 1 continue try: out.append(ast.literal_eval(s)) except Exception: out.append(s) i 1 return out, i def parse_bytecode_hex(lines: List[str], i: int) - Tuple[str, int]: hex_chunks: List[str] [] i 1 # skip opening code while i len(lines): s lines[i].strip() if s /code: break hex_chunks.extend(re.findall(r[0-9a-fA-F], s)) i 1 return .join(hex_chunks), i 1 def parse_codeobj(lines: List[str], i: int) - Tuple[CodeObj, int]: # lines[i] should be code obj CodeObj() i 1 while i len(lines): s lines[i].strip() if s /code: return obj, i 1 if not s: i 1 continue if s code: # bytecode payload section obj.code_hex, i parse_bytecode_hex(lines, i) continue if s consts: obj.consts, i parse_consts(lines, i 1) continue for tag, attr in ( (argcount, argcount), (nlocals, nlocals), (stacksize, stacksize), (flags, flags), (firstlineno, firstlineno), ): if s.startswith(f{tag}): val parse_tag_int(s, tag) if val is not None: setattr(obj, attr, val) break else: if s.startswith(names): val parse_tag_literal(s, names) obj.names tuple(val) if isinstance(val, (list, tuple)) else () elif s.startswith(varnames): val parse_tag_literal(s, varnames) obj.varnames tuple(val) if isinstance(val, (list, tuple)) else () elif s.startswith(freevars): val parse_tag_literal(s, freevars) obj.freevars tuple(val) if isinstance(val, (list, tuple)) else () elif s.startswith(cellvars): val parse_tag_literal(s, cellvars) obj.cellvars tuple(val) if isinstance(val, (list, tuple)) else () elif s.startswith(filename): val parse_tag_literal(s, filename) obj.filename str(val) elif s.startswith(name): val parse_tag_literal(s, name) obj.name str(val) elif s.startswith(lnotab): val parse_tag_literal(s, lnotab) obj.lnotab str(val) i 1 continue i 1 return obj, i def parse_pyc_text(path: str) - CodeObj: with open(path, r, encodingutf-8, errorsignore) as f: lines [ln.rstrip(\n) for ln in f] start None for idx, ln in enumerate(lines): if ln.strip() code: start idx break if start is None: raise ValueError(No top-level code block found) obj, _ parse_codeobj(lines, start) return obj def const_repr(c: Any) - str: if isinstance(c, CodeObj): return fcode:{c.name} return repr(c) def arg_value(co: CodeObj, op: int, arg: int) - Any: if op 100: return co.consts[arg] if 0 arg len(co.consts) else fconst#{arg} if op in (90, 101, 106, 108, 109, 116): return co.names[arg] if 0 arg len(co.names) else fname#{arg} if op in (124, 125): return co.varnames[arg] if 0 arg len(co.varnames) else fvar#{arg} if op in (110, 113, 114, 115): return arg return arg def disassemble_py2(co: CodeObj) - List[Instr]: code bytes.fromhex(co.code_hex) if co.code_hex else b out: List[Instr] [] i 0 while i len(code): off i op code[i] i 1 arg None size 1 if op HAVE_ARGUMENT: if i 1 len(code): break arg code[i] | (code[i 1] 8) i 2 size 3 name OPNAME.get(op, fOP_{op}) av arg_value(co, op, arg) if arg is not None else None out.append(Instr(off, op, name, arg, av, size, i)) return out def safe_pop(stack: List[str], default: str expr) - str: return stack.pop() if stack else default def emit_pseudocode(co: CodeObj) - List[str]: ins disassemble_py2(co) lines: List[str] [] stack: List[str] [] loop_end_stack: List[int] [] pending_print: Optional[str] None suppress_store_offsets set() def indent() - str: return * len(loop_end_stack) # map offsets for lookahead off2idx {x.offset: idx for idx, x in enumerate(ins)} for idx, x in enumerate(ins): while loop_end_stack and x.offset loop_end_stack[-1]: loop_end_stack.pop() op x.op if op 100: # LOAD_CONST stack.append(const_repr(x.argval)) continue if op in (101, 116, 124): # LOAD_NAME/GLOBAL/FAST stack.append(str(x.argval)) continue if op in (90, 125): # STORE_NAME/FAST if x.offset in suppress_store_offsets: continue rhs safe_pop(stack) lines.append(f{indent()}{x.argval} {rhs}) continue if op 106: # LOAD_ATTR obj safe_pop(stack) stack.append(f{obj}.{x.argval}) continue if op 131: # CALL_FUNCTION argc x.arg or 0 args [safe_pop(stack) for _ in range(argc)][::-1] func safe_pop(stack, func) stack.append(f{func}({, .join(args)})) continue if op in (23, 55): # BINARY_ADD / INPLACE_ADD b safe_pop(stack) a safe_pop(stack) stack.append(f({a} {b})) continue if op 22: # BINARY_MODULO b safe_pop(stack) a safe_pop(stack) stack.append(f({a} % {b})) continue if op 65: # BINARY_XOR b safe_pop(stack) a safe_pop(stack) stack.append(f({a} ^ {b})) continue if op 25: # BINARY_SUBSCR idx_expr safe_pop(stack) obj safe_pop(stack) stack.append(f{obj}[{idx_expr}]) continue if op 30: # SLICE0 obj safe_pop(stack) stack.append(f{obj}[:]) continue if op 31: # SLICE1 lo safe_pop(stack) obj safe_pop(stack) stack.append(f{obj}[{lo}:]) continue if op 32: # SLICE2 hi safe_pop(stack) obj safe_pop(stack) stack.append(f{obj}[:{hi}]) continue if op 33: # SLICE3 hi safe_pop(stack) lo safe_pop(stack) obj safe_pop(stack) stack.append(f{obj}[{lo}:{hi}]) continue if op 108: # IMPORT_NAME _from safe_pop(stack) _lvl safe_pop(stack) from_expr _from.strip() mod str(x.argval) if from_expr in (None, (), ): stack.append(fimport {mod}) else: stack.append(mod) continue if op 109: # IMPORT_FROM mod stack[-1] if stack else module mod_name str(mod).replace(import , ).strip() stack.append(ffrom {mod_name} import {x.argval}) continue if op 1: # POP_TOP expr safe_pop(stack, ) if expr: lines.append(f{indent()}{expr}) continue if op 132: # MAKE_FUNCTION defaults x.arg or 0 for _ in range(defaults): safe_pop(stack) code_obj safe_pop(stack, code_obj) stack.append(ffunction {code_obj}) continue if op 120: # SETUP_LOOP continue if op 68: # GET_ITER it safe_pop(stack) stack.append(it) continue if op 93: # FOR_ITER it stack[-1] if stack else iterable nxt ins[idx 1] if idx 1 len(ins) else None var item if nxt and nxt.op in (125, 90): var str(nxt.argval) suppress_store_offsets.add(nxt.offset) loop_end x.next_offset (x.arg or 0) lines.append(f{indent()}for {var} in {it}:) loop_end_stack.append(loop_end) continue if op 113: # JUMP_ABSOLUTE continue if op 87: # POP_BLOCK continue if op 71: # PRINT_ITEM pending_print safe_pop(stack) continue if op 72: # PRINT_NEWLINE if pending_print is not None: lines.append(f{indent()}print({pending_print})) pending_print None else: lines.append(f{indent()}print()) continue if op 83: # RETURN_VALUE ret safe_pop(stack, None) lines.append(f{indent()}return {ret}) continue # fallback line for unsupported opcodes if x.arg is None: lines.append(f{indent()}# {x.offset:04}: {x.opname}) else: lines.append(f{indent()}# {x.offset:04}: {x.opname} {x.argval!r}) return lines def dump_codeobj(co: CodeObj, depth: int 0) - List[str]: pad * depth out: List[str] [] out.append(f{pad}# ---- code object: {co.name} ({co.filename}:{co.firstlineno}) ----) if co.name module: out.extend(f{pad}{ln} for ln in emit_pseudocode(co)) else: args , .join(str(x) for x in co.varnames[: co.argcount]) out.append(f{pad}def {co.name}({args}):) body emit_pseudocode(co) if body: out.extend(f{pad} {ln} for ln in body) else: out.append(f{pad} pass) for c in co.consts: if isinstance(c, CodeObj): out.append() out.extend(dump_codeobj(c, depthdepth)) return out def main() - int: ap argparse.ArgumentParser(descriptionTranslate pyc-text dump to Python-like pseudocode) ap.add_argument(input, helpPath to pyc-text dump file) ap.add_argument(-o, --output, default, helpOutput file path, default prints to stdout) args ap.parse_args() co parse_pyc_text(args.input) text \n.join(dump_codeobj(co)) \n if args.output: with open(args.output, w, encodingutf-8) as f: f.write(text) print(f[OK] pseudo saved to: {args.output}) else: print(text, end) return 0 if __name__ __main__: raise SystemExit(main())分析图中代码f flag{*******}告诉我们flag的格式后面可以逆推key_ function code:_和____ function code:____告诉我们有两个函数短横和长横下方代码def _(b): __ .join(random.sample(string.digits, 4)) #从里面随机取 4 个不重复字符返回列表拼成字符串 ___ for i in range(len(b)):#按顺序遍历索引 ___ (___ chr((ord(b[i]) ^ ord(__[(i % 4)]))))循环异或做拼接 return ___ def ____(a): ___ md5() ___.update(a) return ___.digest() #返回MD5 16字节二进制摘要不是可读字符串e ((_(f[:12]) ____(f[12:19])) _(f[19:])) 告诉我们e是一个字符串由四位密钥异或前12位中间7位 计算MD5段 另一个四位密钥异或第 20 个字符之后的所有内容第1步先把目标密文 base64 解开目标是U1VQU05pSHdqCEJrQu7FS7Vngk1OTQ58qqghXmt2AUdrcFBBUEU跑这个import base64 c base64.b64decode(U1VQU05pSHdqCEJrQu7FS7Vngk1OTQ58qqghXmt2AUdrcFBBUEU) print(len(c), c)或者使用cyberchef的base64和Count occurrences解码与计算长度在Regex输入框中填入一个点号.你会得到长度38字节。第2步按代码结构切段为什么切成12 16 10f[:12]明确是 12 字符。md5(...).digest()固定 16 字节。总长 38所以剩下38-12-1610。所以c1 c[:12] # XOR 段1 c2 c[12:28] # MD5 digest 段 c3 c[28:] # XOR 段2第3步解前12字节最容易_()函数本质是 4位数字循环 XOR。flag 通常以flag{开头所以可用已知明文反推 key。known bflag{ key_guess bytes([c1[i] ^ known[i] for i in range(4)]) # 只取前4位 print(key_guess) # b5914 k0 chr(ord(enc[0]) ^ ord(f)) k1 chr(ord(enc[1]) ^ ord(l)) k2 chr(ord(enc[2]) ^ ord(a)) k3 chr(ord(enc[3]) ^ ord(g)) key k0 k1 k2 k3然后用这个 key 解整个c1k1 b5914 p1 bytes([c1[i] ^ k1[i % 4] for i in range(len(c1))]) print(p1) # bflag{PyC_1s_前半就出来了flag{PyC_1s_第4步解后10字节第二个 XOR 段注意后段是另一次调用_()key 可能和前段不同。所以要爆破 4位不重复数字random.sample(string.digits,4)。import itertools, string def dec(seg, key): kb key.encode() return bytes([seg[i] ^ kb[i % 4] for i in range(len(seg))]) for p in itertools.permutations(0123456789, 4): k .join(p) x dec(c3, k) # 筛选可打印 以 } 结尾flag常见 if all(32 b 127 for b in x) and x.endswith(b}): s x.decode() if Hard in s or s.startswith(_): print(k, s)会筛到正确结果key:4813明文:_N0t_Hard}第5步处理中间 MD5 digest 段c2是 16字节原始摘要先转十六进制print(c2.hex()) # 42eec54bb567824d4e4d0e7caaa8215e这就是f[12:19]长度7的 md5 值。md5 不可逆只能“猜明文再比对”。直接网页搜一下第6步拼接最终 flagflag{PyC_1s_ 613u21i _N0t_Hard} flag{PyC_1s_613u21i_N0t_Hard}