从一个问题出发的关于printf中处理fmtstr部分源码分析

0x00

在第六届强网拟态线下赛的一道格式化字符串漏洞题目中，遇到了一个情况没有想通，找了个时间分析了一下 printf的源码（glibc2.31），分析清楚原因了

0x01 issue

奇怪合理的现象

1
2
3

payload = '%{}c'.format(printf_ret).encode() + b"%11$hn" + \
        '%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

最初我是这样写的，调试发现仅仅成功修改了跳板，而目标，也就是printf的返回地址却没有修改到

1
2
3

payload = b'%p'*9 + '%{}c'.format(printf_ret - 90).encode() + b"%hn" + \
        '%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

而换个偏移方式，这样却能同时修改成功

0x02 why？

探究一下多个 % 偏移参数和 $ 偏移参数在格式化字符串中的差异

我们找到源码中处理格式化字符串的部分（glibc2.31）

/* Process whole format string.  */
  do
    {
      STEP0_3_TABLE;
      STEP4_TABLE;

      union printf_arg *args_value;	/* This is not used here but ... */
      int is_negative;	/* Flag for negative number.  */
      union
      {
	unsigned long long int longlong;
	unsigned long int word;
      } number;
      int base;
      union printf_arg the_arg;
      CHAR_T *string;	/* Pointer to argument string.  */
      int alt = 0;	/* Alternate format.  */
      int space = 0;	/* Use space prefix if no sign is needed.  */
      int left = 0;	/* Left-justify output.  */
      int showsign = 0;	/* Always begin with plus or minus sign.  */
      int group = 0;	/* Print numbers according grouping rules.  */
      int is_long_double = 0; /* Argument is long double/ long long int.  */
      int is_short = 0;	/* Argument is short int.  */
      int is_long = 0;	/* Argument is long int.  */
      int is_char = 0;	/* Argument is promoted (unsigned) char.  */
      int width = 0;	/* Width of output; 0 means none specified.  */
      int prec = -1;	/* Precision of output; -1 means none specified.  */
      /* This flag is set by the 'I' modifier and selects the use of the
	 `outdigits' as determined by the current locale.  */
      int use_outdigits = 0;
      UCHAR_T pad = L_(' ');/* Padding character.  */
      CHAR_T spec;

      workstart = NULL;
      workend = work_buffer + WORK_BUFFER_SIZE;

      /* Get current character in format string.  */
      JUMP (*++f, step0_jumps);

      /* ' ' flag.  */
    LABEL (flag_space):
      space = 1;
      JUMP (*++f, step0_jumps);

      /* '+' flag.  */
    LABEL (flag_plus):
      showsign = 1;
      JUMP (*++f, step0_jumps);

      /* The '-' flag.  */
    LABEL (flag_minus):
      left = 1;
      pad = L_(' ');
      JUMP (*++f, step0_jumps);

      /* The '#' flag.  */
    LABEL (flag_hash):
      alt = 1;
      JUMP (*++f, step0_jumps);

      /* The '0' flag.  */
    LABEL (flag_zero):
      if (!left)
	pad = L_('0');
      JUMP (*++f, step0_jumps);

      /* The '\'' flag.  */
    LABEL (flag_quote):
      group = 1;

      if (grouping == (const char *) -1)
	{
#ifdef COMPILE_WPRINTF
	  thousands_sep = _NL_CURRENT_WORD (LC_NUMERIC,
					    _NL_NUMERIC_THOUSANDS_SEP_WC);
#else
	  thousands_sep = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
#endif

	  grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
	  if (*grouping == '\0' || *grouping == CHAR_MAX
#ifdef COMPILE_WPRINTF
	      || thousands_sep == L'\0'
#else
	      || *thousands_sep == '\0'
#endif
	      )
	    grouping = NULL;
	}
      JUMP (*++f, step0_jumps);

    LABEL (flag_i18n):
      use_outdigits = 1;
      JUMP (*++f, step0_jumps);

      /* Get width from argument.  */
    LABEL (width_asterics):
      {
	const UCHAR_T *tmp;	/* Temporary value.  */

	tmp = ++f;
	if (ISDIGIT (*tmp))
	  {
	    int pos = read_int (&tmp);

	    if (pos == -1)
	      {
		__set_errno (EOVERFLOW);
		done = -1;
		goto all_done;
	      }

	    if (pos && *tmp == L_('$'))
	      /* The width comes from a positional parameter.  */
	      goto do_positional;
	  }
	width = va_arg (ap, int);

	/* Negative width means left justified.  */
	if (width < 0)
	  {
	    width = -width;
	    pad = L_(' ');
	    left = 1;
	  }

	if (__glibc_unlikely (width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
	  {
	    __set_errno (EOVERFLOW);
	    done = -1;
	    goto all_done;
	  }

	if (width >= WORK_BUFFER_SIZE - EXTSIZ)
	  {
	    /* We have to use a special buffer.  */
	    size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
	    if (__libc_use_alloca (needed))
	      workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
	    else
	      {
		workstart = (CHAR_T *) malloc (needed);
		if (workstart == NULL)
		  {
		    done = -1;
		    goto all_done;
		  }
		workend = workstart + width + EXTSIZ;
	      }
	  }
      }
      JUMP (*f, step1_jumps);

      /* Given width in format string.  */
    LABEL (width):
      width = read_int (&f);

      if (__glibc_unlikely (width == -1
			    || width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
	{
	  __set_errno (EOVERFLOW);
	  done = -1;
	  goto all_done;
	}

      if (width >= WORK_BUFFER_SIZE - EXTSIZ)
	{
	  /* We have to use a special buffer.  */
	  size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
	  if (__libc_use_alloca (needed))
	    workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
	  else
	    {
	      workstart = (CHAR_T *) malloc (needed);
	      if (workstart == NULL)
		{
		  done = -1;
		  goto all_done;
		}
	      workend = workstart + width + EXTSIZ;
	    }
	}
      if (*f == L_('$'))
	/* Oh, oh.  The argument comes from a positional parameter.  */
	goto do_positional;
      JUMP (*f, step1_jumps);

    LABEL (precision):
      ++f;
      if (*f == L_('*'))
	{
	  const UCHAR_T *tmp;	/* Temporary value.  */

	  tmp = ++f;
	  if (ISDIGIT (*tmp))
	    {
	      int pos = read_int (&tmp);

	      if (pos == -1)
		{
		  __set_errno (EOVERFLOW);
		  done = -1;
		  goto all_done;
		}

	      if (pos && *tmp == L_('$'))
		/* The precision comes from a positional parameter.  */
		goto do_positional;
	    }
	  prec = va_arg (ap, int);

	  /* If the precision is negative the precision is omitted.  */
	  if (prec < 0)
	    prec = -1;
	}
      else if (ISDIGIT (*f))
	{
	  prec = read_int (&f);

	  /* The precision was specified in this case as an extremely
	     large positive value.  */
	  if (prec == -1)
	    {
	      __set_errno (EOVERFLOW);
	      done = -1;
	      goto all_done;
	    }
	}
      else
	prec = 0;
      if (prec > width && prec > WORK_BUFFER_SIZE - EXTSIZ)
	{
	  /* Deallocate any previously allocated buffer because it is
	     too small.  */
	  if (__glibc_unlikely (workstart != NULL))
	    free (workstart);
	  workstart = NULL;
	  if (__glibc_unlikely (prec >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
	    {
	      __set_errno (EOVERFLOW);
	      done = -1;
	      goto all_done;
	    }
	  size_t needed = ((size_t) prec + EXTSIZ) * sizeof (CHAR_T);

	  if (__libc_use_alloca (needed))
	    workend = (CHAR_T *) alloca (needed) + prec + EXTSIZ;
	  else
	    {
	      workstart = (CHAR_T *) malloc (needed);
	      if (workstart == NULL)
		{
		  done = -1;
		  goto all_done;
		}
	      workend = workstart + prec + EXTSIZ;
	    }
	}
      JUMP (*f, step2_jumps);

      /* Process 'h' modifier.  There might another 'h' following.  */
    LABEL (mod_half):
      is_short = 1;
      JUMP (*++f, step3a_jumps);

      /* Process 'hh' modifier.  */
    LABEL (mod_halfhalf):
      is_short = 0;
      is_char = 1;
      JUMP (*++f, step4_jumps);

      /* Process 'l' modifier.  There might another 'l' following.  */
    LABEL (mod_long):
      is_long = 1;
      JUMP (*++f, step3b_jumps);

      /* Process 'L', 'q', or 'll' modifier.  No other modifier is
	 allowed to follow.  */
    LABEL (mod_longlong):
      is_long_double = 1;
      is_long = 1;
      JUMP (*++f, step4_jumps);

    LABEL (mod_size_t):
      is_long_double = sizeof (size_t) > sizeof (unsigned long int);
      is_long = sizeof (size_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

    LABEL (mod_ptrdiff_t):
      is_long_double = sizeof (ptrdiff_t) > sizeof (unsigned long int);
      is_long = sizeof (ptrdiff_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

    LABEL (mod_intmax_t):
      is_long_double = sizeof (intmax_t) > sizeof (unsigned long int);
      is_long = sizeof (intmax_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

      /* Process current format.  */
      while (1)
	{
	  process_arg (((struct printf_spec *) NULL));
	  process_string_arg (((struct printf_spec *) NULL));

	LABEL (form_unknown):
	  if (spec == L_('\0'))
	    {
	      /* The format string ended before the specifier is complete.  */
	      __set_errno (EINVAL);
	      done = -1;
	      goto all_done;
	    }

	  /* If we are in the fast loop force entering the complicated
	     one.  */
	  goto do_positional;
	}

      /* The format is correctly handled.  */
      ++nspecs_done;

      if (__glibc_unlikely (workstart != NULL))
	free (workstart);
      workstart = NULL;

      /* Look for next format specifier.  */
#ifdef COMPILE_WPRINTF
      f = __find_specwc ((end_of_spec = ++f));
#else
      f = __find_specmb ((end_of_spec = ++f));
#endif

      /* Write the following constant string.  */
      outstring (end_of_spec, f - end_of_spec);
    }
  while (*f != L_('\0'));

其中涉及到的几个跳转表结构都在宏STEP0_3_TABLE以及STEP4_TABLE里，定义如下

#define STEP0_3_TABLE							      \
    /* Step 0: at the beginning.  */					      \
    static JUMP_TABLE_TYPE step0_jumps[30] =				      \
    {									      \
      REF (form_unknown),						      \
      REF (flag_space),		/* for ' ' */				      \
      REF (flag_plus),		/* for '+' */				      \
      REF (flag_minus),		/* for '-' */				      \
      REF (flag_hash),		/* for '<hash>' */			      \
      REF (flag_zero),		/* for '0' */				      \
      REF (flag_quote),		/* for '\'' */				      \
      REF (width_asterics),	/* for '*' */				      \
      REF (width),		/* for '1'...'9' */			      \
      REF (precision),		/* for '.' */				      \
      REF (mod_half),		/* for 'h' */				      \
      REF (mod_long),		/* for 'l' */				      \
      REF (mod_longlong),	/* for 'L', 'q' */			      \
      REF (mod_size_t),		/* for 'z', 'Z' */			      \
      REF (form_percent),	/* for '%' */				      \
      REF (form_integer),	/* for 'd', 'i' */			      \
      REF (form_unsigned),	/* for 'u' */				      \
      REF (form_octal),		/* for 'o' */				      \
      REF (form_hexa),		/* for 'X', 'x' */			      \
      REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
      REF (form_character),	/* for 'c' */				      \
      REF (form_string),	/* for 's', 'S' */			      \
      REF (form_pointer),	/* for 'p' */				      \
      REF (form_number),	/* for 'n' */				      \
      REF (form_strerror),	/* for 'm' */				      \
      REF (form_wcharacter),	/* for 'C' */				      \
      REF (form_floathex),	/* for 'A', 'a' */			      \
      REF (mod_ptrdiff_t),      /* for 't' */				      \
      REF (mod_intmax_t),       /* for 'j' */				      \
      REF (flag_i18n),		/* for 'I' */				      \
    };
    /* Step 1: after processing width.  */				      \
    static JUMP_TABLE_TYPE step1_jumps[30] =
    {
        ...
    }
    /* Step 2: after processing precision.  */				      \
    static JUMP_TABLE_TYPE step2_jumps[30] =
    {
        ...
    }
    /* Step 3a: after processing first 'h' modifier.  */		      \
    static JUMP_TABLE_TYPE step3a_jumps[30] =
    {
        ...
    }
    /* Step 3b: after processing first 'l' modifier.  */		      \
    static JUMP_TABLE_TYPE step3b_jumps[30] =
    {
        ...
    }
    
#define STEP4_TABLE							      \
    /* Step 4: processing format specifier.  */				      \
    static JUMP_TABLE_TYPE step4_jumps[30] =				      \
    {									      \
      REF (form_unknown),						      \
      REF (form_unknown),	/* for ' ' */				      \
      REF (form_unknown),	/* for '+' */				      \
      REF (form_unknown),	/* for '-' */				      \
      REF (form_unknown),	/* for '<hash>' */			      \
      REF (form_unknown),	/* for '0' */				      \
      REF (form_unknown),	/* for '\'' */				      \
      REF (form_unknown),	/* for '*' */				      \
      REF (form_unknown),	/* for '1'...'9' */			      \
      REF (form_unknown),	/* for '.' */				      \
      REF (form_unknown),	/* for 'h' */				      \
      REF (form_unknown),	/* for 'l' */				      \
      REF (form_unknown),	/* for 'L', 'q' */			      \
      REF (form_unknown),	/* for 'z', 'Z' */			      \
      REF (form_percent),	/* for '%' */				      \
      REF (form_integer),	/* for 'd', 'i' */			      \
      REF (form_unsigned),	/* for 'u' */				      \
      REF (form_octal),		/* for 'o' */				      \
      REF (form_hexa),		/* for 'X', 'x' */			      \
      REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
      REF (form_character),	/* for 'c' */				      \
      REF (form_string),	/* for 's', 'S' */			      \
      REF (form_pointer),	/* for 'p' */				      \
      REF (form_number),	/* for 'n' */				      \
      REF (form_strerror),	/* for 'm' */				      \
      REF (form_wcharacter),	/* for 'C' */				      \
      REF (form_floathex),	/* for 'A', 'a' */			      \
      REF (form_unknown),       /* for 't' */				      \
      REF (form_unknown),       /* for 'j' */				      \
      REF (form_unknown)        /* for 'I' */				      \
    }

省略部分基本都长一个样，就是按照顺序，先处理宽度精度等等，在根据特定参数进行操作
来分析一下处理流程，以%n为例，在宏process_arg(fspec)中

LABEL (form_number):						      \
      if ((mode_flags & PRINTF_FORTIFY) != 0)				      \
	{								      \
	  if (! readonly_format)					      \
	    {								      \
	      extern int __readonly_area (const void *, size_t)		      \
		attribute_hidden;					      \
	      readonly_format						      \
		= __readonly_area (format, ((STR_LEN (format) + 1)	      \
					    * sizeof (CHAR_T)));	      \
	    }								      \
	  if (readonly_format < 0)					      \
	    __libc_fatal ("*** %n in writable segment detected ***\n");	      \
	}								      \
      /* Answer the count of characters written.  */			      \
      if (fspec == NULL)						      \
	{								      \
	  if (is_longlong)						      \
	    *(long long int *) va_arg (ap, void *) = done;		      \
	  else if (is_long_num)						      \
	    *(long int *) va_arg (ap, void *) = done;			      \
	  else if (is_char)						      \
	    *(char *) va_arg (ap, void *) = done;			      \
	  else if (!is_short)						      \
	    *(int *) va_arg (ap, void *) = done;			      \
	  else								      \
	    *(short int *) va_arg (ap, void *) = done;			      \
	}								      \
      else								      \
	if (is_longlong)						      \
	  *(long long int *) args_value[fspec->data_arg].pa_pointer = done;   \
	else if (is_long_num)						      \
	  *(long int *) args_value[fspec->data_arg].pa_pointer = done;	      \
	else if (is_char)						      \
	  *(char *) args_value[fspec->data_arg].pa_pointer = done;	      \
	else if (!is_short)						      \
	  *(int *) args_value[fspec->data_arg].pa_pointer = done;	      \
	else								      \
	  *(short int *) args_value[fspec->data_arg].pa_pointer = done;	      \
      break;

看到这里判断 fspec 的状态，为NULL则直接从va_arg(ap, type)来逐个顺序取参，也就是线性的先取寄存器再取栈上参数，并且实时修改完成（**printf 函数利用格式化字符串减去的应该是后五个寄存器，rdi 本身是一个字符串地址，% 占位符解析的是从 rsi 开始的，五个寄存器再加栈内存单元**）
如果不为 NULL 则从args_value[index]取参数，这里的index索引是提前确定的。哪里确定呢？当使用 $ 来确定参数时候，会进入一个特殊的处理函数

1
2
3

if (*f == L_('$'))
	/* Oh, oh.  The argument comes from a positional parameter.  */
	goto do_positional;

do_positional:
  if (__glibc_unlikely (workstart != NULL))
    {
      free (workstart);
      workstart = NULL;
    }
  done = printf_positional (s, format, readonly_format, ap, &ap_save,
			    done, nspecs_done, lead_str_end, work_buffer,
			    save_errno, grouping, thousands_sep, mode_flags);

 all_done:
  if (__glibc_unlikely (workstart != NULL))
    free (workstart);
  /* Unlock the stream.  */
  _IO_funlockfile (s);
  _IO_cleanup_region_end (0);

  return done;
}

会调用 printf_positional 函数，这个函数长的要命，不全贴，展示一下关键流程。
先是对参数存储区域的初始化，不赘述；然后依次将参数从va_arg放入 args_value[]

/* Fill in the types of all the arguments.  */
  for (cnt = 0; cnt < nspecs; ++cnt)
    {
      /* If the width is determined by an argument this is an int.  */
      if (specs[cnt].width_arg != -1)
	args_type[specs[cnt].width_arg] = PA_INT;

      /* If the precision is determined by an argument this is an int.  */
      if (specs[cnt].prec_arg != -1)
	args_type[specs[cnt].prec_arg] = PA_INT;

      switch (specs[cnt].ndata_args)
	{
	case 0:		/* No arguments.  */
	  break;
	case 1:		/* One argument; we already have the
			   type and size.  */
	  args_type[specs[cnt].data_arg] = specs[cnt].data_arg_type;
	  args_size[specs[cnt].data_arg] = specs[cnt].size;
	  break;
	default:
	  /* We have more than one argument for this format spec.
	     We must call the arginfo function again to determine
	     all the types.  */
	  (void) (*__printf_arginfo_table[specs[cnt].info.spec])
	    (&specs[cnt].info,
	     specs[cnt].ndata_args, &args_type[specs[cnt].data_arg],
	     &args_size[specs[cnt].data_arg]);
	  break;
	}
    }

  /* Now we know all the types and the order.  Fill in the argument
     values.  */
  for (cnt = 0; cnt < nargs; ++cnt)
    switch (args_type[cnt])
      {
#define T(tag, mem, type)				\
	case tag:					\
	  args_value[cnt].mem = va_arg (*ap_savep, type); \
	  break

	T (PA_WCHAR, pa_wchar, wint_t);
      case PA_CHAR:				/* Promoted.  */
      case PA_INT|PA_FLAG_SHORT:		/* Promoted.  */
#if LONG_MAX == INT_MAX
      case PA_INT|PA_FLAG_LONG:
#endif
	T (PA_INT, pa_int, int);
#if LONG_MAX == LONG_LONG_MAX
      case PA_INT|PA_FLAG_LONG:
#endif
	T (PA_INT|PA_FLAG_LONG_LONG, pa_long_long_int, long long int);
#if LONG_MAX != INT_MAX && LONG_MAX != LONG_LONG_MAX
# error "he?"
#endif
      case PA_FLOAT:				/* Promoted.  */
	T (PA_DOUBLE, pa_double, double);
      case PA_DOUBLE|PA_FLAG_LONG_DOUBLE:
	if (__glibc_unlikely ((mode_flags & PRINTF_LDBL_IS_DBL) != 0))
	  {
	    args_value[cnt].pa_double = va_arg (*ap_savep, double);
	    args_type[cnt] &= ~PA_FLAG_LONG_DOUBLE;
	  }
#if __HAVE_FLOAT128_UNLIKE_LDBL
	else if ((mode_flags & PRINTF_LDBL_USES_FLOAT128) != 0)
	  args_value[cnt].pa_float128 = va_arg (*ap_savep, _Float128);
#endif
	else
	  args_value[cnt].pa_long_double = va_arg (*ap_savep, long double);
	break;
      case PA_STRING:				/* All pointers are the same */
      case PA_WSTRING:			/* All pointers are the same */
	T (PA_POINTER, pa_pointer, void *);
#undef T
      default:
	if ((args_type[cnt] & PA_FLAG_PTR) != 0)
	  args_value[cnt].pa_pointer = va_arg (*ap_savep, void *);
	else if (__glibc_unlikely (__printf_va_arg_table != NULL)
		 && __printf_va_arg_table[args_type[cnt] - PA_LAST] != NULL)
	  {
	    args_value[cnt].pa_user = alloca (args_size[cnt]);
	    (*__printf_va_arg_table[args_type[cnt] - PA_LAST])
	      (args_value[cnt].pa_user, ap_savep);
	  }
	else
	  memset (&args_value[cnt], 0, sizeof (args_value[cnt]));
	break;
      case -1:
	/* Error case.  Not all parameters appear in N$ format
	   strings.  We have no way to determine their type.  */
	assert ((mode_flags & PRINTF_FORTIFY) != 0);
	__libc_fatal ("*** invalid %N$ use detected ***\n");
      }

这相当于建立了一个“参数快照”，也就是保留了此时的全部参数情况，然后重新处理整个fmtstr

/* Now walk through all format specifiers and process them.  */
 for (; (size_t) nspecs_done < nspecs; ++nspecs_done)
   {
     STEP4_TABLE;
     ...
       process_arg ((&specs[nspecs_done]));
    process_string_arg ((&specs[nspecs_done]));
     ...
   }

这里也是使用了一样的处理的宏，见此前处理流程

do
{
	...
	process_arg (((struct printf_spec *) NULL));
	process_string_arg (((struct printf_spec *) NULL));
	...
} while();

但是printf_positional函数传入的 fspec 不再是NULL，而是(&specs[nspecs_done])，此时就会根据 args_value[]来取参数，并且这时候即使对参数做了更改，也不会影响到建立的“参数快照”，这也就解释了

1
2
3

payload = b'%p'*9 + '%{}c'.format(printf_ret - 90).encode() + b"%hn" + \
        '%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

这样可以一次性改成，而

1
2
3

payload = '%{}c'.format(printf_ret).encode() + b"%11$hn" + \
        '%{}c'.format(0x10000 - printf_ret + 0x23).encode() + b"%39$hhn"
io.send(payload)

这样却不行的情况了