关于exit的利用以及一个程序的开始到结束

0x00

迎新练习赛@powchan出了一个神人题目It's my 钩，主要是打exit()的几个hook，同时打lilctf2025的一道静态堆题的时候也找到了几篇关于fini_array的利用，也是exit()的一个利用点，于是催生了我记录一下exit()的利用的想法
参考博客exit()分析与利用-安全KER - 安全资讯平台
以下贴出的glibc源码皆为2.35版本

0x01 exit()的流程

注意内核层面的系统调用_exit()和用户层面的exit()是不同的，前者负责释放一个内核层面的资源，比如文件描述符等，后者则是负责释放用户层面的资源，比如io流的缓冲区，tls，等等
我们所利用的正是用户层面的exit()
现在我们找到exit()的源码

void
exit (int status)
{
  __run_exit_handlers (status, &__exit_funcs, true, true);
}

其中的__exit_funcs我们找到定义

struct exit_function
  {
    /* `flavour' should be of type of the `enum' above but since we need
       this element in an atomic operation we have to use `long int'.  */
    long int flavor;
    union
      {
	void (*at) (void);
	struct
	  {
	    void (*fn) (int status, void *arg);
	    void *arg;
	  } on;
	struct
	  {
	    void (*fn) (void *arg, int status);
	    void *arg;
	    void *dso_handle;
	  } cxa;
      } func;
  };
struct exit_function_list
  {
    struct exit_function_list *next;
    size_t idx;
    struct exit_function fns[32];
  };
extern struct exit_function_list *__exit_funcs attribute_hidden;

是一个用于管理析构函数的结构，先按下不表
我们看到exit()就是对 __run_exit_handlers()的封装，我们找到 __run_exit_handlers()

void
attribute_hidden
__run_exit_handlers (int status, struct exit_function_list **listp,
		     bool run_list_atexit, bool run_dtors)
{
  /* First, call the TLS destructors.  */
#ifndef SHARED
  if (&__call_tls_dtors != NULL)
#endif
    if (run_dtors)
      __call_tls_dtors ();

  __libc_lock_lock (__exit_funcs_lock);

  /* We do it this way to handle recursive calls to exit () made by
     the functions registered with `atexit' and `on_exit'. We call
     everyone on the list and use the status value in the last
     exit (). */
  while (true)
    {
      struct exit_function_list *cur = *listp;

      if (cur == NULL)
	{
	  /* Exit processing complete.  We will not allow any more
	     atexit/on_exit registrations.  */
	  __exit_funcs_done = true;
	  break;
	}

      while (cur->idx > 0)
	{
	  struct exit_function *const f = &cur->fns[--cur->idx];
	  const uint64_t new_exitfn_called = __new_exitfn_called;

	  switch (f->flavor)
	    {
	      void (*atfct) (void);
	      void (*onfct) (int status, void *arg);
	      void (*cxafct) (void *arg, int status);
	      void *arg;

	    case ef_free:
	    case ef_us:
	      break;
	    case ef_on:
	      onfct = f->func.on.fn;
	      arg = f->func.on.arg;
#ifdef PTR_DEMANGLE
	      PTR_DEMANGLE (onfct);
#endif
	      /* Unlock the list while we call a foreign function.  */
	      __libc_lock_unlock (__exit_funcs_lock);
	      onfct (status, arg);
	      __libc_lock_lock (__exit_funcs_lock);
	      break;
	    case ef_at:
	      atfct = f->func.at;
#ifdef PTR_DEMANGLE
	      PTR_DEMANGLE (atfct);
#endif
	      /* Unlock the list while we call a foreign function.  */
	      __libc_lock_unlock (__exit_funcs_lock);
	      atfct ();
	      __libc_lock_lock (__exit_funcs_lock);
	      break;
	    case ef_cxa:
	      /* To avoid dlclose/exit race calling cxafct twice (BZ 22180),
		 we must mark this function as ef_free.  */
	      f->flavor = ef_free;
	      cxafct = f->func.cxa.fn;
	      arg = f->func.cxa.arg;
#ifdef PTR_DEMANGLE
	      PTR_DEMANGLE (cxafct);
#endif
	      /* Unlock the list while we call a foreign function.  */
	      __libc_lock_unlock (__exit_funcs_lock);
	      cxafct (arg, status);
	      __libc_lock_lock (__exit_funcs_lock);
	      break;
	    }

	  if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))
	    /* The last exit function, or another thread, has registered
	       more exit functions.  Start the loop over.  */
            continue;
	}

      *listp = cur->next;
      if (*listp != NULL)
	/* Don't free the last element in the chain, this is the statically
	   allocate element.  */
	free (cur);
    }

  __libc_lock_unlock (__exit_funcs_lock);

  if (run_list_atexit)
    RUN_HOOK (__libc_atexit, ());

  _exit (status);
}

其主要流程为（感谢chatgpt帮我画图）

用户调用 exit(status)
        │
        ▼
┌───────────────────────────────────────────┐
│        __run_exit_handlers(status)        │
└───────────────────────────────────────────┘
        │
        ▼
   调用 TLS 析构函数 (__call_tls_dtors)
        │
        ▼
   遍历 exit_function_list 链表
        │
        ├── ef_cxa → 调用 __cxa_atexit 注册的函数 (C++ 析构)
        │
        ├── ef_at  → 调用 atexit 注册的函数
        │
        ├── ef_on  → 调用 on_exit 注册的函数 (带 status, arg)
        │
        └── (其他类型跳过)
        │
        ▼
   若执行期间有新的回调注册 → 回到链表开头重新执行
        │
        ▼
   释放动态分配的回调节点
        │
        ▼
   如果 run_list_atexit = true → 执行 __libc_atexit 钩子
        │
        ▼
   最终调用 _exit(status)
        │
        ▼
   内核：彻底终止进程

以pwn的眼光来看，自然在这流程中发现许多可以利用的点

0x02 __exit_funcs

劫持__exit_funcs链表
既然__run_exit_handles()会遍历__exit_funcs链表并执行其中函数，那么我们劫持这个链表就可以劫持程序执行流了….吗？
我们调试一下便知

0x7ffff7c455f0 <exit>         endbr64
0x7ffff7c455f4 <exit+4>       push   rax
0x7ffff7c455f5 <exit+5>       pop    rax                       RAX => 0x555555555149 (main)
0x7ffff7c455f6 <exit+6>       mov    ecx, 1                    ECX => 1
0x7ffff7c455fb <exit+11>      mov    edx, 1                    EDX => 1
0x7ffff7c45600 <exit+16>      lea    rsi, [rip + 0x1d5231]     RSI => 0x7ffff7e1a838 (__exit_funcs) —▸ 0x7ffff7e1bf00 (initial) ◂— 0
0x7ffff7c45607 <exit+23>      sub    rsp, 8                    RSP => 0x7fffffffdb50 (0x7fffffffdb58 - 0x8)
0x7ffff7c4560b <exit+27>      call   __run_exit_handlers         <__run_exit_handlers>

我们看调用__run_exit_handlers前的rsi也就是第二个参数&__exit_funcs，我们由此找到__exit_funcs链表查看

pwndbg> p *(struct exit_function_list*) 0x7ffff7e1bf00
$2 = {
  next = 0x0,
  idx = 1,
  fns = {{
      flavor = 4,
      func = {
        at = 0xf52c223ea4f375e4,
        on = {
          fn = 0xf52c223ea4f375e4,
          arg = 0x0
        },
        cxa = {
          fn = 0xf52c223ea4f375e4,
          arg = 0x0,
          dso_handle = 0x0
        }
      }
    }, {
      flavor = 0,
      func = {
        at = 0x0,
        on = {
          fn = 0x0,
          arg = 0x0
        },
        cxa = {
          fn = 0x0,
          arg = 0x0,
          dso_handle = 0x0
        }
      }
    } <repeats 31 times>}
}

看到0xf52c223ea4f375e4这个诡异的值，显然不是fns[0].on.fn指针该有的值，是被加密后的值

0x00007ffff7c4541a <+138>:   mov    rax,QWORD PTR [rdx+0x18]
0x00007ffff7c4541e <+142>:   mov    rsi,QWORD PTR [rdx+0x20]
0x00007ffff7c45422 <+146>:   mov    edx,ebx
0x00007ffff7c45424 <+148>:   ror    rax,0x11
0x00007ffff7c45428 <+152>:   xor    rax,QWORD PTR fs:0x30
0x00007ffff7c45431 <+161>:   xchg   DWORD PTR [r14],edx
0x00007ffff7c45434 <+164>:   cmp    edx,0x1
0x00007ffff7c45437 <+167>:   jg     0x7ffff7c45558 <__run_exit_handlers+456>
0x00007ffff7c4543d <+173>:   mov    edi,ebp
0x00007ffff7c4543f <+175>:   call   rax

看到这里自然释然了，原来是根据fs:0x30处的值来解密
fs是一个段寄存器, x86架构没有对fs怎么使用做出明确规定, linux中让fs指向当前线程的控制块, 也就是tcbhead_t结构体, 也就是说 fs:0x30 寻址结果和 *(tcbhead_t+0x30) 一样，tcbhead_t结构体定义如下，位于tls.h

typedef struct
{
  void *tcb;		/* Pointer to the TCB.  Not necessarily the
			   thread descriptor used by libpthread.  */
  dtv_t *dtv;
  void *self;		/* Pointer to the thread descriptor.  */
  int multiple_threads;
  int gscope_flag;
  uintptr_t sysinfo;
  uintptr_t stack_guard;
  uintptr_t pointer_guard;
  unsigned long int unused_vgetcpu_cache[2];
  /* Bit 0: X86_FEATURE_1_IBT.
     Bit 1: X86_FEATURE_1_SHSTK.
   */
  unsigned int feature_1;
  int __glibc_unused1;
  /* Reservation of some values for the TM ABI.  */
  void *__private_tm[4];
  /* GCC split stack support.  */
  void *__private_ss;
  /* The lowest address of shadow stack,  */
  unsigned long long int ssp_base;
  /* Must be kept even if it is no longer used by glibc since programs,
     like AddressSanitizer, depend on the size of tcbhead_t.  */
  __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32)));

  void *__padding[8];
} tcbhead_t;

可以看到fs:0x28也就是tcbhead_t+0x28处正是canary保护所使用的值stack_guard，我们这里解密使用的是fs:0x30的pointer_guard，泄露这个值才可能劫持__exit_funcs链表。

0x03 exit_function注册以及 elf程序的start与exit

我们再回头看流程，遍历链表执行的是atexit等函数注册的函数，我们找到atexit

/* Register FUNC to be executed by `exit'.  */
int
#ifndef atexit
attribute_hidden
#endif
atexit (void (*func) (void))
{
  return __cxa_atexit ((void (*) (void *)) func, NULL, __dso_handle);
}

发现只是对__cxa_atexit的封装

/* Register a function to be called by exit or when a shared library
   is unloaded.  This function is only called from code generated by
   the C++ compiler.  */
int
__cxa_atexit (void (*func) (void *), void *arg, void *d)
{
  return __internal_atexit (func, arg, d, &__exit_funcs);
}
libc_hidden_def (__cxa_atexit)

发现还是封装…

int
attribute_hidden
__internal_atexit (void (*func) (void *), void *arg, void *d,
		   struct exit_function_list **listp)
{
  struct exit_function *new;

  /* As a QoI issue we detect NULL early with an assertion instead
     of a SIGSEGV at program exit when the handler is run (bug 20544).  */
  assert (func != NULL);

  __libc_lock_lock (__exit_funcs_lock);
  new = __new_exitfn (listp);

  if (new == NULL)
    {
      __libc_lock_unlock (__exit_funcs_lock);
      return -1;
    }

#ifdef PTR_MANGLE
  PTR_MANGLE (func);
#endif
  new->func.cxa.fn = (void (*) (void *, int)) func;
  new->func.cxa.arg = arg;
  new->func.cxa.dso_handle = d;
  new->flavor = ef_cxa;
  __libc_lock_unlock (__exit_funcs_lock);
  return 0;
}

大概就是用__new_exitfn找到__exit_funcs链表上一个合适的位置，然后写入函数，我们看__new_exitfn函数

/* Must be called with __exit_funcs_lock held.  */
struct exit_function *
__new_exitfn (struct exit_function_list **listp)
{
  struct exit_function_list *p = NULL;
  struct exit_function_list *l;
  struct exit_function *r = NULL;
  size_t i = 0;

  if (__exit_funcs_done)
    /* Exit code is finished processing all registered exit functions,
       therefore we fail this registration.  */
    return NULL;

  for (l = *listp; l != NULL; p = l, l = l->next)
    {
      for (i = l->idx; i > 0; --i)
	if (l->fns[i - 1].flavor != ef_free)
	  break;

      if (i > 0)
	break;

      /* This block is completely unused.  */
      l->idx = 0;
    }

  if (l == NULL || i == sizeof (l->fns) / sizeof (l->fns[0]))
    {
      /* The last entry in a block is used.  Use the first entry in
	 the previous block if it exists.  Otherwise create a new one.  */
      if (p == NULL)
	{
	  assert (l != NULL);
	  p = (struct exit_function_list *)
	    calloc (1, sizeof (struct exit_function_list));
	  if (p != NULL)
	    {
	      p->next = *listp;
	      *listp = p;
	    }
	}

      if (p != NULL)
	{
	  r = &p->fns[0];
	  p->idx = 1;
	}
    }
  else
    {
      /* There is more room in the block.  */
      r = &l->fns[i];
      l->idx = i + 1;
    }

  /* Mark entry as used, but we don't know the flavor now.  */
  if (r != NULL)
    {
      r->flavor = ef_us;
      ++__new_exitfn_called;
    }

  return r;
}

先尝试在__exit_funcs中找到一个exit_function类型的ef_free的位置, ef_free代表着此位置空闲
如果没找到, 就新建一个exit_function节点, 使用头插法插入__exit_funcs链表, 使用新节点的第一个位置作为分配到的exit_function结构体设置找到的exit_function的类型为ef_us, 表示正在使用中, 并返回
这里只是找位置，那么注册的是什么函数呢？这些函数在main之前就被注册了。未知生，焉知死？我们看一下程序的入口_start

ENTRY (_start)
	/* Clearing frame pointer is insufficient, use CFI.  */
	cfi_undefined (rip)
	/* Clear the frame pointer.  The ABI suggests this be done, to mark
	   the outermost frame obviously.  */
	xorl %ebp, %ebp

	/* Extract the arguments as encoded on the stack and set up
	   the arguments for __libc_start_main (int (*main) (int, char **, char **),
		   int argc, char *argv,
		   void (*init) (void), void (*fini) (void),
		   void (*rtld_fini) (void), void *stack_end).
	   The arguments are passed via registers and on the stack:
	main:		%rdi
	argc:		%rsi
	argv:		%rdx
	init:		%rcx
	fini:		%r8
	rtld_fini:	%r9
	stack_end:	stack.	*/

	mov %RDX_LP, %R9_LP	/* Address of the shared library termination
				   function.  */
#ifdef __ILP32__
	mov (%rsp), %esi	/* Simulate popping 4-byte argument count.  */
	add $4, %esp
#else
	popq %rsi		/* Pop the argument count.  */
#endif
	/* argv starts just at the current stack top.  */
	mov %RSP_LP, %RDX_LP
	/* Align the stack to a 16 byte boundary to follow the ABI.  */
	and  $~15, %RSP_LP

	/* Push garbage because we push 8 more bytes.  */
	pushq %rax

	/* Provide the highest stack address to the user code (for stacks
	   which grow downwards).  */
	pushq %rsp

	/* These used to be the addresses of .fini and .init.  */
	xorl %r8d, %r8d
	xorl %ecx, %ecx

#ifdef PIC
	mov main@GOTPCREL(%rip), %RDI_LP
#else
	mov $main, %RDI_LP
#endif

	/* Call the user's main function, and exit with its value.
	   But let the libc call main.  Since __libc_start_main in
	   libc.so is called very early, lazy binding isn't relevant
	   here.  Use indirect branch via GOT to avoid extra branch
	   to PLT slot.  In case of static executable, ld in binutils
	   2.26 or above can convert indirect branch into direct
	   branch.  */
	call *__libc_start_main@GOTPCREL(%rip)

	hlt			/* Crash if somehow `exit' does return.	 */
END (_start)

/* Define a symbol for the first piece of initialized data.  */
	.data
	.globl __data_start
__data_start:
	.long 0
	.weak data_start
	data_start = __data_start

我们关注其传递给__libc_start_main的参数main，argc，argv，init，fini，rtld_fini，stack_end，前三个不用赘述，init，fini，rtld_fini

/* Note: The init and fini parameters are no longer used.  fini is
   completely unused, init is still called if not NULL, but the
   current startup code always passes NULL.  (In the future, it would
   be possible to use fini to pass a version code if init is NULL, to
   indicate the link-time glibc without introducing a hard
   incompatibility for new programs with older glibc versions.)

   For dynamically linked executables, the dynamic segment is used to
   locate constructors and destructors.  For statically linked
   executables, the relevant symbols are access directly.  */
STATIC int
LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
		 int argc, char **argv,
#ifdef LIBC_START_MAIN_AUXVEC_ARG
		 ElfW(auxv_t) *auxvec,
#endif
		 __typeof (main) init,
		 void (*fini) (void),
		 void (*rtld_fini) (void), void *stack_end)
{
#ifndef SHARED
  char **ev = &argv[argc + 1];

  __environ = ev;

  /* Store the lowest stack address.  This is done in ld.so if this is
     the code for the DSO.  */
  __libc_stack_end = stack_end;

# ifdef HAVE_AUX_VECTOR
  /* First process the auxiliary vector since we need to find the
     program header to locate an eventually present PT_TLS entry.  */
#  ifndef LIBC_START_MAIN_AUXVEC_ARG
  ElfW(auxv_t) *auxvec;
  {
    char **evp = ev;
    while (*evp++ != NULL)
      ;
    auxvec = (ElfW(auxv_t) *) evp;
  }
#  endif
  _dl_aux_init (auxvec);
  if (GL(dl_phdr) == NULL)
# endif
    {
      /* Starting from binutils-2.23, the linker will define the
         magic symbol __ehdr_start to point to our own ELF header
         if it is visible in a segment that also includes the phdrs.
         So we can set up _dl_phdr and _dl_phnum even without any
         information from auxv.  */

      extern const ElfW(Ehdr) __ehdr_start
# if BUILD_PIE_DEFAULT
	__attribute__ ((visibility ("hidden")));
# else
	__attribute__ ((weak, visibility ("hidden")));
      if (&__ehdr_start != NULL)
# endif
        {
          assert (__ehdr_start.e_phentsize == sizeof *GL(dl_phdr));
          GL(dl_phdr) = (const void *) &__ehdr_start + __ehdr_start.e_phoff;
          GL(dl_phnum) = __ehdr_start.e_phnum;
        }
    }

  /* Initialize very early so that tunables can use it.  */
  __libc_init_secure ();

  __tunables_init (__environ);

  ARCH_INIT_CPU_FEATURES ();

  /* Do static pie self relocation after tunables and cpu features
     are setup for ifunc resolvers. Before this point relocations
     must be avoided.  */
  _dl_relocate_static_pie ();

  /* Perform IREL{,A} relocations.  */
  ARCH_SETUP_IREL ();

  /* The stack guard goes into the TCB, so initialize it early.  */
  ARCH_SETUP_TLS ();

  /* In some architectures, IREL{,A} relocations happen after TLS setup in
     order to let IFUNC resolvers benefit from TCB information, e.g. powerpc's
     hwcap and platform fields available in the TCB.  */
  ARCH_APPLY_IREL ();

  /* Set up the stack checker's canary.  */
  uintptr_t stack_chk_guard = _dl_setup_stack_chk_guard (_dl_random);
# ifdef THREAD_SET_STACK_GUARD
  THREAD_SET_STACK_GUARD (stack_chk_guard);
# else
  __stack_chk_guard = stack_chk_guard;
# endif

# ifdef DL_SYSDEP_OSCHECK
  {
    /* This needs to run to initiliaze _dl_osversion before TLS
       setup might check it.  */
    DL_SYSDEP_OSCHECK (__libc_fatal);
  }
# endif

  /* Initialize libpthread if linked in.  */
  if (__pthread_initialize_minimal != NULL)
    __pthread_initialize_minimal ();

  /* Set up the pointer guard value.  */
  uintptr_t pointer_chk_guard = _dl_setup_pointer_guard (_dl_random,
							 stack_chk_guard);
# ifdef THREAD_SET_POINTER_GUARD
  THREAD_SET_POINTER_GUARD (pointer_chk_guard);
# else
  __pointer_chk_guard_local = pointer_chk_guard;
# endif

#endif /* !SHARED  */

  /* Register the destructor of the dynamic linker if there is any.  */
  if (__glibc_likely (rtld_fini != NULL))
    __cxa_atexit ((void (*) (void *)) rtld_fini, NULL, NULL);

#ifndef SHARED
  /* Perform early initialization.  In the shared case, this function
     is called from the dynamic loader as early as possible.  */
  __libc_early_init (true);

  /* Call the initializer of the libc.  This is only needed here if we
     are compiling for the static library in which case we haven't
     run the constructors in `_dl_start_user'.  */
  __libc_init_first (argc, argv, __environ);

  /* Register the destructor of the statically-linked program.  */
  __cxa_atexit (call_fini, NULL, NULL);

  /* Some security at this point.  Prevent starting a SUID binary where
     the standard file descriptors are not opened.  We have to do this
     only for statically linked applications since otherwise the dynamic
     loader did the work already.  */
  if (__builtin_expect (__libc_enable_secure, 0))
    __libc_check_standard_fds ();
#endif /* !SHARED */

  /* Call the initializer of the program, if any.  */
#ifdef SHARED
  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
    GLRO(dl_debug_printf) ("\ninitialize program: %s\n\n", argv[0]);

  if (init != NULL)
    /* This is a legacy program which supplied its own init
       routine.  */
    (*init) (argc, argv, __environ MAIN_AUXVEC_PARAM);
  else
    /* This is a current program.  Use the dynamic segment to find
       constructors.  */
    call_init (argc, argv, __environ);

  /* Auditing checkpoint: we have a new object.  */
  _dl_audit_preinit (GL(dl_ns)[LM_ID_BASE]._ns_loaded);

  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS))
    GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]);
#else /* !SHARED */
  call_init (argc, argv, __environ);

  _dl_debug_initialize (0, LM_ID_BASE);
#endif

  __libc_start_call_main (main, argc, argv MAIN_AUXVEC_PARAM);
}

/* Starting with glibc 2.34, the init parameter is always NULL.  Older
   libcs are not prepared to handle that.  The macro
   DEFINE_LIBC_START_MAIN_VERSION creates GLIBC_2.34 alias, so that
   newly linked binaries reflect that dependency.  The macros below
   expect that the exported function is called
   __libc_start_main_impl.  */

从注释我们知道，自glibc2.34以后，init和fini两个参数已经废弃，可以看到，其内部自行使用了call_init函数，我们找到源码

/* Initialization for dynamic executables.  Find the main executable
   link map and run its init functions.  */
static void
call_init (int argc, char **argv, char **env)
{
  /* Obtain the main map of the executable.  */
  struct link_map *l = GL(dl_ns)[LM_ID_BASE]._ns_loaded;

  /* DT_PREINIT_ARRAY is not processed here.  It is already handled in
     _dl_init in elf/dl-init.c.  Also see the call_init function in
     the same file.  */

  if (ELF_INITFINI && l->l_info[DT_INIT] != NULL)
    DL_CALL_DT_INIT(l, l->l_addr + l->l_info[DT_INIT]->d_un.d_ptr,
		    argc, argv, env);

  ElfW(Dyn) *init_array = l->l_info[DT_INIT_ARRAY];
  if (init_array != NULL)
    {
      unsigned int jm
	= l->l_info[DT_INIT_ARRAYSZ]->d_un.d_val / sizeof (ElfW(Addr));
      ElfW(Addr) *addrs = (void *) (init_array->d_un.d_ptr + l->l_addr);
      for (unsigned int j = 0; j < jm; ++j)
	((dl_init_t) addrs[j]) (argc, argv, env);
    }
}

/* Initialization for static executables.  There is no dynamic
   segment, so we access the symbols directly.  */
static void
call_init (int argc, char **argv, char **envp)
{
  /* For static executables, preinit happens right before init.  */
  {
    const size_t size = __preinit_array_end - __preinit_array_start;
    size_t i;
    for (i = 0; i < size; i++)
      (*__preinit_array_start [i]) (argc, argv, envp);
  }

# if ELF_INITFINI
  _init ();
# endif

  const size_t size = __init_array_end - __init_array_start;
  for (size_t i = 0; i < size; i++)
      (*__init_array_start [i]) (argc, argv, envp);
}

可以看到这里，对于动态链接程序先获取link_map，然后执行.init，再遍历 .init_array 函数数组，执行程序和共享库的所有构造函数。而对于动态链接器的构造函数则由另一个函数_dl_init再调用call_init执行，这个函数如下

void
_dl_init (struct link_map *main_map, int argc, char **argv, char **env)
{
  ElfW(Dyn) *preinit_array = main_map->l_info[DT_PREINIT_ARRAY];
  ElfW(Dyn) *preinit_array_size = main_map->l_info[DT_PREINIT_ARRAYSZ];
  unsigned int i;

  if (__glibc_unlikely (GL(dl_initfirst) != NULL))
    {
      call_init (GL(dl_initfirst), argc, argv, env);
      GL(dl_initfirst) = NULL;
    }

  /* Don't do anything if there is no preinit array.  */
  if (__builtin_expect (preinit_array != NULL, 0)
      && preinit_array_size != NULL
      && (i = preinit_array_size->d_un.d_val / sizeof (ElfW(Addr))) > 0)
    {
      ElfW(Addr) *addrs;
      unsigned int cnt;

      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS))
	_dl_debug_printf ("\ncalling preinit: %s\n\n",
			  DSO_FILENAME (main_map->l_name));

      addrs = (ElfW(Addr) *) (preinit_array->d_un.d_ptr + main_map->l_addr);
      for (cnt = 0; cnt < i; ++cnt)
	((dl_init_t) addrs[cnt]) (argc, argv, env);
    }

  /* Stupid users forced the ELF specification to be changed.  It now
     says that the dynamic loader is responsible for determining the
     order in which the constructors have to run.  The constructors
     for all dependencies of an object must run before the constructor
     for the object itself.  Circular dependencies are left unspecified.

     This is highly questionable since it puts the burden on the dynamic
     loader which has to find the dependencies at runtime instead of
     letting the user do it right.  Stupidity rules!  */

  i = main_map->l_searchlist.r_nlist;
  while (i-- > 0)
    call_init (main_map->l_initfini[i], argc, argv, env);

#ifndef HAVE_INLINED_SYSCALLS
  /* Finished starting up.  */
  _dl_starting_up = 0;
#endif
}

_dl_init又由谁调用呢？这里发现另一个_start(?)，位于dl-start.S（动态链接器的入口点），上文的_start位于start.S（程序的入口点）

/* Initial entry point code for the dynamic linker.
   The function _dl_start is the real entry point;
   it's return value is the user program's entry point.  */
ENTRY (_start)
	/* Count arguments in r11 */
	l.ori	r3, r1, 0
	l.movhi	r11, 0
1:
	l.addi	r3, r3, 4
	l.lwz	r12, 0(r3)
	l.sfnei	r12, 0
	l.addi	r11, r11, 1
	l.bf	1b
	 l.nop
	l.addi	r11, r11, -1
	/* store argument counter to stack.  */
	l.sw	0(r1), r11

	/* Load the PIC register.  */
	l.jal	0x8
	 l.movhi r16, gotpchi(_GLOBAL_OFFSET_TABLE_-4)
	l.ori	r16, r16, gotpclo(_GLOBAL_OFFSET_TABLE_+0)
	l.add	r16, r16, r9

	l.ori	r3, r1, 0

	l.jal	_dl_start
	 l.nop
	/* Save user entry in a call saved reg.  */
	l.ori	r22, r11, 0
	/* Fall through to _dl_start_user.  */

_dl_start_user:
	/* Set up for _dl_init.  */

	/* Load _rtld_local (a.k.a _dl_loaded).  */
	l.lwz	r12, got(_rtld_local)(r16)
	l.lwz	r3, 0(r12)

	/* Load argc */
	l.lwz	r18, got(_dl_argc)(r16)
	l.lwz	r4, 0(r18)

	/* Load argv */
	l.lwz	r20, got(_dl_argv)(r16)
	l.lwz	r5, 0(r20)

	/* Load envp = &argv[argc + 1].  */
	l.slli	r6, r4, 2
	l.addi	r6, r6, 4
	l.add	r6, r6, r5

	l.jal	plt(_dl_init)
	 l.nop

	/* Now set up for user entry.
	   The already defined ABI loads argc and argv from the stack.

	   argc = 0(r1)
	   argv = r1 + 4
	*/

	/* Load SP as argv - 4.  */
	l.lwz	r3, 0(r20)
	l.addi	r1, r3, -4

	/* Save argc.  */
	l.lwz	r3, 0(r18)
	l.sw	0(r1), r3

	/* Pass _dl_fini function address to _start.
	   Next start.S will then pass this as rtld_fini to __libc_start_main.  */
	l.lwz	r3, got(_dl_fini)(r16)

	l.jr	r22
	 l.nop

END (_start)

发现正是这里调用了_dl_start和_dl_init
如此完成初始化构造，可以看到call_fini（静态链接程序），rtld_fini（动态链接程序）也是在__libc_start_main完成注册的

__cxa_atexit ((void (*) (void *)) rtld_fini, NULL, NULL);
...
/* Register the destructor of the statically-linked program.  */
__cxa_atexit (call_fini, NULL, NULL);

在__libc_start_main的最后

1	__libc_start_call_main (main, argc, argv MAIN_AUXVEC_PARAM);

我们找到__libc_start_call_main

_Noreturn static __always_inline void
__libc_start_call_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
                        int argc, char **argv MAIN_AUXVEC_DECL)
{
  exit (main (argc, argv, __environ MAIN_AUXVEC_PARAM));
}

正是它最终调用main以及exit，同时这也解释了为什么main函数返回地址总是在__libc_start_call_main的一定偏移处。
现在我们再看被注册的rtld_fini，其实际调用_dl_fini函数，作用是调用进程空间中所有模块的析构函数，也就是遍历.fini_array，看其源码的这一段

/* Is there a destructor function?  */
if (l->l_info[DT_FINI_ARRAY] != NULL
    || (ELF_INITFINI && l->l_info[DT_FINI] != NULL))
{
    /* When debugging print a message first.  */
    if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
        _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
                          DSO_FILENAME (l->l_name),
                          ns);

    /* First see whether an array is given.  */
    if (l->l_info[DT_FINI_ARRAY] != NULL)
    {
        ElfW(Addr) *array =
            (ElfW(Addr) *) (l->l_addr + l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
        unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
                          / sizeof (ElfW(Addr)));
        while (i-- > 0)
            ((fini_t) array[i]) ();
    }

    /* Next try the old-style destructor.  */
    if (ELF_INITFINI && l->l_info[DT_FINI] != NULL)
        DL_CALL_DT_FINI
            (l, l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr);
}

这里执行了.fini以及遍历了.fini_array。ok到这里elf程序从生到死就差不多到此为止，总结梳理一下

内核执行 execve() 系统调用
    |
    V
加载 ELF 可执行文件
    |
    ├─ 动态链接程序：发现 .interp 段
    |       |
    |       V
    |   内核加载动态链接器 ld.so
    |       |
    |       V
    |   跳转到 ld.so 入口地址 -> _dl_start (dl-start.S)
    |           -> _dl_init
    |                 -> call_init (执行 ld.so 自身的 .init_array)
    |       |
    |       V
    |   ld.so 加载依赖库 (libc.so 等) 并重定位
    |       |
    |       V
    |   跳转到程序入口 -> _start (start.S)
    |
    └─ 静态链接程序：直接跳转到 _start (start.S)
    
    |
    V
_start
    -> __libc_start_main
         -> 注册析构函数：
              - 静态链接：__cxa_atexit(call_fini)
                    [程序自身析构器]
              - 动态链接：__cxa_atexit(rtld_fini)
                    [动态链接器统一收尾 -> 调用 dl_fini]
         -> call_init (执行程序和 libc 的 .init_array)
    |
    V
__libc_start_call_main
    -> 调用 main()
    -> exit(main())
    |
    V
用户调用 exit(status)
    |
    V
__run_exit_handlers(status)
    |
    ├─ 调用 TLS 析构函数 (__call_tls_dtors)
    |
    ├─ 遍历 exit_function_list
    |     |
    |     ├─ ef_cxa:
    |     |     - 静态程序：call_fini
    |     |           [执行程序自身 .fini_array]
    |     |     - 动态程序：rtld_fini
    |     |           -> _dl_fini
    |     |                [按依赖顺序执行共享库 .fini_array/DT_FINI]
    |     |                [清理动态链接器资源]
    |     |
    |     ├─ ef_at  → atexit 注册的函数
    |     ├─ ef_on  → on_exit 注册的函数
    |     └─ 其他类型忽略
    |
    ├─ 若执行期间有新回调注册 → 回到链表开头
    |
    ├─ 释放动态分配的回调节点
    |
    ├─ 若 run_list_atexit = true → 执行 __libc_atexit 钩子：默认为_IO_cleanup()
    |
    V
_exit(status)
    |
    V
内核：彻底终止进程

0x04 _rtld_global

exit_hook

攻击_rtld_global.dl_rtld_lock_recursiveor_rtld_global.dl_rtld_unlock_recursive这两个函数指针，偏移分别为0xf08和0xf10，至于_rtld_global的地址需要泄露libc后自行调试确定，注意这个打法在glibc2.34+就失效了，因为这两个函数被写死了，不再是结构体中的钩子（~~笔者调试的时候找了半天这两个钩子，突然想起来自己版本是glibc2.35~~）例题参考whuctf2025迎新练习赛的It‘s my钩，不赘述，攻击示例如下

ld_base = libc_base+0x213000 #此偏移需要自行调试
_rtld_global = ld_base + ld.sym['_rtld_global']
_dl_rtld_lock_recursive = _rtld_global + 0xf08
_dl_rtld_unlock_recursive = _rtld_global + 0xf10

l_info

劫持l_info，这里分析一下劫持原理（~~走一下流水账~~），至于检查绕过等攻击细节，在house of banana分析
从前面分析我们不难发现，在执行.init，.fini，遍历.init_array和.fini_array的时候，就以.fini_array为例，

/* First see whether an array is given.  */
    if (l->l_info[DT_FINI_ARRAY] != NULL)
    {
        ElfW(Addr) *array =
            (ElfW(Addr) *) (l->l_addr + l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
        unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
                          / sizeof (ElfW(Addr)));
        while (i-- > 0)
            ((fini_t) array[i]) ();
    }

发现使用l->l_addr + l->l_info[xxx]来索引各个结点，这个关系是什么样呢？

_rtld_global (全局动态链接器状态)
┌───────────────────────────────┐
│ head -> link_map 链表          │  <─ 链表中每个节点对应一个库或可执行文件
│ exit_done / lock / hook 等     │
└───────────────────────────────┘
           │
           ▼
+--------------------------+
| link_map (每个库/程序)     |
|--------------------------|
| l_addr   : 基址           |
| l_name   : 库名           |
| l_ld     : .dynamic 指针  |
| l_info[] : 动态段 tag数组  |
| ...                      |
+--------------------------+
           │
   ┌───────┴─────────┐
   ▼                 ▼
l_info[DT_FINI]      l_info[DT_FINI_ARRAY] / l_info[DT_FINI_ARRAYSZ]  
  |                      |
  |                      ▼
  |                  .fini_array (函数指针数组)
  |                  +--------------------------+
  |                  | fini1()                  |
  |                  | fini2()                  |
  |                  | ...                      |
  |                  +--------------------------+
  ▼                      │
.fini (单个函数)           │
+-----------------+       │
| _fini()         |       │
+-----------------+       │
           │              │
           ▼              ▼
         程序退出时调用 _dl_fini 
           │
           ▼
遍历 link_map 链表：
    对每个 l：
        # 调用单个 fini 函数
        if (l->l_info[DT_FINI] != NULL)
            ((fini_t)(l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr))();
        # 遍历 fini_array
        if (l->l_info[DT_FINI_ARRAY] != NULL)
            ElfW(Addr)* array = (ElfW(Addr)*)(l->l_addr + l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
            unsigned int n = l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val / sizeof(ElfW(Addr));
            for (i = n-1; i >= 0; i--)
                ((fini_t) array[i])();
           │
           ▼
执行每个 fini 函数 / fini_array 中的函数

那么call_init遍历.init_array的时候同理，我们看call_init中的关键处

 /* Obtain the main map of the executable.  */
  struct link_map *l = GL(dl_ns)[LM_ID_BASE]._ns_loaded;

  /* DT_PREINIT_ARRAY is not processed here.  It is already handled in
     _dl_init in elf/dl-init.c.  Also see the call_init function in
     the same file.  */

  if (ELF_INITFINI && l->l_info[DT_INIT] != NULL)
    DL_CALL_DT_INIT(l, l->l_addr + l->l_info[DT_INIT]->d_un.d_ptr,
		    argc, argv, env);

  ElfW(Dyn) *init_array = l->l_info[DT_INIT_ARRAY];
  if (init_array != NULL)
    {
      unsigned int jm
	= l->l_info[DT_INIT_ARRAYSZ]->d_un.d_val / sizeof (ElfW(Addr));
      ElfW(Addr) *addrs = (void *) (init_array->d_un.d_ptr + l->l_addr);
      for (unsigned int j = 0; j < jm; ++j)
	((dl_init_t) addrs[j]) (argc, argv, env);
    }
}

这里也是同样方式索引的，最后我们整合出一张示意图（感谢chatgpt）

_rtld_global (全局动态链接器状态)
┌───────────────────────────────┐
│ head -> link_map 链表          │  <─ 每个节点对应一个库/程序
│ exit_done / lock / hook 等     │
└───────────────────────────────┘
           │
           ▼
+--------------------------+
| link_map (每个库/程序)     |
|--------------------------|
| l_addr   : 基址           |
| l_name   : 库名           |
| l_ld     : .dynamic 指针  |
| l_info[] : 动态段 tag数组  |
| ...                      |
+--------------------------+
           │
   ┌───────┴────────┐
   ▼                ▼
【程序启动】        【程序退出】
  .init / .init_array   .fini / .fini_array
           │                │
           ▼                ▼
l_info[DT_INIT] → 单个 init 函数      l_info[DT_FINI] → 单个 fini 函数
l_info[DT_INIT_ARRAY] → init_array    l_info[DT_FINI_ARRAY] → fini_array
l_info[DT_INIT_ARRAYSZ] → 数量        l_info[DT_FINI_ARRAYSZ] → 数量
           │                │
           ▼                ▼
遍历 init_array              遍历 fini_array
addrs[j] = l_addr +偏移       addrs[i] = l_addr +偏移
for j = 0..N-1:                for i = N-1..0:
    addrs[j](argc,argv,env)       addrs[i]()
           │                │
           ▼                ▼
初始化每个库/程序          清理每个库/程序

注意这里.init_array是顺序遍历，.fini_array是逆序遍历
到这里可仍然疑惑，link_map链表是怎么样的呢，根据实际调试得到以下示意图

_rtld_global._dl_ns._ns_loaded (链表头) #也可使用_r_debug.r_map获取
       │
       ▼
┌─────────────────────────────────────────┐
│ link_map (主程序 pwn)                    │
│-----------------------------------------│
│ l_addr   → 0x555555554000 (基地址)       │
│ l_name   → 0x7ffff7ffe888 (动态库路径) ->0x0 │
│ l_ld     → 0x555555557dc8 (.dynamic段)  │
│ l_prev   → 0x0 (链表头无前驱)             │
│ l_next   → 0x7ffff7ffe890 (→vdso.so.1)  │
└─────────────────────────────────────────┘
            │
            │ l_next
            ▼
┌─────────────────────────────────────────┐
│ link_map (vdso.so.1)                    │
│-----------------------------------------│
│ l_addr   → 0x7ffff7fc1000               │
│ l_name   → 0x7ffff7fc1371->"linux-vdso.so.1"│
│ l_ld     → 0x7ffff7fc13e0               │
│ l_prev   → 0x7ffff7ffe2e0 (←pwn)        │
│ l_next   → 0x7ffff7fbb160 (→libc.so.6)  │
└─────────────────────────────────────────┘
            │
            │ l_next
            ▼
┌─────────────────────────────────────────┐
│ link_map (libc.so.6)                    │
│-----------------------------------------│
│ l_addr   → 0x7ffff7c00000               │
│ l_name   → 0x7ffff7fbb140->"/lib/x86_64-linux-gnu/libc.so.6"│
│ l_ld     → 0x7ffff7e19bc0               │
│ l_prev   → 0x7ffff7ffe890 (←vdso.so.1)  |
│ l_next   → 0x7ffff7ffdaf0 (→ld.so)      │
└─────────────────────────────────────────┘
            │
            │ l_next
            ▼
┌─────────────────────────────────────────┐
│ link_map (ld-linux-x86-64.so.2)         │
│-----------------------------------------│
│ l_addr   → 0x7ffff7fc3000               │
│ l_name   → 0x555555554318->"/lib64/ld-linux-x86-64.so.2"│
│ l_ld     → 0x7ffff7ffce80               │
│ l_prev   → 0x7ffff7fbb160 (←libc.so.6)  │
│ l_next   → 0x0 (链表尾无后继)             │
└─────────────────────────────────────────┘

可以看到，l_addr在elf程序本身，libc库，ld链接器就分别对应着elf_base，libc_base，ld_base
所以我们知道了是如何遍历执行的，那么如果我们能劫持l_info，不就可以劫持程序执行了吗？既然劫持l_info，索引的时候用了几个宏，宏是多少呢？我们找到

#define DT_INIT		12		/* Address of init function */
#define DT_FINI		13		/* Address of termination function */
...
#define	DT_INIT_ARRAY	25		/* Array with addresses of init fct */
#define	DT_FINI_ARRAY	26		/* Array with addresses of fini fct */
#define	DT_INIT_ARRAYSZ	27		/* Size in bytes of DT_INIT_ARRAY */
#define	DT_FINI_ARRAYSZ	28		/* Size in bytes of DT_FINI_ARRAY */

我们这里是针对exit的利用，自然是伪造.fini_array或者.fini

fini_array

我们调试看看

pwndbg> p _rtld_global
$14 = {
  _dl_ns = {{
      _ns_loaded = 0x7ffff7ffe2e0, #link_map头结点，也就是elf程序本身的link_map结点
      _ns_nloaded = 4, #link_map结点数量
      _ns_main_searchlist = 0x7ffff7ffe5a0,
      _ns_global_scope_alloc = 0,
      _ns_global_scope_pending_adds = 0,
      libc_map = 0x7ffff7fbb160, #libc库对应的link_map结点
      ...
      }
      ...
   }
   ...
}

pwndbg> p *(struct link_map *) 0x7ffff7ffe2e0
$15 = {
  l_addr = 0x555555554000,
  l_name = 0x7ffff7ffe888 "",
  l_ld = 0x555555557dc8,
  l_next = 0x7ffff7ffe890,
  l_prev = 0x0,
  l_real = 0x7ffff7ffe2e0,
  l_ns = 0,
  l_libname = 0x7ffff7ffe870,
  l_info = {0x0, 0x555555557dc8, 0x555555557ea8, 0x555555557e98, 0x0, 0x555555557e48, 0x555555557e58, 0x555555557ed8, 0x555555557ee8, 0x555555557ef8, 0x555555557e68, 0x555555557e78, 0x555555557dd8, 0x555555557de8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x555555557eb8, 0x555555557e88, 0x0, 0x555555557ec8, 0x555555557f18, 0x555555557df8, 0x555555557e18, 0x555555557e08, 0x555555557e28, 0x0, 0x555555557f08, 0x0, 0x0, 0x0, 0x0, 0x555555557f38, 0x555555557f28, 0x0, 0x0, 0x555555557f18, 0x0, 0x555555557f58, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x555555557f48, 0x0 <repeats 25 times>, 0x555555557e38},
  ...
  ...
}

_rtld_global中第一个_ns_loaded正是link_map的头结点，也就是elf程序的link_map结点

pwndbg> tele 0x7ffff7ffe2e0+0x40 30
00:0000│  0x7ffff7ffe320 ◂— 0
01:0008│  0x7ffff7ffe328 —▸ 0x555555557dc8 (_DYNAMIC) ◂— 1
02:0010│  0x7ffff7ffe330 —▸ 0x555555557ea8 (_DYNAMIC+224) ◂— 2
03:0018│  0x7ffff7ffe338 —▸ 0x555555557e98 (_DYNAMIC+208) ◂— 3
04:0020│  0x7ffff7ffe340 ◂— 0
05:0028│  0x7ffff7ffe348 —▸ 0x555555557e48 (_DYNAMIC+128) ◂— 5
06:0030│  0x7ffff7ffe350 —▸ 0x555555557e58 (_DYNAMIC+144) ◂— 6
07:0038│  0x7ffff7ffe358 —▸ 0x555555557ed8 (_DYNAMIC+272) ◂— 7
08:0040│  0x7ffff7ffe360 —▸ 0x555555557ee8 (_DYNAMIC+288) ◂— 8
09:0048│  0x7ffff7ffe368 —▸ 0x555555557ef8 (_DYNAMIC+304) ◂— 9 /* '\t' */
0a:0050│  0x7ffff7ffe370 —▸ 0x555555557e68 (_DYNAMIC+160) ◂— 0xa /* '\n' */
0b:0058│  0x7ffff7ffe378 —▸ 0x555555557e78 (_DYNAMIC+176) ◂— 0xb /* '\x0b' */
0c:0060│  0x7ffff7ffe380 —▸ 0x555555557dd8 (_DYNAMIC+16) ◂— 0xc /* '\x0c' */
0d:0068│  0x7ffff7ffe388 —▸ 0x555555557de8 (_DYNAMIC+32) ◂— 0xd /* '\r' */
0e:0070│  0x7ffff7ffe390 ◂— 0
... ↓     5 skipped
14:00a0│  0x7ffff7ffe3c0 —▸ 0x555555557eb8 (_DYNAMIC+240) ◂— 0x14
15:00a8│  0x7ffff7ffe3c8 —▸ 0x555555557e88 (_DYNAMIC+192) ◂— 0x15
16:00b0│  0x7ffff7ffe3d0 ◂— 0
17:00b8│  0x7ffff7ffe3d8 —▸ 0x555555557ec8 (_DYNAMIC+256) ◂— 0x17
18:00c0│  0x7ffff7ffe3e0 —▸ 0x555555557f18 (_DYNAMIC+336) ◂— 0x6ffffffb
19:00c8│  0x7ffff7ffe3e8 —▸ 0x555555557df8 (_DYNAMIC+48) ◂— 0x19
1a:00d0│  0x7ffff7ffe3f0 —▸ 0x555555557e18 (_DYNAMIC+80) ◂— 0x1a
1b:00d8│  0x7ffff7ffe3f8 —▸ 0x555555557e08 (_DYNAMIC+64) ◂— 0x1b
1c:00e0│  0x7ffff7ffe400 —▸ 0x555555557e28 (_DYNAMIC+96) ◂— 0x1c
1d:00e8│  0x7ffff7ffe408 ◂— 0

这样找到l_info数组来查看数据，我们定位到0x1a和0x1c处

pwndbg> tele  0x555555557e18 2
00:0000│  0x555555557e18 (_DYNAMIC+80) ◂— 0x1a
01:0008│  0x555555557e20 (_DYNAMIC+88) ◂— 0x3dc0
pwndbg> tele  0x555555557e28 2
00:0000│  0x555555557e28 (_DYNAMIC+96) ◂— 0x1c
01:0008│  0x555555557e30 (_DYNAMIC+104) ◂— 8

这里的0x3dc0正是fini_array的偏移，而8则是对应的fini_array段的大小，单位为byte，我们看

pwndbg> tele 0x555555554000+0x3dc0
00:0000│ rcx r14 0x555555557dc0 (__do_global_dtors_aux_fini_array_entry) —▸ 0x555555555100 (__do_global_dtors_aux) ◂— endbr64
01:0008│         0x555555557dc8 (_DYNAMIC) ◂— 1
02:0010│         0x555555557dd0 (_DYNAMIC+8) ◂— 0x27 /* "'" */
03:0018│         0x555555557dd8 (_DYNAMIC+16) ◂— 0xc /* '\x0c' */
04:0020│         0x555555557de0 (_DYNAMIC+24) ◂— 0x1000
05:0028│         0x555555557de8 (_DYNAMIC+32) ◂— 0xd /* '\r' */
06:0030│         0x555555557df0 (_DYNAMIC+40) ◂— 0x115c
07:0038│         0x555555557df8 (_DYNAMIC+48) ◂— 0x19

的确如此，默认只有一个函数。所以攻击方法也就清晰了
DT_FINI_ARRAY的值为26，DT_FINI_ARRAYSZ为28，我们修改rtld_global中的l_info[0x1a]为addrA, 修改l_info[0x1c]为addrB，然后布置

1 2	addrA: flat(0x1a, addrC) addrB: flat(0x1c, N)

也就伪造好了fini_array，在addrC写入函数即可，（我们设置l_addr为0），而这里的N就写需要执行的函数数*8即可。
既然我们可以控制执行多个函数，那么怎么进行ROP呢？我们分析得知fini_array中多个函数调用之间，只有rdx, r13会被破坏，同时rdi总指向可读写区域
我们让fini_array先调用gets()函数, 在rdi中读入SigreturnFrame，然后再调用setcontext+53, 即可进行SROP, 劫持所有寄存器，如果高版本libc, setcontext使用rdx作为参数, 那么在gets(rdi)后还需要一个gadget, 能通过rdi设置rdx，再执行setcontext。同时关于劫持fini_array进行ROP似乎还有另外的手法，贴上博客
通过利用fini_array部署并启动ROP攻击 | TaQini_fini.array 地址从哪里获取-CSDN博客

fini

DT_FINI的值为13

pwndbg> tele 0x555555557de8 2
00:0000│  0x555555557de8 (_DYNAMIC+32) ◂— 0xd /* '\r' */
01:0008│  0x555555557df0 (_DYNAMIC+40) ◂— 0x115c
pwndbg> disass 0x555555554000+0x115c
Dump of assembler code for function _fini:
   0x000055555555515c <+0>:     endbr64
   0x0000555555555160 <+4>:     sub    rsp,0x8
   0x0000555555555164 <+8>:     add    rsp,0x8
   0x0000555555555168 <+12>:    ret
End of assembler dump.

可以看到本来是执行_fini函数，我们修改rtld_global中l_info[0xd]为addrA, 然后再向addrA中写入函数地址即可，因为只能执行一个函数，所以一般都写one_gadget，但是注意，这要求程序未开启PIE，这样l_addr才是0，或者我们手动设置l_addr为0

1	addrA: flat(0xd, OGG)

pwndbg> p/x *(struct link_map *) 0x7ffff7ffe2e0
$2 = {
  l_addr = 0x0,
  l_name = 0x7ffff7ffe888,
  l_ld = 0x403e20,
  l_next = 0x7ffff7ffe890,
  l_prev = 0x0,
  l_real = 0x7ffff7ffe2e0,
  l_ns = 0x0,
  l_libname = 0x7ffff7ffe870,
  l_info = {0x0, 0x403e20, 0x403f00, 0x403ef0, 0x0, 0x403ea0, 0x403eb0, 0x403f30, 0x403f40, 0x403f50, 0x403ec0, 0x403ed0, 0x403e30, 0x403e40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x403f10, 0x403ee0, 0x0, 0x403f20, 0x0, 0x403e50, 0x403e70, 0x403e60, 0x403e80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x403f70, 0x403f60, 0x0 <repeats 13 times>, 0x403f80, 0x0 <repeats 25 times>, 0x403e90},
  ...
  ...
}

可以看到关闭pie后，程序的l_addr为0

0x05 __libc_atexit / _IO_cleanup

可以看到exit()的执行流程中存在__libc_atexit这个钩子

if (run_list_atexit)
    RUN_HOOK (__libc_atexit, ());

  _exit (status);

在 __run_exit_handlers的最后，执行这个指针指向的函数，直接修改即可getshell
需要注意的是，在glibc2.34+，这个钩子已经不可写
为什么呢，我们注意到在glibc2.35

1	text_set_element(__libc_atexit, _IO_cleanup);

正是因为这里，__libc_atexit已经不可写了，并且默认是_IO_cleanup函数

int
_IO_cleanup (void)
{
  /* We do *not* want locking.  Some threads might use streams but
     that is their problem, we flush them underneath them.  */
  int result = _IO_flush_all_lockp (0);

  /* We currently don't have a reliable mechanism for making sure that
     C++ static destructors are executed in the correct order.
     So it is possible that other static destructors might want to
     write to cout - and they're supposed to be able to do so.

     The following will make the standard streambufs be unbuffered,
     which forces any output from late destructors to be written out. */
  _IO_unbuffer_all ();

  return result;
}

这正是_IO_FILE攻击的一大攻击点