Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

full dynamic tracing for ARM/Thumb #14

Open
honggyukim opened this issue Nov 28, 2019 · 9 comments
Open

full dynamic tracing for ARM/Thumb #14

honggyukim opened this issue Nov 28, 2019 · 9 comments

Comments

@honggyukim
Copy link
Owner

For thumb functions, we have to deal with a tricky problem.

The function prologue pattern might use thumb2 instructions randomly.

00024158 <command_replay>:
   24158:       f248 6334       movw    r3, #34356      ; 0x8634
   2415c:       f2c0 0306       movt    r3, #6
   24160:       e92d 4ff0       stmdb   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
   24164:       2102            movs    r1, #2
...
00024f0c <delete_session_map>:
   24f0c:       b538            push    {r3, r4, r5, lr}
   24f0e:       4605            mov     r5, r0
   24f10:       69c0            ldr     r0, [r0, #28]
...
00024f28 <create_session>:
   24f28:       e92d 4ff0       stmdb   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
   24f2c:       b089            sub     sp, #36 ; 0x24
   24f2e:       4680            mov     r8, r0
...

It's very difficult to patch trampoline code and store the original instructions because we don't know the boundary of the original instructions without disassemble the instruction correctly.

@honggyukim
Copy link
Owner Author

honggyukim commented Nov 28, 2019

@honggyukim
Copy link
Owner Author

https://github.com/qemu/qemu/blob/3979fca4b69fc31c372687cd0bb6950592f248bd/disas/arm.c#L3867-L3965

$ cat qemu/disas/arm.c
    ...
/* NOTE: There are no checks in these routines that
   the relevant number of data bytes exist.  */

int
print_insn_arm (bfd_vma pc, struct disassemble_info *info)
{
  unsigned char b[4];
  long          given;
  int           status;
  int           is_thumb = false;
  int           is_data = false;
  unsigned int  size = 4;
  void          (*printer) (bfd_vma, struct disassemble_info *, long);
  int little;

  little = (info->endian == BFD_ENDIAN_LITTLE);
  is_thumb |= (pc & 1);
  pc &= ~(bfd_vma)1;

  if (force_thumb)
    is_thumb = true;

  info->bytes_per_line = 4;

  if (is_data)
    {
      int i;

      /* size was already set above.  */
      info->bytes_per_chunk = size;
      printer = print_insn_data;

      status = info->read_memory_func (pc, (bfd_byte *)b, size, info);
      given = 0;
      if (little)
        for (i = size - 1; i >= 0; i--)
          given = b[i] | (given << 8);
      else
        for (i = 0; i < (int) size; i++)
          given = b[i] | (given << 8);
    }
  else if (!is_thumb)
    {
      /* In ARM mode endianness is a straightforward issue: the instruction
         is four bytes long and is either ordered 0123 or 3210.  */
      printer = print_insn_arm_internal;
      info->bytes_per_chunk = 4;
      size = 4;

      status = info->read_memory_func (pc, (bfd_byte *)b, 4, info);
      if (little)
        given = (b[0]) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
      else
        given = (b[3]) | (b[2] << 8) | (b[1] << 16) | (b[0] << 24);
    }
  else
    {
      /* In Thumb mode we have the additional wrinkle of two
         instruction lengths.  Fortunately, the bits that determine
         the length of the current instruction are always to be found
         in the first two bytes.  */
      printer = print_insn_thumb16;
      info->bytes_per_chunk = 2;
      size = 2;

      status = info->read_memory_func (pc, (bfd_byte *)b, 2, info);
      if (little)
        given = (b[0]) | (b[1] << 8);
      else
        given = (b[1]) | (b[0] << 8);

      if (!status)
        {
          /* These bit patterns signal a four-byte Thumb
             instruction.  */
          if ((given & 0xF800) == 0xF800
              || (given & 0xF800) == 0xF000
              || (given & 0xF800) == 0xE800)
            {
              status = info->read_memory_func (pc + 2, (bfd_byte *)b, 2, info);
              if (little)
                given = (b[0]) | (b[1] << 8) | (given << 16);
              else
                given = (b[1]) | (b[0] << 8) | (given << 16);

              printer = print_insn_thumb32;
              size = 4;
            }
        }

      if (ifthen_address != pc)
        find_ifthen_state(pc, info, little);

      if (ifthen_state)
        {
          if ((ifthen_state & 0xf) == 0x8)
            ifthen_next_state = 0;
          else
            ifthen_next_state = (ifthen_state & 0xe0)
                                | ((ifthen_state & 0xf) << 1);
        }
    }
    ...

@honggyukim
Copy link
Owner Author

$ objdump -d uftrace | grep "[0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]"

$ objdump -d uftrace | grep "[0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]" | sed 's/.*\([0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/g'

@honggyukim
Copy link
Owner Author

The following may be a working function for our purpose.

/* check whether the given instruction is a Thumb32 instruction */
static bool is_thumb32(unsigned long given)
{
        if ((given & 0xf800) == 0xf800 ||
            (given & 0xf800) == 0xf000 ||
            (given & 0xf800) == 0xe800)
                return true;
        return false;
}

@honggyukim
Copy link
Owner Author

honggyukim commented Dec 6, 2019

image

image

@honggyukim
Copy link
Owner Author

image

@honggyukim
Copy link
Owner Author

$ ./cstool -d thumb 0x00b5
 0  00 b5  push {lr}
        op_count: 1
                operands[0].type: REG = lr
                operands[0].access: READ
        Registers read: sp lr
        Registers modified: sp
        Groups: thumb thumb1only

$ ./cstool -d thumb 0x48f23463
 0  48 f2 34 63  movw   r3, #0x8634
        op_count: 2
                operands[0].type: REG = r3
                operands[0].access: WRITE
                operands[1].type: IMM = 0x8634
        Registers modified: r3
        Groups: thumb2
141b2:       e92d 47f0       stmdb   sp!, {r4, r5, r6, r7, r8, r9, sl, lr}

$ ./cstool -d thumb 0x2de9f047
 0  2d e9 f0 47  push.w {r4, r5, r6, r7, r8, sb, sl, lr}
        op_count: 8
                operands[0].type: REG = r4
                operands[0].access: READ | WRITE
                operands[1].type: REG = r5
                operands[1].access: READ | WRITE
                operands[2].type: REG = r6
                operands[2].access: READ | WRITE
                operands[3].type: REG = r7
                operands[3].access: READ | WRITE
                operands[4].type: REG = r8
                operands[4].access: READ | WRITE
                operands[5].type: REG = sb
                operands[5].access: READ | WRITE
                operands[6].type: REG = sl
                operands[6].access: READ | WRITE
                operands[7].type: REG = lr
                operands[7].access: READ | WRITE
        Registers read: sp r4 r5 r6 r7 r8 sb sl lr
        Registers modified: sp r4 r5 r6 r7 r8 sb sl lr
        Groups: thumb2

@honggyukim
Copy link
Owner Author

honggyukim commented Dec 13, 2019

The following example jumps back to the prologue and it makes the program crashed. This case has to be detected in advance.

0003356c <uftrace_match_filter>:
   3356c:       e92d 41f0       stmdb   sp!, {r4, r5, r6, r7, r8, lr}
   33570:       461d            mov     r5, r3
   33572:       6814            ldr     r4, [r2, #0]        # Here is the branch target
   33574:       b1e4            cbz     r4, 335b0 <uftrace_match_filter+0x44>
   33576:       6926            ldr     r6, [r4, #16]
   33578:       42b0            cmp     r0, r6
...
   335bc:       42b0            cmpeq   r0, r6
   335be:       f104 0208       add.w   r2, r4, #8
   335c2:       d3d6            bcc.n   33572 <uftrace_match_filter+0x6>    # jumps back to prologue
   335c4:       1d22            adds    r2, r4, #4
   335c6:       e7d4            b.n     33572 <uftrace_match_filter+0x6>    # jumps back to prologue
   335c8:       f641 40c0       movw    r0, #7360       ; 0x1cc0
   335cc:       68e1            ldr     r1, [r4, #12]
   335ce:       f2c0 0005       movt    r0, #5
   335d2:       f000 fead       bl      34330 <__pr_dbg>
   335d6:       f8d8 300c       ldr.w   r3, [r8, #12]
   335da:       2b02            cmp     r3, #2
   335dc:       dde8            ble.n   335b0 <uftrace_match_filter+0x44>
   335de:       4628            mov     r0, r5
   335e0:       f7ff fda4       bl      3312c <print_trigger>
   335e4:       4620            mov     r0, r4
   335e6:       e8bd 81f0       ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
   335ea:       bf00            nop

@honggyukim
Copy link
Owner Author

code snippet to detect whether PC register is used.

        cs_regs regs_read, regs_write;
        uint8_t regs_read_count, regs_write_count;

        /* check if the instruction uses PC register */
        if (cs_regs_access(disasm->engine, insn, regs_read, &regs_read_count,
                           regs_write, &regs_write_count)) {
                return -1;
        }

        for (i = 0; i < regs_read_count; i++) {
                if (regs_read[i] == ARM_REG_PC) {
                        fprintf(stderr, "read PC reg\n");
                        return -1;
                }
        }
        for (i = 0; i < regs_write_count; i++) {
                if (regs_write[i] == ARM_REG_PC) {
                        fprintf(stderr, "write PC reg\n");
                        return -1;
                }
        }

honggyukim added a commit that referenced this issue Jul 11, 2021
Sometimes it's useful to print backtrace or stacktrace by explicitly
write a request inside source code.

This patch implements stacktrace() function for this.  It's implemented
in two different versions.  In debug mode, it uses an external libunwind
library and it's output looks as follows.

  Stack trace:
    #1  0x000000449ef9 parse_argspec + 0x5d6
    #2  0x00000044b76c parse_argument_spec + 0x70
    #3  0x00000044bf58 setup_trigger_action + 0x146
    #4  0x0000004329e5 find_dwarf_argspec + 0x153
    #5  0x000000432b18 find_auto_argspec + 0x4c
    #6  0x00000044b102 add_filter + 0x87
    #7  0x00000044c1bf add_trigger_entry + 0x15c
    #8  0x00000044c61f setup_trigger + 0x413
    #9  0x00000044c85b uftrace_setup_argument + 0x59
    #10 0x00000044e053 build_arg_spec + 0x4f
    #11 0x00000042a6fe walk_sessions + 0x4a
    #12 0x00000044e12d setup_fstack_args + 0x7d
    #13 0x00000043a353 open_data_file + 0x3ff
    #14 0x0000004171a5 command_replay + 0x7a
    #15 0x000000407ba9 main + 0x4c3
    #16 0x7f4e1df33840 __libc_start_main + 0xf0
    #17 0x000000405499 _start + 0x29

In release mode, it's better not to rely on an external library, so it
just uses builtin backtrace() function, but it can't print static
functions.  The example output looks as follows.

  Stack trace:
    #1  uftrace(parse_argspec+0x4d1) [0x43dd01]
    #2  uftrace() [0x43e44c]
    #3  uftrace(setup_trigger_action+0xb0) [0x43f4e0]
    #4  uftrace() [0x42a933]
    #5  uftrace(find_auto_argspec+0x29) [0x42aff9]
    #6  uftrace() [0x43f2f7]
    #7  uftrace() [0x43fa20]
    #8  uftrace() [0x440140]
    #9  uftrace(walk_sessions+0x39) [0x4248a9]
    #10 uftrace(setup_fstack_args+0xa6) [0x440bb6]
    #11 uftrace(open_data_file+0x16f) [0x43012f]
    #12 uftrace(command_replay+0x59) [0x415ee9]
    #13 uftrace(main+0x544) [0x40adc4]
    #14 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0) [0x7f8e3d944840]
    #15 uftrace(_start+0x29) [0x40aef9]

Signed-off-by: Honggyu Kim <[email protected]>
honggyukim added a commit that referenced this issue Jul 11, 2021
Sometimes it's useful to print backtrace or stacktrace by explicitly
write a request inside source code.

This patch implements stacktrace() function for this.  It's implemented
in two different versions.  In debug mode, it uses an external libunwind
library and it's output looks as follows.

  Stack trace:
    #1  0x000000449ef9 parse_argspec + 0x5d6
    #2  0x00000044b76c parse_argument_spec + 0x70
    #3  0x00000044bf58 setup_trigger_action + 0x146
    #4  0x0000004329e5 find_dwarf_argspec + 0x153
    #5  0x000000432b18 find_auto_argspec + 0x4c
    #6  0x00000044b102 add_filter + 0x87
    #7  0x00000044c1bf add_trigger_entry + 0x15c
    #8  0x00000044c61f setup_trigger + 0x413
    #9  0x00000044c85b uftrace_setup_argument + 0x59
    #10 0x00000044e053 build_arg_spec + 0x4f
    #11 0x00000042a6fe walk_sessions + 0x4a
    #12 0x00000044e12d setup_fstack_args + 0x7d
    #13 0x00000043a353 open_data_file + 0x3ff
    #14 0x0000004171a5 command_replay + 0x7a
    #15 0x000000407ba9 main + 0x4c3
    #16 0x7f4e1df33840 __libc_start_main + 0xf0
    #17 0x000000405499 _start + 0x29

In release mode, it's better not to rely on an external library, so it
just uses builtin backtrace() function, but it can't print static
functions.  The example output looks as follows.

  Stack trace:
    #1  uftrace(parse_argspec+0x4d1) [0x43dd01]
    #2  uftrace() [0x43e44c]
    #3  uftrace(setup_trigger_action+0xb0) [0x43f4e0]
    #4  uftrace() [0x42a933]
    #5  uftrace(find_auto_argspec+0x29) [0x42aff9]
    #6  uftrace() [0x43f2f7]
    #7  uftrace() [0x43fa20]
    #8  uftrace() [0x440140]
    #9  uftrace(walk_sessions+0x39) [0x4248a9]
    #10 uftrace(setup_fstack_args+0xa6) [0x440bb6]
    #11 uftrace(open_data_file+0x16f) [0x43012f]
    #12 uftrace(command_replay+0x59) [0x415ee9]
    #13 uftrace(main+0x544) [0x40adc4]
    #14 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0) [0x7f8e3d944840]
    #15 uftrace(_start+0x29) [0x40aef9]

Signed-off-by: Honggyu Kim <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant