VM-RE系列2之angr符号执行


[TOC]

angr 知识参考 Angr大法好 一文

一个例题来感受下angr的强大吧(对于简单的程序)

[UNCTF2019]easyvm

angr解法

废话不多说,找到成功标志的字符串地址一把梭,直接上exp:5分钟左右就可以跑出来,当然大家可以通过剪枝去加快速度

import angr

proj = angr.Project('./2019UNCTF-easyvm', auto_load_libs = False)
init_state = proj.factory.entry_state()
sim = proj.factory.simulation_manager(init_state)
sim.explore(find = 0x400BDA)
found_states = sim.found
if found_states:
    ans = found_states[0].posix.dumps(0)
    print(ans)
else:
    print("Cant't find solution!")
#942a4115be2359ffd675fa6338ba23b6

普通解法

main函数如下:sub_400c1E对v3进行了初始化

_int64 __fastcall main(int a1, char **a2, char **a3)
{
  unsigned int (__fastcall ***v3)(_QWORD, void *, void *, char *); // rbx
  char s[96]; // [rsp+10h] [rbp-80h] BYREF
  int v6; // [rsp+70h] [rbp-20h]
  unsigned __int64 v7; // [rsp+78h] [rbp-18h]

  v7 = __readfsqword(0x28u);
  memset(s, 0, sizeof(s));
  v6 = 0;
  v3 = (unsigned int (__fastcall ***)(_QWORD, void *, void *, char *))operator new(0x28uLL);
  sub_400C1E((__int64)v3);                      // 对v3进行相关初始化
  puts("please input your flag:");
  scanf("%s", s);                               // 输入flag
  if ( strlen(s) != 32 )                        // 输入flag长度为32
  {
    puts("The length of flag is wrong!");
    puts("Please try it again!");
  }
  if ( (**v3)(v3, &unk_602080, &byte_6020A0, s) )// dispatcher
  {
    puts("Congratulations!");
    printf("The flag is UNCTF{%s}", s);
  }
  return 1LL;
}

sub_400c1E函数如下:注意这里的off_4010A8参数,跟进看看

__int64 __fastcall sub_400C1E(__int64 a1)
{
  __int64 result; // rax

  *(_QWORD *)a1 = off_4010A8;
  *(_QWORD *)(a1 + 8) = 0LL;
  *(_BYTE *)(a1 + 16) = 0;
  *(_BYTE *)(a1 + 17) = 0;
  *(_BYTE *)(a1 + 18) = 0;
  *(_DWORD *)(a1 + 20) = 0;
  *(_QWORD *)(a1 + 24) = 0LL;
  result = a1;
  *(_QWORD *)(a1 + 32) = 0LL;
  return result;
}c

off_4010A8位置处:全是函数偏移,我们跟进第一个函数,也就是main函数v3调用的函数

.rodata:00000000004010A8 06 08 40 00 00 00 00 00       off_4010A8 dq offset sub_400806
.rodata:00000000004010A8                                                             ; DATA XREF: sub_400C1E+8↑o
.rodata:00000000004010B0 7C 0C 40 00 00 00 00 00       dq offset sub_400C7C
.rodata:00000000004010B8 9A 0C 40 00 00 00 00 00       dq offset sub_400C9A
.rodata:00000000004010C0 B8 0C 40 00 00 00 00 00       dq offset sub_400CB8
.rodata:00000000004010C8 D6 0C 40 00 00 00 00 00       dq offset sub_400CD6
.rodata:00000000004010D0 FA 0C 40 00 00 00 00 00       dq offset sub_400CFA
.rodata:00000000004010D8 1E 0D 40 00 00 00 00 00       dq offset sub_400D1E
.rodata:00000000004010E0 42 0D 40 00 00 00 00 00       dq offset sub_400D42
.rodata:00000000004010E8 56 0D 40 00 00 00 00 00       dq offset sub_400D56
.rodata:00000000004010F0 70 0D 40 00 00 00 00 00       dq offset sub_400D70
.rodata:00000000004010F8 84 0D 40 00 00 00 00 00       dq offset sub_400D84
.rodata:0000000000401100 B0 0D 40 00 00 00 00 00       dq offset sub_400DB0
.rodata:0000000000401108 DC 0D 40 00 00 00 00 00       dq offset sub_400DDC
.rodata:0000000000401110 56 0E 40 00 00 00 00 00       dq offset sub_400E56
.rodata:0000000000401118 D0 0E 40 00 00 00 00 00       dq offset sub_400ED0

sub_400806(dispatcher):这里其实就非常明显了其中a1就是main传的v3,而我们知道v3存储的是上面函数偏移数组的首地址,那么+8,+16,+24…就可以去找到其他函数的偏移了,最后把相关变量的逻辑捋清楚就行了;这里需要注意的是,这里不像之前都有opcodes数组,这里的算法是对我们的输入一位一位进行相似的处理,所以其实就是一个循环。那么opcodes就是重复的,所以它直接蕴含在了vm_ip的变化之中

__int64 __fastcall sub_400806(__int64 a1, __int64 a2, __int64 a3, __int64 a4)
{
  *(_QWORD *)(a1 + 8) = a2 + 9; //程序最开始执行的函数的操作码
  *(_QWORD *)(a1 + 24) = a3;
  *(_QWORD *)(a1 + 32) = a4;
  while ( 2 )
  {
    switch ( **(_BYTE **)(a1 + 8) )
    {
      case 0xA0:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 8LL))(a1);
        continue;
      case 0xA1:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 16LL))(a1);
        continue;
      case 0xA2:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 24LL))(a1);
        *(_QWORD *)(a1 + 8) += 11LL;
        continue;
      case 0xA3:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 32LL))(a1);
        *(_QWORD *)(a1 + 8) += 2LL;
        continue;
      case 0xA4:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 40LL))(a1);
        *(_QWORD *)(a1 + 8) += 7LL;
        continue;
      case 0xA5:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 48LL))(a1);
        ++*(_QWORD *)(a1 + 8);
        continue;
      case 0xA6:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 56LL))(a1);
        *(_QWORD *)(a1 + 8) -= 2LL;
        continue;
      case 0xA7:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 64LL))(a1);
        *(_QWORD *)(a1 + 8) += 7LL;
        continue;
      case 0xA8:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 72LL))(a1);
        continue;
      case 0xA9:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 80LL))(a1);
        *(_QWORD *)(a1 + 8) -= 6LL;
        continue;
      case 0xAA:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 88LL))(a1);
        continue;
      case 0xAB:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 96LL))(a1);
        *(_QWORD *)(a1 + 8) -= 4LL;
        continue;
      case 0xAC:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 104LL))(a1);
        continue;
      case 0xAD:
        (*(void (__fastcall **)(__int64))(*(_QWORD *)a1 + 112LL))(a1);
        *(_QWORD *)(a1 + 8) += 2LL;
        continue;
      case 0xAE:
        if ( *(_DWORD *)(a1 + 20) )
          return 0LL;
        *(_QWORD *)(a1 + 8) -= 12LL;
        continue;
      case 0xAF:
        if ( *(_DWORD *)(a1 + 20) != 1 )
        {
          *(_QWORD *)(a1 + 8) -= 6LL;
          continue;
        }
        return 1LL;
      default:
        puts("cmd execute error");
        return 0LL;
    }
  }
}

分析知:相关变量对应的含义,以及相关操作码对应函数的功能如下

"""
(**a1)          == dispatcher
(**(a1+8))      == vm_ip
(*(*a1+8*k))    == handler_k
(*(a1+8))       == &vm_ip
(*(a1+16))      == reg0  (char)
(*(a1+17))      == reg1
(*(a1+18))      == reg2
(*(a1+20))      == eflag
(*(a1+24))      == enc_flag
"""
handlers = {
    0XA0: 'reg0++',
    0XA1: 'reg1++',
    0XA2: 'reg2++',
    0XA3: 'reg0 -= reg2', 
    0XA4: 'reg0 ^= reg1',
    0XA5: 'reg1 ^= reg0', 
    0XA6: 'reg0 = -51', 
    0XA7: 'reg1 = reg0', 
    0XA8: 'reg2 = -51', 
    0XA9: 'reg0 = input[reg2]', 
    0XAA: 'reg1 = input[reg2]', 
    0XAB: 'if reg0 =<> enc_flag[reg2]:eflag=; ==,0; >,1; <,-1;', 
    0XAC: 'if reg1 =<> enc_flag[reg2]:eflag=; ==,0; >,1; <,-1;', 
    0XAD: 'eflag = (reg2>0x1F)', 
    0XAE: 'if eflag != 0:fail', 
    0XAF: 'if eflag == 1:win'
}

通过分析其循环的执行以下操作码对应的函数

0xA9, 0xA3, 0xA5, 0xA6, 0xA4, 0xAB, 0xA7, 0xAE==check1, 0xA2, 0xAD, 0xAF==check2, ... 循环

我们看下这段操作码具体的功能

opcodes = [0xA9, 0xA3, 0xA5, 0xA6, 0xA4, 0xAB, 0xA7, 0xAE, 0xA2, 0xAD, 0xAF]
enc_flag = [  
  0xF4, 0x0A, 0xF7, 0x64, 0x99, 0x78, 0x9E, 0x7D, 0xEA, 0x7B, 
  0x9E, 0x7B, 0x9F, 0x7E, 0xEB, 0x71, 0xE8, 0x00, 0xE8, 0x07, 
  0x98, 0x19, 0xF4, 0x25, 0xF3, 0x21, 0xA4, 0x2F, 0xF4, 0x2F, 
  0xA6, 0x7C
]
count = 0
for i in opcodes:
    count += 1
    print(f"[{count}]:", end='')
    print(handlers[i])
"""
可以发现是对每一位进行相关操作
功能为:取输入的第reg2位,并减去索引reg2,在与前一位处理完的值异或(第一位直接异或0),然后在与0x3d异或
然后再跟对应的enc_flag[reg2]比对
[1]:reg0 = input[reg2]
[2]:reg0 -= reg2
[3]:reg1 ^= reg0
[4]:reg0 = -51
[5]:reg0 ^= reg1
[6]:if reg0 =<> enc_flag[reg2]:eflag=; ==,0; >,1; <,-1; #check
[7]:reg1 = reg0
[8]:if eflag != 0:fail
[9]:reg2++
[10]:eflag = (reg2>0x1F) #处理到32位就成功了
[11]:if eflag == 1:win
"""

这里我直接爆破(这里其实可以不用爆破):这里要注意C与python中数据的表示,-51应该是无符号数,所以得改成0xcd

def encrypt(value, index, reg1):
    reg0 = value
    reg0 -= index
    reg1 ^= reg0
    reg0 = 0xcd
    reg0 ^= reg1
    if reg0 != enc_flag[index]:
        return -1,-1
    reg1 = reg0
    return reg1,value

reg1 = 0
for count in range(32):
    for i in range(32, 128):
        reg, value = encrypt(i, count, reg1)
        if(reg != -1 and value != -1): 
            reg1 = reg
            print(chr(value), end='')
            break
 #942a4115be2359ffd675fa6338ba23b6

总结

可以看出,angr 还是比较简单的,找到成功标志的字符串地址就可以一把梭了,但是 angr 对于一些带有反调试,异常等题还是跑不出来,所以手撕才是嗯(四声)道理。


文章作者: XiaozaYa
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 XiaozaYa !
  目录