linux-backtrace-to-handle-segment-fault-signal

jxgz_leoblog.csdn.net/jxgz_leo/article/details/53458366
Eearthblog.csdn.net/iEearth/article/details/49763481

段错误信号

当程序出现异常时, 内核会发来异常信号, 然后才退出, 段错误的信号是SIGSEGV. 所以可以捕获该信号, 调用backtrace()相关函数打印调用栈, 协助定位问题.

Demo

demo.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include <iostream>
#include <string>
#include <signal.h>
#include <execinfo.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>

void ShowBacktrace(int sig_num)
{
if (sig_num != SIGSEGV) {
return;
}

// recover signal handler
signal(sig_num, SIG_DFL);

// print current process's maps
fprintf(stderr, "====================== maps ======================\n");
char display_map[64];
snprintf(display_map, sizeof(display_map), "cat /proc/%d/maps", getpid());
system(display_map);
fprintf(stderr, "==================================================\n");

void *buf[10];
char **str;

size_t str_num = backtrace(buf, sizeof(buf)/sizeof(buf[0]));

str = backtrace_symbols(buf, sizeof(buf)/sizeof(buf[0]));
if (nullptr == str) {
fprintf(stderr, "backtrace_symbols fail\n");
exit(-1);
}

fprintf(stderr, "SegmentFault backtrace info:\n");
for (size_t i = 0; i < str_num; ++i) {
fprintf(stderr, "%ld %s\n", i, str[i]);
}
free(str);

exit(-1);
}

void GenerateError()
{
void *p = malloc(1024*1024*256);
free(p);
free(p);// ERROR here
}

int main(int argc, char *argv[])
{
signal(SIGSEGV, ShowBacktrace);

GenerateError();

return 0;
}

编译

1
g++ -g -rdynamic -o demo demo.cpp -Wl,-Map,demo.map
Key Value
-g 如果少了该参数, 运行报错时能显示函数名, 但addr2line看不到具体行号
-rdynamic backtrace_symbols()的实现需要该参数
-Wl,-Map,demo.map 在当前目录生成demo.map, 该文件包含一些地址信息, 方便寻找函数首地址

生成的程序demo, strip过后, 也是段错误出现时能显示函数名, 但addr2line看不到具体行号

运行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
~ # ./demo
====================== maps ======================
55b66341e000-55b66341f000 r--p 00000000 08:05 3022387 XXX/demo
55b66341f000-55b663420000 r-xp 00001000 08:05 3022387 XXX/demo
55b663420000-55b663421000 r--p 00002000 08:05 3022387 XXX/demo
55b663421000-55b663422000 r--p 00002000 08:05 3022387 XXX/demo
55b663422000-55b663423000 rw-p 00003000 08:05 3022387 XXX/demo
55b664eb3000-55b664ed4000 rw-p 00000000 00:00 0 [heap]
7eff9d8bc000-7eff9d8c0000 rw-p 00000000 00:00 0
7eff9d8c0000-7eff9d8c3000 r--p 00000000 08:05 1312877 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
7eff9d8c3000-7eff9d8d5000 r-xp 00003000 08:05 1312877 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
7eff9d8d5000-7eff9d8d9000 r--p 00015000 08:05 1312877 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
7eff9d8d9000-7eff9d8da000 r--p 00018000 08:05 1312877 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
7eff9d8da000-7eff9d8db000 rw-p 00019000 08:05 1312877 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
7eff9d8db000-7eff9d8ea000 r--p 00000000 08:05 1312888 /usr/lib/x86_64-linux-gnu/libm-2.31.so
7eff9d8ea000-7eff9d991000 r-xp 0000f000 08:05 1312888 /usr/lib/x86_64-linux-gnu/libm-2.31.so
7eff9d991000-7eff9da28000 r--p 000b6000 08:05 1312888 /usr/lib/x86_64-linux-gnu/libm-2.31.so
7eff9da28000-7eff9da29000 r--p 0014c000 08:05 1312888 /usr/lib/x86_64-linux-gnu/libm-2.31.so
7eff9da29000-7eff9da2a000 rw-p 0014d000 08:05 1312888 /usr/lib/x86_64-linux-gnu/libm-2.31.so
7eff9da2a000-7eff9da4f000 r--p 00000000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9da4f000-7eff9dbc7000 r-xp 00025000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9dbc7000-7eff9dc11000 r--p 0019d000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9dc11000-7eff9dc12000 ---p 001e7000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9dc12000-7eff9dc15000 r--p 001e7000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9dc15000-7eff9dc18000 rw-p 001ea000 08:05 1312886 /usr/lib/x86_64-linux-gnu/libc-2.31.so
7eff9dc18000-7eff9dc1c000 rw-p 00000000 00:00 0
7eff9dc1c000-7eff9dcb2000 r--p 00000000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9dcb2000-7eff9dda2000 r-xp 00096000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9dda2000-7eff9ddeb000 r--p 00186000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9ddeb000-7eff9ddec000 ---p 001cf000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9ddec000-7eff9ddf7000 r--p 001cf000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9ddf7000-7eff9ddfa000 rw-p 001da000 08:05 1312880 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28
7eff9ddfa000-7eff9ddff000 rw-p 00000000 00:00 0
7eff9de0f000-7eff9de10000 r--p 00000000 08:05 1312882 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7eff9de10000-7eff9de33000 r-xp 00001000 08:05 1312882 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7eff9de33000-7eff9de3b000 r--p 00024000 08:05 1312882 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7eff9de3c000-7eff9de3d000 r--p 0002c000 08:05 1312882 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7eff9de3d000-7eff9de3e000 rw-p 0002d000 08:05 1312882 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7eff9de3e000-7eff9de3f000 rw-p 00000000 00:00 0
7ffdc9b4c000-7ffdc9b6f000 rw-p 00000000 00:00 0 [stack]
7ffdc9b9c000-7ffdc9b9f000 r--p 00000000 00:00 0 [vvar]
7ffdc9b9f000-7ffdc9ba0000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]
==================================================
SegmentFault backtrace info:
0 ./demo(_Z13ShowBacktracei+0xc9) [0x55b66341f3b2]
1 /lib/x86_64-linux-gnu/libc.so.6(+0x46210) [0x7eff9da70210]
2 /lib/x86_64-linux-gnu/libc.so.6(cfree+0x20) [0x7eff9dac7870]
3 ./demo(_Z13GenerateErrorv+0x32) [0x55b66341f4ef]
4 ./demo(main+0x29) [0x55b66341f51b]
5 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0x7eff9da510b3]
6 ./demo(_start+0x2e) [0x55b66341f22e]

可以看到出错位置: 3 ./demo(_Z13GenerateErrorv+0x32) [0x55b66341f4ef], 具体意义如下

Key Value
_Z 是函数名开始标识符
13 表示函数名长度
GenerateError 函数名
v 表示参数类型为void
+0x32 表示偏移地址, 下文用到
0x55b66341f4ef 表示对应出错地址, 不过本例这个地址是虚拟地址, 需要找到真实出错地址

查找出错地址

从上文的maps部分摘取关键打印如下

1
2
3
4
5
6
7
8
55b66341f000-55b663420000 r-xp 00001000 08:05 3022387    XXX/demo
│ │ │ │ │ │ └─────── 映射文件名
│ │ │ │ │ └────────────────── 映射文件所属节点号, 00:00表示匿名映射
│ │ │ │ └──────────────────────── 映射文件所述设备号, 此行表示有名映射, 00:00表示匿名映射
│ │ │ └───────────────────────────────── 地址偏移量
│ │ └────────────────────────────────────── r=可读/w=可写/x=可执行/p=私有/s=共享, p与s互斥
│ └─────────────────────────────────────────────────── vm_end
└──────────────────────────────────────────────────────────────── vm_start
参考文档blog.csdn.net/lijzheng/article/details/23618365

可以看到0x55b66341f4ef刚好位于此区间
所以该地址(0x55b66341f4ef) - 此区间首地址(0x55b66341f000) + 地址偏移量(0x00001000) = 0x14ef

上文编译时生成了demo.map文件, 该文件包含很多信息, 直接搜索GenerateError得到如下信息

1
2
3
4
5
......
0x00000000000012e9 ShowBacktrace(int)
0x00000000000014bd GenerateError()
0x00000000000014f2 main
......

可知GenerateError的地址为0x14bd, 再加上上面的偏移地址0x32, 得到0x14bd + 0x32 = 0x14ef

addr2line显示出错行号

1
2
~ # addr2line -e demo 0x14ef
XXXXXX/demo.cpp:50

由此可得, demo.cpp50行处导致的错误.
如果编译时的参数没有-g或者生成的二进制strip了, 则这里是不会显示行号的.

交叉编译

交叉编译工具链也有对应的addr2line工具, 加上对应的前缀即可, 步骤跟上文一致.