前言
简单的说,出现内核奔溃,我们可以通过串口日志提供的信息;使用gdb反汇编出问题函数所在的文件,然后使用《disassemble /m 函数名》查看具体函数的源码和汇编码;这样能直观的看到源代码对应的汇编码干了什么;再根据串口日志的寄存器信息,查看当时寄存器的值,就能对上当时的参数,和变量的值
示例1
内核oops信息
这是个wifi驱动的空指针问题,问题出在memcmp函数
<1> [101063.001918] Unable to handle kernel NULL pointer dereference at virtual address 0000012a
<1> [101063.010245] pgd = c0004000
<1> [101063.013081] [0000012a] *pgd=00000000
<0> [101063.016863] Internal error: Oops: 17 [#1] PREEMPT THUMB2
<4> [101063.140418] CPU: 0 PID: 146 Comm: ssv6xxx_hci_rx_ Not tainted 3.10.33 #1
<4> [101063.147250] task: c16f9500 ti: c16f0000 task.ti: c16f0000
<4> [101063.152771] PC is at memcmp+0xa/0x1a
<4> [101063.156553] LR is at ssv_start_xmit+0x1dc/0x410 [ssv6x5x]
<4> [101063.162073] pc : [<c00fb848>] lr : [<bf815309>] psr: 80000133
sp : c16f1be8 ip : 00000088 fp : 00000000
<4> [101063.173755] r10: bf8a5498 r9 : c10a4440 r8 : c126c8a4
<4> [101063.179123] r7 : c05f4258 r6 : 0000012a r5 : 00000100 r4 : c16d0f00
<4> [101063.185741] r3 : 00000000 r2 : 00000006 r1 : c05f4258 r0 : 0000012a
<4> [101063.192390] Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment kernel
<4> [101063.199984] Control: 50c53c7d Table: 01eb4059 DAC: 00000015
反汇编
根据“LR is at ssv_start_xmit+0x1dc/0x410 [ssv6x5x]”,反汇编该函数所在的.o文件看下
./owtoolchain/linux64/bin/arm-openwrt-linux-uclibcgnueabi-gdb ./build_dir/target-arm_cortex-a7+neon-vfpv4_uClibc-1.0.25_eabi/sv6318pre/src/ssv6x5x.o
可以根据0x1dc定位到保存的返回地址
根据“PC is at memcmp+0xa/0x1a”,反汇编该函数所在的.o文件看下
./owtoolchain/linux64/bin/arm-openwrt-linux-uclibcgnueabi-gdb ./marvell/linux/arch/arm/boot/compressed/string.o
就是取出第一参数R3(根据上图,就是cur->mac_addr)的每个字符做复制到第二份参数;但是r3为0 ,是空指针,所以触发异常
(gdb) disassemble /m memcmp
Dump of assembler code for function memcmp:
69 {0x000000e4 <+2>: mov r3, r0 //将第一参数r0,保存在r3中0x000000e6 <+4>: push {r4, lr}70 const unsigned char *su1 = cs, *su2 = ct, *end = su1 + count;0x000000e2 <+0>: add r2, r071 int res = 0;
72
73 while (su1 < end) {0x000000e8 <+6>: cmp r3, r20x000000ea <+8>: bcs.n 0xfa <memcmp+24>74 res = *su1++ - *su2++;0x000000ec <+10>: ldrb.w r4, [r3], #1 //取出r3的数据放到R4中,r3地址加10x000000f0 <+14>: ldrb.w r0, [r1], #175 if (res)0x000000f4 <+18>: subs r0, r4, r00x000000f6 <+20>: beq.n 0xe8 <memcmp+6>0x000000f8 <+22>: b.n 0xfc <memcmp+26>0x000000fa <+24>: movs r0, #076 break;
77 }
78 return res;
79 }0x000000fc <+26>: pop {r4, pc}End of assembler dump.
示例2
内核oops信息
提示:7265705f这个r1寄存器的地址的页表是空的
<1> [36592.660565] Unable to handle kernel paging request at virtual address 7265705f
<1> [36592.668068] pgd = c0004000
<1> [36592.671362] [7265705f] *pgd=00000000
<0> [36592.680085] Internal error: Oops: 5 [#1] PREEMPT THUMB2
<4> [36592.685301] Modules linked in: mfp iptable_nat nf_nat_ipv4 nf_conntrack_netlink nf_conntrack_ipv4 ebtable_nat ebtable_filter ebtable_broute xt_time xt_tcpudp xt_tcpmss xt_statistic xt_state xt_quota xt_pkttype xt_physdev xt_owner xt_nat xt_multiport xt_mark xt_mac xt_limit xt_length xt_id xt_http xt_hl xt_ecn xt_dscp xt_conntrack xt_comment xt_addrtype xt_TCPMSS xt_REDIRECT xt_LOG xt_HL xt_DSCP xt_CT xt_CLASSIFY nfnetlink nf_nat_tftp nf_nat_irc nf_nat_ftp nf_defrag_ipv4 nf_conntrack_tftp nf_conntrack_irc nf_conntrack_ftp iptable_raw iptable_mangle iptable_filter ipt_REJECT ipt_MASQUERADE ipt_ECN ip_tables ebtables ebt_vlan ebt_stp ebt_snat ebt_redirect ebt_pkttype ebt_mark_m ebt_mark ebt_limit ebt_ip6 ebt_ip ebt_dnat ebt_arpreply ebt_arp ebt_among ebt_802_3 crc_ccitt ip6t_NPT ip6t_MASQUERADE ip6table_nat
<4> [36592.756335] nf_nat_ipv6 nf_nat ip6t_rt ip6t_frag ip6t_hbh ip6t_eui64 ip6t_mh ip6t_ah ip6t_ipv6header ip6t_REJECT ip6table_raw ip6table_mangle ip6table_filter ip6_tables x_tables nf_conntrack_ipv6 nf_conntrack nf_defrag_ipv6 ipcomp6 xfrm6_tunnel xfrm6_mode_tunnel xfrm6_mode_transport xfrm6_mode_beet esp6 ah6 ipcomp xfrm4_tunnel xfrm4_mode_tunnel xfrm4_mode_transport xfrm4_mode_beet esp4 ah4 ipip ip6_tunnel tunnel6 tunnel4 ip_tunnel af_key xfrm_user xfrm_ipcomp xfrm_algo zram zsmalloc lz4_decompress lz4_compress ssv6x5x button_hotplug
<4> [36592.802268] CPU: 0 PID: 143 Comm: ssv6xxx_hci_rx_ Not tainted 3.10.33 #1
<4> [36592.808947] task: c15e5500 ti: c17d4000 task.ti: c17d4000
<4> [36592.814346] PC is at strcmp+0x4/0x20
<4> [36592.817975] LR is at ssv_rx_handle_msg+0x32/0x88 [ssv6x5x]
<4> [36592.823465] pc : [<c00fb676>] lr : [<bf812e37>] psr: 20000133
sp : c17d5e88 ip : 00000000 fp : 00000005
<4> [36592.834933] r10: c16b8000 r9 : c108f446 r8 : c1758440
<4> [36592.840149] r7 : 00000000 r6 : c108f4b2 r5 : c1d54000 r4 : 00000156
<4> [36592.846676] r3 : 00000075 r2 : 00000156 r1 : 7265705f r0 : bf861f2a
<4> [36592.853202] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment kernel
<4> [36592.860675] Control: 50c53c7d Table: 01228059 DAC: 00000015
反汇编
PC is at strcmp+0x4/0x20,可以看到就是第二个参数r1的页表是0;第四个字节做的事就是取出r1的数据(第二个参数)放到R0中,r1地址加1,遍历比较两个字符串的各字符
(gdb) disassemble /m strcmp
Dump of assembler code for function strcmp:
82 {0x000000fe <+0>: mov r2, r083 unsigned char c1, c2;
84 int res = 0;
85
86 do {
87 c1 = *cs++;0x00000100 <+2>: ldrb.w r3, [r2], #188 c2 = *ct++;
89 res = c1 - c2;0x00000104 <+6>: ldrb.w r0, [r1], #1 //取出r1的数据(第二个参数)放到R0中,r1地址加190 if (res)0x00000108 <+10>: subs r0, r3, r00x0000010a <+12>: bne.n 0x110 <strcmp+18>91 break;
92 } while (c1);0x0000010c <+14>: cmp r3, #00x0000010e <+16>: bne.n 0x100 <strcmp+2>93 return res;
94 }0x00000110 <+18>: bx lrEnd of assembler dump.
LR is at ssv_rx_handle_msg+0x32/0x88 [ssv6x5x],可以看到strcmp的第二参数就是SSV_ID2STR(msg->id);证明没对这个参数做判断,直接使用而导致的异常
(gdb) disassemble /m ssv_rx_handle_msg
Dump of assembler code for function ssv_rx_handle_msg:
1050 if (strcmp("unknown", SSV_ID2STR(msg->id)) == 0)0x0001052e <+6>: ldrh r4, [r1, #0]0x00010532 <+10>: lsrs r7, r4, #100x00010534 <+12>: cmp r7, #120x00010536 <+14>: bhi.n 0x10552 <ssv_rx_handle_msg+42>0x00010538 <+16>: ldr r3, [pc, #96] ; (0x1059c <ssv_rx_handle_msg+116>)0x0001053a <+18>: ldr.w r3, [r3, r7, lsl #2]0x0001053e <+22>: cbz r3, 0x10552 <ssv_rx_handle_msg+42>0x00010540 <+24>: ubfx r2, r4, #0, #100x00010544 <+28>: ldr.w r1, [r3, r2, lsl #2]0x00010548 <+32>: ldr r3, [pc, #84] ; (0x105a0 <ssv_rx_handle_msg+120>)0x0001054a <+34>: cmp r1, #00x0001054c <+36>: it eq0x0001054e <+38>: moveq r1, r30x00010550 <+40>: b.n 0x10554 <ssv_rx_handle_msg+44>0x00010552 <+42>: ldr r1, [pc, #76] ; (0x105a0 <ssv_rx_handle_msg+120>)0x00010554 <+44>: ldr r0, [pc, #72] ; (0x105a0 <ssv_rx_handle_msg+120>)0x00010556 <+46>: bl 0x10556 <ssv_rx_handle_msg+46>0x0001055a <+50>: mov r5, r00x0001055c <+52>: cbnz r0, 0x10570 <ssv_rx_handle_msg+72>