Difference between revisions of "Panic"
m (→Best effort for M1 (arm64)) |
|||
(13 intermediate revisions by 3 users not shown) | |||
Line 40: | Line 40: | ||
Which can be repeated for spl, and spl load address as well, if needed. | Which can be repeated for spl, and spl load address as well, if needed. | ||
+ | And for kernel addresses, look for "kernel slide:" value, I assumed 0 in this example | ||
+ | xcrun atos -arch x86_64 -d -o /Volumes/KernelDebugKit/mach_kernel -s 0 0xffffff8000222f79 0xffffff80002dc24e 0xffffff80002f3746 | ||
− | If you are not | + | |
+ | |||
+ | If you are not panicking, but would like to print the stack at a certain point in the kext, you can use | ||
OSReportWithBacktrace("I am here: vp %p\n", vp); | OSReportWithBacktrace("I am here: vp %p\n", vp); | ||
Line 47: | Line 51: | ||
in `printf` style notation. | in `printf` style notation. | ||
+ | |||
+ | === Best effort for M1 (arm64) === | ||
+ | |||
+ | |||
+ | # xcrun llvm-objdump --disassemble --arch arm64e /Library/Extensions/zfs.kext/Contents/MacOS/zfs|head | ||
+ | |||
+ | /Library/Extensions/zfs.kext/Contents/MacOS/zfs: | ||
+ | (__TEXT_EXEC,__text) section | ||
+ | _atomic_cas_ptr: | ||
+ | 94000: 02 fc e1 c8 casal x1, x2, [x0] | ||
+ | |||
+ | org.openzfsonosx.zfs(2.1)[0BF8CB05-9B3B-3182-8DE6-AF14261D75B8]@ 0xfffffe0022410000->0xfffffe00226fffff | ||
+ | 0xfffffe0022410000 - 0x94000 = 0xFFFFFE0022388000 | ||
+ | |||
+ | lr: 0xfffffe002348abe4 fp: 0xfffffe3feaf526f0 | ||
+ | lr: 0xfffffe002348a9c8 fp: 0xfffffe3feaf52760 | ||
+ | |||
+ | # atos -o module/os/macos/zfs/zfs -arch arm64e -l 0xFFFFFE0022388000 0xfffffe002348abe4 0xfffffe002348a9c8 .... | ||
=== Alternate symbol lookup with lldb === | === Alternate symbol lookup with lldb === | ||
Line 79: | Line 101: | ||
err = dmu_tx_assign(tx, TXG_NOWAIT); | err = dmu_tx_assign(tx, TXG_NOWAIT); | ||
+ | Or just for the kernel | ||
+ | |||
+ | (lldb) target create --no-dependents --arch x86_64 mach_kernel | ||
+ | (lldb) target modules load --file mach_kernel --slide 0x000000000b600000 | ||
+ | (lldb) image lookup -a 0xffffff800b8d6aa7 | ||
+ | |||
+ | === Kernel to Userland === | ||
+ | |||
+ | Connect to crashed kernel: | ||
+ | |||
+ | # Load the compiled binary, just makes it easier for lldb to find it when connecting | ||
+ | (lldb) target create --no-dependents --arch x86_64 module/os/macos/zfs/zfs.kext/Contents/MacOS/zfs | ||
+ | Current executable set to 'module/os/macos/zfs/zfs.kext/Contents/MacOS/zfs' (x86_64). | ||
+ | # Connect to crashed VM | ||
+ | (lldb) kdp-remote 172.16.248.128 | ||
+ | Version: Darwin Kernel Version 18.6.0: Thu Apr 25 23:16:27 PDT 2019; root:xnu-4903.261.4~2/DEVELOPMENT_X86_64; UUID=12647AE2-57FB-35DB-AF57-4B25D060D845; stext=0xffffff802f200000 | ||
+ | |||
+ | In this case we have died when trying to copyinstr() from userland: | ||
+ | |||
+ | (lldb) up | ||
+ | frame #1: 0xffffff7f8db91238 zfs`history_str_get(zc=0xffffff90a8dec000) at zfs_ioctl.c:295:6 | ||
+ | 292 return (NULL); | ||
+ | 293 | ||
+ | 294 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); | ||
+ | -> 295 if (copyinstr((void *)(uintptr_t)zc->zc_history, | ||
+ | 296 buf, HIS_MAX_RECORD_LEN, NULL) != 0) { | ||
+ | 297 history_str_free(buf); | ||
+ | 298 return (NULL); | ||
+ | (lldb) p zc->zc_history | ||
+ | (uint64_t) $1 = 4413168320 | ||
+ | |||
+ | Find the task ptr for userprocess, a few ways; | ||
+ | |||
+ | (lldb) showproc 0xffffff804afb3700 | ||
+ | task vm_map ipc_space #acts flags pid process io_policy wq_state command | ||
+ | 0xffffff803bb76498 0xffffff804056cf00 0xffffff803bbd4200 2 D 687 0xffffff804afb3700 1 1 0 zpool | ||
+ | |||
+ | (lldb) showpid 687 | ||
+ | task vm_map ipc_space #acts flags pid process io_policy wq_state command | ||
+ | 0xffffff803bb76498 0xffffff804056cf00 0xffffff803bbd4200 2 D 687 0xffffff804afb3700 1 1 0 zpool | ||
+ | |||
+ | (lldb) showprocinfo 0xffffff804afb3700 | ||
+ | Process 0xffffff804afb3700 | ||
+ | name zpool | ||
+ | pid:687 task:0xffffff803bb76498 p_stat:2 parent pid: 686 | ||
+ | Cred: euid 0 ruid 0 svuid 0 | ||
+ | Flags: 0x4006 | ||
+ | 0x00000002 - has a controlling tty | ||
+ | 0x00000004 - process is 64 bit | ||
+ | 0x00004000 - process has called exec | ||
+ | |||
+ | You can view user process threads: | ||
+ | |||
+ | (lldb) showtaskuserstacks 0xffffff803bb76498 | ||
+ | Process: zpool [687] | ||
+ | Path: zpool | ||
+ | Identifier: zpool | ||
+ | Version: ??? (???) | ||
+ | Code Type: X86-64 (Native) | ||
+ | Parent Process: bash [686] | ||
+ | |||
+ | Date/Time: 2020-05-08 09:20:50.000 -0800 | ||
+ | OS Version: Mac OS X 10.8 (18G103) | ||
+ | Report Version: 8 | ||
+ | |||
+ | Exception Type: n/a | ||
+ | Exception Codes: n/a | ||
+ | Crashed Thread: 0 | ||
+ | |||
+ | Application Specific Information: | ||
+ | Synthetic crash log generated from Kernel userstacks | ||
+ | |||
+ | Binary Images: | ||
+ | Unknown dyld all_image_infos version number 15 | ||
+ | 0x10efd9000 - 0x10f018fff zpool | ||
+ | (??? - ???) <7234AC69-6CE8-321D-9ACC-4F73BB964650> /Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool | ||
+ | |||
+ | [snip] | ||
+ | |||
+ | Create a gdb-server to debug zpool process: | ||
+ | |||
+ | (lldb) beginusertaskdebugging 0xffffff803bb76498 | ||
+ | Starting debug session for zpool at localhost:7936. | ||
+ | |||
+ | From another shell, start to debug the zpool process, | ||
+ | using the convenience wrapper script: | ||
+ | All it does is run "lldb zpool" after setting DYLD_LIBRARY_PATH so you can | ||
+ | run it in the build directory. | ||
+ | |||
+ | ± ./scripts/debug-macos.sh zpool | ||
+ | (lldb) target create "/Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool" | ||
+ | Current executable set to '/Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool' (x86_64). | ||
+ | (lldb) gdb-remote localhost:7936 | ||
+ | Process 687 stopped | ||
+ | * thread #1, stop reason = signal SIGINT | ||
+ | frame #0: 0x00007fff63fbfb5a libsystem_kernel.dylib | ||
+ | -> 0x7fff63fbfb5a: jae 0x7fff63fbfb64 | ||
+ | 0x7fff63fbfb5c: movq %rax, %rdi | ||
+ | 0x7fff63fbfb5f: jmp 0x7fff63fbf421 | ||
+ | 0x7fff63fbfb64: retq | ||
+ | |||
+ | (lldb) up | ||
+ | frame #6: 0x000000010efdc434 zpool`zpool_export_one(zhp=0x00007ff51440b750, data=0x00007ffee0c25718) at zpool_main.c:1706:13 | ||
+ | 1703 if (cb->hardforce) { | ||
+ | 1704 if (zpool_export_force(zhp, history_str) != 0) | ||
+ | 1705 return (1); | ||
+ | -> 1706 } else if (zpool_export(zhp, cb->force, history_str) != 0) { | ||
+ | 1707 return (1); | ||
+ | 1708 } | ||
+ | 1709 | ||
+ | (lldb) p history_str | ||
+ | (char [2049]) $0 = "zpool export BOOM" | ||
=== Links === | === Links === |
Latest revision as of 05:29, 30 May 2023
Contents
Kernel panics[edit]
One of the most useful settings to assist with debugging is telling Darwin kernel to keep the symbols from kexts. This can be set using the nvram command, and requires a reboot.
First check to see if you have any special boot-args set and add the new keepsyms instruction.
# nvram boot-args="keepsyms=y debug=0x144"
and reboot the machine for it to take effect.
[Table 20-1] in Apple's Kernel Programming Guide has a summary of the meaning of the debug options.
Panic decoding[edit]
If you get a panic but you do not have symbols enabled, it can be decoded using the atos command.
In a standard panic log, you will see something like:
# cd /Library/Logs/DiagnosticReports/ # less Kernel_2014-03-13-093629_OSX109.panic Backtrace (CPU 0), Frame : Return Address 0xffffff8088843b10 : 0xffffff7f85e25759 : 0xffffff7f80dcf760 0xffffff8088843b40 : 0xffffff7f85e25560 : 0xffffff7f80dcf423 0xffffff8088843be0 : 0xffffff7f85e08f27 : 0xffffff7f80dc491a Kernel Extensions in backtrace: net.lundman.spl(1.0)[7F69C13B-C730-3475-99E9-53861AC6C54E]@0xffffff7f85d2a000->0xffffff7f85d36fff net.lundman.zfs(1.0)[5637421D-EE17-33F1-ACB2-8FA38BC5A5A6]@0xffffff7f80d54000->0xffffff7f85f38fff
We can then run the command
# xcrun atos -arch x86_64 -l 0xffffff7f80d54000 -o ../zfs/module/zfs/zfs.kext/Contents/MacOS/zfs 0xffffff7f80dcf760 0xffffff7f80dcf423 0xffffff7f80dc491a got symbolicator for ../zfs/module/zfs/zfs.kext/Contents/MacOS/zfs, base address 0 spa_taskqs_init (in zfs) (spa.c:888) spa_create_zio_taskqs (in zfs) (spa.c:972) spa_activate (in zfs) (spa.c:1094)
Which can be repeated for spl, and spl load address as well, if needed.
And for kernel addresses, look for "kernel slide:" value, I assumed 0 in this example
xcrun atos -arch x86_64 -d -o /Volumes/KernelDebugKit/mach_kernel -s 0 0xffffff8000222f79 0xffffff80002dc24e 0xffffff80002f3746
If you are not panicking, but would like to print the stack at a certain point in the kext, you can use
OSReportWithBacktrace("I am here: vp %p\n", vp);
in `printf` style notation.
Best effort for M1 (arm64)[edit]
# xcrun llvm-objdump --disassemble --arch arm64e /Library/Extensions/zfs.kext/Contents/MacOS/zfs|head /Library/Extensions/zfs.kext/Contents/MacOS/zfs: (__TEXT_EXEC,__text) section _atomic_cas_ptr: 94000: 02 fc e1 c8 casal x1, x2, [x0]
org.openzfsonosx.zfs(2.1)[0BF8CB05-9B3B-3182-8DE6-AF14261D75B8]@ 0xfffffe0022410000->0xfffffe00226fffff 0xfffffe0022410000 - 0x94000 = 0xFFFFFE0022388000 lr: 0xfffffe002348abe4 fp: 0xfffffe3feaf526f0 lr: 0xfffffe002348a9c8 fp: 0xfffffe3feaf52760 # atos -o module/os/macos/zfs/zfs -arch arm64e -l 0xFFFFFE0022388000 0xfffffe002348abe4 0xfffffe002348a9c8 ....
Alternate symbol lookup with lldb[edit]
Panic:
panic(cpu 5 caller 0xffffff80088d1066): trying to interlock destroyed mutex (0xffffff8029196000) Backtrace (CPU 5), Frame : Return Address 0xffffff81f49fba80 : 0xffffff8008822fa9 0xffffff81f49fbb00 : 0xffffff80088d1066 0xffffff81f49fbb10 : 0xffffff800889c75e 0xffffff81f49fbbe0 : 0xffffff80088ae60c 0xffffff81f49fbc00 : 0xffffff7f8a4252e0 0xffffff81f49fbdf0 : 0xffffff80089ffea9 net.lundman.zfs(1.0)[0EC79B06-3C9F-3529-8450-42222507F310]@0xffffff7f8a33c000->0xffffff7f8a545fff
Assuming you have the same build as panic report, in this case 1.2.7
# lldb (lldb) target create --no-dependents --arch x86_64 module/zfs/zfs #Binary before moved into zfs.kext (lldb) target modules load --file zfs __TEXT 0xffffff7f8a33c000 (lldb) image lookup --verbose --address 0xffffff7f8a4252e0 Address: zfs[0x00000000000e92e0] (zfs.__TEXT.__text + 950160) Summary: zfs`zfs_vnop_pageout + 1264 at zfs_vnops_osx.c:1236 Module: file = "/Users/lundman/x/zfs/module/zfs/zfs", arch = "x86_64" CompileUnit: id = {0x00000000}, file = "/Users/lundman/x/zfs/module/zfs/zfs_vnops_osx.c", language = "c89" LineEntry: [0xffffff7f8a4252da-0xffffff7f8a4252f0): /Users/lundman/x/zfs/module/zfs/zfs_vnops_osx.c:1236
zfs_vnops_osx.c:1236
tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_write(tx, zp->z_id, off, len); dmu_tx_hold_bonus(tx, zp->z_id); err = dmu_tx_assign(tx, TXG_NOWAIT);
Or just for the kernel
(lldb) target create --no-dependents --arch x86_64 mach_kernel (lldb) target modules load --file mach_kernel --slide 0x000000000b600000 (lldb) image lookup -a 0xffffff800b8d6aa7
Kernel to Userland[edit]
Connect to crashed kernel:
# Load the compiled binary, just makes it easier for lldb to find it when connecting (lldb) target create --no-dependents --arch x86_64 module/os/macos/zfs/zfs.kext/Contents/MacOS/zfs Current executable set to 'module/os/macos/zfs/zfs.kext/Contents/MacOS/zfs' (x86_64). # Connect to crashed VM (lldb) kdp-remote 172.16.248.128 Version: Darwin Kernel Version 18.6.0: Thu Apr 25 23:16:27 PDT 2019; root:xnu-4903.261.4~2/DEVELOPMENT_X86_64; UUID=12647AE2-57FB-35DB-AF57-4B25D060D845; stext=0xffffff802f200000
In this case we have died when trying to copyinstr() from userland:
(lldb) up frame #1: 0xffffff7f8db91238 zfs`history_str_get(zc=0xffffff90a8dec000) at zfs_ioctl.c:295:6 292 return (NULL); 293 294 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); -> 295 if (copyinstr((void *)(uintptr_t)zc->zc_history, 296 buf, HIS_MAX_RECORD_LEN, NULL) != 0) { 297 history_str_free(buf); 298 return (NULL); (lldb) p zc->zc_history (uint64_t) $1 = 4413168320
Find the task ptr for userprocess, a few ways;
(lldb) showproc 0xffffff804afb3700 task vm_map ipc_space #acts flags pid process io_policy wq_state command 0xffffff803bb76498 0xffffff804056cf00 0xffffff803bbd4200 2 D 687 0xffffff804afb3700 1 1 0 zpool (lldb) showpid 687 task vm_map ipc_space #acts flags pid process io_policy wq_state command 0xffffff803bb76498 0xffffff804056cf00 0xffffff803bbd4200 2 D 687 0xffffff804afb3700 1 1 0 zpool (lldb) showprocinfo 0xffffff804afb3700 Process 0xffffff804afb3700 name zpool pid:687 task:0xffffff803bb76498 p_stat:2 parent pid: 686 Cred: euid 0 ruid 0 svuid 0 Flags: 0x4006 0x00000002 - has a controlling tty 0x00000004 - process is 64 bit 0x00004000 - process has called exec
You can view user process threads:
(lldb) showtaskuserstacks 0xffffff803bb76498 Process: zpool [687] Path: zpool Identifier: zpool Version: ??? (???) Code Type: X86-64 (Native) Parent Process: bash [686] Date/Time: 2020-05-08 09:20:50.000 -0800 OS Version: Mac OS X 10.8 (18G103) Report Version: 8 Exception Type: n/a Exception Codes: n/a Crashed Thread: 0 Application Specific Information: Synthetic crash log generated from Kernel userstacks Binary Images: Unknown dyld all_image_infos version number 15 0x10efd9000 - 0x10f018fff zpool (??? - ???) <7234AC69-6CE8-321D-9ACC-4F73BB964650> /Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool [snip]
Create a gdb-server to debug zpool process:
(lldb) beginusertaskdebugging 0xffffff803bb76498 Starting debug session for zpool at localhost:7936.
From another shell, start to debug the zpool process, using the convenience wrapper script: All it does is run "lldb zpool" after setting DYLD_LIBRARY_PATH so you can run it in the build directory.
± ./scripts/debug-macos.sh zpool (lldb) target create "/Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool" Current executable set to '/Users/lundman/src/zfs/openzfs/scripts/..//cmd/zpool/.libs/zpool' (x86_64). (lldb) gdb-remote localhost:7936 Process 687 stopped * thread #1, stop reason = signal SIGINT frame #0: 0x00007fff63fbfb5a libsystem_kernel.dylib -> 0x7fff63fbfb5a: jae 0x7fff63fbfb64 0x7fff63fbfb5c: movq %rax, %rdi 0x7fff63fbfb5f: jmp 0x7fff63fbf421 0x7fff63fbfb64: retq (lldb) up frame #6: 0x000000010efdc434 zpool`zpool_export_one(zhp=0x00007ff51440b750, data=0x00007ffee0c25718) at zpool_main.c:1706:13 1703 if (cb->hardforce) { 1704 if (zpool_export_force(zhp, history_str) != 0) 1705 return (1); -> 1706 } else if (zpool_export(zhp, cb->force, history_str) != 0) { 1707 return (1); 1708 } 1709 (lldb) p history_str (char [2049]) $0 = "zpool export BOOM"
Links[edit]
https://developer.apple.com/library/mac/qa/qa1264/_index.html