Random crashes with nRF5340 and OpenThread

We have a product with the nRF5340, which are connected in mesh networks of around 20 devices to a border router via OpenThread. We are on the latest SDK version (v.2.5.0).

We are having problems that our devices are randomly rebooting, sometimes after days, sometimes after weeks, sometimes a device is stable for days and then it gets into a endless crashing loop, with only hours or minutes of uptime in between. This causes quite a bit of instability in the network, and increases packet loss while the network reforms.

We have devices that have weeks of uptime, and other devices running the exact same firmware are rebooting in intervals of 1-2 hours (due to the kernel panic).

Here are a couple log extracts / kernel panics:

rtt:~$ [00:24:57.743,652] <err> net_otPlat_radio: Error while appending to otMessage
rtt:~$ [00:25:07.755,462] <err> net_otPlat_radio: Error while appending to otMessage
rtt:~$ [00:25:17.757,110] <err> net_otPlat_radio: Error while appending to otMessage
rtt:~$ [00:25:17.771,240] <err> net_otPlat_radio: Error while appending to otMessage
rtt:~$ [00:25:17.772,796] <err> net_otPlat_radio: Error while appending to otMessage
rtt:~$ rtt:~$ [00:25:21.150,085] <err> spinel_ipc_backend_rsp_ntf: No response within timeout 500
rtt:~$ ASSERTION FAIL [0] @ WEST_TOPDIR/zephyr/drivers/ieee802154/ieee802154_nrf5.c:1153
rtt:~$ 	802.15.4 serialization error: -5
rtt:~$ [00:25:21.150,177] <err> os: r0/a1:  0x00000004  r1/a2:  0x00000481  r2/a3:  0x00000003
rtt:~$ [00:25:21.150,177] <err> os: r3/a4:  0x20004140 r12/ip:  0x0000000c r14/lr:  0x0003c8e3
rtt:~$ [00:25:21.150,207] <err> os:  xpsr:  0x41000000
rtt:~$ [00:25:21.150,207] <err> os: r4/v1:  0x200233ec  r5/v2:  0x000b9547  r6/v3:  0x2000d9d0
rtt:~$ [00:25:21.150,238] <err> os: r7/v4:  0x00000000  r8/v5:  0x00000000  r9/v6:  0x62b76bdb
rtt:~$ [00:25:21.150,238] <err> os: r10/v7: 0x00000000  r11/v8: 0x00000000    psp:  0x200233b8
rtt:~$ [00:25:21.150,268] <err> os: EXC_RETURN: 0x0
rtt:~$ [00:25:21.150,299] <err> os: Faulting instruction address (r15/pc): 0x000886a2
rtt:~$ [00:25:21.150,329] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
rtt:~$ [00:25:21.150,360] <err> os: Current thread: 0x20005e38 (openthread)
rtt:~$ [00:25:21.207,489] <err> os: Halting system


rtt:~$ rtt:~$ [00:00:53.192,596] <err> spinel_ipc_backend_rsp_ntf: No response within timeout 500
rtt:~$ ASSERTION FAIL [0] @ WEST_TOPDIR/zephyr/drivers/ieee802154/ieee802154_nrf5.c:1153
rtt:~$ 	802.15.4 serialization error: -5
rtt:~$ [00:00:53.192,687] <err> os: r0/a1:  0x00000004  r1/a2:  0x00000481  r2/a3:  0x00000003
rtt:~$ [00:00:53.192,687] <err> os: r3/a4:  0x20004140 r12/ip:  0x0000000c r14/lr:  0x0003c8e3
rtt:~$ [00:00:53.192,718] <err> os:  xpsr:  0x41000000
rtt:~$ [00:00:53.192,718] <err> os: r4/v1:  0x20024454  r5/v2:  0x000b9547  r6/v3:  0x200157ec
rtt:~$ [00:00:53.192,749] <err> os: r7/v4:  0x20006790  r8/v5:  0x00000000  r9/v6:  0x0001c379
rtt:~$ [00:00:53.192,749] <err> os: r10/v7: 0x00000000  r11/v8: 0x00000000    psp:  0x20024420
rtt:~$ [00:00:53.192,779] <err> os: EXC_RETURN: 0x0
rtt:~$ [00:00:53.192,810] <err> os: Faulting instruction address (r15/pc): 0x000886a2
rtt:~$ [00:00:53.192,840] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
rtt:~$ [00:00:53.192,871] <err> os: Current thread: 0x20006698 (ot_radio_workq)
rtt:~$ [00:00:53.248,443] <err> os: Halting system

I found pretty much nothing regarding this issue anywhere, and since this is coming from the OpenThread stack, I'm also not quite sure where to get this issue addressed. Any idea?

Thanks

Parents Reply Children
  • Hi Sigurd,

    these are from the application core.

    RTT should be enabled for the netcore, but I haven't been able to get it to work.
    When I select it in visual studio, I get no output:

    The cputnet.dts board file has RTT enabled, AFAIK:

    ```

    /*
     * Copyright (c) 2021 Nordic Semiconductor ASA
     *
     * SPDX-License-Identifier: Apache-2.0
     */
    
    /dts-v1/;
    #include <nordic/nrf5340_cpunet_qkaa.dtsi>
    #include "ivy_nrf5340_cpunet-pinctrl.dtsi"
    #include <zephyr/dt-bindings/input/input-event-codes.h>
    
    //TODO
    // Debugging LED?
    
    / {
    	model = "Nordic Ivy NRF5340 Network";
    	compatible = "nordic,ivy-nrf5340-cpunet";
    
    	chosen {
    		zephyr,console = &uart0;
    		zephyr,shell-uart = &uart0;
    		zephyr,uart-mcumgr = &uart0;
    		zephyr,bt-mon-uart = &uart0;
    		zephyr,bt-c2h-uart = &uart0;
    		zephyr,bt-hci-rpmsg-ipc = &ipc0;
    		nordic,802154-spinel-ipc = &ipc0;
    		zephyr,sram = &sram1;
    		zephyr,flash = &flash1;
    		zephyr,code-partition = &slot0_partition;
    		zephyr,ieee802154 = &ieee802154;
    	};
    
    	buttons {
    		compatible = "gpio-keys";
    		button0: button_0 {
    			gpios = <&gpio1 9 (GPIO_PULL_UP | GPIO_ACTIVE_LOW)>; //Z0
    			label = "User Button 1 (BTN1)";
    			// zephyr,code = <INPUT_KEY_0>;
    		};
    		button1: button_1 {
    			gpios = <&gpio1 10 (GPIO_PULL_UP | GPIO_ACTIVE_LOW)>; //Z1
    			label = "User Button 2 (BTN2)";
    			// zephyr,code = <INPUT_KEY_1>;
    		};
    	};
    
    	aliases {
    		sw0 = &button0;
    		sw1 = &button1;
    		watchdog0 = &wdt;
    	};
    };
    
    &gpiote {
    	status = "okay";
    };
    
    &gpio0 {
    	status = "okay";
    };
    
    &gpio1 {
    	status = "okay";
    };
    
    // Use RTT UART since we don't have UART exposed.
    &uart0 {
    	compatible = "segger,rtt-uart";
    	status = "okay";
    };
    
    &flash1 {
    
    	partitions {
    		compatible = "fixed-partitions";
    		#address-cells = <1>;
    		#size-cells = <1>;
    
    		boot_partition: partition@0 {
    			label = "mcuboot";
    			reg = <0x00000000 0xc000>;
    		};
    		slot0_partition: partition@c000 {
    			label = "image-0";
    			reg = <0x0000C000 0x12000>;
    		};
    		slot1_partition: partition@1e000 {
    			label = "image-1";
    			reg = <0x0001E000 0x12000>;
    		};
    		scratch_partition: partition@30000 {
    			label = "image-scratch";
    			reg = <0x00030000 0xa000>;
    		};
    		storage_partition: partition@3a000 {
    			label = "storage";
    			reg = <0x0003a000 0x6000>;
    		};
    	};
    };
    
    &ieee802154 {
    	status = "okay";
    };
    
    /* Include shared RAM configuration file */
    #include "ivy_nrf5340_shared_sram_planning_conf.dtsi"
    

    Is there something else I need to do to make it work?

  • I had to enable a couple more things in the multiprotocol_rpmsg.conf:

    # enable logging
    CONFIG_LOG=y
    CONFIG_LOG_DEFAULT_LEVEL=3

    # Enable console via j-link RTT
    CONFIG_USE_SEGGER_RTT=y

    CONFIG_CONSOLE=y
    CONFIG_UART_CONSOLE=n
    CONFIG_RTT_CONSOLE=y

    CONFIG_SHELL=n
    CONFIG_SHELL_BACKEND_SERIAL=n
    CONFIG_SHELL_BACKEND_RTT=n

    # disable GPIO, we need the space
    CONFIG_GPIO=n

    I will reply with the log once the crash happens.

  • Here is the log from the network core:

    rtt:~$ rtt:~$ ###RTT Client: ************************************************************ 
    ###RTT Client: *               SEGGER Microcontroller GmbH                * 
    ###RTT Client: *   Solutions for real time microcontroller applications   * 
    ###RTT Client: ************************************************************ 
    ###RTT Client: *                                                          * 
    ###RTT Client: *       (c) 2012 - 2016  SEGGER Microcontroller GmbH       * 
    ###RTT Client: *                                                          * 
    ###RTT Client: *     www.segger.com     Support: [email protected]       * 
    ###RTT Client: *                                                          * 
    ###RTT Client: ************************************************************ 
    ###RTT Client: *                                                          * 
    ###RTT Client: * SEGGER J-Link RTT Client   Compiled Sep 27 2022 16:08:30 * 
    ###RTT Client: *                                                          * 
    ###RTT Client: ************************************************************ 
    
    ###RTT Client: -----------------------------------------------
    ###RTT Client: Connecting to J-Link RTT Server via localhost:19021 ...
    ###RTT Client: Connected.
    
    SEGGER J-Link V7.80c - Real time terminal output
    J-Link OB-nRF5340-NordicSemi compiled Nov  7 2022 16:22:01 V1.0, SN=1050095136
    Process: JLinkExe
    [00:00:00.001,495] <err> sync_rtc: Failed synchronized RTC setup (err: -12)
    *** Booting nRF Connect SDK v2.5.0 ***
    [00:00:00.001,831] <inf> bt_sdc_hci_driver: SoftDevice Controller build revision: 
                                                c5 93 ba a9 14 4d 8d 05  30 4e 9b 92 d7 71 1e e8 |.....M.. 0N...q..
                                                aa 02 50 3c                                      |..P<             
    [00:00:00.002,258] <inf> bt_hci_raw: Bluetooth enabled in RAW mode
    [00:00:00.002,319] <err> multiprotocol_rpmsg: IPC service instance initialization failed: -120
    
    [00:00:01.589,385] <inf> multiprotocol_rpmsg: Received message of 4 bytes.
    [00:00:01.590,057] <inf> multiprotocol_rpmsg: Received message of 4 bytes.
    [00:00:01.590,667] <inf> multiprotocol_rpmsg: Received message of 4 bytes.
    [00:00:01.591,247] <inf> multiprotocol_rpmsg: Received message of 4 bytes.
    [00:00:01.591,949] <inf> multiprotocol_rpmsg0m
    0m
    [17:50:54.756,805] <err> spinel_ipc_backend: No spinel buffer available to send a new packet
    ASSERTION FAIL [0] @ WEST_TOPDIR/nrf/samples/nrf5340/multiprotocol_rpmsg/src/main.c:267
    	802.15.4 serialization error
    [17:50:54.756,896] <err> os: r0/a1:  0x00000004  r1/a2:  0x0000010b  r2/a3:  0x00000003
    [17:50:54.756,927] <err> os: r3/a4:  0x21001cc0 r12/ip:  0x00000014 r14/lr:  0x01021a4b
    [17:50:54.756,927] <err> os:  xpsr:  0x41000024
    [17:50:54.756,958] <err> os: Faulting instruction address (r15/pc): 0x01036a00
    [17:50:54.756,988] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
    [17:50:54.757,019] <err> os: Fault during interrupt handling
    
    [17:50:54.757,049] <err> os: Current thread: 0x21004000 (unknown)
    [17:50:55.038,909] <err> fatal_error: Resetting system
    [00:00:01.001,220] <err> spinel_ipc_backend: IPC endpoint bind timed out
    ASSERTION FAIL [0] @ WEST_TOPDIR/nrf/samples/nrf5340/multiprotocol_rpmsg/src/main.c:267
    	802.15.4 serialization error
    [00:00:01.001,312] <err> os: r0/a1:  0x00000004  r1/a2:  0x0000010b  r2/a3:  0x00000003
    [00:00:01.001,342] <err> os: r3/a4:  0x21001cc0 r12/ip:  0x00000014 r14/lr:  0x01021a4b
    [00:00:01.001,373] <err> os:  xpsr:  0x49000000
    [00:00:01.001,373] <err> os: Faulting instruction address (r15/pc): 0x01036a00
    [00:00:01.001,403] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
    [00:00:01.001,464] <err> os: Current thread: 0x21004080 (unknown)
    [00:00:01.247,161] <err> fatal_error: Resetting system
    [00:00:01.001,190] <err> spinel_ipc_backend: IPC endpoint bind timed out
    ASSERTION FAIL [0] @ WEST_TOPDIR/nrf/samples/nrf5340/multiprotocol_rpmsg/src/main.c:267
    	802.15.4 serialization error
    [00:00:01.001,281] <err> os: r0/a1:  0x00000004  r1/a2:  0x0000010b  r2/a3:  0x00000003
    [00:00:01.001,312] <err> os: r3/a4:  0x21001cc0 r12/ip:  0x00000014 r14/lr:  0x01021a4b
    [00:00:01.001,342] <err> os:  xpsr:  0x49000000
    [00:00:01.001,342] <err> os: Faulting instruction address (r15/pc): 0x01036a00
    [00:00:01.001,373] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
    [00:00:01.001,434] <err> os: Current thread: 0x21004080 (unknown)
    [00:00:01.247,253] <err> fatal_error: Resetting system
    [00:00:01.001,190] <err> spinel_ipc_backend: IPC endpoint bind timed out
    ASSERTION FAIL [0] @ WEST_TOPDIR/nrf/samples/nrf5340/multiprotocol_rpmsg/src/main.c:267
    	802.15.4 serialization error
    [00:00:01.001,281] <err> os: r0/a1:  0x00000004  r1/a2:  0x0000010b  r2/a3:  0x00000003
    [00:00:01.001,312] <err> os: r3/a4:  0x21001cc0 r12/ip:  0x00000014 r14/lr:  0x01021a4b
    [00:00:01.001,342] <err> os:  xpsr:  0x49000000
    [00:00:01.001,342] <err> os: Faulting instruction address (r15/pc): 0x01036a00
    [00:00:01.001,373] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
    [00:00:01.001,434] <err> os: Current thread: 0x21004080 (unknown)
    [00:00:01.247,100] <err> fatal_error: Resetting system
    [00:00:01.001,190] <err> spinel_ipc_backend: IPC endpoint bind timed out
    ASSERTION FAIL [0] @ WEST_TOPDIR/nrf/samples/nrf5340/multiprotocol_rpmsg/src/main.c:267
    	802.15.4 serialization error
    [00:00:01.001,281] <err> os: r0/a1:  0x00000004  r1/a2:  0x0000010b  r2/a3:  0x00000003
    [00:00:01.001,312] <err> os: r3/a4:  0x21001cc0 r12/ip:  0x00000014 r14/lr:  0x01021a4b
    [00:00:01.001,342] <err> os:  xpsr:  0x49000000
    [00:00:01.001,342] <err> os: Faulting instruction address (r15/pc): 0x01036a00
    [00:00:01.001,373] <err> os: >>> ZEPHYR FATAL ERROR 4: Kernel panic on CPU 0
    [00:00:01.001,434] <err> os: Current thread: 0x21004080 (unknown)
    [00:00:01.247,161] <err> fatal_error: Resetting system
    [00:00:01.001,190] <err> spinel_ipc_backend: IPC endpoint bind timed out
    

  • Hi,

    Can you try to apply the changes from this pull request and see if that solves the issues you are seeing?

    Best regards,
    Jørgen

  • In which version of the SDK is the pull request mentioned above implemented?

    Waiting to upgrade...

    Mary

Related