This post is older than 2 years and might not be relevant anymore
More Info: Consider searching for newer posts

Secure DFU race condition

I have an iOS App which is just designed for updating our device firmware OTA. It uses the Nordic DFU library, updated to the latest cocoapod. It connects to our device, and then starts DFU with the library.

In the firmware I'm using S132 v7.0.1 SDK 16.0.0 on nRF52832.

Because the device has not connected before, when the library writes the enter bootloader command the device starts a new bond. This is written to flash on the device, but it seems that sometimes the buttonless DFU library attempts to set the bond info in the bootloader before the peer manager has finished writing to flash.

The log:

<info> nrf_ble_lesc: Calling sd_ble_gap_lesc_dhkey_reply on conn_handle: 0
<info> peer_manager_handler: Connection secured: role: Peripheral, conn_handle: 0, procedure: Bonding
<info> app: Paired
<info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Bonding data, action: Update
<info> app: New Bond
<info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Peer rank, action: Update
<info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Local database, action: Update
<info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Local database, action: Update
<info> NRF_DFU_BLE_BONDED: Writing peer data to the bootloader... 1
<error> NRF_DFU_BLE_BONDED: Failed to set peer data: 0011
<error> NRF_DFU_BLE_BONDED: Failed to enter bootloader: 0006
<error> app: Request to enter bootloader mode failed asynchroneously.
<info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Central address resolution, action: Update

I have added some additional logs here that are not in the buttonless DFU by default to print the relevant error codes. Stepping through what is happening:

  1. Bonding occurs
  2. The buttonless lib attempts to write peer data to bootloader (1 = svci state = initialized)
  3. As state is initialized, in nrf_dfu_svci_handler.c line 84 peer data write is attempted
  4. In nrf_dfu_settings_svci.c line 83 sd_flash_write returns NRF_ERROR_BUSY
  5. This is returned to buttonless lib (failed to set peer data error 0x11 == 17 == NRF_ERROR_BUSY)
  6. Peer manager handler indicates peer data write is complete (central address resolution)

Is there any way to prevent this? It seems like it is something that should be handled in the SDK.

  • Here is a patch for my workaround, which uses app_timer to delay by 10ms until peer data set does not return busy. I don't think there is a way to determine if all the peer manager operations are complete but if there is that would be preferable to this solution.

    diff -Naur -x '*.DS_Store' -x '*.elf' -x '*.hex' -x '*.map' -x '*.o' -x '*.bin' -x _build -x Output -x '*.emSession' -x micro-ecc -x dfu_public_key.c nRF5_SDK_16.0.0_98a08e2_vanilla/components/ble/ble_services/ble_dfu/ble_dfu.c nRF5_SDK_16.0.0_98a08e2/components/ble/ble_services/ble_dfu/ble_dfu.c
    --- nRF5_SDK_16.0.0_98a08e2_vanilla/components/ble/ble_services/ble_dfu/ble_dfu.c	2019-10-18 21:22:36.000000000 +0100
    +++ nRF5_SDK_16.0.0_98a08e2/components/ble/ble_services/ble_dfu/ble_dfu.c	2020-04-25 15:57:42.000000000 +0100
    @@ -50,7 +50,6 @@
     #include "nrf_nvic.h"
     #include "nrf_sdm.h"
     #include "nrf_soc.h"
    -#include "nrf_log.h"
     #include "nrf_dfu_ble_svci_bond_sharing.h"
     #include "nrf_bootloader_info.h"
     #include "nrf_svci_async_function.h"
    @@ -59,6 +58,17 @@
     #include "gatts_cache_manager.h"
     #include "peer_id.h"
     
    +#define NRF_LOG_MODULE_NAME NRF_DFU_BLE
    +#if NRF_DFU_BLE_CONFIG_LOG_ENABLED
    +#define NRF_LOG_LEVEL       NRF_DFU_BLE_CONFIG_LOG_LEVEL
    +#define NRF_LOG_INFO_COLOR  NRF_DFU_BLE_CONFIG_INFO_COLOR
    +#define NRF_LOG_DEBUG_COLOR NRF_DFU_BLE_CONFIG_DEBUG_COLOR
    +#else
    +#define NRF_LOG_LEVEL       0
    +#endif // NRF_DFU_BLE_CONFIG_LOG_ENABLED
    +#include "nrf_log.h"
    +NRF_LOG_MODULE_REGISTER();
    +
     #define MAX_CTRL_POINT_RESP_PARAM_LEN   3                           /**< Max length of the responses. */
     
     #define BLE_DFU_SERVICE_UUID            0xFE59                      /**< The 16-bit UUID of the Secure DFU Service. */
    @@ -191,6 +201,7 @@
                 err_code = ble_dfu_buttonless_bootloader_start_prepare();
                 if (err_code != NRF_SUCCESS)
                 {
    +                NRF_LOG_ERROR("Failed to enter bootloader: 0x%04X", err_code);
                     m_dfu.evt_handler(BLE_DFU_EVT_BOOTLOADER_ENTER_FAILED);
                 }
             }
    diff -Naur -x '*.DS_Store' -x '*.elf' -x '*.hex' -x '*.map' -x '*.o' -x '*.bin' -x _build -x Output -x '*.emSession' -x micro-ecc -x dfu_public_key.c nRF5_SDK_16.0.0_98a08e2_vanilla/components/ble/ble_services/ble_dfu/ble_dfu_bonded.c nRF5_SDK_16.0.0_98a08e2/components/ble/ble_services/ble_dfu/ble_dfu_bonded.c
    --- nRF5_SDK_16.0.0_98a08e2_vanilla/components/ble/ble_services/ble_dfu/ble_dfu_bonded.c	2019-10-18 21:22:36.000000000 +0100
    +++ nRF5_SDK_16.0.0_98a08e2/components/ble/ble_services/ble_dfu/ble_dfu_bonded.c	2020-04-25 15:43:34.000000000 +0100
    @@ -45,15 +45,27 @@
     #include "nordic_common.h"
     #include "nrf_error.h"
     #include "ble_dfu.h"
    -#include "nrf_log.h"
     #include "peer_manager.h"
     #include "gatts_cache_manager.h"
     #include "peer_id.h"
     #include "nrf_sdh_soc.h"
     #include "nrf_strerror.h"
    +#include "app_timer.h"
     
     #if (NRF_DFU_BLE_BUTTONLESS_SUPPORTS_BONDS)
     
    +#define NRF_LOG_MODULE_NAME NRF_DFU_BLE_BONDED
    +#if NRF_DFU_BLE_CONFIG_LOG_ENABLED
    +#define NRF_LOG_LEVEL       NRF_DFU_BLE_CONFIG_LOG_LEVEL
    +    #define NRF_LOG_INFO_COLOR  NRF_DFU_BLE_CONFIG_INFO_COLOR
    +    #define NRF_LOG_DEBUG_COLOR NRF_DFU_BLE_CONFIG_DEBUG_COLOR
    +#else
    +#define NRF_LOG_LEVEL       0
    +#endif // NRF_DFU_BLE_CONFIG_LOG_ENABLED
    +#include "nrf_log.h"
    +NRF_LOG_MODULE_REGISTER();
    +
    +APP_TIMER_DEF(peer_data_set_delay);
     
     void ble_dfu_buttonless_on_sys_evt(uint32_t, void * );
     uint32_t nrf_dfu_svci_vector_table_set(void);
    @@ -102,6 +114,7 @@
                         ret = ble_dfu_buttonless_bootloader_start_finalize();
                         if (ret != NRF_SUCCESS)
                         {
    +                        NRF_LOG_ERROR("Failed to start bootloader: 0x%04X", ret);
                             mp_dfu->evt_handler(BLE_DFU_EVT_BOOTLOADER_ENTER_FAILED);
                         }
                     }
    @@ -113,6 +126,7 @@
                 ret = ble_dfu_buttonless_bootloader_start_finalize();
                 if (ret != NRF_SUCCESS)
                 {
    +                NRF_LOG_ERROR("Failed to enter bootloader after peer data update fail: 0x%04X", ret);
                     mp_dfu->evt_handler(BLE_DFU_EVT_BOOTLOADER_ENTER_FAILED);
                 }
                 break;
    @@ -154,6 +168,46 @@
         return ret;
     }
     
    +static void send_response(ble_dfu_buttonless_op_code_t operation, ble_dfu_buttonless_rsp_code_t rsp_code) {
    +    uint32_t ret = ble_dfu_buttonless_resp_send(operation,
    +                                       rsp_code);
    +
    +    if (ret != NRF_SUCCESS)
    +    {
    +        NRF_LOG_ERROR("Failed to send DFU response: 0x%04X", ret);
    +        mp_dfu->evt_handler(BLE_DFU_EVT_RESPONSE_SEND_ERROR);
    +    }
    +
    +    if(rsp_code != DFU_RSP_SUCCESS) {
    +        // Report the error to the main application
    +        mp_dfu->evt_handler(BLE_DFU_EVT_BOOTLOADER_ENTER_FAILED);
    +    }
    +}
    +
    +static ble_dfu_buttonless_rsp_code_t convert_nrf_error_to_dfu_rsp(uint32_t nrf_error) {
    +    switch(nrf_error) {
    +        case NRF_SUCCESS: return DFU_RSP_SUCCESS;
    +        case NRF_ERROR_BUSY: return DFU_RSP_BUSY;
    +        case NRF_ERROR_FORBIDDEN: return DFU_RSP_NOT_BONDED;
    +        default: return DFU_RSP_OPERATION_FAILED;
    +    }
    +}
    +
    +static uint32_t peer_data_set_attempts_remaining;
    +static void peer_data_set_retry(void * p_context) {
    +    uint32_t ret = nrf_dfu_set_peer_data(&m_peer_data);
    +
    +    if(ret != NRF_ERROR_BUSY || --peer_data_set_attempts_remaining == 0) {
    +        // stop
    +        app_timer_stop(peer_data_set_delay);
    +        if(ret == NRF_SUCCESS) {
    +            NRF_LOG_DEBUG("Successfully initiated peer data set after retry");
    +        } else {
    +            NRF_LOG_ERROR("Failed to write peer data after retries: 0x%04X", ret);
    +            send_response(DFU_OP_ENTER_BOOTLOADER, convert_nrf_error_to_dfu_rsp(ret));
    +        }
    +    }
    +}
     
     /**@brief Function for entering the bootloader.
      *
    @@ -163,10 +217,11 @@
     {
         uint32_t ret;
     
    -    NRF_LOG_INFO("Writing peer data to the bootloader...");
    +    NRF_LOG_INFO("Writing peer data to the bootloader, svci state: %d", nrf_dfu_set_peer_data_svci_async_def.state);
     
         if (mp_dfu->is_waiting_for_svci)
         {
    +        NRF_LOG_WARNING("Already waiting for SVCI");
             return ble_dfu_buttonless_resp_send(DFU_OP_ENTER_BOOTLOADER, DFU_RSP_BUSY);
         }
     
    @@ -175,8 +230,10 @@
         VERIFY_SUCCESS(ret);
     
         ret = nrf_dfu_set_peer_data(&m_peer_data);
    +
         if (ret == NRF_SUCCESS)
         {
    +        NRF_LOG_DEBUG("Successfully initiated peer data set");
             // The request was accepted. Waiting for sys events to progress.
             mp_dfu->is_waiting_for_svci = true;
         }
    @@ -184,6 +241,12 @@
         {
             NRF_LOG_ERROR("The bootloader has write protected its settings page. This prohibits setting the peer data. "\
                           "The bootloader must be compiled with NRF_BL_SETTINGS_PAGE_PROTECT=0 to allow setting the peer data.");
    +    } else if(ret == NRF_ERROR_BUSY) {
    +        NRF_LOG_DEBUG("Set peer data failed due to busy, will retry");
    +        peer_data_set_attempts_remaining = 10;
    +        app_timer_start(peer_data_set_delay, APP_TIMER_TICKS(10), NULL);
    +    } else {
    +        NRF_LOG_ERROR("Failed to set peer data: 0x%04X", ret);
         }
     
         return ret;
    @@ -197,6 +260,8 @@
         // Set the memory used by the backend.
         mp_dfu = p_dfu;
     
    +    app_timer_create(&peer_data_set_delay, APP_TIMER_MODE_REPEATED, peer_data_set_retry);
    +
         // Initialize the Peer manager handler.
         return pm_register(pm_evt_handler);
     }
    @@ -260,6 +325,7 @@
         }
         else
         {
    +        NRF_LOG_ERROR("Failed to set peer data: 0x%04X", ret);
             // Failed to set peer data. Report this.
             mp_dfu->is_waiting_for_reset = false;
             mp_dfu->is_waiting_for_svci  = false;
    @@ -268,6 +334,7 @@
             // Report the failure to send the response to the client
             if (ret != NRF_SUCCESS)
             {
    +            NRF_LOG_ERROR("Failed to send DFU error response: 0x%04X", ret);
                 mp_dfu->evt_handler(BLE_DFU_EVT_RESPONSE_SEND_ERROR);
             }
     
    @@ -308,21 +375,15 @@
         {
             case DFU_OP_ENTER_BOOTLOADER:
                 ret = enter_bootloader();
    -            if (ret == NRF_SUCCESS)
    -            {
    -                rsp_code = DFU_RSP_SUCCESS;
    -            }
    -            else if (ret == NRF_ERROR_BUSY)
    -            {
    -                rsp_code = DFU_RSP_BUSY;
    -            }
    -            else if (ret == NRF_ERROR_FORBIDDEN)
    -            {
    -                rsp_code = DFU_RSP_NOT_BONDED;
    +            if(ret == NRF_ERROR_BUSY) {
    +                // will be retried
    +                return;
                 }
    +            rsp_code = convert_nrf_error_to_dfu_rsp(ret);
                 break;
     
             default:
    +            NRF_LOG_ERROR("Invalid DFU op code: %02X", p_evt_write->data[0]);
                 rsp_code = DFU_RSP_OP_CODE_NOT_SUPPORTED;
                 break;
         }
    @@ -330,16 +391,8 @@
         // Report back in case of error
         if (rsp_code != DFU_RSP_SUCCESS)
         {
    -        ret = ble_dfu_buttonless_resp_send((ble_dfu_buttonless_op_code_t)p_evt_write->data[0],
    -                                            rsp_code);
    -
    -        if (ret != NRF_SUCCESS)
    -        {
    -            mp_dfu->evt_handler(BLE_DFU_EVT_RESPONSE_SEND_ERROR);
    -        }
    -
    -        // Report the error to the main application
    -        mp_dfu->evt_handler(BLE_DFU_EVT_BOOTLOADER_ENTER_FAILED);
    +        NRF_LOG_ERROR("Failed to enter bootloader: 0x%04X", rsp_code);
    +        send_response((ble_dfu_buttonless_op_code_t)p_evt_write->data[0], rsp_code);
         }
     }
     

    The new log:

    <debug> app: PHY update request.
    <debug> nrf_ble_lesc: BLE_GAP_EVT_LESC_DHKEY_REQUEST
    <info> nrf_ble_lesc: Calling sd_ble_gap_lesc_dhkey_reply on conn_handle: 0
    <info> peer_manager_handler: Connection secured: role: Peripheral, conn_handle: 0, procedure: Bonding
    <info> app: Paired
    <info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Bonding data, action: Update
    <info> app: New Bond
    <info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Peer rank, action: Update
    <info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Local database, action: Update
    <info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Local database, action: Update
    <info> NRF_DFU_BLE_BONDED: Writing peer data to the bootloader, svci state: 1
    <debug> NRF_DFU_BLE_BONDED: system attribute table len: 8
    <debug> NRF_DFU_BLE_BONDED: Set peer data failed due to busy, will retry
    <info> peer_manager_handler: Peer data updated in flash: peer_id: 0, data_id: Central address resolution, action: Update
    <debug> NRF_DFU_BLE_BONDED: Successfully initiated peer data set after retry

  • Thanks for reporting and for providing a fix. I have reported this internally. It may be an idea to also call nrf_fstorage_is_busy() from the app timer callback to check that there are no pending flash operations.

Related