This post is older than 2 years and might not be relevant anymore
More Info: Consider searching for newer posts

MESH_CONFIG_STRATEGY_ON_POWER_DOWN files can deadlock mesh_stack_power_down() on load failure

With SDK For Mesh v5.0.0 the replay cache is saved to flash using MESH_CONFIG_STRATEGY_ON_POWER_DOWN. In mesh_config_load() a check is made to ensure that there is enough space left to save the data on shutdown. If that is not the case the file will be cleared with mesh_config_backend_file_clean() and m_file_in_progress_cnt is incremented.

If there is an issue with the stored data on flash mesh_stack_init() will call mesh_stack_config_clear(), which among other things, ends up calling mesh_config_backend_file_clean() for the replay cache. However, this file has already been queued for cleaning and thus the backend simply ignores the request. But mesh_stack_config_clear() will still increment the m_file_in_progress_cnt.

Since m_file_in_progress_cnt never reaches 0, NRF_MESH_EVT_CONFIG_STABLE is never emitted causing things like mesh_stack_power_down() to deadlock.

The following patch reports back to the caller if the backend was already busy cleaning the file and thus can avoid incrementing the counter.

--- a/mesh/core/include/mesh_config_backend.h
+++ b/mesh/core/include/mesh_config_backend.h
@@ -190,9 +190,11 @@ uint32_t mesh_config_backend_power_down_time_get(void);
  *
  * It is possible to clean several files in parallel. The backend creates queue from requests.
  *
- * @param[in] p_file File to clean content.
+ * @param[in] p_file      File to clean content.
+ * @retval NRF_SUCCESS    The file is successfully queued for clean.
+ * @retval NRF_ERROR_BUSY The file is in the process of cleaning.
  */
-void mesh_config_backend_file_clean(mesh_config_backend_file_t * p_file);
+uint32_t mesh_config_backend_file_clean(mesh_config_backend_file_t * p_file);
 
 /**
  * Puts the backend in the power down mode.
diff --git a/mesh/core/src/mesh_config.c b/mesh/core/src/mesh_config.c
index a97644d..36d9188 100644
--- a/mesh/core/src/mesh_config.c
+++ b/mesh/core/src/mesh_config.c
@@ -478,8 +478,9 @@ void mesh_config_load(void)
                     }
                 }
 
-                m_file_in_progress_cnt++;
-                mesh_config_backend_file_clean(p_file->p_backend_data);
+                if (mesh_config_backend_file_clean(p_file->p_backend_data) == NRF_SUCCESS) {
+                    m_file_in_progress_cnt++;
+                }
             }
         }
     }
@@ -636,8 +637,9 @@ void mesh_config_file_clear(uint16_t file_id)
 #if PERSISTENT_STORAGE
     if (p_file->strategy != MESH_CONFIG_STRATEGY_NON_PERSISTENT)
     {
-        m_file_in_progress_cnt++;
-        mesh_config_backend_file_clean(p_file->p_backend_data);
+        if (mesh_config_backend_file_clean(p_file->p_backend_data) == NRF_SUCCESS) {
+            m_file_in_progress_cnt++;
+        }
     }
 #endif
 }
diff --git a/mesh/core/src/mesh_config_flashman_glue.c b/mesh/core/src/mesh_config_flashman_glue.c
index 9ad6c09..f442823 100644
--- a/mesh/core/src/mesh_config_flashman_glue.c
+++ b/mesh/core/src/mesh_config_flashman_glue.c
@@ -81,7 +81,7 @@ typedef struct
 static mesh_config_backend_evt_cb_t m_evt_cb;
 static uint8_t m_allocated_page_count;
 
-static void file_remove(mesh_config_backend_file_t * p_file);
+static uint32_t file_remove(mesh_config_backend_file_t * p_file);
 static void file_restore(mesh_config_backend_file_t * p_file);
 
 static const uint8_t * flash_area_end_get(void)
@@ -196,13 +196,13 @@ static void file_ready_listener(void * p_args)
     m_evt_cb(&event);
 }
 
-static void file_remove(mesh_config_backend_file_t * p_file)
+static uint32_t file_remove(mesh_config_backend_file_t * p_file)
 {
     flash_manager_t * p_manager = &p_file->glue_data.flash_manager;
 
     if (flash_manager_is_removing(p_manager))
     {
-        return;
+        return NRF_ERROR_BUSY;
     }
 
     uint32_t status = flash_manager_remove(p_manager);
@@ -226,6 +226,8 @@ static void file_remove(mesh_config_backend_file_t * p_file)
         p_file->glue_data.listener.p_args = p_file;
         flash_manager_mem_listener_register(&p_file->glue_data.listener);
     }
+
+    return NRF_SUCCESS;
 }
 
 static void file_restore(mesh_config_backend_file_t * p_file)
@@ -353,9 +355,9 @@ uint32_t mesh_config_backend_file_create(mesh_config_backend_file_t * p_file)
     return flash_manager_add(p_manager, &config);
 }
 
-void mesh_config_backend_file_clean(mesh_config_backend_file_t * p_file)
+uint32_t mesh_config_backend_file_clean(mesh_config_backend_file_t * p_file)
 {
-    file_remove(p_file);
+    return file_remove(p_file);
 }
 
 uint32_t mesh_config_backend_record_write(mesh_config_backend_file_t * p_file, const uint8_t * p_data, uint32_t length)

Thanks.

Related