Dear All,
I am trying to add the task watchdog to my application, but I am dealing with some interesting issues.
The NRF SDK that I am using is the v1.6.1
I have dealt with several issues, one of them being the issue resolved here:
https://github.com/zephyrproject-rtos/zephyr/issues/39523
After applying the above fix, my device would not hard-fault, but another strange behaviour would appear:
The debug messages that I am using would start to be printed at a gradually slower pace until they almost stop and the device will just remain in that state.
This is my code:
This is my task_watchdog.c
#include "task_watchdog.h"
#include "drivers/watchdog.h"
#include "sys/reboot.h"
#include "task_wdt/task_wdt.h"
/* Define HW WDT device */
#define WDT_NODE DT_COMPAT_GET_ANY_STATUS_OKAY(nordic_nrf_watchdog)
static int wdt_count = 0;
// Callback function on timeout
void task_wdt_callback(int channel_id, void* user_data);
void task_watchdog_init(void)
{
// Initialiaze Task watchdog with HW WDT0 as fallback
// Default fallback delay is set in the prj.conf file
const struct device* hw_wdt_dev = DEVICE_DT_GET(WDT_NODE);
if (!device_is_ready(hw_wdt_dev)) {
hw_wdt_dev = NULL;
}
task_wdt_init(hw_wdt_dev); // Initialize HW watchdog fallback
}
bool task_watchdog_add(task_wdt_t* handle)
{
bool ret = false;
// add in struct with declare macro
handle->id = task_wdt_add(handle->timeout * 1000, task_wdt_callback, handle);
if (handle->id >= 0) {
wdt_count += 1; // increment the count
ret = true;
} else {
handle->id = -1;
}
return ret;
}
bool task_watchdog_delete(task_wdt_t* handle)
{
bool ret = false;
if (task_wdt_delete(handle->id) > 0) {
handle->id = -1; // set to uninitialized
wdt_count -= 1; // decrement the count
ret = true;
}
return ret;
}
bool task_watchdog_feed(task_wdt_t* handle)
{
bool ret = false;
if (task_wdt_feed(handle->id) > 0) {
ret = true;
}
return ret;
}
uint8_t task_watchdog_get_count(void)
{
return wdt_count;
}
/* Callback for Task WDT */
void task_wdt_callback(int channel_id, void* user_data)
{
task_wdt_t* handle = user_data; // Cast to struct
/* We have WDT HW fallback to account for, no time to waste */
printk("\r\n**** Timeout on ID %u with name %s ****\r\n\r\n",
channel_id,
handle->thread_name);
/* Can retry a few times. because we have the ID... Keep a log of which modules timeout? */
/* Can try to save in persistent memory */
/* Reboots due to HW WDT Fallback, default delay needed to finish printing */
/* In case of WDT HW fallback failure, still do a reboot */
sys_reboot(SYS_REBOOT_COLD);
}
This is a thread where I initialize the task whatchdog:
#include "watchdog.h"
#include "task_watchdog.h"
#include <device.h>
#include <drivers/watchdog.h>
#include "../MISC/log_defines.h"
#include "../MISC/reset_cause.h"
#include "logging/log.h"
LOG_MODULE_REGISTER(LOG_DEFAULT_LEVEL(WDT));
#define WATCHDOG_THREAD_PRIORITY 14 // Lowest possible priority
#define WATCHDOG_THREAD_STACK_SIZE 2048
static struct k_thread watchdog_thread;
K_THREAD_STACK_DEFINE(watchdog_threadStack, WATCHDOG_THREAD_STACK_SIZE);
/** Function Used to identify the last reset cause */
static void dump_reset_cause(sResetCause_t resetCause)
{
printk("Reset cause(s):\n");
if (resetCause.ctrlap) {
printk("\tCTRLAP\n");
}
if (resetCause.debugInterface) {
printk("\tDIF\n");
}
if (resetCause.lockup) {
printk("\tLOCKUP\n");
}
if (resetCause.off) {
printk("\tOFF\n");
}
if (resetCause.resetPin) {
printk("\tRESETPIN\n");
}
if (resetCause.software) {
printk("\tSREQ\n");
}
if (resetCause.watchDog) {
printk("\tDOG\n");
}
}
static void watchdog_entry(void* a, void* b, void* c)
{
k_thread_name_set(NULL, "watchdog");
// Cast the first argument to struct k_sem*
struct k_sem* sem = a;
// Init task watchdog
task_watchdog_init();
task_wdt_t thread_watchdog = { 0 };
thread_watchdog.timeout = 30; // This is the timeout in seconds.
thread_watchdog.thread_name = k_thread_name_get(k_current_get());
bool err = task_watchdog_add(&thread_watchdog);
printk("\r\nThread name: %s\r\n", thread_watchdog.thread_name);
if (sem) {
k_sem_give(sem);
}
while (1) {
k_sleep(K_SECONDS(5));
printk("Feeding watchdog.\r\n");
task_watchdog_feed(&thread_watchdog);
}
}
uint8_t watchdog_initialize(struct k_sem* sem)
{
// Read the reset cause
dump_reset_cause(getResetCause());
// Spawn the watchdog thread
k_thread_create(
&watchdog_thread, watchdog_threadStack, sizeof(watchdog_threadStack), watchdog_entry, sem, NULL, NULL, WATCHDOG_THREAD_PRIORITY, 0, K_NO_WAIT);
if (sem) {
return 1;
}
else {
return 0;
}
}
The result is that the debug message gets printed very slowly, character by character, gradually printing it with a gradually slower rate.
I am not sure what exactly is the issue but, when I set the CONFIG_TASK_WDT_HW_FALLBACK, the device behaves normally again.
The related prj.conf that I am using is:
# TASK WATCHDOG CONFIG_TASK_WDT=y CONFIG_TASK_WDT_MIN_TIMEOUT=1000 CONFIG_TASK_WDT_CHANNELS=3 CONFIG_TASK_WDT_HW_FALLBACK=y CONFIG_TASK_WDT_HW_FALLBACK_DELAY=1000 # Watchdog CONFIG_WATCHDOG=y CONFIG_WDT_DISABLE_AT_BOOT=y