This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Task Watchdog is causing the device to slow down

Dear All,

I am trying to add the task watchdog to my application, but I am dealing with some interesting issues.

The NRF SDK that I am using is the v1.6.1

I have dealt with several issues, one of them being the issue resolved here:
https://github.com/zephyrproject-rtos/zephyr/issues/39523

After applying the above fix, my device would not hard-fault, but another strange behaviour would appear:

The debug messages that I am using would start to be printed at a gradually slower pace until they almost stop and the device will just remain in that state.

This is my code:

This is my task_watchdog.c

#include "task_watchdog.h"

#include "drivers/watchdog.h"
#include "sys/reboot.h"
#include "task_wdt/task_wdt.h"

/* Define HW WDT device */
#define WDT_NODE DT_COMPAT_GET_ANY_STATUS_OKAY(nordic_nrf_watchdog)

static int wdt_count = 0;

// Callback function on timeout
void task_wdt_callback(int channel_id, void* user_data);

void task_watchdog_init(void)
{
    // Initialiaze Task watchdog with HW WDT0 as fallback
    // Default fallback delay is set in the prj.conf file
    const struct device* hw_wdt_dev = DEVICE_DT_GET(WDT_NODE);

    if (!device_is_ready(hw_wdt_dev)) {
        hw_wdt_dev = NULL;
    }

    task_wdt_init(hw_wdt_dev); // Initialize HW watchdog fallback
}

bool task_watchdog_add(task_wdt_t* handle)
{
    bool ret = false;
    // add in struct with declare macro
    handle->id = task_wdt_add(handle->timeout * 1000, task_wdt_callback, handle);

    if (handle->id >= 0) {
        wdt_count += 1; // increment the count
        ret = true;
    } else {
		handle->id = -1;
	}
    return ret;
}

bool task_watchdog_delete(task_wdt_t* handle)
{
    bool ret = false;
    if (task_wdt_delete(handle->id) > 0) {
        handle->id = -1; // set to uninitialized
        wdt_count -= 1;  // decrement the count
        ret = true;
    }
    return ret;
}

bool task_watchdog_feed(task_wdt_t* handle)
{
    bool ret = false;
    if (task_wdt_feed(handle->id) > 0) {
        ret = true;
    }
    return ret;
}

uint8_t task_watchdog_get_count(void)
{
    return wdt_count;
}

/* Callback for Task WDT */
void task_wdt_callback(int channel_id, void* user_data)
{
    task_wdt_t* handle = user_data; // Cast to struct

    /* We have WDT HW fallback to account for, no time to waste */
    printk("\r\n**** Timeout on ID %u with name %s ****\r\n\r\n",
           channel_id,
           handle->thread_name);

    /* Can retry a few times. because we have the ID... Keep a log of which modules timeout? */

    /* Can try to save in persistent memory */

    /* Reboots due to HW WDT Fallback, default delay needed to finish printing */

    /* In case of WDT HW fallback failure, still do a reboot */
    sys_reboot(SYS_REBOOT_COLD);
}


This is a thread where I initialize the task whatchdog:

#include "watchdog.h"
#include "task_watchdog.h"

#include <device.h>
#include <drivers/watchdog.h>
#include "../MISC/log_defines.h"
#include "../MISC/reset_cause.h"
#include "logging/log.h"

LOG_MODULE_REGISTER(LOG_DEFAULT_LEVEL(WDT));

#define WATCHDOG_THREAD_PRIORITY 14 // Lowest possible priority
#define WATCHDOG_THREAD_STACK_SIZE 2048
static struct k_thread watchdog_thread;
K_THREAD_STACK_DEFINE(watchdog_threadStack, WATCHDOG_THREAD_STACK_SIZE);

/** Function Used to identify the last reset cause */
static void dump_reset_cause(sResetCause_t resetCause)
{
    printk("Reset cause(s):\n");
    if (resetCause.ctrlap) {
        printk("\tCTRLAP\n");
    }
    if (resetCause.debugInterface) {
        printk("\tDIF\n");
    }
    if (resetCause.lockup) {
        printk("\tLOCKUP\n");
    }
    if (resetCause.off) {
        printk("\tOFF\n");
    }
    if (resetCause.resetPin) {
        printk("\tRESETPIN\n");
    }
    if (resetCause.software) {
        printk("\tSREQ\n");
    }
    if (resetCause.watchDog) {
        printk("\tDOG\n");
    }
}

static void watchdog_entry(void* a, void* b, void* c)
{
    k_thread_name_set(NULL, "watchdog");
    // Cast the first argument to struct k_sem*
    struct k_sem* sem = a;
    // Init task watchdog
    task_watchdog_init();

    task_wdt_t thread_watchdog = { 0 };
    thread_watchdog.timeout = 30; // This is the timeout in seconds.
    thread_watchdog.thread_name = k_thread_name_get(k_current_get());
    bool err = task_watchdog_add(&thread_watchdog);

    printk("\r\nThread name: %s\r\n", thread_watchdog.thread_name);
    if (sem) {
       k_sem_give(sem);
    }
    while (1) {
        k_sleep(K_SECONDS(5));
        printk("Feeding watchdog.\r\n");
        task_watchdog_feed(&thread_watchdog);
    }
}

uint8_t watchdog_initialize(struct k_sem* sem)
{
    // Read the reset cause
    dump_reset_cause(getResetCause());

    // Spawn the watchdog thread
    k_thread_create(
        &watchdog_thread, watchdog_threadStack, sizeof(watchdog_threadStack), watchdog_entry, sem, NULL, NULL, WATCHDOG_THREAD_PRIORITY, 0, K_NO_WAIT);

    if (sem) {
        return 1;
    }
    else {
        return 0;
    }
}


The result is that the debug message gets printed very slowly, character by character, gradually printing it with a gradually slower rate.

I am not sure what exactly is the issue but, when I set the CONFIG_TASK_WDT_HW_FALLBACK, the device behaves normally again.

The related prj.conf that I am using is:

# TASK WATCHDOG
CONFIG_TASK_WDT=y
CONFIG_TASK_WDT_MIN_TIMEOUT=1000
CONFIG_TASK_WDT_CHANNELS=3
CONFIG_TASK_WDT_HW_FALLBACK=y
CONFIG_TASK_WDT_HW_FALLBACK_DELAY=1000

# Watchdog
CONFIG_WATCHDOG=y
CONFIG_WDT_DISABLE_AT_BOOT=y

Parents Reply Children
Related