Central uart on thingy91:nrf52840 unstabile ble_data_received

Hi,

I am running central_uart example on thingy91:nrf52840 it is paired up with Thingy52 running peripheral_uart example.

everything seems to run fine i send and receive msg every 5 sec.
my problem is that the ble_data_received is toggling with triggered when buffer is full (40 bytes) and end of msg that is normaly 15-20 bytes.

how can i controll this? is there any time settings or end of msg character i can use

Rune

/*
 * Copyright (c) 2018 Nordic Semiconductor ASA
 *modifyed by rv
 * SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
 */

/** @file
 *  @brief Nordic UART Service Client sample
 */

#include <errno.h>
#include <zephyr/kernel.h>
#include <zephyr/device.h>
#include <zephyr/devicetree.h>
#include <zephyr/sys/byteorder.h>
#include <zephyr/sys/printk.h>

#include <zephyr/bluetooth/bluetooth.h>
#include <zephyr/bluetooth/hci.h>
#include <zephyr/bluetooth/conn.h>
#include <zephyr/bluetooth/uuid.h>
#include <zephyr/bluetooth/gatt.h>

#include <bluetooth/services/nus.h>
#include <bluetooth/services/nus_client.h>
#include <bluetooth/gatt_dm.h>
#include <bluetooth/scan.h>

#include <zephyr/settings/settings.h>

#include <zephyr/drivers/uart.h>

#include <zephyr/logging/log.h>


#define LOG_MODULE_NAME central_uart
LOG_MODULE_REGISTER(LOG_MODULE_NAME);

/* UART payload buffer element size. */
#define UART_BUF_SIZE 40

#define KEY_PASSKEY_ACCEPT DK_BTN1_MSK
#define KEY_PASSKEY_REJECT DK_BTN2_MSK

#define NUS_WRITE_TIMEOUT K_MSEC(150)
#define UART_WAIT_FOR_BUF_DELAY K_MSEC(50)
#define UART_RX_TIMEOUT 500

static const struct device *uart = DEVICE_DT_GET(DT_NODELABEL(uart1));
static struct k_work_delayable uart_work;

K_SEM_DEFINE(nus_write_sem, 0, 1);

struct uart_data_t {
	void *fifo_reserved;
	uint8_t  data[UART_BUF_SIZE];
	uint16_t len;
};



struct uart_data_t *uart_rx_buf;
uint8_t  uart_tx_buf[UART_BUF_SIZE];

static K_FIFO_DEFINE(fifo_uart_tx_data);
static K_FIFO_DEFINE(fifo_uart_rx_data);

static struct bt_conn *default_conn;
static struct bt_nus_client nus_client;

bool btcon=false;

static void ble_data_sent(struct bt_nus_client *nus, uint8_t err,
					const uint8_t *const data, uint16_t len)
{
	ARG_UNUSED(nus);
/*
	struct uart_data_t *buf;

	// Retrieve buffer context.
	buf = CONTAINER_OF(data, struct uart_data_t, data);
	k_free(buf);
*/
	k_sem_give(&nus_write_sem);

	if (err) {
		LOG_WRN("ATT error code: 0x%02X", err);
	}
	
}

static uint8_t ble_data_received(struct bt_nus_client *nus,
						const uint8_t *data, uint16_t len)
{
	ARG_UNUSED(nus);

		int err;
		LOG_INF("BLE data received!  %d,:%s",len,data);

		for (uint16_t x=0;x<len;x++){
			if ( data[x]=='\n' || x==len-1 ){
				uart_tx_buf[x]=data[x];
				uart_tx_buf[x+1]='\0';
				err = uart_tx(uart,uart_tx_buf, x+1, SYS_FOREVER_MS);
				if (err) {
				LOG_INF("Error Sending data over UART");
				}else{
					LOG_INF("Sending data over UART to nrf91! %d,%s",x,uart_tx_buf);
				}
				break;
			}
		uart_tx_buf[x]=data[x];		
		}
	
	return BT_GATT_ITER_CONTINUE;
}

static void uart_cb(const struct device *dev, struct uart_event *evt, void *user_data)
{
	ARG_UNUSED(dev);
	int err;


	switch (evt->type) {
	
	case UART_RX_RDY:
		uart_rx_buf = CONTAINER_OF(evt->data.rx.buf, struct uart_data_t, data);
		uart_rx_buf->len=evt->data.rx.len;
		if (evt->data.rx.buf[uart_rx_buf->len - 1] == '\n') { // removed or '\r'
			k_fifo_put(&fifo_uart_rx_data, uart_rx_buf);
			
			uart_rx_disable(uart);
		}


		break;

	case UART_RX_DISABLED:
		LOG_DBG("UART_RX_DISABLED");

		uart_rx_enable(uart, uart_rx_buf->data, sizeof(uart_rx_buf->data),
			       UART_RX_TIMEOUT);

		break;

	default:
		break;

	}
	
}

static int uart_init(void)
{
	
	int err;
	struct uart_data_t *rx;

	if (!device_is_ready(uart)) {
		LOG_ERR("UART device not ready");
		return -ENODEV;
	}

	rx = k_malloc(sizeof(*rx));
	if (rx) {
		rx->len = 0;
	} else {
		return -ENOMEM;
	}



	err = uart_callback_set(uart, uart_cb, NULL);
	if (err) {
		return err;
	}

	return uart_rx_enable(uart, rx->data, sizeof(rx->data),
			      UART_RX_TIMEOUT);
	
}


static void discovery_complete(struct bt_gatt_dm *dm,
			       void *context)
{
	struct bt_nus_client *nus = context;
	LOG_INF("Service discovery completed");

	bt_gatt_dm_data_print(dm);

	bt_nus_handles_assign(dm, nus);
	bt_nus_subscribe_receive(nus);

	bt_gatt_dm_data_release(dm);
}

static void discovery_service_not_found(struct bt_conn *conn,
					void *context)
{
	LOG_INF("Service not found");
}

static void discovery_error(struct bt_conn *conn,
			    int err,
			    void *context)
{
	LOG_WRN("Error while discovering GATT database: (%d)", err);
}

struct bt_gatt_dm_cb discovery_cb = {
	.completed         = discovery_complete,
	.service_not_found = discovery_service_not_found,
	.error_found       = discovery_error,
};

static void gatt_discover(struct bt_conn *conn)
{
	int err;

	if (conn != default_conn) {
		return;
	}

	err = bt_gatt_dm_start(conn,
			       BT_UUID_NUS_SERVICE,
			       &discovery_cb,
			       &nus_client);
	if (err) {
		LOG_ERR("could not start the discovery procedure, error "
			"code: %d", err);
	}
}

static void exchange_func(struct bt_conn *conn, uint8_t err, struct bt_gatt_exchange_params *params)
{
	if (!err) {
		LOG_INF("MTU exchange done");
	} else {
		LOG_WRN("MTU exchange failed (err %" PRIu8 ")", err);
	}
}

static void connected(struct bt_conn *conn, uint8_t conn_err)
{
	char addr[BT_ADDR_LE_STR_LEN];
	int err;

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (conn_err) {
		LOG_INF("Failed to connect to %s (%d)", log_strdup(addr),
			conn_err);

		if (default_conn == conn) {
			bt_conn_unref(default_conn);
			default_conn = NULL;

			err = bt_scan_start(BT_SCAN_TYPE_SCAN_ACTIVE);
			if (err) {
				LOG_ERR("Scanning failed to start (err %d)",
					err);
			}
		}

		return;
	}

	LOG_INF("Connected: %s", log_strdup(addr));

	static struct bt_gatt_exchange_params exchange_params;

	exchange_params.func = exchange_func;
	err = bt_gatt_exchange_mtu(conn, &exchange_params);
	if (err) {
		LOG_WRN("MTU exchange failed (err %d)", err);
	}

	err = bt_conn_set_security(conn, BT_SECURITY_L2);
	if (err) {
		LOG_WRN("Failed to set security: %d", err);

		gatt_discover(conn);
	}

	err = bt_scan_stop();
	if ((!err) && (err != -EALREADY)) {
		LOG_ERR("Stop LE scan failed (err %d)", err);
	}
}

static void disconnected(struct bt_conn *conn, uint8_t reason)
{
	char addr[BT_ADDR_LE_STR_LEN];
	int err;

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	LOG_INF("Disconnected: %s (reason %u)", log_strdup(addr),
		reason);
	btcon=false;
	if (default_conn != conn) {
		return;
	}

	bt_conn_unref(default_conn);
	default_conn = NULL;

	err = bt_scan_start(BT_SCAN_TYPE_SCAN_ACTIVE);
	if (err) {
		LOG_ERR("Scanning failed to start (err %d)",
			err);
	}
}

static void security_changed(struct bt_conn *conn, bt_security_t level,
			     enum bt_security_err err)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (!err) {
		LOG_INF("Security changed: %s level %u", log_strdup(addr),
			level);
	} else {
		LOG_WRN("Security failed: %s level %u err %d", log_strdup(addr),
			level, err);
	}

	gatt_discover(conn);
}

BT_CONN_CB_DEFINE(conn_callbacks) = {
	.connected = connected,
	.disconnected = disconnected,
	.security_changed = security_changed
};

static void scan_filter_match(struct bt_scan_device_info *device_info,
			      struct bt_scan_filter_match *filter_match,
			      bool connectable)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(device_info->recv_info->addr, addr, sizeof(addr));

	LOG_INF("Filters matched. Address: %s connectable: %d",
		log_strdup(addr), connectable);
	btcon=true;
}

static void scan_connecting_error(struct bt_scan_device_info *device_info)
{
	LOG_WRN("Connecting failed");
}

static void scan_connecting(struct bt_scan_device_info *device_info,
			    struct bt_conn *conn)
{
	default_conn = bt_conn_ref(conn);
}

static int nus_client_init(void)
{
	int err;
	struct bt_nus_client_init_param init = {
		.cb = {
			.received = ble_data_received,
			.sent = ble_data_sent,
		}
	};

	err = bt_nus_client_init(&nus_client, &init);
	if (err) {
		LOG_ERR("NUS Client initialization failed (err %d)", err);
		return err;
	}

	LOG_INF("NUS Client module initialized");
	return err;
}

BT_SCAN_CB_INIT(scan_cb, scan_filter_match, NULL,
		scan_connecting_error, scan_connecting);

static int scan_init(void)
{
	int err;
	struct bt_scan_init_param scan_init = {
		.connect_if_match = 1,
	};

	bt_scan_init(&scan_init);
	bt_scan_cb_register(&scan_cb);
	//static uint8_t cname[] = "CriticAlert";
	err = bt_scan_filter_add(BT_SCAN_FILTER_TYPE_UUID, BT_UUID_NUS_SERVICE);
	//err = bt_scan_filter_add(BT_SCAN_FILTER_TYPE_NAME,*cname);
	if (err) {
		LOG_ERR("Scanning filters cannot be set (err %d)", err);
		return err;
	}

	err = bt_scan_filter_enable(BT_SCAN_UUID_FILTER, false);
	//err = bt_scan_filter_enable(BT_SCAN_NAME_FILTER, false);
	if (err) {
		LOG_ERR("Filters cannot be turned on (err %d)", err);
		return err;
	}

	LOG_INF("Scan module initialized");
	return err;
}


static void auth_cancel(struct bt_conn *conn)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	LOG_INF("Pairing cancelled: %s", log_strdup(addr));
}


static void pairing_complete(struct bt_conn *conn, bool bonded)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	LOG_INF("Pairing completed: %s, bonded: %d", log_strdup(addr),
		bonded);
}


static void pairing_failed(struct bt_conn *conn, enum bt_security_err reason)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	LOG_WRN("Pairing failed conn: %s, reason %d", log_strdup(addr),
		reason);
}

static struct bt_conn_auth_cb conn_auth_callbacks = {
	.cancel = auth_cancel,
};

static struct bt_conn_auth_info_cb conn_auth_info_callbacks = {
	.pairing_complete = pairing_complete,
	.pairing_failed = pairing_failed
};

void main(void)
{
	int err;

	err = bt_conn_auth_cb_register(&conn_auth_callbacks);
	if (err) {
		LOG_ERR("Failed to register authorization callbacks.");
		return;
	}

	err = bt_conn_auth_info_cb_register(&conn_auth_info_callbacks);
	if (err) {
		printk("Failed to register authorization info callbacks.\n");
		return;
	}

	err = bt_enable(NULL);
	if (err) {
		LOG_ERR("Bluetooth init failed (err %d)", err);
		return;
	}
	LOG_INF("Bluetooth initialized");

	if (IS_ENABLED(CONFIG_SETTINGS)) {
		settings_load();
	}

	int (*module_init[])(void) = {uart_init, scan_init, nus_client_init};
	for (size_t i = 0; i < ARRAY_SIZE(module_init); i++) {
		err = (*module_init[i])();
		if (err) {
			return;
		}
	}

	printk("Starting Bluetooth Central UART example\n");


	err = bt_scan_start(BT_SCAN_TYPE_SCAN_ACTIVE);
	if (err) {
		LOG_ERR("Scanning failed to start (err %d)", err);
		return;
	}

	LOG_INF("Scanning successfully started");


	for (;;) {
		/* Wait indefinitely for data to be sent over Bluetooth */
		
		struct uart_data_t *nusbuf = k_fifo_get(&fifo_uart_rx_data,
						     K_FOREVER);
		
		err = bt_nus_client_send(&nus_client, nusbuf->data, nusbuf->len);
		if (err) {
			LOG_WRN("Failed to send data over BLE connection"
				"(err %d)", err);
		}
		else
		{
			LOG_INF("Sent data over BLE from nrf52: %d:%s", nusbuf->len,nusbuf->data);
		}

		err = k_sem_take(&nus_write_sem, NUS_WRITE_TIMEOUT);
		if (err) {
			LOG_WRN("NUS send timeout");
		}
	
		k_msleep(5);
	}
}

Parents
  • Hei Rune,

    The ble_data_received() callback should be invoked each time you receive a BLE notification packet from the peripheral regardless of what the packet size is. How do you determine that the last 15-20 byte message is not received, are you looking at the messages relayed to the uart interface, or at the debug log messages on RTT (those printed with the LOG_* macros)?

    Best regards,

    Vidar

  • Hi Vidar,

    I am Looking at the Log msg on the rtt. the peripheral reveives a msg on bt_nus from central unit and passes it thrue to the uart. the reply from uart is then sent to central uart thrue bt_nus. ( standard uart bridge ).
    I can see on the rtt that the peripheral unit is sending and receiving good. on the central unit the ble_data_received() trigger when buffer is full(40 byte) and the data contet is 2 or 3 mesgs.
    This is not constant, suddenly it can work as expected and trigger on one msg and then go backto trigger on buffer full.

    Br

    Rune

  • Hi Vidar,

    When i flash another device i got this error:

    [00:00:00.004,760] <dbg> peripheral_uart: uart_cb: UART_RX_BUF_REQUEST
    [00:00:00.008,056] <dbg> peripheral_uart: uart_cb: UART_TX_DONE
    [00:00:00.017,822] <err> os: ***** MPU FAULT *****
    [00:00:00.017,852] <err> os:   Stacking error (context area might be not valid)
    [00:00:00.017,852] <err> os:   Data Access Violation
    [00:00:00.017,883] <err> os:   MMFAR Address: 0x20006ef8
    [00:00:00.017,913] <err> os: r0/a1:  0x0002f736  r1/a2:  0x01000000  r2/a3:  0xcfbdb380
    [00:00:00.017,913] <err> os: r3/a4:  0x3e0aa44c r12/ip:  0x857e7f62 r14/lr:  0x86a02161
    [00:00:00.017,944] <err> os:  xpsr:  0x6a58c000
    [00:00:00.017,944] <err> os: Faulting instruction address (r15/pc): 0xe51528b3
    [00:00:00.018,005] <err> os: >>> ZEPHYR FATAL ERROR 2: Stack overflow on CPU 0
    [00:00:00.018,035] <err> os: Current thread: 0x200026b0 (main)
    [00:00:00.348,754] <err> fatal_error: Resetting system

    there is no code change from te first device.

    Please advice 

    Br

    Rune

  • Hi Rune,

    The log shows you have a stack overflow in your main thread. This can be fixed by increasing the stack size (CONFIG_MAIN_STACK_SIZE). But it does not make sense that you get this on one board and not the other if you are running the same FW build and the same boards. 

    I notice you have debug options enabled now; this will lead to some increase in stack usage. Do you also get a stack overflow if you uncheck the "enable debug options" checkbox in your project configuration?

    With regards to the original problem you reported, I am still waiting for the developers to have a look at it and find a root cause.

    Best regards,

    Vidar

  • Tanks Vidar,

    By disabling debug option made the error disappear.

    Regards

    Rune

  • Presently using nRF5340, NCS 2.2.0

    I am seeing this UART crash relating to uart_rx_disable() / uart_rx_enable(), even when using NCS 2.2.0

    This was mentioned above as a fix in NCS 2.1.3


    I have a shell running on our USB CDC.

    UART0 is enabled for async API, and UART is communicating to an external AT modem. (115K2, N, 8, 1, no flow control)

    In our application, when I perform a uart_rx_disable(), I will subsequently call uart_rx_enable() about 1.5 seconds later.

    After the uart_rx_enable(), the crash will occur only after some subsequent transmit / receive activity on UART0. I can send the AT commands via shell commands. It only takes 5-20 AT/OK commands for the spontaneous crash to occur.

    If I comment any code relating to uart_rx_disable() / uart_rx_enable(), then all UART0 AT communications remain flawless forever.


    Would be curious is you have any insights into `I suspect the issue may be related to the UART async adapter which is enabled when USB CDC is used`


    I suspect it is related to the USB CDC.

    If I disable the "debug option" during my build, this crash still occurs.

  • What I recall from debugging this is that I sometimes did not receive the UART_RX_BUF_RELEASED event from the async adapter. But I did not really debug it much further. Instead, I reported it as a bug internally to let the developers continue the investigation. 

    Below are the commits that fixed the issue mentioned in the v2.1.3 release notes (are also included in v.2.3.0)

    https://github.com/nrfconnect/sdk-zephyr/commit/595fbf9b975103eede31c2b5c68a392517f252f6 

    https://github.com/zephyrproject-rtos/zephyr/commit/c984a343afdb06abb7cf8cca03c00eacdf88675e 

    https://github.com/zephyrproject-rtos/zephyr/pull/52340/commits/4a9c18f806914a910163e65da417a34becb92531 

    And based on the commit messages, it does not sound like they are addressing the problems OP reported here.

    mtsunstrum said:

    I suspect it is related to the USB CDC.

    If I disable the "debug option" during my build, this crash still occurs.

    Is it an option to try v.2.3.0 to see if it fixes the problem you experienced?

    Thanks

    Vidar

Reply Children
  • Thanks  for the response.

    I won't yet try out NCS 2.3.0, but I found a workaround that may highlight where the issue may reside.

    In my application, I have an external AT modem where at times I need to reset it.

    This causes the AT modem to send a UART break signal to the nRF5340, which lasts about 600ms.

    Before resetting the AT modem, I would call uart_rx_disable() first, so as not to trigger UART framing/break errors. I then subsequently call uart_rx_enable() about 1500ms later, after the AT modem has stabilized.

    After uart_rx_enable() via USB shell interface, I would send simple "AT" messages, and get an "OK" back from the modem. But after about 2-8 attempts, I would get the random crash.


    To "workaround" the issue, what I did is not call uart_rx_enable()

    In this scenario, once we get the UART break signal from the UART modem, our uart callback handler would get about 4 UART_RX_STOPPED events with reason codes 4 and 8 ( framing error and break signal ), then it would be followed by a UART_RX_DISABLED event.

    Upon receiving above events, I would take no action. In this scenario, the uart library is taking care of disabling the uart receive.

    In my code, after resetting the AT modem, I would simply sleep for 1500ms, then I would simply call uart_rx_enable() appropriately.

    AT command operations were flawless after this.


    It gives me a suspicion that the programmatic uart_rx_disable() operation is suspect. If I just let the uart receive disable itself upon receiving a uart break condition as described above, it works.

  • It sounds like you have a good workaround in place. I still think what you describe may be a result of the UART driver bug I mentioned. The fix was included in v2.1.3 and 2.3.0, but not in v.2.2.0 which is the oldest release of the three.

    mtsunstrum said:
    This causes the AT modem to send a UART break signal to the nRF5340, which lasts about 600ms.

    Assuming the modem is not driving the signal low, it should be possible to avoid the break signal by enabling the internal pull-up on the RX line, as we are doing in our board files: https://github.com/nrfconnect/sdk-zephyr/blob/main/boards/arm/nrf52840dk_nrf52840/nrf52840dk_nrf52840-pinctrl.dtsi#L15

  • Hi   thanks for your reply.

    I thought I had it under control, and it worked most of the time ( maybe 80% fewer lockups), so likely I was on the right path. 

    Unfortunately, the only way out of the situation was for my firmware to do a cold reset, after a UART break had been detected. Fortunately, my application logic allowed me to use this "hacky" workaround.

    You mentioned the fix not being in v2.2.0, but it was in the earlier v2.1.3 and the later v2.3.0. Are you correct in saying v2.2.0 did not have the fix ?

    Yes, in my situation, the modem is driving the break signal, so usage of UART RX pullup would not help me out.

    Cheers

  • Hi,

    Yes, v2.2.0 does not have the fix which may seem a bit counterintuitive. The thing is that v2.1.3 was a minor release made after v2.2.0 had been tagged with fixes backported from v2.2.0 and the main branch. Ideally there should have been a 2.2.1 version to address this problem, but it was not prioritized.

    https://developer.nordicsemi.com/nRF_Connect_SDK/doc/latest/nrf/releases/release-notes-2.1.3.html 

    Cheers

    Vidar

  • Aaah .... made the move to NCS 2.3.0 and it all seems good now.

    Within about a millisecond or so, the 115K2 UART isr will have detected the break signal, and disable uart receive. When I re-enable uart receive later, it works properly.

Related