NCS Nordic UART service: BT RX HCI timeout

Hi!

I am working on a customer project where up to 20 BLE peripherals are connected to a single central device. The code of the central is based on the Multi-NUS sample from this Nordic-Blog-Post and has been updated to NCS Version v2.0.0.

The NUS client has been optimized for data throughput. Therefore, the following additions have been made to the nus_client.c file:

/* Heap region to hold TX context */
struct tx_ctx_t {
    struct bt_nus_client *nus_c;
    const uint8_t *data;
    uint32_t length;
};

K_HEAP_DEFINE(tx_ctx_heap, CONFIG_BT_MAX_CONN * CONFIG_BT_CONN_TX_MAX * sizeof(struct tx_ctx_t));

static void on_sent_wo_rsp(struct bt_conn *conn, void *user_data)
{
	// get tx context from user data
	struct tx_ctx_t *context = (struct tx_ctx_t*) user_data;

	// make a copy of volatile data that is required by the callback
	struct bt_nus_client *nus_c = context->nus_c;
	const void *data = context->data;
	uint16_t length = (uint16_t) context->length;

	// release allocated tx context
	k_heap_free(&tx_ctx_heap, user_data);

	if (nus_c->cb.sent) {
		nus_c->cb.sent(nus_c, 0, data, length);
	}
}

int bt_nus_client_send(struct bt_nus_client *nus_c, const uint8_t *data, uint16_t len)
{
	int err;

	if (!nus_c->conn) {
		return -ENOTCONN;
	}

	// allocate TX context	
	struct tx_ctx_t* context = k_heap_alloc(&tx_ctx_heap, sizeof(struct tx_ctx_t), K_NO_WAIT);
	if (context == NULL) {
		LOG_ERR("TX context allocation failed");
		return -ENOMEM;
	}
	context->nus_c = nus_c;
	context->data = data;
	context->length = len;

	// throughput optimized GATT write request
	err = bt_gatt_write_without_response_cb(nus_c->conn, nus_c->handles.rx, data, len, false, on_sent_wo_rsp, context);
	if (err) {
		// release allocated tx context
		k_heap_free(&tx_ctx_heap, context);
	}
	
	return err;
}

Here is my project Kconfig configuration file:

#
# Copyright (c) 2018 Nordic Semiconductor
#
# SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
#

# C++ Support
CONFIG_CPLUSPLUS=y
CONFIG_NEWLIB_LIBC=y
CONFIG_STD_CPP17=y
#CONFIG_LIB_CPLUSPLUS=y

# Configure Serial driver (additional configuration is done in overlay-<usb/uart>.conf)
CONFIG_STDOUT_CONSOLE=y
CONFIG_SERIAL=y
CONFIG_UART_INTERRUPT_DRIVEN=y
CONFIG_UART_LINE_CTRL=y

# Enable the BLE stack with GATT Client configuration
CONFIG_BT=y
CONFIG_BT_CENTRAL=y
CONFIG_BT_SMP=y
CONFIG_BT_GATT_CLIENT=y
CONFIG_BT_MAX_CONN=20
CONFIG_BT_MAX_PAIRED=20
CONFIG_BT_CONN_CTX=y

# Advanced BLE configuration
CONFIG_BT_CTLR_ADVANCED_FEATURES=y
CONFIG_BT_CTLR_CONN_RSSI=y
CONFIG_BT_CTLR_TX_PWR_DYNAMIC_CONTROL=y
CONFIG_BT_USER_PHY_UPDATE=y
CONFIG_BT_CTLR_PHY_CODED=y

# Enable the BLE modules from NCS
#CONFIG_BT_LOG_LEVEL_DBG=y
CONFIG_BT_NUS=y
CONFIG_BT_SCAN=y
CONFIG_BT_SCAN_FILTER_ENABLE=y
CONFIG_BT_SCAN_UUID_CNT=1
CONFIG_BT_GATT_DM=y
CONFIG_HEAP_MEM_POOL_SIZE=16384

# BLE Scanner configuration
CONFIG_BT_SCAN_ADDRESS_CNT=20
CONFIG_BT_SCAN_NAME_CNT=1

# MTU configuration taken from throughput example (adapted)
CONFIG_BT_BUF_ACL_RX_SIZE=251
CONFIG_BT_BUF_ACL_TX_SIZE=251
CONFIG_BT_BUF_ACL_TX_COUNT=40
CONFIG_BT_L2CAP_TX_BUF_COUNT=40
CONFIG_BT_ATT_PREPARE_COUNT=2
CONFIG_BT_L2CAP_TX_MTU=247
CONFIG_BT_CTLR_RX_BUFFERS=2
CONFIG_BT_CTLR_DATA_LENGTH_MAX=251

# This example requires more workqueue stack
CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=4096
CONFIG_BT_RX_STACK_SIZE=4096

# Enable bonding
CONFIG_BT_SETTINGS=y
CONFIG_FLASH=y
CONFIG_FLASH_PAGE_LAYOUT=y
CONFIG_FLASH_MAP=y
CONFIG_NVS=y
CONFIG_SETTINGS=y

# Config logger
CONFIG_LOG=y
CONFIG_USE_SEGGER_RTT=y
CONFIG_LOG_BACKEND_RTT=y
CONFIG_LOG_BACKEND_UART=n

# App specific config
CONFIG_BASE64=y

# Bootloader
CONFIG_BOOTLOADER_MCUBOOT=y


The central throws a Zephyr Kernel oops error, when data is sent to and response is received from more than 10 peripherals.

[00:02:10.709,259] <err> os: r0/a1:  0x00000003  r1/a2:  0x00000000  r2/a3:  0x00000000
[00:02:10.709,289] <err> os: r3/a4:  0x20019646 r12/ip:  0x00000000 r14/lr:  0x000351bf
[00:02:10.709,320] <err> os:  xpsr:  0x61000000
[00:02:10.709,320] <err> os: Faulting instruction address (r15/pc): 0x000351ca
[00:02:10.709,350] <err> os: >>> ZEPHYR FATAL ERROR 3: Kernel oops on CPU 0
[00:02:10.709,381] <err> os: Current thread: 0x20002c98 (BT RX)
[00:02:10.906,616] <err> fatal_error: Resetting system

Resolving the PC shows that the error is a HCI_CMD_TIMEOUT in function bt_hci_cmd_send_sync of the hci_core.c file.

  err = k_sem_take(&sync_sem, HCI_CMD_TIMEOUT);
  BT_ASSERT_MSG(err == 0, "k_sem_take failed with err %d", err);


I think the problem is related to a deadlock of the BLE driver. Is there something wrong with my code or configuration?


Best regards,

Thomas

Related