BT_HCI_ERR_CONN_FAIL_TO_ESTAB every time on re-connection

I am working on a peripheral / receiver project based on the nRF5340 audio application for SDK 2.5.1. 

I have added the ability for the receiver to advertise and accept a connection from a central device. 

This all works fine up to a certain point. After some time, the connection gets terminated by the receiver. I believe it's a known issue with controllers when both syncing to a broadcast stream and having an ACL connection at the same time. 

The disconnecting issue aside, my central device cannot re-connect to the peripheral again after the disconnect, unless the receiver gets a reset. Here is what happens in my logs :

HL [00:01:09.882,537] <inf> bt_mgmt: Disconnected in bt_mgmt : 7F:A0:94:3D:DF:2E (random) : (reason 0x08)
HL [00:01:09.882,568] <wrn> streamctrl_broadcast_sink: Unexpected/unhandled bt_mgmt event: 6
HL [00:01:09.883,666] <inf> bt_mgmt_adv: connectable extended advertising set created
HL [00:01:09.883,758] <inf> broadcast_sink: Application disconnected: 7F:A0:94:3D:DF:2E (random) (reason 0x08)
HL [00:01:09.883,758] <inf> broadcast_sink: Restarting advertising..
HL [00:01:09.884,674] <inf> bt_mgmt_adv: Advertising successfully started
HL [00:01:10.136,108] <inf> bt_mgmt: Connected in bt_mgmt
HL [00:01:10.136,199] <inf> bt_mgmt: Connected: 5F:B4:9C:F0:98:9D (random)
HL [00:01:10.136,413] <wrn> streamctrl_broadcast_sink: Unexpected/unhandled bt_mgmt event: 2
HL [00:01:10.136,505] <inf> broadcast_sink: Application connected: 5F:B4:9C:F0:98:9D (random)

HL [00:01:10.332,550] <inf> bt_mgmt: Disconnected in bt_mgmt : 5F:B4:9C:F0:98:9D (random) : (reason 0x3e)

This happens again and again when the central tris to re-connect after the receiver disconnects. It fails with reason 0x3eBT_HCI_ERR_CONN_FAIL_TO_ESTAB

Here are my connected and disconnected callback functions which are implemented in broadcast_sink.c :  

static void connected(struct bt_conn *conn, uint8_t err)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (err != 0U)
	{
		LOG_INF("Failed to connect to application %s (%u)", addr, err);
		app_conn = NULL;
		return;
	}

	app_conn = bt_conn_ref(conn);

	if (app_conn == NULL)
	{
		LOG_ERR("connected - app_conn is NULL");
	}

	LOG_INF("Application connected: %s\n", addr);

}

static void disconnected(struct bt_conn *conn, uint8_t reason)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (app_conn)
	{
		bt_conn_unref(app_conn);
		app_conn = NULL;
	}


	LOG_INF("Application disconnected: %s (reason 0x%02x)", addr, reason);

	LOG_INF("Restarting advertising..");

}

Also within the SDK , in bluetooth/bt_management/bt_mgmt.c, there are connected and disconnected callbacks which also get invoked before my own callbacks. 

static void connected_cb(struct bt_conn *conn, uint8_t err)
{
	LOG_INF("Connected in bt_mgmt");

	int ret;
	char addr[BT_ADDR_LE_STR_LEN] = {0};
	uint8_t num_conn = 0;
	uint16_t conn_handle;
	enum ble_hci_vs_tx_power conn_tx_pwr;
	struct bt_mgmt_msg msg;

	(void)bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (err) {
		LOG_ERR("ACL connection in bt_mgmt to addr: %s, conn: %p, failed, error %d", addr,
			(void *)conn, err);

		bt_conn_unref(conn);

		if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
			ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
			if (ret) {
				LOG_ERR("Failed to restart scanning: %d", ret);
			}
		}

		if (IS_ENABLED(CONFIG_BT_PERIPHERAL)) {
			//ret = bt_mgmt_adv_start(NULL, 0, NULL, 0, true);
			ret = bt_mgmt_adv_start(ext_adv_buf, ext_adv_buf_cnt, NULL, 0, true);
			if (ret) {
				LOG_ERR("Failed to restart advertising: %d", ret);
			}
		}

		return;
	}

	bt_conn_foreach(BT_CONN_TYPE_LE, conn_state_connected_check, (void *)&num_conn);

	/* ACL connection established */
	/* NOTE: The string below is used by the Nordic CI system */
	LOG_INF("Connected: %s", addr);

	if (IS_ENABLED(CONFIG_BT_CENTRAL) && (num_conn < MAX_CONN_NUM)) {
		/* Room for more connections, start scanning again */
		ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
		if (ret) {
			LOG_ERR("Failed to resume scanning: %d", ret);
		}
	}

	ret = bt_hci_get_conn_handle(conn, &conn_handle);
	if (ret) {
		LOG_ERR("Unable to get conn handle");
	} else {
		if (IS_ENABLED(CONFIG_NRF_21540_ACTIVE)) {
			conn_tx_pwr = CONFIG_NRF_21540_MAIN_DBM;
		} else {
			conn_tx_pwr = CONFIG_BLE_CONN_TX_POWER_DBM;
		}

		ret = ble_hci_vsc_conn_tx_pwr_set(conn_handle, conn_tx_pwr);
		if (ret) {
			LOG_ERR("Failed to set TX power for conn");
		} else {
			LOG_DBG("TX power set to %d dBm for connection %p", conn_tx_pwr,
				(void *)conn);
		}
	}

	msg.event = BT_MGMT_CONNECTED;
	msg.conn = conn;

	ret = zbus_chan_pub(&bt_mgmt_chan, &msg, K_NO_WAIT);
	ERR_CHK(ret);

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		ret = bt_conn_set_security(conn, BT_SECURITY_L2);
		if (ret) {
			LOG_ERR("Failed to set security to L2: %d", ret);
		}
	}
}

static void disconnected_cb(struct bt_conn *conn, uint8_t reason)
{
	int ret;
	char addr[BT_ADDR_LE_STR_LEN];
	struct bt_mgmt_msg msg;

	(void)bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	/* NOTE: The string below is used by the Nordic CI system */
	LOG_INF("Disconnected in bt_mgmt : %s : (reason 0x%02x)", addr, reason);

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		bt_conn_unref(conn);
	}

	/* Publish disconnected */
	msg.event = BT_MGMT_DISCONNECTED;
	msg.conn = conn;

	ret = zbus_chan_pub(&bt_mgmt_chan, &msg, K_NO_WAIT);
	ERR_CHK(ret);

	if (IS_ENABLED(CONFIG_BT_PERIPHERAL)) {
		//ret = bt_mgmt_adv_start(NULL, 0, NULL, 0, true);
		ret = bt_mgmt_adv_start(ext_adv_buf, ext_adv_buf_cnt, NULL, 0, true);
		ERR_CHK(ret);
	}

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
		if (ret) {
			LOG_ERR("Failed to restart scanning: %d", ret);
		}
	}
}

Any ideas as to why the central device can't reconnect here with error BT_HCI_ERR_CONN_FAIL_TO_ESTAB , until the receiver gets reset? The receiver and central device are right beside one another.

The receiver is clearly not in a right state to accept a new connection after it disconnects, but I'm not sure how to solve this. Thanks in advance for any help. 

Parents Reply Children
  • Hi  ,

    I tried NCS 2.6.0-rc1 now. 

    As soon as the receiver tries to sync to a broadcast, the following crash happens. Is it possible to determine what the problem is just from these error logs? Thanks : 

    HL [00:00:30.885,894] <inf> main: PA synced
    HL [00:00:30.885,894] <inf> broadcast_sink: broadcast_sink_pa_sync_set called with broadcast_id 10555112
    HL [00:00:31.833,587] <err> bt_mgmt_ctlr_cfg: No response from IPC or controller
    HL [00:00:31.833,587] <err> bt_mgmt_ctlr_cfg: ERR_CHK Err_code: [-116] @ line: 164
    HL [00:00:31.833,618] <err> os: r0/a1:  0x00000003  r1/a2:  0x00000009  r2/a3:  0x00000008
    HL [00:00:31.833,618] <err> os: r3/a4:  0x000578a3 r12/ip:  0x20002b58 r14/lr:  0x0005792d
    HL [00:00:31.833,618] <err> os:  xpsr:  0x41000025
    HL [00:00:31.833,618] <err> os: s[ 0]:  0x000000a4  s[ 1]:  0x00000200  s[ 2]:  0x00000200  s[ 3]:  0x01000003
    HL [00:00:31.833,648] <err> os: s[ 4]:  0x00000000  s[ 5]:  0x20002ef0  s[ 6]:  0x00000000  s[ 7]:  0x00012f13
    HL [00:00:31.833,648] <err> os: s[ 8]:  0x00012ee1  s[ 9]:  0x0003c19b  s[10]:  0x20002ef0  s[11]:  0x00000000
    HL [00:00:31.833,648] <err> os: s[12]:  0x00000026  s[13]:  0x00000000  s[14]:  0x000feab3  s[15]:  0x0003bf13
    HL [00:00:31.833,679] <err> os: fpscr:  0x00000000
    HL [00:00:31.833,679] <err> os: Faulting instruction address (r15/pc): 0x00006efe
    HL [00:00:31.833,679] <err> os: >>> ZEPHYR FATAL ERROR 3: Kernel oops on CPU 0
    HL [00:00:31.833,679] <err> os: Fault during interrupt handling
    
    HL [00:00:31.833,709] <err> os: Current thread: 0x20003520 (BT RX)
    HL [00:00:31.833,709] <err> error_handler: Caught system error -- reason 3. Entering infinite loop
    
    

  • Hi.

    Amanda is currently out of office. I'll try to assist while she is away.

    muli said:
    I tried NCS 2.6.0-rc1 now. 

    NCS 2.6.0 is now released. Maybe you can perform some test in the latest release while I get up to speed and take a look at the information in your ticket.

    Br,
    Joakim

  •  I can confirm that with NCS 2.6.0, the connection no longer drops even. Very stable in comparison to before when using the PCFT controller. Thanks for all the support.

  • Thats great news.

    Thanks for the update!

    BR,
    Joakim

Related