BT_HCI_ERR_CONN_FAIL_TO_ESTAB every time on re-connection

I am working on a peripheral / receiver project based on the nRF5340 audio application for SDK 2.5.1. 

I have added the ability for the receiver to advertise and accept a connection from a central device. 

This all works fine up to a certain point. After some time, the connection gets terminated by the receiver. I believe it's a known issue with controllers when both syncing to a broadcast stream and having an ACL connection at the same time. 

The disconnecting issue aside, my central device cannot re-connect to the peripheral again after the disconnect, unless the receiver gets a reset. Here is what happens in my logs :

HL [00:01:09.882,537] <inf> bt_mgmt: Disconnected in bt_mgmt : 7F:A0:94:3D:DF:2E (random) : (reason 0x08)
HL [00:01:09.882,568] <wrn> streamctrl_broadcast_sink: Unexpected/unhandled bt_mgmt event: 6
HL [00:01:09.883,666] <inf> bt_mgmt_adv: connectable extended advertising set created
HL [00:01:09.883,758] <inf> broadcast_sink: Application disconnected: 7F:A0:94:3D:DF:2E (random) (reason 0x08)
HL [00:01:09.883,758] <inf> broadcast_sink: Restarting advertising..
HL [00:01:09.884,674] <inf> bt_mgmt_adv: Advertising successfully started
HL [00:01:10.136,108] <inf> bt_mgmt: Connected in bt_mgmt
HL [00:01:10.136,199] <inf> bt_mgmt: Connected: 5F:B4:9C:F0:98:9D (random)
HL [00:01:10.136,413] <wrn> streamctrl_broadcast_sink: Unexpected/unhandled bt_mgmt event: 2
HL [00:01:10.136,505] <inf> broadcast_sink: Application connected: 5F:B4:9C:F0:98:9D (random)

HL [00:01:10.332,550] <inf> bt_mgmt: Disconnected in bt_mgmt : 5F:B4:9C:F0:98:9D (random) : (reason 0x3e)

This happens again and again when the central tris to re-connect after the receiver disconnects. It fails with reason 0x3eBT_HCI_ERR_CONN_FAIL_TO_ESTAB

Here are my connected and disconnected callback functions which are implemented in broadcast_sink.c :  

static void connected(struct bt_conn *conn, uint8_t err)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (err != 0U)
	{
		LOG_INF("Failed to connect to application %s (%u)", addr, err);
		app_conn = NULL;
		return;
	}

	app_conn = bt_conn_ref(conn);

	if (app_conn == NULL)
	{
		LOG_ERR("connected - app_conn is NULL");
	}

	LOG_INF("Application connected: %s\n", addr);

}

static void disconnected(struct bt_conn *conn, uint8_t reason)
{
	char addr[BT_ADDR_LE_STR_LEN];

	bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (app_conn)
	{
		bt_conn_unref(app_conn);
		app_conn = NULL;
	}


	LOG_INF("Application disconnected: %s (reason 0x%02x)", addr, reason);

	LOG_INF("Restarting advertising..");

}

Also within the SDK , in bluetooth/bt_management/bt_mgmt.c, there are connected and disconnected callbacks which also get invoked before my own callbacks. 

static void connected_cb(struct bt_conn *conn, uint8_t err)
{
	LOG_INF("Connected in bt_mgmt");

	int ret;
	char addr[BT_ADDR_LE_STR_LEN] = {0};
	uint8_t num_conn = 0;
	uint16_t conn_handle;
	enum ble_hci_vs_tx_power conn_tx_pwr;
	struct bt_mgmt_msg msg;

	(void)bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	if (err) {
		LOG_ERR("ACL connection in bt_mgmt to addr: %s, conn: %p, failed, error %d", addr,
			(void *)conn, err);

		bt_conn_unref(conn);

		if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
			ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
			if (ret) {
				LOG_ERR("Failed to restart scanning: %d", ret);
			}
		}

		if (IS_ENABLED(CONFIG_BT_PERIPHERAL)) {
			//ret = bt_mgmt_adv_start(NULL, 0, NULL, 0, true);
			ret = bt_mgmt_adv_start(ext_adv_buf, ext_adv_buf_cnt, NULL, 0, true);
			if (ret) {
				LOG_ERR("Failed to restart advertising: %d", ret);
			}
		}

		return;
	}

	bt_conn_foreach(BT_CONN_TYPE_LE, conn_state_connected_check, (void *)&num_conn);

	/* ACL connection established */
	/* NOTE: The string below is used by the Nordic CI system */
	LOG_INF("Connected: %s", addr);

	if (IS_ENABLED(CONFIG_BT_CENTRAL) && (num_conn < MAX_CONN_NUM)) {
		/* Room for more connections, start scanning again */
		ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
		if (ret) {
			LOG_ERR("Failed to resume scanning: %d", ret);
		}
	}

	ret = bt_hci_get_conn_handle(conn, &conn_handle);
	if (ret) {
		LOG_ERR("Unable to get conn handle");
	} else {
		if (IS_ENABLED(CONFIG_NRF_21540_ACTIVE)) {
			conn_tx_pwr = CONFIG_NRF_21540_MAIN_DBM;
		} else {
			conn_tx_pwr = CONFIG_BLE_CONN_TX_POWER_DBM;
		}

		ret = ble_hci_vsc_conn_tx_pwr_set(conn_handle, conn_tx_pwr);
		if (ret) {
			LOG_ERR("Failed to set TX power for conn");
		} else {
			LOG_DBG("TX power set to %d dBm for connection %p", conn_tx_pwr,
				(void *)conn);
		}
	}

	msg.event = BT_MGMT_CONNECTED;
	msg.conn = conn;

	ret = zbus_chan_pub(&bt_mgmt_chan, &msg, K_NO_WAIT);
	ERR_CHK(ret);

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		ret = bt_conn_set_security(conn, BT_SECURITY_L2);
		if (ret) {
			LOG_ERR("Failed to set security to L2: %d", ret);
		}
	}
}

static void disconnected_cb(struct bt_conn *conn, uint8_t reason)
{
	int ret;
	char addr[BT_ADDR_LE_STR_LEN];
	struct bt_mgmt_msg msg;

	(void)bt_addr_le_to_str(bt_conn_get_dst(conn), addr, sizeof(addr));

	/* NOTE: The string below is used by the Nordic CI system */
	LOG_INF("Disconnected in bt_mgmt : %s : (reason 0x%02x)", addr, reason);

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		bt_conn_unref(conn);
	}

	/* Publish disconnected */
	msg.event = BT_MGMT_DISCONNECTED;
	msg.conn = conn;

	ret = zbus_chan_pub(&bt_mgmt_chan, &msg, K_NO_WAIT);
	ERR_CHK(ret);

	if (IS_ENABLED(CONFIG_BT_PERIPHERAL)) {
		//ret = bt_mgmt_adv_start(NULL, 0, NULL, 0, true);
		ret = bt_mgmt_adv_start(ext_adv_buf, ext_adv_buf_cnt, NULL, 0, true);
		ERR_CHK(ret);
	}

	if (IS_ENABLED(CONFIG_BT_CENTRAL)) {
		ret = bt_mgmt_scan_start(0, 0, BT_MGMT_SCAN_TYPE_CONN, NULL);
		if (ret) {
			LOG_ERR("Failed to restart scanning: %d", ret);
		}
	}
}

Any ideas as to why the central device can't reconnect here with error BT_HCI_ERR_CONN_FAIL_TO_ESTAB , until the receiver gets reset? The receiver and central device are right beside one another.

The receiver is clearly not in a right state to accept a new connection after it disconnects, but I'm not sure how to solve this. Thanks in advance for any help. 

Parents Reply Children
  • Hi, 

    May I know which central device you are using? We see the connection interval request from central is 30ms. Is it possible for the central device to use a wider connection interval for a trial? e.g, 40ms or 80ms.

    Which controller version are you using now?

    Does the peripheral still sync with the broadcaster and play the music when the ACL is disconnected and waiting for reconnecting?

    Would it help if you lower the bitrate from the broadcaster?

    If you are using the PCFT controller which always treats ISO packets as higher priority than ACL packets, that could be the reason why it is hard to re-connect during streaming.

    We are going to provide a new controller solution soon which would improve the situation.

    -Amanda H.

  • Hi  

    Thanks for your reply. Sorry for the delayed response as I was away. 

    For the central device we are using a customized Android application, which connects to the receiver and sends messages to it via the Nordic UART Service, to tell it which broadcast name to look for when scanning.

    This all works well, up until the point where the receiver decides to disconnect sometimes when the audio is syncing. The receiver never accepts a new connection then until it is reset.

    I have tried to change the connection intervals from the central app to something longer, but still the same issue.

    I am using PCFT controller version 3393.

    The peripheral / receiver still syncs and plays the broadcast audio when the ACL disconnects. 

    Do you know hen the new PCFT controller will be released?

    Regards,

    Paul

  • NCS v2.6.0 is coming soon, but I don't have the date. If you want, you can try NCS 2.6.0-rc1. 

  • Hi  ,

    I tried NCS 2.6.0-rc1 now. 

    As soon as the receiver tries to sync to a broadcast, the following crash happens. Is it possible to determine what the problem is just from these error logs? Thanks : 

    HL [00:00:30.885,894] <inf> main: PA synced
    HL [00:00:30.885,894] <inf> broadcast_sink: broadcast_sink_pa_sync_set called with broadcast_id 10555112
    HL [00:00:31.833,587] <err> bt_mgmt_ctlr_cfg: No response from IPC or controller
    HL [00:00:31.833,587] <err> bt_mgmt_ctlr_cfg: ERR_CHK Err_code: [-116] @ line: 164
    HL [00:00:31.833,618] <err> os: r0/a1:  0x00000003  r1/a2:  0x00000009  r2/a3:  0x00000008
    HL [00:00:31.833,618] <err> os: r3/a4:  0x000578a3 r12/ip:  0x20002b58 r14/lr:  0x0005792d
    HL [00:00:31.833,618] <err> os:  xpsr:  0x41000025
    HL [00:00:31.833,618] <err> os: s[ 0]:  0x000000a4  s[ 1]:  0x00000200  s[ 2]:  0x00000200  s[ 3]:  0x01000003
    HL [00:00:31.833,648] <err> os: s[ 4]:  0x00000000  s[ 5]:  0x20002ef0  s[ 6]:  0x00000000  s[ 7]:  0x00012f13
    HL [00:00:31.833,648] <err> os: s[ 8]:  0x00012ee1  s[ 9]:  0x0003c19b  s[10]:  0x20002ef0  s[11]:  0x00000000
    HL [00:00:31.833,648] <err> os: s[12]:  0x00000026  s[13]:  0x00000000  s[14]:  0x000feab3  s[15]:  0x0003bf13
    HL [00:00:31.833,679] <err> os: fpscr:  0x00000000
    HL [00:00:31.833,679] <err> os: Faulting instruction address (r15/pc): 0x00006efe
    HL [00:00:31.833,679] <err> os: >>> ZEPHYR FATAL ERROR 3: Kernel oops on CPU 0
    HL [00:00:31.833,679] <err> os: Fault during interrupt handling
    
    HL [00:00:31.833,709] <err> os: Current thread: 0x20003520 (BT RX)
    HL [00:00:31.833,709] <err> error_handler: Caught system error -- reason 3. Entering infinite loop
    
    

  • Hi.

    Amanda is currently out of office. I'll try to assist while she is away.

    muli said:
    I tried NCS 2.6.0-rc1 now. 

    NCS 2.6.0 is now released. Maybe you can perform some test in the latest release while I get up to speed and take a look at the information in your ticket.

    Br,
    Joakim

Related