Uart corrupted when scanning with many advertisers in range

Hello,

There seems to be a bug with scanning for many Bluetooth advertisements while simultaneously sending  data over uart in Nrf sdk v17. 

I have tried to work around this issue by using the nrfx_uart/uarte library, and slowing the scanning rate. Both of these changes did not fix the issue, just made it take longer to show up. Also originally I was using nrf sdk v16, upgrading to v17 made the issue better but still shows up.

At the bottom of this post is a reproducible project that shows this bug.

If the code is programmed and the python script is pointing to the com port of the device, you will see that thereis a wild inconsistency with the data recieved.

If lines 166 and 167 are commented out (the nrf_ble_scan_start) and start the script again, the script will show that every 100 bytes received are in the same pattern

Note: this only seems to happen when there are a lot of advertisers around.

Can you please help find a solution to this problem?

Thanks

#include "nrfx_log.h"
#include "nrf_log_ctrl.h"
#include "nrf_log_default_backends.h"
#include "nrf_pwr_mgmt.h"
#include "nrf_sdh.h"
#include "nrf_sdh_ble.h"
#include "nrf_ble_scan.h"
#include <nrfx.h>

#include "nrf_uarte.h"
#include "nrf_gpio.h"


#define APP_BLE_CONN_CFG_TAG 1

struct uart_config_t {
	uint32_t            pseltxd; ///< TXD pin number.
	uint32_t            pselrxd; ///< RXD pin number.
	uint32_t            pselcts; ///< CTS pin number.
	uint32_t            pselrts; ///< RTS pin number.
	void *              p_context; ///< Context passed to interrupt handler.
	nrf_uarte_hwfc_t     hwfc; ///< Flow control configuration.
	nrf_uarte_parity_t   parity; ///< Parity configuration.
	nrf_uarte_baudrate_t baudrate; ///< Baudrate.
	uint8_t             interrupt_priority; ///< Interrupt priority.
} ;

static nrf_ble_scan_t m_scan;
NRF_UARTE_Type * p_uart = NRF_UARTE0;
static void send_uart_payload();
static void ble_evt(ble_evt_t const * p_evt, void * p_context);
NRF_SDH_BLE_OBSERVER(m_ble_observer, 1, ble_evt, 0);
#define UART_PAYLOAD_SIZE 100
static uint8_t pl[UART_PAYLOAD_SIZE] = { 0 };

static void ble_evt(ble_evt_t const * p_evt, void * p_context) {
	if (p_evt->header.evt_id == BLE_GAP_EVT_ADV_REPORT) {
		ble_gap_evt_adv_report_t const * p_adv_report = &p_evt->evt.gap_evt.params.adv_report;
		NRFX_LOG_INFO("ADV");
	}
	send_uart_payload();
	nrf_ble_scan_on_ble_evt(p_evt, &m_scan);
	
}

static void init_softdevice() {
	ret_code_t err_code = nrf_sdh_enable_request();
	APP_ERROR_CHECK(err_code);
}

static void init_bluetooth() {
		
	uint32_t ram_start = 0;
	ret_code_t err_code = nrf_sdh_ble_default_cfg_set(APP_BLE_CONN_CFG_TAG, &ram_start);
	APP_ERROR_CHECK(err_code);
	
	err_code = nrf_sdh_ble_enable(&ram_start);
	APP_ERROR_CHECK(err_code);
}

static void send_uart_payload() {
	nrf_uarte_tx_buffer_set(p_uart, pl, UART_PAYLOAD_SIZE);
	nrf_uarte_task_trigger(p_uart, NRF_UARTE_TASK_STARTTX);
}

void nrfx_uarte_0_irq_handler(void) {
	if (nrf_uarte_int_enable_check(p_uart, NRF_UARTE_INT_ERROR_MASK) && nrf_uarte_event_check(p_uart, NRF_UARTE_EVENT_ERROR)) {
		nrf_uarte_event_clear(p_uart, NRF_UARTE_EVENT_ERROR);
		nrf_uarte_int_disable(p_uart, NRF_UARTE_INT_RXDRDY_MASK | NRF_UARTE_INT_ERROR_MASK);
		NRFX_LOG_ERROR("uart error!");
	}
	
	if (nrf_uarte_event_check(p_uart, NRF_UARTE_EVENT_ENDTX)) {
		nrf_uarte_event_clear(p_uart, NRF_UARTE_EVENT_ENDTX);
		nrf_uarte_event_clear(p_uart, NRF_UARTE_EVENT_TXSTOPPED);
		send_uart_payload();
	}
}
static void init_scanner() {
	nrf_ble_scan_init_t init_scan = { 
			.connect_if_match = false,
		.conn_cfg_tag = APP_BLE_CONN_CFG_TAG
	};

	ret_code_t err_code = nrf_ble_scan_init(&m_scan, &init_scan, 0);
	APP_ERROR_CHECK(err_code);
	
	m_scan.scan_params.interval =  MSEC_TO_UNITS(100, UNIT_0_625_MS);
	m_scan.scan_params.window =  MSEC_TO_UNITS(100, UNIT_0_625_MS);
	m_scan.scan_params.timeout =  MSEC_TO_UNITS(0, UNIT_10_MS);
}

static void apply_uart_config(const struct uart_config_t* p_config) {
	if (p_config->pseltxd != NRF_UARTE_PSEL_DISCONNECTED) {
		nrf_gpio_pin_set(p_config->pseltxd);
		nrf_gpio_cfg_output(p_config->pseltxd);
	}
	if (p_config->pselrxd != NRF_UARTE_PSEL_DISCONNECTED) {
		nrf_gpio_cfg_input(p_config->pselrxd, NRF_GPIO_PIN_NOPULL);
	}
	nrf_uarte_baudrate_set(p_uart, p_config->baudrate);
	nrf_uarte_configure(p_uart, p_config->parity, p_config->hwfc);
	nrf_uarte_txrx_pins_set(p_uart, p_config->pseltxd, p_config->pselrxd);
	if (p_config->hwfc == NRF_UARTE_HWFC_ENABLED) {
		if (p_config->pselcts != NRF_UARTE_PSEL_DISCONNECTED) {
			nrf_gpio_cfg_input(p_config->pselcts, NRF_GPIO_PIN_NOPULL);
		}
		if (p_config->pselrts != NRF_UARTE_PSEL_DISCONNECTED) {
			nrf_gpio_pin_set(p_config->pselrts);
			nrf_gpio_cfg_output(p_config->pselrts);
		}
		nrf_uarte_hwfc_pins_set(p_uart, p_config->pselrts, p_config->pselcts);
	}
}
static void init_uart() {
	struct uart_config_t config = { 
		.pseltxd = 6,
		.pselrxd = 5, 
		.pselcts = NRF_UARTE_PSEL_DISCONNECTED, 
		.pselrts = NRF_UARTE_PSEL_DISCONNECTED,
		.p_context = NULL,
		.hwfc = NRF_UARTE_HWFC_DISABLED,
		.parity = NRF_UARTE_PARITY_EXCLUDED,
		.baudrate = NRF_UARTE_BAUDRATE_9600,
		.interrupt_priority = NRFX_UART_DEFAULT_CONFIG_IRQ_PRIORITY, 
	};
	apply_uart_config(&config);
	
	nrf_uarte_event_clear(p_uart, NRF_UARTE_EVENT_TXDRDY);
	nrf_uarte_event_clear(p_uart, NRF_UARTE_EVENT_RXTO);
	nrf_uarte_int_enable(p_uart,
		NRF_UARTE_INT_TXDRDY_MASK |  NRF_UARTE_INT_RXTO_MASK | NRF_UARTE_INT_ERROR_MASK);
	NRFX_IRQ_PRIORITY_SET(nrfx_get_irq_number(p_uart),
		config.interrupt_priority);
	NRFX_IRQ_ENABLE(nrfx_get_irq_number(p_uart));
	
	nrf_uarte_enable(p_uart);
}

int main() {
	ret_code_t err_code = NRF_LOG_INIT(NULL);
	APP_ERROR_CHECK(err_code);

	NRF_LOG_DEFAULT_BACKENDS_INIT();
	
	err_code = nrf_pwr_mgmt_init();
	APP_ERROR_CHECK(err_code);
	
	NRFX_LOG_INFO("Test started.");
	
	memset(pl, 0xAB, UART_PAYLOAD_SIZE);
	for (uint16_t i = 0; i < UART_PAYLOAD_SIZE; i++) {
		if (i % 2 == 0) {
			pl[i] = 0xAB;
		} else {
			pl[i] = 0xCD;
		}
	}
	
	init_softdevice();
	init_bluetooth();
	init_scanner();
	init_uart();
	send_uart_payload();
	
	err_code = nrf_ble_scan_start(&m_scan);
	APP_ERROR_CHECK(err_code);
	
	for (;;) {
		if (!NRF_LOG_PROCESS()) {
			nrf_pwr_mgmt_run();
		}
	}
	return 0;
}

import serial

def main():
    with serial.Serial('COM17', baudrate=9600) as s:
        while True:
            p = []
            while len(p) < 100:
                p += s.read(100 - len(p))
            print(''.join(map('{:X}'.format, p)))


if __name__ == "__main__":
    main()

Parents
  • Bug is here:

    static void send_uart_payload() {
    	uint8_t pl[30] = { 0 };
    	nrfx_uart_tx(&uart, pl, 30);
    }
    

    The "pl" resides on the stack, and I am pretty sure nrf_uart_tx() is non-blocking. That allows other calls - like later advertisements - to overwrite the stack memory location of pl variable during UART transmission.

    First order fix is quite simple:

    static void send_uart_payload() {
    	static uint8_t pl[30] = { 0 }; /* <-- static here */
    	nrfx_uart_tx(&uart, pl, 30);
    }
    

    But in the long run you may need to switch to some kind of FIFO or a blocking UART function.

  • Sorry for the delayed response. I have been trying to come up with possible reasons for this behaviour, but I can't think of any. Nor have I been able to find any similiar reports in our ticket database.

    Can you try set the registers as below on startup to turn on the 64M clock domain as suggested in the errata 109 document ([109] DMA: DMA access transfers might be corrupted) to see if it makes any difference?

    /*Turn on the 64Mhz clock domain. Errata 109 WA */
    *(volatile uint32_t *)0x4006EC00 = 0x00009375; 
    *(volatile uint32_t *)0x4006ED08 = 0x00000003; 
    *(volatile uint32_t *)0x4006EC00 = 0x00009375;

  • this did not seem to fix the problem. I have also created a zephyr project to confirm it was not  an implementation issue but it still happens. Also it is not required that any Bluetooth activity to be happening for this bug to happen, it just makes it more likely.

    You can reproduce the issue with this zephyr project:

    #include <logging/log.h>
    #include <logging/log_ctrl.h>
    #include <drivers/uart.h>
    
    LOG_MODULE_REGISTER(main);
    
    
    struct k_sem tx_sem;
    const struct device *uart_inst;
    
    static void uart_cb(const struct device *dev, struct uart_event *evt, void *user_data) {
    	switch (evt->type) {
    	case UART_TX_DONE: {
    			LOG_INF("Sent %i bytes", evt->data.tx.len);
    			k_sem_give(&tx_sem);
    		}break;
    	default:break;
    	}
    }
    
    int main() {
    	LOG_INF("Starting test\n");
    	uart_inst =  device_get_binding(DT_LABEL(DT_NODELABEL(uart0)));
    	k_sem_init(&tx_sem, 0, 1);
    	
    	const struct uart_config cfg = { 
    		.baudrate = 115200,
    		.parity =	UART_CFG_PARITY_NONE,
    		.stop_bits = UART_CFG_STOP_BITS_1,
    		.data_bits = UART_CFG_DATA_BITS_8,
    		.flow_ctrl = UART_CFG_FLOW_CTRL_NONE
    	};
    	int err = uart_configure(uart_inst, &cfg);
    	__ASSERT(err == 0, "failed to configure uart");
    	
    	err = uart_callback_set(uart_inst, uart_cb, 0);
    	__ASSERT(err == 0, "failed to set uart callback");
    
    
    	k_sem_give(&tx_sem);
    	
    	return 0;
    }
    
    extern void uart_tx_thread(void *p1, void *p2, void *p3) {
    	const uint16_t DATA_SIZE = 100;
    	uint8_t* data = k_malloc(DATA_SIZE);
    	for (uint16_t i = 0; i < DATA_SIZE; i++) {
    		data[i] = ((i % 2 == 0) ? 0xAB : 0xCD);
    	}
    	for (;;) {
    		if (k_sem_take(&tx_sem, K_FOREVER) != 0) {
    			LOG_ERR("Failed to take sem lock");
    			break;
    		}
    	
    		LOG_INF("Sending...");
    		int err = uart_tx(uart_inst, data, DATA_SIZE, 0);
    		if (err) {
    			LOG_ERR("failed to write uart message. %i", err);
    			k_sem_give(&tx_sem);
    		}
    	}
    	k_free(data);
    }
    
    K_THREAD_DEFINE(my_tid,
    	1024,
    	uart_tx_thread,
    	NULL,
    	NULL,
    	NULL,
    	5,
    	0,
    	1000);

  • Thanks for the code. I will try to let it run overnight to see if I get the same problem here. How long time does it take for you to reproduce the problem with this new code?

  • between 10 - 30 minutes. It is reproducible on nrf52 dk, DVK-NINA-B3 and custom hardware using a FTDI chip.

    Here is a python script that will assert when the problem is found.

    import serial
    PAYLOAD_LENGTH = 100
    
    def read_exact(s: serial.Serial, n):
        p = []
        while len(p) < PAYLOAD_LENGTH:
            p += s.read(PAYLOAD_LENGTH - len(p))
        return p
    
    def main():
        with serial.Serial('COM17', baudrate=115200) as s:
            compare = read_exact(s, PAYLOAD_LENGTH)
            while True:
                p = read_exact(s, PAYLOAD_LENGTH)
                print(''.join(map('{:X}'.format, p)))
                assert compare==p
    
    
    if __name__ == "__main__":
        main()

  • Could it be an issue with the PC setup (USB hub,etc) or maybe an issue with PySerial? I used the RealTerm terminal here and stored all received data to a text file, then grepped through it to see if there were any corrupted data. Here's a dropbox link to the file if you are interested: https://www.dropbox.com/s/8nde44huwpa5ixw/uart_test.txt?dl=0

    Edit: Forgot to add that I ran the code on a 52 DK and used the on-board J-link as the serial bridge. Also, I left it running for about 4-5 hours.

Reply Children
  • I have tried with both SerialPortStream (C#) and pyserial and could replicate it with both libraries. Have tried it on a windows machine and linux machine with diffrent devboards plugged in, and have also done it on an upboard with a custom hat with 52s on it.

    I am going to try and create a project that just sends data between two devboards to eliminate it being anything to do with the computer.

Related