pm_device_action_run gives MPU FAULT if bme280 ID is bad (0x00)

In my project I have a BME280 that starts OFF, and I interface with SPI.

The system then turns the power supply and the bme280 is powered.

At this stage the bme280 needs to be configurated, with the devicetree and .conf configurations.

To do this, I call pm_device_action_run(dev_bme, PM_DEVICE_ACTION_RESUME) to run the static int bme280_chip_init(const struct device *dev) internally.

Before passing to PM_DEVICE_ACTION_RESUME I also placed it into PM_DEVICE_ACTION_SUSPEND and then into PM_DEVICE_ACTION_RESUME.

/* main.c - Application main entry point */

/*
 * Copyright (c) 2015-2016 Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */

#include <zephyr/types.h>
#include <stddef.h>
#include <zephyr/sys/printk.h>
#include <zephyr/sys/util.h>

#include <zephyr/drivers/gpio.h>
#include <zephyr/devicetree.h>
#include <zephyr/logging/log.h>

#include <zephyr/pm/device_runtime.h>

#include <zephyr/bluetooth/bluetooth.h>
#include <zephyr/bluetooth/hci.h>

#include <zephyr/device.h>
#include <zephyr/drivers/sensor.h>
#include <zephyr/pm/device.h>

#include <zephyr/drivers/adc.h>

#include <math.h>


LOG_MODULE_REGISTER(Log_Main, LOG_LEVEL_DBG);

#define DEVICE_NAME CONFIG_BT_DEVICE_NAME
#define DEVICE_NAME_LEN (sizeof(DEVICE_NAME) - 1)

#define BEACON_ALL_SENSOR_TESTING 1



#define DT_SPEC_AND_COMMA(node_id, prop, idx) \
	ADC_DT_SPEC_GET_BY_IDX(node_id, idx),
    





/*
 * Get a device structure from a devicetree node with compatible
 * "bosch,bme280". (If there are multiple, just pick one.)
 */
static const struct device *get_bme280_device(void)
{
	const struct device *const dev = DEVICE_DT_GET_ANY(bosch_bme280);

	if (dev == NULL) {
		/* No such node, or the node does not have status "okay". */
		printk("\nError: no device found.\n");
		return NULL;
	}

	// if (!device_is_ready(dev)) {
	// 	printk("\nError: Device \"%s\" is not ready; "
	// 	       "check the driver initialization logs for errors.\n",
	// 	       dev->name);
	// 	return NULL;
	// }

	printk("Found device \"%s\", getting sensor data\n", dev->name);
	return dev;
}

static const struct device *get_lis2_device(void)
{
	const struct device *const sensor = DEVICE_DT_GET_ANY(st_lis2dh);

	if (sensor == NULL) {
		printf("No device found\n");
		return 0;
	}
	if (!device_is_ready(sensor)) {
		printf("Device %s is not ready\n", sensor->name);
		return 0;
	}

	printk("Found device \"%s\", getting sensor data\n", sensor->name);
	return sensor;
}

/*
 * Set Advertisement data. Based on the Eddystone specification:
 * https://github.com/google/eddystone/blob/master/protocol-specification.md
 * https://github.com/google/eddystone/tree/master/eddystone-url
 */
static const struct bt_data ad[] = {
	BT_DATA_BYTES(BT_DATA_FLAGS, BT_LE_AD_NO_BREDR)
	// BT_DATA_BYTES(BT_DATA_UUID16_ALL, 0xaa, 0xfe), //, 0x6E, 0x2A, 0x6F, 0x2A, 0x6D, 0x2A),
	// BT_DATA_BYTES(BT_DATA_SVC_DATA16,
	// 	      0xaa, 0xfe, /* Eddystone UUID */
	// 	      0x10, /* Eddystone-URL frame type */
	// 	      0x00, /* Calibrated Tx power at 0m */
	// 	      0x00, /* URL Scheme Prefix http://www. */
	// 	      'z', 'e', 'p', 'h', 'y', 'r',
	// 	      'p', 'r', 'o', 'j', 'e', 'c', 't',
	// 	      0x08) /* .org */
};

/* Set Scan Response data */
static struct bt_data sd[] = {
	BT_DATA(BT_DATA_NAME_COMPLETE, DEVICE_NAME, DEVICE_NAME_LEN),
};

static void bt_ready(int err)
{
	char addr_s[BT_ADDR_LE_STR_LEN];
	bt_addr_le_t addr = {0};
	size_t count = 1;

	if (err) {
		printk("Bluetooth init failed (err %d)\n", err);
		return;
	}

	printk("Bluetooth initialized\n");

	/* Start advertising */
	err = bt_le_adv_start(BT_LE_ADV_PARAM(BT_LE_ADV_OPT_USE_IDENTITY, 0x07D0, 0x07FD, NULL), ad, ARRAY_SIZE(ad),
			      sd, ARRAY_SIZE(sd));
	if (err) {
		printk("Advertising failed to start (err %d)\n", err);
		return;
	}


	/* For connectable advertising you would use
	 * bt_le_oob_get_local().  For non-connectable non-identity
	 * advertising an non-resolvable private address is used;
	 * there is no API to retrieve that.
	 */

	bt_id_get(&addr, &count);
	bt_addr_le_to_str(&addr, addr_s, sizeof(addr_s));

	printk("Beacon started, advertising as %s\n", addr_s);
}

static const uint16_t RTD_0 = 1000;
static const uint16_t RTD_100 = 1385;
static const uint16_t ResBridge = 845;

int main(void)
{
	int err;

	LOG_INF("Starting Beacon Demo\n");

	/* Initialize the Bluetooth Subsystem */
	err = bt_enable(bt_ready);
	if (err) {
		LOG_ERR("Bluetooth init failed (err %d)\n", err);
	}


    printk("DONE\n");
    //sens_power_mosfet

    const struct gpio_dt_spec mosfet = GPIO_DT_SPEC_GET(DT_NODELABEL(sens_power_mosfet), gpios);
    
    err = gpio_pin_configure_dt(&mosfet, GPIO_OUTPUT_INACTIVE);
    if (err != 0) {
        LOG_ERR("Function 'gpio_pin_configure_dt' for 'mosfet' pin err code: %d", err);
        return err;
    }

    err = gpio_pin_set_dt(&mosfet, 1);
    if (err != 0) {
        LOG_ERR("Function 'gpio_pin_set_dt' for 'RLed' pin err code: %d", err);
        return err;
    }

    k_sleep(K_SECONDS(1));

    const struct device *dev_bme = get_bme280_device();

	// if (dev_bme == NULL) {
	// 	return 0;
	// }

    err = pm_device_action_run(dev_bme, PM_DEVICE_ACTION_SUSPEND);
    if(err){
        LOG_ERR("BME device pm OFF failed: %d", err);
        return 0;
    }

    k_sleep(K_SECONDS(1));

    err = pm_device_action_run(dev_bme, PM_DEVICE_ACTION_RESUME);
    if(err){
        LOG_ERR("BME device pm RESUME failed: %d", err);
        return 0;
    }

    k_sleep(K_SECONDS(1));


    const struct device *dev_lis = get_lis2_device();


    struct sensor_value temp, press, humidity, accelx, accely, accelz;
    uint32_t count = 0;

    while (1) {
        #if BEACON_ALL_SENSOR_TESTING

            sensor_sample_fetch(dev_bme);
            sensor_channel_get(dev_bme, SENSOR_CHAN_AMBIENT_TEMP, &temp);
            sensor_channel_get(dev_bme, SENSOR_CHAN_PRESS, &press);
            sensor_channel_get(dev_bme, SENSOR_CHAN_HUMIDITY, &humidity);

            printk("temp: %d.%06d; press: %d.%06d; humidity: %d.%06d\n",
                    temp.val1, temp.val2, press.val1, press.val2,
                    humidity.val1, humidity.val2);

            int32_t tempaux = (temp.val1 * 100 + temp.val2 / 10000);
            int32_t humaux = (humidity.val1 * 100 + humidity.val2 / 10000);
            int32_t pressaux = (press.val1 * 100 + press.val2 / 10000);

            //printk("tempaux: %d\n", tempaux);

            sensor_sample_fetch(dev_lis);
            sensor_channel_get(dev_lis, SENSOR_CHAN_ACCEL_X, &accelx);
            sensor_channel_get(dev_lis, SENSOR_CHAN_ACCEL_Y, &accely);
            sensor_channel_get(dev_lis, SENSOR_CHAN_ACCEL_Z, &accelz);

            printk("accelx: %d.%06d; accely: %d.%06d; accelz: %d.%06d\n", 
                    accelx.val1, accelx.val2, accely.val1, accely.val2,
                    accelz.val1, accelz.val2);

            int32_t accelxaux = (accelx.val1 * 100 + accelx.val2 / 10000);
            int32_t accelyaux = (accely.val1 * 100 + accely.val2 / 10000);
            int32_t accelzaux = (accelz.val1 * 100 + accelz.val2 / 10000);

            uint8_t checkMem = 0;

            int16_t RTDTempInt = 0;


            struct bt_data ad_aux[] = {
                BT_DATA_BYTES(BT_DATA_FLAGS, BT_LE_AD_NO_BREDR),
                BT_DATA_BYTES(BT_DATA_UUID16_ALL, 0xE5, 0xFE),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x6E, 0x2A, (uint8_t)(tempaux & 0xFF), (uint8_t)(tempaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x6E, 0x2A, (uint8_t)(RTDTempInt & 0xFF), (uint8_t)(RTDTempInt >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x6F, 0x2A, (uint8_t)(humaux & 0xFF), (uint8_t)(humaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0xEB, 0x2A, (uint8_t)count)
            };

            struct bt_data sd_aux[] = {
                //BT_DATA_BYTES(BT_DATA_UUID16_ALL, 0x2A, 0x6E), // 0x015, 0x03, 0x0543),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x6D, 0x2A, (uint8_t)(pressaux & 0xFF), (uint8_t)(pressaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x13, 0x27, (uint8_t)(accelxaux & 0xFF), (uint8_t)(accelxaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x13, 0x27, (uint8_t)(accelyaux & 0xFF), (uint8_t)(accelyaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0x13, 0x27, (uint8_t)(accelzaux & 0xFF), (uint8_t)(accelzaux >> 8)),
                BT_DATA_BYTES(BT_DATA_SVC_DATA16, 0xEB, 0x2A, checkMem)
            };


            bt_le_adv_update_data(ad_aux, ARRAY_SIZE(ad_aux), sd_aux, ARRAY_SIZE(sd_aux));

            count++;
        #endif // BEACON_ALL_SENSOR_TESTING

        k_sleep(K_SECONDS(10));
    }
    
	return 0;
}

This is my config:

CONFIG_PM_DEVICE=y
CONFIG_PM_DEVICE_RUNTIME=y

CONFIG_BT=y
CONFIG_BT_DEBUG_LOG=y
CONFIG_BT_DEVICE_NAME="Test beacon" #Test beacon


#CONFIG_BT_CTLR_TX_PWR_PLUS_4=y
# CONFIG_BT_CTLR_TX_PWR_PLUS_3=y
CONFIG_BT_CTLR_TX_PWR_0=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_4=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_8=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_12=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_16=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_20=y
#CONFIG_BT_CTLR_TX_PWR_MINUS_40=y

CONFIG_SENSOR_LOG_LEVEL_DBG=y

CONFIG_MAIN_STACK_SIZE=16384

CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=4096

CONFIG_ISR_STACK_SIZE=4096

#CONFIG_LOG_PROCESS_THREAD_STACK_SIZE=2048

This is the error:

*** Booting nRF Connect SDK v2.7.0-5cb85570ca43 ***
*** Using Zephyr OS v3.6.99-100befc70c74 ***
[00:00:00.004,272] <inf> Log_Main: main: Starting Beacon Demo

[00:00:00.004,394] <inf> bt_sdc_hci_driver: hci_driver_open: SoftDevice Controller build revision:
                                            d6 da c7 ae 08 db 72 6f  2a a3 26 49 2a 4d a8 b3 |......ro *.&I*M..
                                            98 0e 07 7f                                      |....
[00:00:00.006,286] <inf> bt_hci_core: hci_vs_init: HW Platform: Nordic Semiconductor (0x0002)
[00:00:00.006,317] <inf> bt_hci_core: hci_vs_init: HW Variant: nRF52x (0x0002)
[00:00:00.006,378] <inf> bt_hci_core: hci_vs_init: Firmware: Standard Bluetooth controller (0x00) Version 214.51162 Build 1926957230
[00:00:00.007,232] <inf> bt_hci_core: bt_dev_show_info: Identity: E9:04:F6:A4:59:5D (random)
[00:00:00.007,263] <inf> bt_hci_core: bt_dev_show_info: HCI: version 5.4 (0x0d) revision 0x11fb, manufacturer 0x0059
[00:00:00.007,293] <inf> bt_hci_core: bt_dev_show_info: LMP: version 5.4 (0x0d) subver 0x11fb
Bluetooth initialized
Beacon started, advertising as E9:04:F6:A4:59:5D (random)
DONE
Found device "bme280@0", getting sensor data
[00:00:02.008,636] <dbg> BME280: bme280_chip_init: bad chip id 0x0
[36:16:02.036,315] <err> os: mem_manage_fault: ***** MPU FAULT *****
[36:16:02.078,552] <err> os: mem_manage_fault:   Data Access Violation
[36:16:02.085,937] <err> os: mem_manage_fault:   MMFAR Address: 0x20002bc0
[36:16:02.093,688] <err> os: esf_dump: r0/a1:  0x40004000  r1/a2:  0x20002528  r2/a3:  0x200024ff
[36:16:02.103,485] <err> os: esf_dump: r3/a4:  0x000006c1 r12/ip:  0x00000000 r14/lr:  0x00017881
[36:16:02.113,281] <err> os: esf_dump:  xpsr:  0x81003814
[36:16:02.119,537] <err> os: esf_dump: Faulting instruction address (r15/pc): 0x00017820
[36:16:02.128,570] <err> os: z_fatal_error: >>> ZEPHYR FATAL ERROR 19: Unknown error on CPU 0
[36:16:02.138,000] <err> os: z_fatal_error: Fault during interrupt handling

[36:16:02.145,935] <err> os: z_fatal_error: Current thread: 0x200016e8 (unknown)
[36:16:02.154,205] <err> os: k_sys_fatal_error_handler: Halting system

My problem is that the MPU FAULT only happens when the while is ran as is. If a sensor_channel_get(...) function or the ble function are commented the MPU FAULT no longer appears.

This lead me to believe that it was a lack of main stack but i also increased that.

Parents
  • Hello,

    My problem is that the MPU FAULT only happens when the while is ran as is. If a sensor_channel_get(...) function or the ble function are commented the MPU FAULT no longer appears.

    If you comment out the sensor_channel_get(), do you still see this in the log?

    <dbg> BME280: bme280_chip_init: bad chip id 0x0

    And what happens if you do not suspend and resume the BME device? 

    And if you run the application as is one more time, and take note of the r14/lr and r15/pc registers (in this log, they were 0x00017881 and 0x00017820, respectively. If you then, without re-building the application, run the command:

    arm-none-eabi-addr2line -e build\zephyr\zephyr.elf 0x00017881

    arm-none-eabi-addr2line -e build\zephyr\zephyr.elf 0x00017820

    (but with the addresses from the latest log). What does it say? You may need to adjust the path to the .elf file if you are using sysbuild, and you need to install armgcc to use arm-none-eabi-addr2line. Alternatively, if you have a terminal with the toolchain from NCS, you can use arm-zephyr-eabi-addr2line. If you can't get the addr2line tool working, you can send me your build folder, and the addresses from the log in that build. 

    I don't suppose you are able to replicate the same behavior without the actual sensor, just using a DK?

    Best regards,

    Edvin

  • Thank you for the reply.

    If you comment out the sensor_channel_get(), do you still see this in the log?

    Yes commenting any code after the pm RESUME still gives this error

    And what happens if you do not suspend and resume the BME device?

    Removing the suspend gives the error -120 or EALREADY meaning the socket is already connected

    arm-none-eabi-addr2line stuff

    This points to line 297 of the v2.7.0/modules/hal/nordic/nrfx/drivers/src/nrfx_spi.c in function transfer_byte and this function is called in the irq_handler (same file) line 455.

    I don't suppose you are able to replicate the same behavior without the actual sensor, just using a DK?

    No clue, will try tomorrow

  • Can you try to use the SPIM driver instead of the SPI, for debugging purposes?

    Also, note that the P0.10 is the NFC pin. I don't understand why it would behave any different. Do you happen  use P0.09 for anything?

  • Will try to use the SPIM.

    Also, my bad, I'm not using P0.10 to control de CS of the BME, its the P0.11.

    P0.10 and P0.09 are in use but for other stuff.

  • Just tried the spim and it works, so its a spi problem.

    My guess, the transfer_next_chunk() > nrfx_spi_xfer() function in spi_nrfx_spi.c is finishing too quickly.

  • Hello,

    Sorry for the late reply.

    The SPI doesn't have DMA, which handles your queues/buffers without the need of the CPU. Therefore, in most cases, the SPIM driver is preferred. 

    If you say that the nrfx_spi_xfer() finishes too quickly, it may be that it is not blocking, and hence only queues the transaction. Then your application goes on to the next SPI xfer before the first one finishes, messing up the data flow. Although I can't find any documentation of it, I believe I have heard something like the SPI driver is deprecated, and you should use the SPIM driver instead, as it has no disadvantages compared to the SPI driver. 

    So I believe the case is that since the SPI driver is not blocking, other drivers may not wait for a transaction to finish before calling the next transaction, causing the next transaction to return an error. 

    TheLittleFish said:
    nrfx_spi_xfer() function in spi_nrfx_spi.c is finishing too quickly.

    Indeed. It will finish/return before the transaction is complete, looking at the source code:

        if (p_cb->transfer_in_progress)
        {
            err_code = NRFX_ERROR_BUSY;
            NRFX_LOG_WARNING("Function: %s, error code: %s.",
                             __func__,
                             NRFX_LOG_ERROR_STRING_GET(err_code));
            return err_code;
        }
        else
        {
            ...

    So if it is not a problem, I suggest you use the SPIM driver. Since this uses DMA, it is capable of handling the buffer, adding to it instead of trying to hijack it.

    Best regards,

    Edvin

Reply
  • Hello,

    Sorry for the late reply.

    The SPI doesn't have DMA, which handles your queues/buffers without the need of the CPU. Therefore, in most cases, the SPIM driver is preferred. 

    If you say that the nrfx_spi_xfer() finishes too quickly, it may be that it is not blocking, and hence only queues the transaction. Then your application goes on to the next SPI xfer before the first one finishes, messing up the data flow. Although I can't find any documentation of it, I believe I have heard something like the SPI driver is deprecated, and you should use the SPIM driver instead, as it has no disadvantages compared to the SPI driver. 

    So I believe the case is that since the SPI driver is not blocking, other drivers may not wait for a transaction to finish before calling the next transaction, causing the next transaction to return an error. 

    TheLittleFish said:
    nrfx_spi_xfer() function in spi_nrfx_spi.c is finishing too quickly.

    Indeed. It will finish/return before the transaction is complete, looking at the source code:

        if (p_cb->transfer_in_progress)
        {
            err_code = NRFX_ERROR_BUSY;
            NRFX_LOG_WARNING("Function: %s, error code: %s.",
                             __func__,
                             NRFX_LOG_ERROR_STRING_GET(err_code));
            return err_code;
        }
        else
        {
            ...

    So if it is not a problem, I suggest you use the SPIM driver. Since this uses DMA, it is capable of handling the buffer, adding to it instead of trying to hijack it.

    Best regards,

    Edvin

Children
Related