Debug a program crashing and resetting back to start? spi_transceive(spi_dev, &spi_cfg, &tx, &rx) just crashes and debugger starts back at beginning.

I'm trying to make some driver code to talk to a flash chip via SPI.  I'm using the nrf5340 and SDK 2.0.0.  I'm also doing this for the secure board environment the DK app, not the DK app_NS. 

The simple SPI transceive that seems to work is:

/* Private define ------------------------------------------------------------*/

#define FLASH_TEST_DATA_SIZE 1024

/* Private variables ---------------------------------------------------------*/
static const struct device * spi_dev;

static struct spi_cs_control spi_cs = {
    .gpio_dev = NULL,
    .gpio_pin = 18,
    .gpio_dt_flags = GPIO_ACTIVE_LOW,
    .delay = 0,
  };

static struct spi_config spi_cfg = {
    .operation = SPI_WORD_SET(8) | SPI_TRANSFER_MSB,
    .frequency = 8000000,
    .slave = 0,
    .cs = &spi_cs,
};

static const struct device *gpioPort0;

static uint8_t m_buffer_tx[FLASH_TEST_DATA_SIZE] = {0};
static uint8_t m_buffer_rx[FLASH_TEST_DATA_SIZE] = {0};

uint8_t buffer[256 * 3] = {0};

static bool flashSuccessfullyInitialized = false;

    static struct spi_buf tx_buf = {
        .buf = m_buffer_tx,
        .len = sizeof(m_buffer_tx)
    };
    static const struct spi_buf_set tx = {
        .buffers = &tx_buf,
        .count = 1
    };

    static struct spi_buf rx_buf = {
        .buf = m_buffer_rx,
        .len = sizeof(m_buffer_rx),
    };
    static const struct spi_buf_set rx = {
        .buffers = &rx_buf,
        .count = 1
    };

/* Functions -----------------------------------------------------------------*/
void FlashInit(void)
{
    memset(&chip, 0, sizeof(chip));
    flashSuccessfullyInitialized = false;

    spi_dev = device_get_binding("SPI_3");
    if (spi_dev == NULL)
    {
        printk("Could not get %s device\n", "SPI_2");
        return;
    }
    else
    {
        printk("Flash SPI binding good\n");
    }

    gpioPort0 = device_get_binding("GPIO_0");

    if (gpioPort0 == NULL)
    {
        printk("Could not get gpio device\n");
        return;
    }
    else
    {
        printk("gpio0 device binding good\n");
    }

    spi_cs.gpio_dev = gpioPort0;

    int err = spi_transceive(spi_dev, &spi_cfg, &tx, &rx);

And if you spend the time to really look.

spi_cs.gpio_dev is deprecated but SPI doesn't work without it, so if anyone knows what to do about this, I'd love to know.  

But the .len items in both structs is the size of the arrays.  So when I call spi_transceive I can see on the logic analyzer that 1024 bytes get clocked out.  That works fine. 

I'm trying to read 768 bytes out of my flash chip, so I called a function (and I did this loop to find out where it fails. 

    for (int x = 510; x < (256 * 3); x++)
    {
        __NOP();
        __NOP();
        spi_nand_read_from_cache(0x01, 0, x);
    }

I have this struct and function

struct spi_nand_cmd {
    uint8_t     cmd;
    uint8_t     n_addr;     /* Number of address */
    uint8_t     addr[SPINAND_MAX_ADDR_LEN]; /* Reg Offset */
    uint32_t        n_tx;       /* Number of tx bytes */
    const uint8_t   *tx_buf;    /* Tx buf */
    uint32_t        n_rx;       /* Number of rx bytes */
    uint8_t     *rx_buf;    /* Rx buf */
};
static int spi_nand_read_from_cache(uint32_t page_addr, uint32_t column, size_t len)
{
    struct spi_nand_cmd cmd;

    memset(&cmd, 0, sizeof(struct spi_nand_cmd));
    cmd.cmd = chip.read_cache_op;
    cmd.n_addr = 2;
    cmd.addr[0] = (uint8_t)(column >> 8);
    cmd.addr[1] = (uint8_t)column;
    cmd.n_rx = len;
    cmd.rx_buf = buffer;

    return spi_nand_issue_cmd(&cmd);
}
int spi_nand_issue_cmd(struct spi_nand_cmd *cmd)
{
    struct spi_nand_cmd_cfg *cmd_cfg = NULL;

    cmd_cfg = spi_nand_lookup_cmd_cfg_table(cmd->cmd, alliance_cmd_cfg_table);

    if (!cmd_cfg)
    {
        return -EINVAL;
    }

    spi_nand_xfer(cmd, cmd_cfg);

    return 0;
}

int spi_nand_xfer(struct spi_nand_cmd *cmd, struct spi_nand_cmd_cfg *cmd_cfg)
{
    // clear out our buffer
    //memset(m_buffer_tx,0,sizeof(m_buffer_tx));
    memset(m_buffer_rx,0,sizeof(m_buffer_rx));

    size_t totalLength = 0;
    size_t beginningOfRxData = 0;

    // set the command
    m_buffer_tx[0] = cmd->cmd;
    totalLength = 1;  // cause that's what it has to be to issue a command

    if (cmd->n_addr > 0)
    {
        totalLength = totalLength + cmd->n_addr;

        for (int x = 0; x < cmd->n_addr; x++)
        {
            m_buffer_tx[x+1] = cmd->addr[x];
        }
    }

    if (cmd_cfg->dummy_bytes > 0)
    {
        for (int x = 0; x < cmd_cfg->dummy_bytes; x++)
        {
            // add and zero out the number of dummy bytes
            m_buffer_tx[totalLength+x] = 0;
            totalLength++;
            // should only be 1 in 1 case, but in case we need to add more.
        }
    }

    if (cmd->n_tx > 0)
    {
        for (int x = 0; x < cmd->n_tx; x++)
        {
            // if there is a number of things to TX, add them
            // to our transmit buffer here
            m_buffer_tx[totalLength+x] = cmd->tx_buf[x];
            totalLength++;
        }
    }

    if (cmd->n_rx > 0)
    {
        // mark where in the list our RX stuff should start
        beginningOfRxData = totalLength;

        for (int x = 0; x < cmd->n_rx; x++)
        {
            // if there is a number of things to RX, we just
            // need place holders to get the data
            m_buffer_tx[totalLength+x] = 0;
            totalLength++;
        }  
    }

    // this is where we tell our SPI how much stuff we tx/rx
    tx_buf.len = totalLength;
    rx_buf.len = totalLength;

    int err = spi_transceive(spi_dev, &spi_cfg, &tx, &rx);

    if (err) {
        printk("QSPI error: %d\n", err);
    }
    else
    {
        printk("QSPI success! Command 0x%02X, length: %d\n",cmd->cmd, totalLength);

        if (cmd->n_rx > 0)
        {
            // we were looking for some data back
            for (int x = 0; x < cmd->n_rx; x++)
            {
                cmd->rx_buf[x] = m_buffer_rx[beginningOfRxData+x];
            }  
        }

    }

    return 0;
}

I know that's a lot of code to look through.  But it's not really doing anything too crazy. 

The problem is when want to get 510 bytes from SPI (and it adds in 4 header bytes, 1 command and 3 address) for a total of 514 bytes of communication, it works fine.  But if I call:

spi_nand_read_from_cache(0x01, 0, x);

where x = 511, for a total of 515 bytes of SPI transmission.  The whole thing crashes.  I can debug through

spi_nand_xfer(cmd, cmd_cfg);

and all the way down to

int err = spi_transceive(spi_dev, &spi_cfg, &tx, &rx);

I can debug to here

Then if I step into the spi_transceive() function

it goes to here

Then here

and then one more step it jumps back out to the beginning of my init code, with no SPI output. 

How do I debug this.  I get no information or why it's moving to this point in the code.  It's basically the first breakpoint that I put in the code that gets hit when I was debugging.  Goes from SPI transceiver right to the first breakpoint I set. I don't know if it's crashing, or rebooting.  And it's not like there is any allocated memory.  If I make the loop go from 1 byte of data to 768 bytes, it will transmit over SPI 1 byte, then 2, then 3,.... etc to 510 and then when I hit 511 it crashes.  If I'm not debugging my whole application reboots and starts over.  

I saw someone say to try putting this in my prj.conf file

CONFIG_REBOOT=n

but it still reboots. 

Anyone know how to debug some code that just reboots and doesn't give any idea as to why?

Parents
  • Hi,

    Do you have logging enabled in your application? The log will usually give you information about the error that caused the reset. Reset is a typical way to recover the application in case of an error/asser/hardfault.

    Do I understand you correctly that the reset occurs when you get to the memset in the last picture? What is this "chip" parameter? Could there be some buffer/stack overflow causing issues with larger transfers?

    If you can post your full application, it may be easier to see what is going on.

    Best regards,
    Jørgen

Reply
  • Hi,

    Do you have logging enabled in your application? The log will usually give you information about the error that caused the reset. Reset is a typical way to recover the application in case of an error/asser/hardfault.

    Do I understand you correctly that the reset occurs when you get to the memset in the last picture? What is this "chip" parameter? Could there be some buffer/stack overflow causing issues with larger transfers?

    If you can post your full application, it may be easier to see what is going on.

    Best regards,
    Jørgen

Children
  • Logging?  I don't think so.  How do I do that? 

    And the reset occurs when I call the

    spi_transceive(spi_dev, &spi_cfg, &tx, &rx);

    Not the memset.  And the memset is actually setting the rx buffer to all 0's, and it's a global array that's fixed.  And it doesn't grow or shrink and it's set every time even if I'm only expecting 1 byte of transfer (yes wasteful I know, but still trying to figure this out).

    This chip parameter is a struct which is also global and it's a hold over from the driver code that I'm trying to use.  A way for the code to query the SPI flash chip that I'm talking to and figure out it's information and then use that info moving forward. 

    struct spi_nand_chip {
        char        *name;
        struct spi_slave    *spi;

        uint8_t     mfr_id;
        uint8_t     dev_id;
        uint8_t     read_cache_op;
        uint8_t     write_cache_op;
        uint8_t     write_cache_rdm_op;

        uint8_t     *oobbuf;
        uint64_t        size;
        uint32_t        block_size;
        uint16_t        page_size;
        uint16_t        oob_size;
        uint8_t     lun_shift;
        uint8_t     block_shift;
        uint8_t     page_shift;
        uint16_t        page_mask;
        uint32_t        options;
        uint32_t        ecc_strength;
        uint8_t     refresh_threshold;
        uint8_t     lun;
        struct nand_ecclayout *ecclayout;
        struct spi_nand_onfi_params  onfi_params;
    };

    When the code starts I blow that out and then set some parameters, what you saw in the code above was just a way to set the command byte I wanted to send.  And that's set right and there didn't seem to be any weird memory shenanigans going on with that. 

    Could there be buffer/stack overflow issues?  Yes anything is possible.  I believe all the buffers I use are fixed in size, larger than they need to be and global.

    #define FLASH_TEST_DATA_SIZE 1024

    static uint8_t m_buffer_tx[FLASH_TEST_DATA_SIZE] = {0};
    static uint8_t m_buffer_rx[FLASH_TEST_DATA_SIZE] = {0};

    uint8_t buffer[256 * 3] = {0};

    They don't grow and shrink, and the crashing happens when I call spi_transcieve.  if It failed when doing a memset or something that would make sense.  But the act of calling spi_transcieve just reboots. 

    I don't know if I'm doing this part right, but I don't know how to check stack usage, or heap usage in my application (I've asked about that in a different post) but in my prj.conf I did this

    CONFIG_MAIN_STACK_SIZE=16384
    CONFIG_HEAP_MEM_POOL_SIZE=8192

    Is that enough?  I would hope so.  But if anyone knows how to check memory usage, I'd be all ears as well.

    Post my fill application?  Sure, I will try in another post below.

  • Attempt to share source code.  Hopefully this works.  Main() goes right to FlashInit() in flash.c (all the other stuff I left in for now but it's commented out).  FlashInit() does an spi_transcieve with 1024 bytes and that works.  And the real crash comes in the for loop inside

    spi_nand_detect_onfi()

    Patch.zip

Related