cJSON + mqtt_publish on nRF9160

I've noticed that some of the samples/applications in NCS that use cJSON are

  1. Generating a pointer to a string on the heap
  2. Then publishing that data (which ultimately at a lower level calls mqtt_publish)

So for example, here's a snippet of code that doesn't quite work as expected..

                /* Encode */
                static struct app_cloud_message msg;
                err = app_codec_device_info_encode(&modem_info, &msg);
                if (err)
                {
                    LOG_ERR("Unable to encode boot time. Err: %i", err);
                    break;
                }

                LOG_INF("%s", msg.data);

                /* Publish */
                err = app_backend_publish(msg.data, msg.len);
                k_free(msg.data);

Which throws a nice hard fault when freeing that memory.

[00:00:06.054,901] <err> os: Exception occurred in Secure State
[00:00:06.061,431] <err> os: ***** HARD FAULT *****
[00:00:06.066,986] <err> os:   Fault escalation (see below)
[00:00:06.073,211] <err> os: ***** BUS FAULT *****
[00:00:06.078,674] <err> os:   Precise data bus error
[00:00:06.084,381] <err> os:   BFAR Address: 0x50008158
[00:00:06.090,270] <err> os: r0/a1:  0x00000010  r1/a2:  0x20025a6c  r2/a3:  0x20018f38
[00:00:06.098,937] <err> os: r3/a4:  0x00000020 r12/ip:  0x0000a000 r14/lr:  0x0003a3f7
[00:00:06.107,574] <err> os:  xpsr:  0x21000000
[00:00:06.112,792] <err> os: s[ 0]:  0x00000000  s[ 1]:  0x00000000  s[ 2]:  0x00000000  s[ 3]:  0x00000000
[00:00:06.123,168] <err> os: s[ 4]:  0x00000000  s[ 5]:  0x00000000  s[ 6]:  0x00000000  s[ 7]:  0x00000000
[00:00:06.133,575] <err> os: s[ 8]:  0x00000000  s[ 9]:  0x00000000  s[10]:  0x00000000  s[11]:  0x00000000
[00:00:06.143,951] <err> os: s[12]:  0x00000000  s[13]:  0x00000000  s[14]:  0x00000000  s[15]:  0x00000000
[00:00:06.154,357] <err> os: fpscr:  0x00026be8
[00:00:06.159,545] <err> os: Faulting instruction address (r15/pc): 0x0003a904
[00:00:06.167,419] <err> os: >>> ZEPHYR FATAL ERROR 0: CPU exception on CPU 0
[00:00:06.175,201] <err> os: Current thread: 0x20015578 (unknown)

That leads me to think that somewhere along the line the socket should be configured not to be async. (I'm assuming mqtt_publish is returning immediately. That causes the k_free to be called on data still in use. Thus the hard fault)

It looks like it was done in both of the asset tracker samples but I'm not sure where.

Here's my mqtt configuration:

static int client_init(struct mqtt_client *client, char *p_client_id, size_t client_id_sz)
{

    int err;

    mqtt_client_init(client);

    err = broker_init();
    if (err != 0)
        return err;

    /* MQTT client configuration */
    client->broker = &broker;
    client->evt_cb = mqtt_evt_handler;
    client->client_id.utf8 = p_client_id;
    client->client_id.size = client_id_sz;
    client->protocol_version = MQTT_VERSION_3_1_1;

/* MQTT user name and password */
#ifdef CONFIG_BACKEND_PASSWORD
    mqtt_password.utf8 = CONFIG_BACKEND_PASSWORD;
    mqtt_password.size = strlen(CONFIG_BACKEND_PASSWORD);
    client->password = &mqtt_password;
#else
    client->password = NULL;
#endif

#ifdef CONFIG_BACKEND_USER_NAME
    mqtt_user_name.utf8 = CONFIG_BACKEND_USER_NAME;
    mqtt_user_name.size = strlen(CONFIG_BACKEND_USER_NAME);
    client->user_name = &mqtt_user_name;
#else
    client->user_name = NULL;
#endif

    /* MQTT buffers configuration */
    client->rx_buf = rx_buffer;
    client->rx_buf_size = sizeof(rx_buffer);
    client->tx_buf = tx_buffer;
    client->tx_buf_size = sizeof(tx_buffer);

    /* MQTT transport configuration */
    /* struct mqtt_sec_config *tls_config = &client->transport.tls.config; */

    client->transport.type = MQTT_TRANSPORT_NON_SECURE;

I also turned on timeouts for giggles to see if that did it.

void app_backend_poll(void)
{
    int err;

start:
    k_sem_take(&connection_poll_sem, K_FOREVER);
    atomic_set(&connection_poll_active, 1);

    /* MQTT client create */
    err = client_init(&client, m_client_id.str, m_client_id.len);
    if (err != 0)
    {
        LOG_ERR("client_init %d", err);

        /* Send to calback */
        if (err == -EAGAIN)
        {
            /* Wait and retry */
            k_sleep(K_MSEC(1000));
            k_sem_give(&connection_poll_sem);
            goto start;
        }
        else
        {
            /* Send error event */
            struct app_event app_evt = {
                .type = APP_EVENT_BACKEND_ERROR,
                .err = err,
            };
            app_event_manager_push(&app_evt);
        }

        goto reset;
    }

    /* Connect to MQTT */
    err = mqtt_connect(&client);
    if (err != 0)
    {
        LOG_ERR("mqtt_connect %d", err);

        /* Send error event */
        struct app_event app_evt = {
            .type = APP_EVENT_BACKEND_ERROR,
            .err = err,
        };
        app_event_manager_push(&app_evt);

        goto reset;
    }

    struct timeval timeout = {
        .tv_sec = 60
    };

    /* Set FDS info */
    fds.fd = client.transport.tcp.sock;
    fds.events = POLLIN;

    /* Set timeout for sening data */
    err = setsockopt(fds.fd, SOL_SOCKET, SO_SNDTIMEO,
                &timeout, sizeof(timeout));
    if (err == -1) {
        LOG_ERR("Failed to set timeout, errno: %d", errno);
    } else {
        LOG_INF("Using socket send timeout of %d seconds",
            60);
    }

But no dice. It's probably something silly but I quite haven't figured it out yet. Any ideas a sincerely appreciated.

Jared

P.S. running B0 silicon, NCS 1.7.0 and MFW 1.3.0

P.P.S I am running mqtt_publish from the main thread. The MQTT poll thread is separate configured like so:

#define BACKEND_THREAD_STACK_SIZE KB(2)
K_THREAD_DEFINE(app_backend_thread, BACKEND_THREAD_STACK_SIZE,
                app_backend_poll, NULL, NULL, NULL,
                K_LOWEST_APPLICATION_THREAD_PRIO, 0, 0);

Parents Reply Children
No Data
Related