This post is older than 2 years and might not be relevant anymore
More Info: Consider searching for newer posts

Mqtt_connect -60 error

Hello,

I am working on the mqtt sample from ncs\nrf\samples\nrf9160\mqtt_simple. Mostly the nRF9160 connects to the MQTT server in the first attempt. But sometimes I observe the error mqtt_connect -60. The error is for connection timed out. It tries to reconnect again, but still returns the same error always.

The code:

/*
 * Copyright (c) 2018 Nordic Semiconductor ASA
 *
 * SPDX-License-Identifier: LicenseRef-BSD-5-Clause-Nordic
 */

#include <zephyr.h>
#include <stdio.h>
#include <uart.h>
#include <string.h>

#include <net/mqtt.h>
#include <net/socket.h>
#include <lte_lc.h>

#define APP_CONNECT_TRIES	10
#define APP_SLEEP_MSECS		500

/* Buffers for MQTT client. */
static u8_t rx_buffer[CONFIG_MQTT_MESSAGE_BUFFER_SIZE];
static u8_t tx_buffer[CONFIG_MQTT_MESSAGE_BUFFER_SIZE];
static u8_t payload_buf[CONFIG_MQTT_PAYLOAD_BUFFER_SIZE];

/* The mqtt client struct */
static struct mqtt_client client;

/* MQTT Broker details. */
static struct sockaddr_storage broker;

/* Connected flag */
static bool connected;

/* File descriptor */
static struct pollfd fds;

#if defined(CONFIG_BSD_LIBRARY)

/**@brief Recoverable BSD library error. */
void bsd_recoverable_error_handler(uint32_t err)
{
	printk("bsdlib recoverable error: %u\n", err);
}

/**@brief Irrecoverable BSD library error. */
void bsd_irrecoverable_error_handler(uint32_t err)
{
	printk("bsdlib irrecoverable error: %u\n", err);

	__ASSERT_NO_MSG(false);
}

#endif /* defined(CONFIG_BSD_LIBRARY) */

/**@brief Function to print strings without null-termination
 */
static void data_print(u8_t *prefix, u8_t *data, size_t len)
{
	char buf[len + 1];

	memcpy(buf, data, len);
	buf[len] = 0;
	printk("%s%s\n", prefix, buf);
}

/**@brief Function to publish data on the configured topic
 */
static int data_publish(struct mqtt_client *c, enum mqtt_qos qos,
	u8_t *data, size_t len)
{
	struct mqtt_publish_param param;

	param.message.topic.qos = qos;
	param.message.topic.topic.utf8 = CONFIG_MQTT_PUB_TOPIC;
	param.message.topic.topic.size = strlen(CONFIG_MQTT_PUB_TOPIC);
	param.message.payload.data = data;
	param.message.payload.len = len;
	param.message_id = sys_rand32_get();
	param.dup_flag = 0;
	param.retain_flag = 0;

	data_print("Publishing: ", data, len);
	printk("to topic: %s len: %u\n",
		CONFIG_MQTT_PUB_TOPIC,
		(unsigned int)strlen(CONFIG_MQTT_PUB_TOPIC));

	return mqtt_publish(c, &param);
}

/**@brief Function to subscribe to the configured topic
 */
static int subscribe(void)
{
	struct mqtt_topic subscribe_topic = {
		.topic = {
			.utf8 = CONFIG_MQTT_SUB_TOPIC,
			.size = strlen(CONFIG_MQTT_SUB_TOPIC)
		},
		.qos = MQTT_QOS_1_AT_LEAST_ONCE
	};

	const struct mqtt_subscription_list subscription_list = {
		.list = &subscribe_topic,
		.list_count = 1,
		.message_id = 1234
	};

	printk("Subscribing to: %s len %u\n", CONFIG_MQTT_SUB_TOPIC,
		(unsigned int)strlen(CONFIG_MQTT_SUB_TOPIC));

	return mqtt_subscribe(&client, &subscription_list);
}

/**@brief Function to read the published payload.
 */
static int publish_get_payload(struct mqtt_client *c, size_t length)
{
	u8_t *buf = payload_buf;
	u8_t *end = buf + length;

	if (length > sizeof(payload_buf)) {
		return -EMSGSIZE;
	}

	while (buf < end) {
		int ret = mqtt_read_publish_payload(c, buf, end - buf);

		if (ret < 0) {
			int err;

			if (ret != -EAGAIN) {
				return ret;
			}

			printk("mqtt_read_publish_payload: EAGAIN\n");

			err = poll(&fds, 1, K_SECONDS(CONFIG_MQTT_KEEPALIVE));
			if (err > 0 && (fds.revents & POLLIN) == POLLIN) {
				continue;
			} else {
				return -EIO;
			}
		}

		if (ret == 0) {
			return -EIO;
		}

		buf += ret;
	}

	return 0;
}

/**@brief MQTT client event handler
 */
void mqtt_evt_handler(struct mqtt_client *const c,
		      const struct mqtt_evt *evt)
{
	int err;

	switch (evt->type) {
	case MQTT_EVT_CONNACK:
		if (evt->result != 0) {
			printk("MQTT connect failed %d\n", evt->result);
			break;
		}

		connected = true;
		printk("[%s:%d] MQTT client connected!\n", __func__, __LINE__);
		subscribe();
		break;

	case MQTT_EVT_DISCONNECT:
		printk("[%s:%d] MQTT client disconnected %d\n", __func__,
		       __LINE__, evt->result);

		connected = false;
		break;

	case MQTT_EVT_PUBLISH: {
		const struct mqtt_publish_param *p = &evt->param.publish;

		printk("[%s:%d] MQTT PUBLISH result=%d len=%d\n", __func__,
		       __LINE__, evt->result, p->message.payload.len);
		err = publish_get_payload(c, p->message.payload.len);
		if (err >= 0) {
			data_print("Received: ", payload_buf,
				p->message.payload.len);
			/* Echo back received data */
			data_publish(&client, MQTT_QOS_1_AT_LEAST_ONCE,
				payload_buf, p->message.payload.len);
		} else {
			printk("mqtt_read_publish_payload: Failed! %d\n", err);
			printk("Disconnecting MQTT client...\n");

			err = mqtt_disconnect(c);
			if (err) {
				printk("Could not disconnect: %d\n", err);
			}
		}
	} break;

	case MQTT_EVT_PUBACK:
		if (evt->result != 0) {
			printk("MQTT PUBACK error %d\n", evt->result);
			break;
		}

		printk("[%s:%d] PUBACK packet id: %u\n", __func__, __LINE__,
				evt->param.puback.message_id);
		break;

	case MQTT_EVT_SUBACK:
		if (evt->result != 0) {
			printk("MQTT SUBACK error %d\n", evt->result);
			break;
		}

		printk("[%s:%d] SUBACK packet id: %u\n", __func__, __LINE__,
				evt->param.suback.message_id);
		break;

	default:
		printk("[%s:%d] default: %d\n", __func__, __LINE__,
				evt->type);
		break;
	}
}

/**@brief Resolves the configured hostname and
 * initializes the MQTT broker structure
 */
static void broker_init(void)
{
	int err;
	struct addrinfo *result;
	struct addrinfo *addr;
	struct addrinfo hints = {
		.ai_family = AF_INET,
		.ai_socktype = SOCK_STREAM
	};

	err = getaddrinfo(CONFIG_MQTT_BROKER_HOSTNAME, NULL, &hints, &result);
	if (err) {
		printk("ERROR: getaddrinfo failed %d\n", err);

		return;
	}

	addr = result;
	err = -ENOENT;

	/* Look for address of the broker. */
	while (addr != NULL) {
		/* IPv4 Address. */
		if (addr->ai_addrlen == sizeof(struct sockaddr_in)) {
			struct sockaddr_in *broker4 =
				((struct sockaddr_in *)&broker);
			char ipv4_addr[NET_IPV4_ADDR_LEN];

			broker4->sin_addr.s_addr =
				((struct sockaddr_in *)addr->ai_addr)
				->sin_addr.s_addr;
			broker4->sin_family = AF_INET;
			broker4->sin_port = htons(CONFIG_MQTT_BROKER_PORT);

			inet_ntop(AF_INET, &broker4->sin_addr.s_addr,
				  ipv4_addr, sizeof(ipv4_addr));
			printk("IPv4 Address found %s\n", ipv4_addr);

			break;
		} else {
			printk("ai_addrlen = %u should be %u or %u\n",
				(unsigned int)addr->ai_addrlen,
				(unsigned int)sizeof(struct sockaddr_in),
				(unsigned int)sizeof(struct sockaddr_in6));
		}

		addr = addr->ai_next;
		break;
	}

	/* Free the address. */
	freeaddrinfo(result);
}

/**@brief Initialize the MQTT client structure
 */
static void client_init(struct mqtt_client *client)
{

	mqtt_client_init(client);

	broker_init();

	/* MQTT client configuration */
	client->broker = &broker;
	client->evt_cb = mqtt_evt_handler;
	client->client_id.utf8 = (u8_t *)CONFIG_MQTT_CLIENT_ID;
	client->client_id.size = strlen(CONFIG_MQTT_CLIENT_ID);
	client->password = NULL;
	client->user_name = NULL;
	client->protocol_version = MQTT_VERSION_3_1_1;

	/* MQTT buffers configuration */
	client->rx_buf = rx_buffer;
	client->rx_buf_size = sizeof(rx_buffer);
	client->tx_buf = tx_buffer;
	client->tx_buf_size = sizeof(tx_buffer);

	/* MQTT transport configuration */
	client->transport.type = MQTT_TRANSPORT_NON_SECURE;
}

/**@brief Initialize the file descriptor structure used by poll.
 */
static int fds_init(struct mqtt_client *c)
{
	if (c->transport.type == MQTT_TRANSPORT_NON_SECURE) {
		fds.fd = c->transport.tcp.sock;
	} else {
#if defined(CONFIG_MQTT_LIB_TLS)
		fds.fd = c->transport.tls.sock;
#else
		return -ENOTSUP;
#endif
	}

	fds.events = POLLIN;

	return 0;
}

/**@brief Configures modem to provide LTE link. Blocks until link is
 * successfully established.
 */
static void modem_configure(void)
{
#if defined(CONFIG_LTE_LINK_CONTROL)
	if (IS_ENABLED(CONFIG_LTE_AUTO_INIT_AND_CONNECT)) {
		/* Do nothing, modem is already turned on
		 * and connected.
		 */
	} else {
		int err;

		printk("LTE Link Connecting ...\n");
		err = lte_lc_init_and_connect();
		__ASSERT(err == 0, "LTE link could not be established.");
                if(err){
                    printk("Could not establish link\n");
                }
		printk("LTE Link Connected!\n");
	}
#endif
}


static int try_to_connect(struct mqtt_client *c)
{
	int rc, i = 0;

	while (i++ < APP_CONNECT_TRIES && !connected) {

		client_init(c);

		rc = mqtt_connect(c);
		if (rc != 0) {
			printk("mqtt_connect %d\n", rc);
			k_sleep(APP_SLEEP_MSECS);
			continue;
		}


		rc = fds_init(c);
		if (rc != 0) {
			printk("ERROR: fds_init %d\n", rc);
			return -EINVAL;
		}

		if (poll(&fds, 1, K_SECONDS(CONFIG_MQTT_KEEPALIVE)) < 0) {
			printk("poll error: %d\n", errno);
		}
		mqtt_input(c);

		if (!connected) {
			mqtt_abort(c);
		}
	}

	if (connected) {
		return 0;
	}

	return -EINVAL;
}

static void mqtt_loop(void){
	int rc;
	
	printk("attempting to connect:\n ");
	
	rc = try_to_connect(&client);
        if (rc != 0) { return; }


	
	while (connected) {
		rc = poll(&fds, 1, K_SECONDS(CONFIG_MQTT_KEEPALIVE));
		if (rc < 0) {
			printk("ERROR: poll %d\n", errno);
			break;
		}

		rc = mqtt_live(&client);
		if (rc != 0) {
			printk("ERROR: mqtt_live %d\n", rc);
			break;
		}

		if ((fds.revents & POLLIN) == POLLIN) {
			rc = mqtt_input(&client);
			if (rc != 0) {
				printk("ERROR: mqtt_input %d\n", rc);
				break;
			}
		}

		if ((fds.revents & POLLERR) == POLLERR) {
			printk("POLLERR\n");
			break;
		}

		if ((fds.revents & POLLNVAL) == POLLNVAL) {
			printk("POLLNVAL\n");
			break;
		}
	}
	rc = mqtt_disconnect(&client);
	if (rc) {
		printk("Could not disconnect MQTT client. Error: %d\n", rc);
	}
}

void main(void){
	printk("The MQTT simple sample started\n");
	modem_configure();
	
	while(1){
		mqtt_loop();
		k_sleep(5000);
	}
	
}

When the nRF9160 connects to the mqtt server, the output is:

LTE Link Connecting ...
LTE Link Connected!
The MQTT simple sample started
attempting to connect:
IPv4 Address found 3.XX.XXX.XX
[mqtt_evt_handler:684] MQTT client connected!
Subscribing to: my/subscribe/topic len 18
[mqtt_evt_handler:734] SUBACK packet id: 1234

When it does not connect, the output is:

LTE Link Connecting ...
LTE Link Connected!
The MQTT simple sample started
attempting to connect:
IPv4 Address found 3.XX.XXX.XX
mqtt_connect -60
IPv4 Address found 3.XX.XXX.XX
mqtt_connect -60
IPv4 Address found 3.XX.XXX.XX
mqtt_connect -60

The code is same but still sometimes it does not connect. 

I refered to this ticket for the code to reattempt the connection to the server https://devzone.nordicsemi.com/f/nordic-q-a/49960/pollnval-simple-mqtt-re-connection-problem/215598#215598

How can I avoid this problem? 

Parents
  • Hi,

     

    There's a potential issue in the code you link to, which is that mqtt_input() isn't called after the mqtt_disconnect() is sent.

    After mqtt_disconnect() is called, you need to call mqtt_input() next to get the disconnected event. mqtt_disconnect() does not close the connection immediately, as it still needs to send the disconnect message first to the broker.

     

    The normal error in the above case is to get ENOMEM back, as you run out of sockets or RAM (unread messages received). Could you enter debug mode and see where inside mqtt_connect() it fails?

     

    Kind regards,

    Håkon

  • Hello Hakon,

    I read more about MQTT connection timeout on these links https://github.com/eclipse/paho.mqtt.python/issues/217 and https://xdk.bosch-connectivity.com/community/-/message_boards/message/349928

    After reading this, I understood that MQTT connection timeout means connection was sent successfully but response was not received and this error is triggered by networking error.

    In the above code, I try to reconnect but still there is always connection timed out error. In one of the links, they have used connect_async function if this error occur. Can we use that function here?

    In ncs/zephyr/include/net/net_offload.h file, there are functions net_offload_connect() and net_offload_accept. In net_offload_accept, they have used async, I guess. I did not understand it properly.

    So is it possible to use async here?

    These are the timeout values:

     * @param timeout    The timeout value for the connection. Possible values:
     *                   * K_NO_WAIT: this function will return immediately,
     *                   * K_FOREVER: this function will block until the
     *                                      connection is established,
     *                   * >0: this function will wait the specified ms.

    So for mqtt_connect, which timeout value is used among above? Where can I find it? If we change the timeout value, can this error be solved?

Reply
  • Hello Hakon,

    I read more about MQTT connection timeout on these links https://github.com/eclipse/paho.mqtt.python/issues/217 and https://xdk.bosch-connectivity.com/community/-/message_boards/message/349928

    After reading this, I understood that MQTT connection timeout means connection was sent successfully but response was not received and this error is triggered by networking error.

    In the above code, I try to reconnect but still there is always connection timed out error. In one of the links, they have used connect_async function if this error occur. Can we use that function here?

    In ncs/zephyr/include/net/net_offload.h file, there are functions net_offload_connect() and net_offload_accept. In net_offload_accept, they have used async, I guess. I did not understand it properly.

    So is it possible to use async here?

    These are the timeout values:

     * @param timeout    The timeout value for the connection. Possible values:
     *                   * K_NO_WAIT: this function will return immediately,
     *                   * K_FOREVER: this function will block until the
     *                                      connection is established,
     *                   * >0: this function will wait the specified ms.

    So for mqtt_connect, which timeout value is used among above? Where can I find it? If we change the timeout value, can this error be solved?

Children
  • Hi,

     

    Jagruti said:
    In the above code, I try to reconnect but still there is always connection timed out error. In one of the links, they have used connect_async function if this error occur. Can we use that function here?

     connect_async() isn't available for the nRF9160.

    You can see the socket offloading for the nrf9160 details here: https://github.com/nrfconnect/sdk-nrf/blob/master/lib/bsdlib/nrf91_sockets.c

    Jagruti said:
    In ncs/zephyr/include/net/net_offload.h file, there are functions net_offload_connect() and net_offload_accept. In net_offload_accept, they have used async, I guess. I did not understand it properly.

     that is the "zephyr API" for socket operations. each vendor that has support for this has its own conforming to this API, as shown in the above nrf91_sockets.c file.

    For mqtt, the transport layer is mqtt_transport_socket_tcp.c or mqtt_transport_socket_tls.c, depending on if you use secure or non-secure connection.

    Are you using TLS?

    Do you see the event "MQTT_EVT_DISCONNECT" coming in your application?

     

    Kind regards,

    Håkon

  • OK, Thank you for the explaination.

    No I am not using TLS.

    No I have never seen  "MQTT_EVT_DISCONNECT" in my application. The nRF9160 does not connect in the first attempt, I think that's why there is no  "MQTT_EVT_DISCONNECT". 

    The output is like this when nRF91 does not connect to the server:

    LTE Link Connecting ...
    LTE Link Connected!
    The MQTT simple sample started
    attempting to connect:
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60

    I am trying to reconnect still it does not connect after first failure. I don't know what else to try.

    I tested it for 40 times continuously. So among 40, 5 times it did not connect to the server. The tests were done using same sim card and same broker address.

  • Hi,

     

    Jagruti said:
    No I have never seen  "MQTT_EVT_DISCONNECT" in my application. The nRF9160 does not connect in the first attempt, I think that's why there is no  "MQTT_EVT_DISCONNECT". 
    Jagruti said:
    I tested it for 40 times continuously. So among 40, 5 times it did not connect to the server. The tests were done using same sim card and same broker address.

    But, if you disconnect and connect in a loop, you should see the MQTT_EVT_DISCONNECT. If you do not, can you please try to wait for it before connecting again? 

     

    Kind regards,

    Håkon

  • No, I mean I turn off the DK. Once it connects to the mqtt and again if it disconnects, then it can connect properly again. That is not the problem. The program works properly in loop if there is mqtt connection.

    The problem is if mqtt does not connect the very first time, then it never connects to the mqtt. I waited for 10 minutes, still it did not connect.

    If the DK does not connect at the very first attempt, I can not see the disconnect event.

    The 40 tests means I turn on and turn off the DK for 40 times. Among those tests, DK connected to the mqtt server at the very first attempt and it worked properly for 35 tests. But for 5 tests, it did not connect to the mqtt server in the first attempt and then it never connected even if I wait for 10 minutes.

    For 35 tests, the output is:

    LTE Link Connecting ...
    LTE Link Connected!
    The MQTT simple sample started
    attempting to connect:
    IPv4 Address found 3.XX.XXX.XX
    [mqtt_evt_handler:684] MQTT client connected!
    Subscribing to: my/subscribe/topic len 18
    [mqtt_evt_handler:734] SUBACK packet id: 1234

    For 5 tests, the output is:

    LTE Link Connecting ...
    LTE Link Connected!
    The MQTT simple sample started
    attempting to connect:
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60
    IPv4 Address found 3.XX.XXX.XX
    mqtt_connect -60

    For all the 40 tests, the code, sim card and mqtt server is same. 

    But still for 5 tests I received the connection timed out error and it never connects.

  • Can you give some more detailed log output? Just set these options in prj.conf and run the tests again

    CONFIG_NET_LOG=y
    CONFIG_MQTT_LOG_LEVEL_DBG=y

Related