LOG_BACKEND_FS on SD card crashes when accessing SD card from own code

Hello,

we are using NRF SDK 2.3.0 and a NRF9160 to do some sensor logging on a SD card connected with SPI.

To do so, we are using the fs.h libary for mounting and writing on the SD card. At the same time we have the LOG_BACKEND_FS activated to log some information about system state on the SD card.

But it seems like the fs logging backend crashes and never comes back, if a simultaneos access to SD card happens (It happens by chance sometimes).

I was able to write a code that reproduces that case: It constantly writes LOG messages and reads the content of the card each 20 log messages. In theory the size of the log files should increase, no matter what.

main.c:

#include <zephyr/kernel.h>
#include <zephyr/storage/disk_access.h>
#include <ff.h>
#include <zephyr/fs/fs.h>
#include <zephyr/logging/log.h>
#include <zephyr/logging/log_ctrl.h>
LOG_MODULE_REGISTER(main);

struct fs_file_t filep;

static FATFS sfat_fs;
/* mounting info */
static struct fs_mount_t mp = {
	.type = FS_FATFS,
	.fs_data = &sfat_fs,
};
/*
*  Note the fatfs library is able to mount only strings inside _VOLUME_STRS
*  in ffconf.h
*/
static const char *disk_mount_pt = "/SD:";

int SDMount(){
    /* raw disk i/o */
	do {
		static const char *disk_pdrv = "SD";
		uint64_t memory_size_mb;
		uint32_t block_count;
		uint32_t block_size;

		if (disk_access_init(disk_pdrv) != 0) {
			LOG_ERR("Storage init Error!%s","");
			break;
		}

		if (disk_access_ioctl(disk_pdrv,
				DISK_IOCTL_GET_SECTOR_COUNT, &block_count)) {
			LOG_ERR("Unable to get sector count%s","");
			break;
		}
		//LOG_INF("Block count %u", block_count);

		if (disk_access_ioctl(disk_pdrv,
				DISK_IOCTL_GET_SECTOR_SIZE, &block_size)) {
			LOG_ERR("Unable to get sector size%s","");
			break;
		}
		//printk("Sector size %u\n", block_size);

		memory_size_mb = (uint64_t)block_count * block_size;
		//printk("Memory Size(MB) %u\n", (uint32_t)(memory_size_mb >> 20));
	} while (0);

	mp.mnt_point = disk_mount_pt;

	int res = fs_mount(&mp);
	

	if (res == FR_OK) {
		LOG_INF("Disk mounted.\n");
		return 0;
	} else {
		LOG_ERR("Error mounting disk (E: %i)\n", res);
		return -1;
	}
}

int lsdir(const char *path)
{
	int res;
	struct fs_dir_t dirp;
	static struct fs_dirent entry;

	fs_dir_t_init(&dirp);

	/* Verify fs_opendir() */
	res = fs_opendir(&dirp, path);
	if (res) {
		LOG_ERR("Error opening dir %s [%d]\n", path, res);
		return res;
	}

	LOG_INF("\nListing dir %s ...\n", path);
	for (;;) {
		/* Verify fs_readdir() */
		res = fs_readdir(&dirp, &entry);

		/* entry.name[0] == 0 means end-of-dir */
		if (res || entry.name[0] == 0) {
			break;
		}

		if (entry.type == FS_DIR_ENTRY_DIR) {
			LOG_INF("[DIR ] %s\n", entry.name);
		} else {
			LOG_INF("[FILE] %s (size = %zu)\n",
				entry.name, entry.size);
		}
	}

	/* Verify fs_closedir() */
	fs_closedir(&dirp);

	return res;
}

void main(void)
{
	printk("Hello World! %s\n", CONFIG_BOARD);

	int err = SDMount();
	if (err != 0){
		LOG_ERR("Error mounting SD card: E:%i", err);
	}
	lsdir("/SD:");
	int counter = 0;
	while (1)
	{
		LOG_INF("%i", counter);
		counter++;
		k_sleep(K_MSEC(300));
		if (counter == 20){
			lsdir("/SD:");
			counter = 0;
		}
	}
}
And proj.conf

#Boot
CONFIG_BOOTLOADER_MCUBOOT=y

CONFIG_LOG=y
CONFIG_PRINTK=y
CONFIG_MAIN_STACK_SIZE=2048

#SD card & SPI 
#####################################################################
CONFIG_DISK_ACCESS=y
CONFIG_FILE_SYSTEM=y
CONFIG_FAT_FILESYSTEM_ELM=y


#Log
CONFIG_LOG_BACKEND_FS=y
CONFIG_LOG_BACKEND_FS_OUTPUT_TEXT=n
CONFIG_LOG_BACKEND_FS_DIR="/SD:"
CONFIG_LOG_BACKEND_FS_FILE_PREFIX="log"
CONFIG_LOG_BACKEND_FS_FILES_LIMIT=5
CONFIG_LOG_BACKEND_FS_FILE_SIZE=4096
CONFIG_LOG_BACKEND_FS_OVERWRITE=y


# Need this when storage is on flash
CONFIG_MPU_ALLOW_FLASH_WRITE=y

This is the log from the console:

*** Booting Zephyr OS build v3.2.99-ncs2 ***
Hello World! actinius_icarus
[00:00:02.488,403] <inf> sd: Detected SD card
[00:00:02.507,934] <inf> sd: Maximum SD clock is under 25MHz, using clock of 24000000Hz
[00:00:02.513,549] <inf> main: Disk mounted.

[00:00:02.513,641] <inf> main: 
Listing dir /SD: ...

[00:00:02.515,441] <inf> main: [FILE] LOG0008 (size = 4060)

[00:00:02.515,502] <inf> main: [FILE] LOG0009 (size = 4060)

[00:00:02.515,563] <inf> main: [FILE] LOG0010 (size = 129)

[00:00:02.519,165] <inf> main: [FILE] DEVICEID.TXT (size = 5)

[00:00:02.531,677] <inf> main: [FILE] HOSTNAME.TXT (size = 28)

[00:00:02.531,738] <inf> main: [DIR ] SYSTEM~1

[00:00:02.533,569] <inf> main: 0
[00:00:02.833,648] <inf> main: 1
[00:00:03.133,789] <inf> main: 2
[00:00:03.433,898] <inf> main: 3
[00:00:03.734,008] <inf> main: 4
[00:00:04.034,210] <inf> main: 5
[00:00:04.334,320] <inf> main: 6
[00:00:04.634,429] <inf> main: 7
[00:00:04.934,539] <inf> main: 8
[00:00:05.234,649] <inf> main: 9
[00:00:05.534,759] <inf> main: 10
[00:00:05.834,869] <inf> main: 11
[00:00:06.134,979] <inf> main: 12
[00:00:06.435,089] <inf> main: 13
[00:00:06.735,198] <inf> main: 14
[00:00:07.035,308] <inf> main: 15
[00:00:07.335,418] <inf> main: 16
[00:00:07.635,528] <inf> main: 17
[00:00:07.935,638] <inf> main: 18
[00:00:08.235,748] <inf> main: 19
[00:00:08.535,919] <inf> main: 
Listing dir /SD: ...

[00:00:08.535,980] <inf> main: [FILE] LOG0008 (size = 4060)

[00:00:08.536,041] <inf> main: [FILE] LOG0009 (size = 4060)

[00:00:08.536,071] <inf> main: [FILE] LOG0010 (size = 129)

[00:00:08.536,132] <inf> main: [FILE] LOG0000 (size = 1402)

[00:00:08.539,733] <inf> main: [FILE] DEVICEID.TXT (size = 5)

[00:00:08.552,062] <inf> main: [FILE] HOSTNAME.TXT (size = 28)

[00:00:08.552,124] <inf> main: [DIR ] SYSTEM~1

[00:00:08.553,161] <wrn> sd: Could not get SD card mutex1
[00:00:08.553,192] <err> fs: file write error (-5)
[00:00:08.553,375] <wrn> sd: Could not get SD card mutex1
[00:00:08.553,436] <err> fs: directory read error (-5)
backend corrupted
[00:00:08.553,833] <inf> main: 0
[00:00:08.853,912] <inf> main: 1
[00:00:09.154,022] <inf> main: 2
[00:00:09.454,132] <inf> main: 3
[00:00:09.754,241] <inf> main: 4
[00:00:10.054,351] <inf> main: 5
[00:00:10.354,492] <inf> main: 6
[00:00:10.654,602] <inf> main: 7
[00:00:10.954,711] <inf> main: 8
[00:00:11.254,821] <inf> main: 9
[00:00:11.554,931] <inf> main: 10
[00:00:11.855,041] <inf> main: 11
[00:00:12.155,151] <inf> main: 12
[00:00:12.455,261] <inf> main: 13
[00:00:12.755,371] <inf> main: 14
[00:00:13.055,480] <inf> main: 15
[00:00:13.355,590] <inf> main: 16
[00:00:13.655,700] <inf> main: 17
[00:00:13.955,810] <inf> main: 18
[00:00:14.255,920] <inf> main: 19
[00:00:14.556,091] <inf> main: 
Listing dir /SD: ...

[00:00:14.557,922] <inf> main: [FILE] LOG0008 (size = 4060)

[00:00:14.557,952] <inf> main: [FILE] LOG0009 (size = 4060)

[00:00:14.558,013] <inf> main: [FILE] LOG0010 (size = 129)

[00:00:14.558,074] <inf> main: [FILE] LOG0000 (size = 1402)

[00:00:14.561,645] <inf> main: [FILE] DEVICEID.TXT (size = 5)

[00:00:14.573,974] <inf> main: [FILE] HOSTNAME.TXT (size = 28)

[00:00:14.574,035] <inf> main: [DIR ] SYSTEM~1

[00:00:14.575,714] <inf> main: 0
[00:00:14.875,823] <inf> main: 1
[00:00:15.175,933] <inf> main: 2
[00:00:15.476,043] <inf> main: 3
[00:00:15.776,153] <inf> main: 4
[00:00:16.076,263] <inf> main: 5
[00:00:16.376,403] <inf> main: 6
[00:00:16.676,513] <inf> main: 7
[00:00:16.976,623] <inf> main: 8
[00:00:17.276,733] <inf> main: 9
[00:00:17.576,843] <inf> main: 10
[00:00:17.876,953] <inf> main: 11
[00:00:18.177,062] <inf> main: 12
[00:00:18.477,172] <inf> main: 13
[00:00:18.777,313] <inf> main: 14
[00:00:19.077,423] <inf> main: 15
[00:00:19.377,532] <inf> main: 16
[00:00:19.677,642] <inf> main: 17
[00:00:19.977,752] <inf> main: 18
[00:00:20.277,862] <inf> main: 19
[00:00:20.578,033] <inf> main: 
Listing dir /SD: ...

[00:00:20.579,833] <inf> main: [FILE] LOG0008 (size = 4060)

[00:00:20.579,895] <inf> main: [FILE] LOG0009 (size = 4060)

[00:00:20.579,956] <inf> main: [FILE] LOG0010 (size = 129)

[00:00:20.580,017] <inf> main: [FILE] LOG0000 (size = 1402)

[00:00:20.583,557] <inf> main: [FILE] DEVICEID.TXT (size = 5)

[00:00:20.595,886] <inf> main: [FILE] HOSTNAME.TXT (size = 28)

[00:00:20.595,947] <inf> main: [DIR ] SYSTEM~1

You can see that it first log to the file "LOG0000". Then tries to read the dir and the fs backend crashes. After that the size of LOG0000 doesn't increase, so the backend stopped working, which is not good.

When tracing it back, I found the critical part in zephyr/subsys/sd/sdmmc.c:

/* Reads data from SD card memory card */
int sdmmc_read_blocks(struct sd_card *card, uint8_t *rbuf,
	uint32_t start_block, uint32_t num_blocks)
{
	.
	.
	.
	
	ret = k_mutex_lock(&card->lock, K_NO_WAIT);
	if (ret) {
		LOG_WRN("Could not get SD card mutex1");
		return -EBUSY;
	}

The code doesn't "wait" for a mutex to come back, because it has K_NO_WAIT. When changing it to 500msec wait it works without any problems. (Same thing in sdmmc_write_blocks())

But changing the zephyr code is not a good option for us. So we were wondering if there is a thing we missed (maybe a mutex that can be accessed from outside) or a good workaround for that problem.

Again thank you very much for your help!

Best regards, Moritz

  • Hi Einar,

    yes just found a solution that seems to work!

    The trick is to deactivate the FS_Backend before every SD card operation and wait 50ms. After the SD card operation turn the backend on again. The good thing is, no Log message will be dropped, even during deactived backend it seems to buffer all the messages and writes them as soon it is activated again.

    We are using mutexes for our SD card operations. When taking a mutex, it automatically deactivates the fs backend now. After unlocking the mutex it activates the backend again.

    #define BACKEND_IDENTIFIER_FS "fs"
    /// @brief De/Activate FS Backend. Prevents FS Backend from writting on SD card. Method wont drop LOG messages (by experience)
    /// @param desired_state True to turn backend on, False to turn backend of
    void fs_backend_activate(bool desired_state){
    	int backendslength = log_backend_count_get();
    	for (int i = 0; i <backendslength; i++){
    		const struct log_backend *backend = log_backend_get(i);
    		if (strstr(backend->name, BACKEND_IDENTIFIER_FS)){	
    			if (desired_state == true){
    				log_backend_activate(backend, backend->cb->ctx);
    				LOG_INF("Fs active");
    			} else {
    				log_backend_deactivate(backend);
    				LOG_INF("Fs inactive");
    				k_msleep(50);
    			}
    		}
    	}
    }

    int SD_lock_wait(){
    	LOG_DBG("SD mutex take");
    	fs_backend_activate(false);
    	int ret = k_mutex_lock(&SD_mutex, K_FOREVER);
    	if(ret){
    		LOG_ERR("Error taking SD mutex: %d", ret);
    	}
    	return ret;
    }
    
    int SD_unlock(){
    	LOG_DBG("SD mutex give");
    	int ret = k_mutex_unlock(&SD_mutex);
    	if(ret){
    		LOG_ERR("Error giving SD mutex: %d", ret);
    	}
    	fs_backend_activate(true);
    	return ret;
    }

    Thanks for checking this case with us, Einar

Related