Hello,
we are using NRF SDK 2.3.0 and a NRF9160 to do some sensor logging on a SD card connected with SPI.
To do so, we are using the fs.h libary for mounting and writing on the SD card. At the same time we have the LOG_BACKEND_FS activated to log some information about system state on the SD card.
But it seems like the fs logging backend crashes and never comes back, if a simultaneos access to SD card happens (It happens by chance sometimes).
I was able to write a code that reproduces that case: It constantly writes LOG messages and reads the content of the card each 20 log messages. In theory the size of the log files should increase, no matter what.
main.c:
#include <zephyr/kernel.h> #include <zephyr/storage/disk_access.h> #include <ff.h> #include <zephyr/fs/fs.h> #include <zephyr/logging/log.h> #include <zephyr/logging/log_ctrl.h> LOG_MODULE_REGISTER(main); struct fs_file_t filep; static FATFS sfat_fs; /* mounting info */ static struct fs_mount_t mp = { .type = FS_FATFS, .fs_data = &sfat_fs, }; /* * Note the fatfs library is able to mount only strings inside _VOLUME_STRS * in ffconf.h */ static const char *disk_mount_pt = "/SD:"; int SDMount(){ /* raw disk i/o */ do { static const char *disk_pdrv = "SD"; uint64_t memory_size_mb; uint32_t block_count; uint32_t block_size; if (disk_access_init(disk_pdrv) != 0) { LOG_ERR("Storage init Error!%s",""); break; } if (disk_access_ioctl(disk_pdrv, DISK_IOCTL_GET_SECTOR_COUNT, &block_count)) { LOG_ERR("Unable to get sector count%s",""); break; } //LOG_INF("Block count %u", block_count); if (disk_access_ioctl(disk_pdrv, DISK_IOCTL_GET_SECTOR_SIZE, &block_size)) { LOG_ERR("Unable to get sector size%s",""); break; } //printk("Sector size %u\n", block_size); memory_size_mb = (uint64_t)block_count * block_size; //printk("Memory Size(MB) %u\n", (uint32_t)(memory_size_mb >> 20)); } while (0); mp.mnt_point = disk_mount_pt; int res = fs_mount(&mp); if (res == FR_OK) { LOG_INF("Disk mounted.\n"); return 0; } else { LOG_ERR("Error mounting disk (E: %i)\n", res); return -1; } } int lsdir(const char *path) { int res; struct fs_dir_t dirp; static struct fs_dirent entry; fs_dir_t_init(&dirp); /* Verify fs_opendir() */ res = fs_opendir(&dirp, path); if (res) { LOG_ERR("Error opening dir %s [%d]\n", path, res); return res; } LOG_INF("\nListing dir %s ...\n", path); for (;;) { /* Verify fs_readdir() */ res = fs_readdir(&dirp, &entry); /* entry.name[0] == 0 means end-of-dir */ if (res || entry.name[0] == 0) { break; } if (entry.type == FS_DIR_ENTRY_DIR) { LOG_INF("[DIR ] %s\n", entry.name); } else { LOG_INF("[FILE] %s (size = %zu)\n", entry.name, entry.size); } } /* Verify fs_closedir() */ fs_closedir(&dirp); return res; } void main(void) { printk("Hello World! %s\n", CONFIG_BOARD); int err = SDMount(); if (err != 0){ LOG_ERR("Error mounting SD card: E:%i", err); } lsdir("/SD:"); int counter = 0; while (1) { LOG_INF("%i", counter); counter++; k_sleep(K_MSEC(300)); if (counter == 20){ lsdir("/SD:"); counter = 0; } } }And proj.conf
#Boot CONFIG_BOOTLOADER_MCUBOOT=y CONFIG_LOG=y CONFIG_PRINTK=y CONFIG_MAIN_STACK_SIZE=2048 #SD card & SPI ##################################################################### CONFIG_DISK_ACCESS=y CONFIG_FILE_SYSTEM=y CONFIG_FAT_FILESYSTEM_ELM=y #Log CONFIG_LOG_BACKEND_FS=y CONFIG_LOG_BACKEND_FS_OUTPUT_TEXT=n CONFIG_LOG_BACKEND_FS_DIR="/SD:" CONFIG_LOG_BACKEND_FS_FILE_PREFIX="log" CONFIG_LOG_BACKEND_FS_FILES_LIMIT=5 CONFIG_LOG_BACKEND_FS_FILE_SIZE=4096 CONFIG_LOG_BACKEND_FS_OVERWRITE=y # Need this when storage is on flash CONFIG_MPU_ALLOW_FLASH_WRITE=y
This is the log from the console:
*** Booting Zephyr OS build v3.2.99-ncs2 *** Hello World! actinius_icarus [00:00:02.488,403] <inf> sd: Detected SD card [00:00:02.507,934] <inf> sd: Maximum SD clock is under 25MHz, using clock of 24000000Hz [00:00:02.513,549] <inf> main: Disk mounted. [00:00:02.513,641] <inf> main: Listing dir /SD: ... [00:00:02.515,441] <inf> main: [FILE] LOG0008 (size = 4060) [00:00:02.515,502] <inf> main: [FILE] LOG0009 (size = 4060) [00:00:02.515,563] <inf> main: [FILE] LOG0010 (size = 129) [00:00:02.519,165] <inf> main: [FILE] DEVICEID.TXT (size = 5) [00:00:02.531,677] <inf> main: [FILE] HOSTNAME.TXT (size = 28) [00:00:02.531,738] <inf> main: [DIR ] SYSTEM~1 [00:00:02.533,569] <inf> main: 0 [00:00:02.833,648] <inf> main: 1 [00:00:03.133,789] <inf> main: 2 [00:00:03.433,898] <inf> main: 3 [00:00:03.734,008] <inf> main: 4 [00:00:04.034,210] <inf> main: 5 [00:00:04.334,320] <inf> main: 6 [00:00:04.634,429] <inf> main: 7 [00:00:04.934,539] <inf> main: 8 [00:00:05.234,649] <inf> main: 9 [00:00:05.534,759] <inf> main: 10 [00:00:05.834,869] <inf> main: 11 [00:00:06.134,979] <inf> main: 12 [00:00:06.435,089] <inf> main: 13 [00:00:06.735,198] <inf> main: 14 [00:00:07.035,308] <inf> main: 15 [00:00:07.335,418] <inf> main: 16 [00:00:07.635,528] <inf> main: 17 [00:00:07.935,638] <inf> main: 18 [00:00:08.235,748] <inf> main: 19 [00:00:08.535,919] <inf> main: Listing dir /SD: ... [00:00:08.535,980] <inf> main: [FILE] LOG0008 (size = 4060) [00:00:08.536,041] <inf> main: [FILE] LOG0009 (size = 4060) [00:00:08.536,071] <inf> main: [FILE] LOG0010 (size = 129) [00:00:08.536,132] <inf> main: [FILE] LOG0000 (size = 1402) [00:00:08.539,733] <inf> main: [FILE] DEVICEID.TXT (size = 5) [00:00:08.552,062] <inf> main: [FILE] HOSTNAME.TXT (size = 28) [00:00:08.552,124] <inf> main: [DIR ] SYSTEM~1 [00:00:08.553,161] <wrn> sd: Could not get SD card mutex1 [00:00:08.553,192] <err> fs: file write error (-5) [00:00:08.553,375] <wrn> sd: Could not get SD card mutex1 [00:00:08.553,436] <err> fs: directory read error (-5) backend corrupted [00:00:08.553,833] <inf> main: 0 [00:00:08.853,912] <inf> main: 1 [00:00:09.154,022] <inf> main: 2 [00:00:09.454,132] <inf> main: 3 [00:00:09.754,241] <inf> main: 4 [00:00:10.054,351] <inf> main: 5 [00:00:10.354,492] <inf> main: 6 [00:00:10.654,602] <inf> main: 7 [00:00:10.954,711] <inf> main: 8 [00:00:11.254,821] <inf> main: 9 [00:00:11.554,931] <inf> main: 10 [00:00:11.855,041] <inf> main: 11 [00:00:12.155,151] <inf> main: 12 [00:00:12.455,261] <inf> main: 13 [00:00:12.755,371] <inf> main: 14 [00:00:13.055,480] <inf> main: 15 [00:00:13.355,590] <inf> main: 16 [00:00:13.655,700] <inf> main: 17 [00:00:13.955,810] <inf> main: 18 [00:00:14.255,920] <inf> main: 19 [00:00:14.556,091] <inf> main: Listing dir /SD: ... [00:00:14.557,922] <inf> main: [FILE] LOG0008 (size = 4060) [00:00:14.557,952] <inf> main: [FILE] LOG0009 (size = 4060) [00:00:14.558,013] <inf> main: [FILE] LOG0010 (size = 129) [00:00:14.558,074] <inf> main: [FILE] LOG0000 (size = 1402) [00:00:14.561,645] <inf> main: [FILE] DEVICEID.TXT (size = 5) [00:00:14.573,974] <inf> main: [FILE] HOSTNAME.TXT (size = 28) [00:00:14.574,035] <inf> main: [DIR ] SYSTEM~1 [00:00:14.575,714] <inf> main: 0 [00:00:14.875,823] <inf> main: 1 [00:00:15.175,933] <inf> main: 2 [00:00:15.476,043] <inf> main: 3 [00:00:15.776,153] <inf> main: 4 [00:00:16.076,263] <inf> main: 5 [00:00:16.376,403] <inf> main: 6 [00:00:16.676,513] <inf> main: 7 [00:00:16.976,623] <inf> main: 8 [00:00:17.276,733] <inf> main: 9 [00:00:17.576,843] <inf> main: 10 [00:00:17.876,953] <inf> main: 11 [00:00:18.177,062] <inf> main: 12 [00:00:18.477,172] <inf> main: 13 [00:00:18.777,313] <inf> main: 14 [00:00:19.077,423] <inf> main: 15 [00:00:19.377,532] <inf> main: 16 [00:00:19.677,642] <inf> main: 17 [00:00:19.977,752] <inf> main: 18 [00:00:20.277,862] <inf> main: 19 [00:00:20.578,033] <inf> main: Listing dir /SD: ... [00:00:20.579,833] <inf> main: [FILE] LOG0008 (size = 4060) [00:00:20.579,895] <inf> main: [FILE] LOG0009 (size = 4060) [00:00:20.579,956] <inf> main: [FILE] LOG0010 (size = 129) [00:00:20.580,017] <inf> main: [FILE] LOG0000 (size = 1402) [00:00:20.583,557] <inf> main: [FILE] DEVICEID.TXT (size = 5) [00:00:20.595,886] <inf> main: [FILE] HOSTNAME.TXT (size = 28) [00:00:20.595,947] <inf> main: [DIR ] SYSTEM~1
You can see that it first log to the file "LOG0000". Then tries to read the dir and the fs backend crashes. After that the size of LOG0000 doesn't increase, so the backend stopped working, which is not good.
When tracing it back, I found the critical part in zephyr/subsys/sd/sdmmc.c:
/* Reads data from SD card memory card */ int sdmmc_read_blocks(struct sd_card *card, uint8_t *rbuf, uint32_t start_block, uint32_t num_blocks) { . . . ret = k_mutex_lock(&card->lock, K_NO_WAIT); if (ret) { LOG_WRN("Could not get SD card mutex1"); return -EBUSY; }
The code doesn't "wait" for a mutex to come back, because it has K_NO_WAIT. When changing it to 500msec wait it works without any problems. (Same thing in sdmmc_write_blocks())
But changing the zephyr code is not a good option for us. So we were wondering if there is a thing we missed (maybe a mutex that can be accessed from outside) or a good workaround for that problem.
Again thank you very much for your help!
Best regards, Moritz