I tested various copy methods in terms of instruction cycles, that I determined with DWT.
For that I copied an uint8_t array size 70 by
- for Loop --> 63 Cycles
- memcpy --> 514 Cycles
- std::array copy --> 514 Cycles
Are theses result valid? I always thought that memcpy is highly optimized and would take less cycles then coping by hand with a for loop.
My Setup:
- gcc-arm-none-eabi-7-2018-q2-update
- nrf5_SDK_17.0.2_d674dde
- nrf52840
I Used follwing code to determine the clock cycles:
uint8_t copyTests(uint8_t dummyVal){
// enable the use DWT
CoreDebug->DEMCR |= 0x01000000;
NRF_LOG_INFO("Starting DWT_CYCCNT");
volatile uint32_t cycleCount = 0xffffffff;
static constexpr uint16_t LENGHT_OF_ARRAY = 70;
uint8_t array1[LENGHT_OF_ARRAY] = {0};
memset(array1, dummyVal, LENGHT_OF_ARRAY);
uint8_t array2[LENGHT_OF_ARRAY] = {0};
memset(array2, dummyVal+1, LENGHT_OF_ARRAY);
// Reset cycle counter
DWT->CYCCNT = 0;
DWT->CTRL |=0x1;
for(uint8_t i=0; i<LENGHT_OF_ARRAY; i++){
array2[i]=array1[i];
}
cycleCount = DWT->CYCCNT;
NRF_LOG_INFO("copyTests LENGHT_OF_ARRAY %i", LENGHT_OF_ARRAY);
NRF_LOG_INFO("copyTests forloop cycles: %llu", cycleCount);
// Reset cycle counter
DWT->CYCCNT = 0;
DWT->CTRL |=0x1;
memcpy(array2, array1, sizeof array1);
cycleCount = DWT->CYCCNT;
NRF_LOG_INFO("copyTests memcpy cycles: %llu", cycleCount);
std::array<uint8_t, LENGHT_OF_ARRAY> stdArray1 = {0};
std::array<uint8_t, LENGHT_OF_ARRAY> stdArray2 = {0};
memset(stdArray1.data(), dummyVal, LENGHT_OF_ARRAY);
memset(stdArray2.data(), dummyVal, LENGHT_OF_ARRAY);
NRF_LOG_INFO("copyTests &stdArray1 %p &stdArray1.data %p val0 %i val1 %i val2 %i", &stdArray1, stdArray1.data(), stdArray1.at(0), stdArray1.at(2), stdArray1.at(2));
NRF_LOG_INFO("copyTests &stdArray2 %p &stdArray2.data %p val0 %i val1 %i val2 %i", &stdArray2, stdArray2.data(), stdArray2.at(0), stdArray2.at(2), stdArray2.at(2));
// Reset cycle counter
DWT->CYCCNT = 0;
DWT->CTRL |=0x1;
stdArray2 = stdArray1;
cycleCount = DWT->CYCCNT;
NRF_LOG_INFO("copyTests stdArraycopy cycles: %llu", cycleCount);
NRF_LOG_INFO("copyTests &stdArray1 %p &stdArray1.data %p val0 %i val1 %i val2 %i", &stdArray1, stdArray1.data(), stdArray1.at(0), stdArray1.at(2), stdArray1.at(2));
NRF_LOG_INFO("copyTests &stdArray2 %p &stdArray2.data %p val0 %i val1 %i val2 %i", &stdArray2, stdArray2.data(), stdArray2.at(0), stdArray2.at(2), stdArray2.at(2));
clockCycleCounting::resetClockCylce();
std::copy(stdArray1.begin(), stdArray1.end(), stdArray2.begin());
cycleCount = clockCycleCounting::getClockCycleCount();
NRF_LOG_INFO("IteratingforVsStdArray std::copy cycles: %llu", cycleCount);
return 0;
}