This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Copy instruction cycles

I tested various copy methods in terms of instruction cycles, that I determined with DWT.

For that I copied an uint8_t array size 70 by

  • for Loop --> 63 Cycles
  • memcpy --> 514 Cycles
  • std::array copy --> 514 Cycles

Are theses result valid? I always thought that memcpy is highly optimized and would take less cycles then coping by hand with a for loop.

 My Setup:

  • gcc-arm-none-eabi-7-2018-q2-update
  • nrf5_SDK_17.0.2_d674dde
  • nrf52840

I Used follwing code to determine the clock cycles:

	uint8_t copyTests(uint8_t dummyVal){

	    // enable the use DWT
	    CoreDebug->DEMCR |= 0x01000000;
		NRF_LOG_INFO("Starting DWT_CYCCNT");

		volatile uint32_t cycleCount = 0xffffffff;
		static constexpr  uint16_t LENGHT_OF_ARRAY = 70;

		uint8_t array1[LENGHT_OF_ARRAY] = {0};
		memset(array1, dummyVal, LENGHT_OF_ARRAY);

		uint8_t array2[LENGHT_OF_ARRAY] = {0};
		memset(array2, dummyVal+1, LENGHT_OF_ARRAY);

	    // Reset cycle counter
		DWT->CYCCNT = 0;
	    DWT->CTRL |=0x1;

		for(uint8_t i=0; i<LENGHT_OF_ARRAY; i++){
			array2[i]=array1[i];
		}
		cycleCount = DWT->CYCCNT;
		NRF_LOG_INFO("copyTests LENGHT_OF_ARRAY %i", LENGHT_OF_ARRAY);
		NRF_LOG_INFO("copyTests forloop cycles: %llu",  cycleCount);


	    // Reset cycle counter
		DWT->CYCCNT = 0;
	    DWT->CTRL |=0x1;
		memcpy(array2, array1, sizeof array1);
		cycleCount = DWT->CYCCNT;

		NRF_LOG_INFO("copyTests memcpy cycles: %llu", cycleCount);



		std::array<uint8_t, LENGHT_OF_ARRAY> stdArray1 = {0};
		std::array<uint8_t, LENGHT_OF_ARRAY> stdArray2 = {0};

		memset(stdArray1.data(), dummyVal, LENGHT_OF_ARRAY);
		memset(stdArray2.data(), dummyVal, LENGHT_OF_ARRAY);


		NRF_LOG_INFO("copyTests &stdArray1 %p &stdArray1.data %p val0 %i val1 %i val2 %i", &stdArray1, stdArray1.data(), stdArray1.at(0), stdArray1.at(2), stdArray1.at(2));
		NRF_LOG_INFO("copyTests &stdArray2 %p &stdArray2.data %p val0 %i val1 %i val2 %i", &stdArray2, stdArray2.data(), stdArray2.at(0), stdArray2.at(2), stdArray2.at(2));

	    // Reset cycle counter
		DWT->CYCCNT = 0;
	    DWT->CTRL |=0x1;
		stdArray2 = stdArray1;
		cycleCount = DWT->CYCCNT;

		NRF_LOG_INFO("copyTests stdArraycopy  cycles: %llu", cycleCount);
		NRF_LOG_INFO("copyTests &stdArray1 %p &stdArray1.data %p val0 %i val1 %i val2 %i", &stdArray1, stdArray1.data(), stdArray1.at(0), stdArray1.at(2), stdArray1.at(2));
		NRF_LOG_INFO("copyTests &stdArray2 %p &stdArray2.data %p val0 %i val1 %i val2 %i", &stdArray2, stdArray2.data(), stdArray2.at(0), stdArray2.at(2), stdArray2.at(2));



		clockCycleCounting::resetClockCylce();
		std::copy(stdArray1.begin(), stdArray1.end(), stdArray2.begin());
		cycleCount = clockCycleCounting::getClockCycleCount();

		NRF_LOG_INFO("IteratingforVsStdArray std::copy cycles: %llu", cycleCount);

		return 0;
	}

Parents Reply Children
No Data
Related