Performance on hardware (cc3xx) is significantly slower than performance on software (oberon). Does anyone have a good explanation for this? I have also tested ECDSA and can see that hardware is faster. I also tried a different elliptic curve, but the result is the same.
I added the performance code, everything else and more can be found in nrf/samples/crypto/ecdh.
Development setup:
Macbook Air Apple M1
Toolchain version: 2.6.0
nRF5340 Development Kit board
This is my main.c file:
This is my prj.conf:
This is my nrf5340dk_nrf5340_cpuapp.conf:
This is my main.c file:
/* * Copyright (c) 2021 Nordic Semiconductor ASA * * SPDX-License-Identifier: LicenseRef-Nordic-5-Clause */ #include <zephyr/kernel.h> #include <zephyr/sys/printk.h> #include <zephyr/logging/log.h> #include <stdio.h> #include <stdlib.h> #include <psa/crypto.h> #include <psa/crypto_extra.h> #include <zephyr/timing/timing.h> #include <math.h> #include <zephyr/drivers/timer/system_timer.h> #include <zephyr/sys/__assert.h> #ifdef CONFIG_BUILD_WITH_TFM #include <tfm_ns_interface.h> #endif #define APP_SUCCESS (0) #define APP_ERROR (-1) #define APP_SUCCESS_MESSAGE "Example finished successfully!" #define APP_ERROR_MESSAGE "Example exited with error!" #define PRINT_HEX(p_label, p_text, len)\ ({\ LOG_INF("---- %s (len: %u): ----", p_label, len);\ LOG_HEXDUMP_INF(p_text, len, "Content:");\ LOG_INF("---- %s end ----", p_label);\ }) LOG_MODULE_REGISTER(ecdh, LOG_LEVEL_DBG); /* ====================================================================== */ /* Global variables/defines for the ECDH example */ #define NRF_CRYPTO_EXAMPLE_ECDH_KEY_BITS (256) #define NRF_CRYPTO_EXAMPLE_ECDH_PUBLIC_KEY_SIZE (65) psa_key_id_t key_id_alice; /* ====================================================================== */ int crypto_init(void) { psa_status_t status; /* Initialize PSA Crypto */ status = psa_crypto_init(); if (status != PSA_SUCCESS) return APP_ERROR; return APP_SUCCESS; } int create_ecdh_keypair(psa_key_id_t *key_id) { psa_status_t status; psa_key_attributes_t key_attributes = PSA_KEY_ATTRIBUTES_INIT; /* Crypto settings for ECDH using the SHA256 hashing algorithm, * the secp256r1 curve */ psa_set_key_usage_flags(&key_attributes, PSA_KEY_USAGE_DERIVE); psa_set_key_lifetime(&key_attributes, PSA_KEY_LIFETIME_VOLATILE); psa_set_key_algorithm(&key_attributes, PSA_ALG_ECDH); psa_set_key_type(&key_attributes, PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1)); psa_set_key_bits(&key_attributes, 256); /* Generate a key pair */ status = psa_generate_key(&key_attributes, key_id); if (status != PSA_SUCCESS) { LOG_INF("psa_generate_key failed! (Error: %d)", status); return APP_ERROR; } psa_reset_key_attributes(&key_attributes); // LOG_INF("ECDH keypair created successfully!"); return APP_SUCCESS; } int main(void) { psa_status_t status; /* Init crypto */ status = crypto_init(); if (status != APP_SUCCESS) { LOG_INF(APP_ERROR_MESSAGE); return APP_ERROR; } /* Create the ECDH key pairs for Alice and Bob */ timing_t start_time, end_time; uint64_t total_cycles = 0; uint64_t sum_squared = 0; uint64_t min_cycles = UINT64_MAX; uint64_t max_cycles = 0; char* benchmark_name = "ECDH Keypair Generation"; int runs = 100; printf("Starting %s benchmark (%d runs)...\n", benchmark_name, runs); // Warm-up run status = create_ecdh_keypair(&key_id_alice); if(status != APP_SUCCESS) { printf("%s warm-up failed with %d\n", benchmark_name, status); } psa_destroy_key(key_id_alice); timing_init(); timing_start(); for(int i = 0; i < runs; i++) { start_time = timing_counter_get(); // Function to benchmark status = create_ecdh_keypair(&key_id_alice); end_time = timing_counter_get(); if(status != APP_SUCCESS) { printf("%s error on run %d\n", benchmark_name, i); runs = i; break; } uint64_t cycles = timing_cycles_get(&start_time, &end_time); total_cycles += cycles; sum_squared += cycles * cycles; if(cycles < min_cycles) { min_cycles = cycles; } if(cycles > max_cycles) { max_cycles = cycles; } psa_destroy_key(key_id_alice); } timing_stop(); printf("Frequency: %u MHz\n", timing_freq_get_mhz()); double mean = total_cycles / runs; double var = (sum_squared - ((total_cycles * total_cycles) / runs)) / (runs - 1); double std = sqrt(var); printf("%s benchmark results:\n", benchmark_name); printf(" Runs: %d\n", runs); printf(" Total: %llu cycles\n", total_cycles); printf(" Average: %.3f cycles\n", mean); printf(" Minimum: %llu cycles\n", min_cycles); printf(" Maximum: %llu cycles\n", max_cycles); printf(" Std: %.3f cycles\n", std); return APP_SUCCESS; }
This is my prj.conf:
# The Zephyr CMSIS emulation assumes that ticks are ms, currently CONFIG_SYS_CLOCK_TICKS_PER_SEC=1000 CONFIG_MAIN_STACK_SIZE=4096 CONFIG_HEAP_MEM_POOL_SIZE=4096 # Enable logging CONFIG_CONSOLE=y CONFIG_LOG=y # Enable nordic security backend and PSA APIs CONFIG_NRF_SECURITY=y CONFIG_MBEDTLS_PSA_CRYPTO_C=y CONFIG_MBEDTLS_ENABLE_HEAP=y CONFIG_MBEDTLS_HEAP_SIZE=8192 CONFIG_PSA_WANT_ALG_ECDH=y CONFIG_PSA_WANT_KEY_TYPE_ECC_KEY_PAIR=y CONFIG_PSA_WANT_ECC_SECP_R1_256=y # For key generation CONFIG_PSA_WANT_GENERATE_RANDOM=y # Timing functions CONFIG_TIMING_FUNCTIONS=y CONFIG_CBPRINTF_FP_SUPPORT=y
This is my nrf5340dk_nrf5340_cpuapp.conf:
# Using hardware crypto accelerator CONFIG_PSA_CRYPTO_DRIVER_OBERON=n CONFIG_PSA_CRYPTO_DRIVER_CC3XX=y
When I run this with CONFIG_PSA_CRYPTO_DRIVER_OBERON=y and CONFIG_PSA_CRYPTO_DRIVER_CC3XX=n, I get this output:
*** Booting nRF Connect SDK v3.5.99-ncs1 ***
Starting ECDH Keypair Generation benchmark (100 runs)...
Frequency: 64 MHz
ECDH Keypair Generation benchmark results:
Runs: 100
Total: 1149048 cycles
Average: 11490.000 cycles
Minimum: 11490 cycles
Maximum: 11520 cycles
Std: 3.000 cycles
*** Booting nRF Connect SDK v3.5.99-ncs1 ***
but running it with CONFIG_PSA_CRYPTO_DRIVER_OBERON=n and CONFIG_PSA_CRYPTO_DRIVER_CC3XX=y, I get this output:
*** Booting nRF Connect SDK v3.5.99-ncs1 ***
Starting ECDH Keypair Generation benchmark (100 runs)...
Frequency: 64 MHz
ECDH Keypair Generation benchmark results:
Runs: 100
Total: 97657851 cycles
Average: 976578.000 cycles
Minimum: 929049 cycles
Maximum: 1014195 cycles
Std: 13743.361 cycles
*** Booting nRF Connect SDK v3.5.99-ncs1 ***