Added routines to convert data from 32-bit integer type to float32_t and vice-versa.
Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com> --- lib/mldev/mldev_utils.h | 92 +++++++++++++ lib/mldev/mldev_utils_neon.c | 242 +++++++++++++++++++++++++++++++++ lib/mldev/mldev_utils_scalar.c | 98 +++++++++++++ lib/mldev/version.map | 4 + 4 files changed, 436 insertions(+) diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h index 220afb42f0d..1d041531b43 100644 --- a/lib/mldev/mldev_utils.h +++ b/lib/mldev/mldev_utils.h @@ -236,6 +236,98 @@ __rte_internal int rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output); +/** + * @internal + * + * Convert a buffer containing numbers in single precision floating format (float32) to signed + * 32-bit integer format (INT32). + * + * @param[in] scale + * Scale factor for conversion. + * @param[in] nb_elements + * Number of elements in the buffer. + * @param[in] input + * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * @param[out] output + * Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * + * @return + * - 0, Success. + * - < 0, Error code on failure. + */ +__rte_internal +int +rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output); + +/** + * @internal + * + * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision + * floating format (float32). + * + * @param[in] scale + * Scale factor for conversion. + * @param[in] nb_elements + * Number of elements in the buffer. + * @param[in] input + * Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * @param[out] output + * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * + * @return + * - 0, Success. + * - < 0, Error code on failure. + */ +__rte_internal +int +rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output); + +/** + * @internal + * + * Convert a buffer containing numbers in single precision floating format (float32) to unsigned + * 32-bit integer format (UINT32). + * + * @param[in] scale + * Scale factor for conversion. + * @param[in] nb_elements + * Number of elements in the buffer. + * @param[in] input + * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * @param[out] output + * Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * + * @return + * - 0, Success. + * - < 0, Error code on failure. + */ +__rte_internal +int +rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output); + +/** + * @internal + * + * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single + * precision floating format (float32). + * + * @param[in] scale + * Scale factor for conversion. + * @param[in] nb_elements + * Number of elements in the buffer. + * @param[in] input + * Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * @param[out] output + * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. + * + * @return + * - 0, Success. + * - < 0, Error code on failure. + */ +__rte_internal +int +rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output); + /** * @internal * diff --git a/lib/mldev/mldev_utils_neon.c b/lib/mldev/mldev_utils_neon.c index c7baec012b8..250fa43fa73 100644 --- a/lib/mldev/mldev_utils_neon.c +++ b/lib/mldev/mldev_utils_neon.c @@ -600,6 +600,248 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void return 0; } +static inline void +__float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output) +{ + float32x4_t f32x4; + int32x4_t s32x4; + + /* load 4 x float elements */ + f32x4 = vld1q_f32(input); + + /* scale */ + f32x4 = vmulq_n_f32(f32x4, scale); + + /* convert to int32x4_t using round to nearest with ties away rounding mode */ + s32x4 = vcvtaq_s32_f32(f32x4); + + /* store 4 elements */ + vst1q_s32(output, s32x4); +} + +static inline void +__float32_to_int32_neon_s32x1(float scale, float *input, int32_t *output) +{ + /* scale and convert, round to nearest with ties away rounding mode */ + *output = vcvtas_s32_f32(scale * (*input)); +} + +int +rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output) +{ + float *input_buffer; + int32_t *output_buffer; + uint64_t nb_iterations; + uint32_t vlen; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (float *)input; + output_buffer = (int32_t *)output; + vlen = 2 * sizeof(float) / sizeof(int32_t); + nb_iterations = nb_elements / vlen; + + /* convert vlen elements in each iteration */ + for (i = 0; i < nb_iterations; i++) { + __float32_to_int32_neon_s32x4(scale, input_buffer, output_buffer); + input_buffer += vlen; + output_buffer += vlen; + } + + /* convert leftover elements */ + i = i * vlen; + for (; i < nb_elements; i++) { + __float32_to_int32_neon_s32x1(scale, input_buffer, output_buffer); + input_buffer++; + output_buffer++; + } + + return 0; +} + +static inline void +__int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output) +{ + float32x4_t f32x4; + int32x4_t s32x4; + + /* load 4 x int32_t elements */ + s32x4 = vld1q_s32(input); + + /* convert int32_t to float */ + f32x4 = vcvtq_f32_s32(s32x4); + + /* scale */ + f32x4 = vmulq_n_f32(f32x4, scale); + + /* store float32x4_t */ + vst1q_f32(output, f32x4); +} + +static inline void +__int32_to_float32_neon_f32x1(float scale, int32_t *input, float *output) +{ + *output = scale * vcvts_f32_s32(*input); +} + +int +rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) +{ + int32_t *input_buffer; + float *output_buffer; + uint64_t nb_iterations; + uint32_t vlen; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (int32_t *)input; + output_buffer = (float *)output; + vlen = 2 * sizeof(float) / sizeof(int32_t); + nb_iterations = nb_elements / vlen; + + /* convert vlen elements in each iteration */ + for (i = 0; i < nb_iterations; i++) { + __int32_to_float32_neon_f32x4(scale, input_buffer, output_buffer); + input_buffer += vlen; + output_buffer += vlen; + } + + /* convert leftover elements */ + i = i * vlen; + for (; i < nb_elements; i++) { + __int32_to_float32_neon_f32x1(scale, input_buffer, output_buffer); + input_buffer++; + output_buffer++; + } + + return 0; +} + +static inline void +__float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output) +{ + float32x4_t f32x4; + uint32x4_t u32x4; + + /* load 4 float elements */ + f32x4 = vld1q_f32(input); + + /* scale */ + f32x4 = vmulq_n_f32(f32x4, scale); + + /* convert using round to nearest with ties to away rounding mode */ + u32x4 = vcvtaq_u32_f32(f32x4); + + /* store 4 elements */ + vst1q_u32(output, u32x4); +} + +static inline void +__float32_to_uint32_neon_u32x1(float scale, float *input, uint32_t *output) +{ + /* scale and convert, round to nearest with ties away rounding mode */ + *output = vcvtas_u32_f32(scale * (*input)); +} + +int +rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output) +{ + float *input_buffer; + uint32_t *output_buffer; + uint64_t nb_iterations; + uint64_t vlen; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (float *)input; + output_buffer = (uint32_t *)output; + vlen = 2 * sizeof(float) / sizeof(uint32_t); + nb_iterations = nb_elements / vlen; + + /* convert vlen elements in each iteration */ + for (i = 0; i < nb_iterations; i++) { + __float32_to_uint32_neon_u32x4(scale, input_buffer, output_buffer); + input_buffer += vlen; + output_buffer += vlen; + } + + /* convert leftover elements */ + i = i * vlen; + for (; i < nb_elements; i++) { + __float32_to_uint32_neon_u32x1(scale, input_buffer, output_buffer); + input_buffer++; + output_buffer++; + } + + return 0; +} + +static inline void +__uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output) +{ + float32x4_t f32x4; + uint32x4_t u32x4; + + /* load 4 x uint32_t elements */ + u32x4 = vld1q_u32(input); + + /* convert uint32_t to float */ + f32x4 = vcvtq_f32_u32(u32x4); + + /* scale */ + f32x4 = vmulq_n_f32(f32x4, scale); + + /* store float32x4_t */ + vst1q_f32(output, f32x4); +} + +static inline void +__uint32_to_float32_neon_f32x1(float scale, uint32_t *input, float *output) +{ + *output = scale * vcvts_f32_u32(*input); +} + +int +rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) +{ + uint32_t *input_buffer; + float *output_buffer; + uint64_t nb_iterations; + uint32_t vlen; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (uint32_t *)input; + output_buffer = (float *)output; + vlen = 2 * sizeof(float) / sizeof(uint32_t); + nb_iterations = nb_elements / vlen; + + /* convert vlen elements in each iteration */ + for (i = 0; i < nb_iterations; i++) { + __uint32_to_float32_neon_f32x4(scale, input_buffer, output_buffer); + input_buffer += vlen; + output_buffer += vlen; + } + + /* convert leftover elements */ + i = i * vlen; + for (; i < nb_elements; i++) { + __uint32_to_float32_neon_f32x1(scale, input_buffer, output_buffer); + input_buffer++; + output_buffer++; + } + + return 0; +} + static inline void __float32_to_float16_neon_f16x4(float32_t *input, float16_t *output) { diff --git a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c index 4d6cb880240..af1a3a103b2 100644 --- a/lib/mldev/mldev_utils_scalar.c +++ b/lib/mldev/mldev_utils_scalar.c @@ -229,6 +229,104 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void return 0; } +int +rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output) +{ + float *input_buffer; + int32_t *output_buffer; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (float *)input; + output_buffer = (int32_t *)output; + + for (i = 0; i < nb_elements; i++) { + *output_buffer = (int32_t)round((*input_buffer) * scale); + + input_buffer++; + output_buffer++; + } + + return 0; +} + +int +rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) +{ + int32_t *input_buffer; + float *output_buffer; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (int32_t *)input; + output_buffer = (float *)output; + + for (i = 0; i < nb_elements; i++) { + *output_buffer = scale * (float)(*input_buffer); + + input_buffer++; + output_buffer++; + } + + return 0; +} + +int +rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output) +{ + float *input_buffer; + uint32_t *output_buffer; + int32_t i32; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (float *)input; + output_buffer = (uint32_t *)output; + + for (i = 0; i < nb_elements; i++) { + i32 = (int32_t)round((*input_buffer) * scale); + + if (i32 < 0) + i32 = 0; + + *output_buffer = (uint32_t)i32; + + input_buffer++; + output_buffer++; + } + + return 0; +} + +int +rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) +{ + uint32_t *input_buffer; + float *output_buffer; + uint64_t i; + + if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) + return -EINVAL; + + input_buffer = (uint32_t *)input; + output_buffer = (float *)output; + + for (i = 0; i < nb_elements; i++) { + *output_buffer = scale * (float)(*input_buffer); + + input_buffer++; + output_buffer++; + } + + return 0; +} + /* Convert a single precision floating point number (float32) into a half precision * floating point number (float16) using round to nearest rounding mode. */ diff --git a/lib/mldev/version.map b/lib/mldev/version.map index 99841db6aa9..2e8f1555225 100644 --- a/lib/mldev/version.map +++ b/lib/mldev/version.map @@ -57,6 +57,10 @@ INTERNAL { rte_ml_io_int16_to_float32; rte_ml_io_float32_to_uint16; rte_ml_io_uint16_to_float32; + rte_ml_io_float32_to_int32; + rte_ml_io_int32_to_float32; + rte_ml_io_float32_to_uint32; + rte_ml_io_uint32_to_float32; rte_ml_io_float32_to_float16; rte_ml_io_float16_to_float32; rte_ml_io_float32_to_bfloat16; -- 2.42.0