| /* Copyright 2016 The Chromium OS Authors. All rights reserved. |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include <math.h> /* for abs() */ |
| #include <stdio.h> /* for printf() */ |
| #include <string.h> /* for memset() */ |
| #include <stdint.h> /* for uint64 definition */ |
| #include <stdlib.h> /* for exit() definition */ |
| #include <time.h> /* for clock_gettime */ |
| |
| #include "../drc_math.h" |
| #include "../dsp_util.h" |
| |
| /* Constant for converting time to milliseconds. */ |
| #define BILLION 1000000000LL |
| /* Number of iterations for performance testing. */ |
| #define ITERATIONS 400000 |
| |
| #if defined(__aarch64__) |
| int16_t float_to_short(float a) |
| { |
| int32_t ret; |
| asm volatile("fcvtas %s[ret], %s[a]\n" |
| "sqxtn %h[ret], %s[ret]\n" |
| : [ret] "=w"(ret) |
| : [a] "w"(a) |
| :); |
| return (int16_t)(ret); |
| } |
| #else |
| int16_t float_to_short(float a) |
| { |
| a += (a >= 0) ? 0.5f : -0.5f; |
| return (int16_t)(max(-32768, min(32767, a))); |
| } |
| #endif |
| |
| void dsp_util_deinterleave_reference(int16_t *input, float *const *output, |
| int channels, int frames) |
| { |
| float *output_ptr[channels]; |
| int i, j; |
| |
| for (i = 0; i < channels; i++) |
| output_ptr[i] = output[i]; |
| |
| for (i = 0; i < frames; i++) |
| for (j = 0; j < channels; j++) |
| *(output_ptr[j]++) = *input++ / 32768.0f; |
| } |
| |
| void dsp_util_interleave_reference(float *const *input, int16_t *output, |
| int channels, int frames) |
| { |
| float *input_ptr[channels]; |
| int i, j; |
| |
| for (i = 0; i < channels; i++) |
| input_ptr[i] = input[i]; |
| |
| for (i = 0; i < frames; i++) |
| for (j = 0; j < channels; j++) { |
| float f = *(input_ptr[j]++) * 32768.0f; |
| *output++ = float_to_short(f); |
| } |
| } |
| |
| /* Use fixed size allocation to avoid performance fluctuation of allocation. */ |
| #define MAXSAMPLES 4096 |
| #define MINSAMPLES 256 |
| /* PAD buffer to check for overflows. */ |
| #define PAD 4096 |
| |
| void TestRounding(float in, int16_t expected, int samples) |
| { |
| int i; |
| int max_diff; |
| int d; |
| |
| short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| |
| memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD); |
| memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD); |
| memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD); |
| memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD); |
| |
| float *out_floats_ptr_c[2]; |
| float *out_floats_ptr_opt[2]; |
| |
| out_floats_ptr_c[0] = out_floats_left_c; |
| out_floats_ptr_c[1] = out_floats_right_c; |
| out_floats_ptr_opt[0] = out_floats_left_opt; |
| out_floats_ptr_opt[1] = out_floats_right_opt; |
| |
| for (i = 0; i < MAXSAMPLES; ++i) { |
| out_floats_left_c[i] = in; |
| out_floats_right_c[i] = in; |
| } |
| |
| /* reference C interleave */ |
| dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2, |
| samples); |
| |
| /* measure optimized interleave */ |
| for (i = 0; i < ITERATIONS; ++i) { |
| dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt, |
| 2, SND_PCM_FORMAT_S16_LE, samples); |
| } |
| |
| max_diff = 0; |
| for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) { |
| d = abs(out_shorts_c[i] - out_shorts_opt[i]); |
| if (d > max_diff) { |
| max_diff = d; |
| } |
| } |
| printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n", |
| max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0], |
| expected, |
| max_diff == 0 ? "PASS" : |
| (out_shorts_opt[0] == expected ? |
| "EXPECTED DIFFERENCE" : |
| "UNEXPECTED DIFFERENCE")); |
| |
| /* measure reference C deinterleave */ |
| dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2, |
| samples); |
| |
| /* measure optimized deinterleave */ |
| dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2, |
| SND_PCM_FORMAT_S16_LE, samples); |
| |
| d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4); |
| if (d) |
| printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0], |
| out_floats_ptr_opt[0][0]); |
| d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4); |
| if (d) |
| printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0], |
| out_floats_ptr_opt[1][0]); |
| |
| free(in_shorts); |
| free(out_floats_left_c); |
| free(out_floats_right_c); |
| free(out_floats_left_opt); |
| free(out_floats_right_opt); |
| free(out_shorts_c); |
| free(out_shorts_opt); |
| } |
| |
| int main(int argc, char **argv) |
| { |
| float e = 0.000000001f; |
| int samples = 16; |
| |
| dsp_enable_flush_denormal_to_zero(); |
| |
| // Print headings for TestRounding output. |
| printf("test interleave compare maxdif, float, float * 32k " |
| "C SIMD expect pass\n"); |
| |
| // test clamping |
| TestRounding(1.0f, 32767, samples); |
| TestRounding(-1.0f, -32768, samples); |
| TestRounding(1.1f, 32767, samples); |
| TestRounding(-1.1f, -32768, samples); |
| TestRounding(2000000000.f / 32768.f, 32767, samples); |
| TestRounding(-2000000000.f / 32768.f, -32768, samples); |
| |
| /* Infinity produces zero on arm64. */ |
| #if defined(__aarch64__) |
| #define EXPECTED_INF_RESULT 0 |
| #define EXPECTED_NEGINF_RESULT 0 |
| #elif defined(__i386__) || defined(__x86_64__) |
| #define EXPECTED_INF_RESULT -32768 |
| #define EXPECTED_NEGINF_RESULT 0 |
| #else |
| #define EXPECTED_INF_RESULT 32767 |
| #define EXPECTED_NEGINF_RESULT -32768 |
| #endif |
| |
| TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples); |
| TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples); |
| |
| // test infinity |
| union ieee754_float inf; |
| inf.ieee.negative = 0; |
| inf.ieee.exponent = 0xfe; |
| inf.ieee.mantissa = 0x7fffff; |
| TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail |
| inf.ieee.negative = 1; |
| inf.ieee.exponent = 0xfe; |
| inf.ieee.mantissa = 0x7fffff; |
| TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail |
| |
| // test rounding |
| TestRounding(0.25f, 8192, samples); |
| TestRounding(-0.25f, -8192, samples); |
| TestRounding(0.50f, 16384, samples); |
| TestRounding(-0.50f, -16384, samples); |
| TestRounding(1.0f / 32768.0f, 1, samples); |
| TestRounding(-1.0f / 32768.0f, -1, samples); |
| TestRounding(1.0f / 32768.0f + e, 1, samples); |
| TestRounding(-1.0f / 32768.0f - e, -1, samples); |
| TestRounding(1.0f / 32768.0f - e, 1, samples); |
| TestRounding(-1.0f / 32768.0f + e, -1, samples); |
| |
| /* Rounding on 'tie' is different for Intel. */ |
| #if defined(__i386__) || defined(__x86_64__) |
| TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */ |
| TestRounding(-0.5f / 32768.0f, 0, samples); |
| #else |
| TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */ |
| TestRounding(-0.5f / 32768.0f, -1, samples); |
| #endif |
| |
| TestRounding(0.5f / 32768.0f + e, 1, samples); |
| TestRounding(-0.5f / 32768.0f - e, 1, samples); |
| TestRounding(0.5f / 32768.0f - e, 0, samples); |
| TestRounding(-0.5f / 32768.0f + e, 0, samples); |
| |
| TestRounding(1.5f / 32768.0f, 2, samples); |
| TestRounding(-1.5f / 32768.0f, -2, samples); |
| TestRounding(1.5f / 32768.0f + e, 2, samples); |
| TestRounding(-1.5f / 32768.0f - e, -2, samples); |
| TestRounding(1.5f / 32768.0f - e, 1, samples); |
| TestRounding(-1.5f / 32768.0f + e, -1, samples); |
| |
| /* Test denormals */ |
| union ieee754_float denorm; |
| denorm.ieee.negative = 0; |
| denorm.ieee.exponent = 0; |
| denorm.ieee.mantissa = 1; |
| TestRounding(denorm.f, 0, samples); |
| denorm.ieee.negative = 1; |
| denorm.ieee.exponent = 0; |
| denorm.ieee.mantissa = 1; |
| TestRounding(denorm.f, 0, samples); |
| |
| /* Test NaNs. Caveat Results vary by implementation. */ |
| #if defined(__i386__) || defined(__x86_64__) |
| #define EXPECTED_NAN_RESULT -32768 |
| #else |
| #define EXPECTED_NAN_RESULT 0 |
| #endif |
| union ieee754_float nan; /* Quiet NaN */ |
| nan.ieee.negative = 0; |
| nan.ieee.exponent = 0xff; |
| nan.ieee.mantissa = 0x400001; |
| TestRounding(nan.f, EXPECTED_NAN_RESULT, samples); |
| nan.ieee.negative = 0; |
| nan.ieee.exponent = 0xff; |
| nan.ieee.mantissa = 0x000001; /* Signalling NaN */ |
| TestRounding(nan.f, EXPECTED_NAN_RESULT, samples); |
| |
| /* Test Performance */ |
| uint64_t diff; |
| struct timespec start, end; |
| int i; |
| int d; |
| |
| short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD); |
| short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD); |
| |
| memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD); |
| memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD); |
| memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD); |
| memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD); |
| memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD); |
| |
| float *out_floats_ptr_c[2]; |
| float *out_floats_ptr_opt[2]; |
| |
| out_floats_ptr_c[0] = out_floats_left_c; |
| out_floats_ptr_c[1] = out_floats_right_c; |
| out_floats_ptr_opt[0] = out_floats_left_opt; |
| out_floats_ptr_opt[1] = out_floats_right_opt; |
| |
| /* Benchmark dsp_util_interleave */ |
| for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) { |
| /* measure original C interleave */ |
| clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ |
| for (i = 0; i < ITERATIONS; ++i) { |
| dsp_util_interleave_reference(out_floats_ptr_c, |
| out_shorts_c, 2, samples); |
| } |
| clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ |
| diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - |
| start.tv_nsec) / |
| 1000000; |
| printf("interleave ORIG size = %6d, elapsed time = %llu ms\n", |
| samples, (long long unsigned int)diff); |
| |
| /* measure optimized interleave */ |
| clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ |
| for (i = 0; i < ITERATIONS; ++i) { |
| dsp_util_interleave(out_floats_ptr_c, |
| (uint8_t *)out_shorts_opt, 2, |
| SND_PCM_FORMAT_S16_LE, samples); |
| } |
| clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ |
| diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - |
| start.tv_nsec) / |
| 1000000; |
| printf("interleave SIMD size = %6d, elapsed time = %llu ms\n", |
| samples, (long long unsigned int)diff); |
| |
| /* Test C and SIMD output match */ |
| d = memcmp(out_shorts_c, out_shorts_opt, |
| MAXSAMPLES * 2 * 2 + PAD); |
| if (d) |
| printf("interleave compare %d, %d %d, %d %d\n", d, |
| out_shorts_c[0], out_shorts_c[1], |
| out_shorts_opt[0], out_shorts_opt[1]); |
| } |
| |
| /* Benchmark dsp_util_deinterleave */ |
| for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) { |
| /* Measure original C deinterleave */ |
| clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ |
| for (i = 0; i < ITERATIONS; ++i) { |
| dsp_util_deinterleave_reference( |
| in_shorts, out_floats_ptr_c, 2, samples); |
| } |
| clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ |
| diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - |
| start.tv_nsec) / |
| 1000000; |
| printf("deinterleave ORIG size = %6d, " |
| "elapsed time = %llu ms\n", |
| samples, (long long unsigned int)diff); |
| |
| /* Measure optimized deinterleave */ |
| clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ |
| for (i = 0; i < ITERATIONS; ++i) { |
| dsp_util_deinterleave((uint8_t *)in_shorts, |
| out_floats_ptr_opt, 2, |
| SND_PCM_FORMAT_S16_LE, samples); |
| } |
| clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ |
| diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - |
| start.tv_nsec) / |
| 1000000; |
| printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n", |
| samples, (long long unsigned int)diff); |
| |
| /* Test C and SIMD output match */ |
| d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], |
| samples * 4); |
| if (d) |
| printf("left compare %d, %f %f\n", d, |
| out_floats_ptr_c[0][0], |
| out_floats_ptr_opt[0][0]); |
| d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], |
| samples * 4); |
| if (d) |
| printf("right compare %d, %f %f\n", d, |
| out_floats_ptr_c[1][0], |
| out_floats_ptr_opt[1][0]); |
| } |
| |
| free(in_shorts); |
| free(out_floats_left_c); |
| free(out_floats_right_c); |
| free(out_floats_left_opt); |
| free(out_floats_right_opt); |
| free(out_shorts_c); |
| free(out_shorts_opt); |
| |
| return 0; |
| } |