110 lines
3.6 KiB
C
110 lines
3.6 KiB
C
/* Copyright (C) 2010-2021 The RetroArch team
|
|
*
|
|
* ---------------------------------------------------------------------------------------
|
|
* The following license statement only applies to this file (s16_to_float.c).
|
|
* ---------------------------------------------------------------------------------------
|
|
*
|
|
* Permission is hereby granted, free of charge,
|
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation the rights to
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#if defined(__SSE2__)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
#include <boolean.h>
|
|
#include <features/features_cpu.h>
|
|
#include <audio/conversion/s16_to_float.h>
|
|
|
|
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
|
|
static bool s16_to_float_neon_enabled = false;
|
|
|
|
#include <arm_neon.h>
|
|
|
|
void convert_s16_to_float(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
unsigned i = 0;
|
|
|
|
if (s16_to_float_neon_enabled)
|
|
{
|
|
float gf = gain / (1 << 15);
|
|
float32x4_t vgf = {gf, gf, gf, gf};
|
|
while (samples >= 8)
|
|
{
|
|
float32x4x2_t oreg;
|
|
int16x4x2_t inreg = vld1_s16_x2(in); // why were these interleaved before?
|
|
int32x4_t p1 = vmovl_s16(inreg.val[0]);
|
|
int32x4_t p2 = vmovl_s16(inreg.val[1]);
|
|
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(p1), vgf);
|
|
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(p2), vgf);
|
|
vst1q_f32_x2(out, oreg);
|
|
in += 8;
|
|
out += 8;
|
|
samples -= 8;
|
|
}
|
|
}
|
|
|
|
gain /= 0x8000;
|
|
|
|
for (; i < samples; i++)
|
|
out[i] = (float)in[i] * gain;
|
|
}
|
|
|
|
void convert_s16_to_float_init_simd(void)
|
|
{
|
|
uint64_t cpu = cpu_features_get();
|
|
|
|
if (cpu & RETRO_SIMD_NEON)
|
|
s16_to_float_neon_enabled = true;
|
|
}
|
|
#else
|
|
void convert_s16_to_float(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
unsigned i = 0;
|
|
|
|
#if defined(__SSE2__)
|
|
float fgain = gain / UINT32_C(0x80000000);
|
|
__m128 factor = _mm_set1_ps(fgain);
|
|
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
{
|
|
__m128i input = _mm_loadu_si128((const __m128i *)in);
|
|
__m128i regs_l = _mm_unpacklo_epi16(_mm_setzero_si128(), input);
|
|
__m128i regs_r = _mm_unpackhi_epi16(_mm_setzero_si128(), input);
|
|
__m128 output_l = _mm_mul_ps(_mm_cvtepi32_ps(regs_l), factor);
|
|
__m128 output_r = _mm_mul_ps(_mm_cvtepi32_ps(regs_r), factor);
|
|
|
|
_mm_storeu_ps(out + 0, output_l);
|
|
_mm_storeu_ps(out + 4, output_r);
|
|
}
|
|
|
|
samples = samples - i;
|
|
i = 0;
|
|
#endif
|
|
|
|
gain /= 0x8000;
|
|
|
|
for (; i < samples; i++)
|
|
out[i] = (float)in[i] * gain;
|
|
}
|
|
|
|
void convert_s16_to_float_init_simd(void) { }
|
|
#endif
|
|
|