1433 lines
44 KiB
C
1433 lines
44 KiB
C
#include <stdlib.h>
|
|
#include <string.h>
|
|
#define _USE_MATH_DEFINES
|
|
#include <math.h>
|
|
#if defined(_USE_SSE) && (defined(_M_IX86) || defined(__i386__) || \
|
|
defined(_M_X64) || defined(__amd64__))
|
|
#include <xmmintrin.h>
|
|
#define RESAMPLER_SSE
|
|
#endif
|
|
#ifdef __APPLE__
|
|
#include <TargetConditionals.h>
|
|
#if TARGET_CPU_ARM || TARGET_CPU_ARM64
|
|
#define RESAMPLER_NEON
|
|
#endif
|
|
#elif (defined(__arm__) && defined(__ARM_NEON__))
|
|
#define RESAMPLER_NEON
|
|
#endif
|
|
#ifdef RESAMPLER_NEON
|
|
#include <arm_neon.h>
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#define ALIGNED _declspec(align(16))
|
|
#else
|
|
#define ALIGNED __attribute__((aligned(16)))
|
|
#endif
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846
|
|
#endif
|
|
|
|
#include "internal/resampler.h"
|
|
|
|
enum { RESAMPLER_SHIFT = 10 };
|
|
enum { RESAMPLER_SHIFT_EXTRA = 8 };
|
|
enum { RESAMPLER_RESOLUTION = 1 << RESAMPLER_SHIFT };
|
|
enum {
|
|
RESAMPLER_RESOLUTION_EXTRA = 1 << (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA)
|
|
};
|
|
enum { SINC_WIDTH = 16 };
|
|
enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH };
|
|
enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 };
|
|
|
|
static const float RESAMPLER_BLEP_CUTOFF = 0.90f;
|
|
static const float RESAMPLER_BLAM_CUTOFF = 0.93f;
|
|
static const float RESAMPLER_SINC_CUTOFF = 0.999f;
|
|
|
|
ALIGNED static float cubic_lut[CUBIC_SAMPLES];
|
|
|
|
static float sinc_lut[SINC_SAMPLES + 1];
|
|
static float window_lut[SINC_SAMPLES + 1];
|
|
|
|
enum { resampler_buffer_size = SINC_WIDTH * 4 };
|
|
|
|
static int fEqual(const float b, const float a) { return fabs(a - b) < 1.0e-6; }
|
|
|
|
static float sinc(float x) {
|
|
return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI);
|
|
}
|
|
|
|
#ifdef RESAMPLER_SSE
|
|
#ifdef _MSC_VER
|
|
#include <intrin.h>
|
|
#elif defined(__clang__) || defined(__GNUC__)
|
|
static inline void __cpuid(int *data, int selector) {
|
|
#if defined(__PIC__) && defined(__i386__)
|
|
__asm("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi"
|
|
: "=a"(data[0]), "=S"(data[1]), "=c"(data[2]), "=d"(data[3])
|
|
: "0"(selector));
|
|
#elif defined(__PIC__) && defined(__amd64__)
|
|
__asm("xchg{q} {%%}rbx, %q1; cpuid; xchg{q} {%%}rbx, %q1"
|
|
: "=a"(data[0]), "=&r"(data[1]), "=c"(data[2]), "=d"(data[3])
|
|
: "0"(selector));
|
|
#else
|
|
__asm("cpuid"
|
|
: "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3])
|
|
: "0"(selector));
|
|
#endif
|
|
}
|
|
#else
|
|
#define __cpuid(a, b) memset((a), 0, sizeof(int) * 4)
|
|
#endif
|
|
|
|
static int query_cpu_feature_sse() {
|
|
int buffer[4];
|
|
__cpuid(buffer, 1);
|
|
if ((buffer[3] & (1 << 25)) == 0)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static int resampler_has_sse = 0;
|
|
#endif
|
|
|
|
void resampler_init(void) {
|
|
unsigned i;
|
|
double dx = (float)(SINC_WIDTH) / SINC_SAMPLES, x = 0.0;
|
|
for (i = 0; i < SINC_SAMPLES + 1; ++i, x += dx) {
|
|
float y = x / SINC_WIDTH;
|
|
#if 0
|
|
// Blackman
|
|
float window = 0.42659 - 0.49656 * cos(M_PI + M_PI * y) + 0.076849 * cos(2.0 * M_PI * y);
|
|
#elif 1
|
|
// Nuttal 3 term
|
|
float window =
|
|
0.40897 + 0.5 * cos(M_PI * y) + 0.09103 * cos(2.0 * M_PI * y);
|
|
#elif 0
|
|
// C.R.Helmrich's 2 term window
|
|
float window =
|
|
0.79445 * cos(0.5 * M_PI * y) + 0.20555 * cos(1.5 * M_PI * y);
|
|
#elif 0
|
|
// Lanczos
|
|
float window = sinc(y);
|
|
#endif
|
|
sinc_lut[i] = fabs(x) < SINC_WIDTH ? sinc(x) : 0.0;
|
|
window_lut[i] = window;
|
|
}
|
|
dx = 1.0 / (float)(RESAMPLER_RESOLUTION);
|
|
x = 0.0;
|
|
for (i = 0; i < RESAMPLER_RESOLUTION; ++i, x += dx) {
|
|
cubic_lut[i * 4] = (float)(-0.5 * x * x * x + x * x - 0.5 * x);
|
|
cubic_lut[i * 4 + 1] = (float)(1.5 * x * x * x - 2.5 * x * x + 1.0);
|
|
cubic_lut[i * 4 + 2] =
|
|
(float)(-1.5 * x * x * x + 2.0 * x * x + 0.5 * x);
|
|
cubic_lut[i * 4 + 3] = (float)(0.5 * x * x * x - 0.5 * x * x);
|
|
}
|
|
#ifdef RESAMPLER_SSE
|
|
resampler_has_sse = query_cpu_feature_sse();
|
|
#endif
|
|
}
|
|
|
|
typedef struct resampler {
|
|
int write_pos, write_filled;
|
|
int read_pos, read_filled;
|
|
float phase;
|
|
float phase_inc;
|
|
float inv_phase;
|
|
float inv_phase_inc;
|
|
unsigned char quality;
|
|
signed char delay_added;
|
|
signed char delay_removed;
|
|
float last_amp;
|
|
float accumulator;
|
|
float buffer_in[resampler_buffer_size * 2];
|
|
float buffer_out[resampler_buffer_size + SINC_WIDTH * 2 - 1];
|
|
} resampler;
|
|
|
|
void *resampler_create(void) {
|
|
resampler *r = (resampler *)malloc(sizeof(resampler));
|
|
if (!r)
|
|
return 0;
|
|
|
|
r->write_pos = SINC_WIDTH - 1;
|
|
r->write_filled = 0;
|
|
r->read_pos = 0;
|
|
r->read_filled = 0;
|
|
r->phase = 0;
|
|
r->phase_inc = 0;
|
|
r->inv_phase = 0;
|
|
r->inv_phase_inc = 0;
|
|
r->quality = RESAMPLER_QUALITY_MAX;
|
|
r->delay_added = -1;
|
|
r->delay_removed = -1;
|
|
r->last_amp = 0;
|
|
r->accumulator = 0;
|
|
memset(r->buffer_in, 0, sizeof(r->buffer_in));
|
|
memset(r->buffer_out, 0, sizeof(r->buffer_out));
|
|
|
|
return r;
|
|
}
|
|
|
|
void resampler_delete(void *_r) { free(_r); }
|
|
|
|
void *resampler_dup(const void *_r) {
|
|
void *r_out = malloc(sizeof(resampler));
|
|
if (!r_out)
|
|
return 0;
|
|
|
|
resampler_dup_inplace(r_out, _r);
|
|
|
|
return r_out;
|
|
}
|
|
|
|
void resampler_dup_inplace(void *_d, const void *_s) {
|
|
const resampler *r_in = (const resampler *)_s;
|
|
resampler *r_out = (resampler *)_d;
|
|
|
|
r_out->write_pos = r_in->write_pos;
|
|
r_out->write_filled = r_in->write_filled;
|
|
r_out->read_pos = r_in->read_pos;
|
|
r_out->read_filled = r_in->read_filled;
|
|
r_out->phase = r_in->phase;
|
|
r_out->phase_inc = r_in->phase_inc;
|
|
r_out->inv_phase = r_in->inv_phase;
|
|
r_out->inv_phase_inc = r_in->inv_phase_inc;
|
|
r_out->quality = r_in->quality;
|
|
r_out->delay_added = r_in->delay_added;
|
|
r_out->delay_removed = r_in->delay_removed;
|
|
r_out->last_amp = r_in->last_amp;
|
|
r_out->accumulator = r_in->accumulator;
|
|
memcpy(r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in));
|
|
memcpy(r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out));
|
|
}
|
|
|
|
void resampler_set_quality(void *_r, int quality) {
|
|
resampler *r = (resampler *)_r;
|
|
if (quality < RESAMPLER_QUALITY_MIN)
|
|
quality = RESAMPLER_QUALITY_MIN;
|
|
else if (quality > RESAMPLER_QUALITY_MAX)
|
|
quality = RESAMPLER_QUALITY_MAX;
|
|
if (r->quality != quality) {
|
|
if (quality == RESAMPLER_QUALITY_BLEP ||
|
|
r->quality == RESAMPLER_QUALITY_BLEP ||
|
|
quality == RESAMPLER_QUALITY_BLAM ||
|
|
r->quality == RESAMPLER_QUALITY_BLAM) {
|
|
r->read_pos = 0;
|
|
r->read_filled = 0;
|
|
r->last_amp = 0;
|
|
r->accumulator = 0;
|
|
memset(r->buffer_out, 0, sizeof(r->buffer_out));
|
|
}
|
|
r->delay_added = -1;
|
|
r->delay_removed = -1;
|
|
}
|
|
r->quality = (unsigned char)quality;
|
|
}
|
|
|
|
int resampler_get_free_count(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
return resampler_buffer_size - r->write_filled;
|
|
}
|
|
|
|
static int resampler_min_filled(resampler *r) {
|
|
switch (r->quality) {
|
|
default:
|
|
case RESAMPLER_QUALITY_ZOH:
|
|
case RESAMPLER_QUALITY_BLEP:
|
|
return 1;
|
|
|
|
case RESAMPLER_QUALITY_LINEAR:
|
|
case RESAMPLER_QUALITY_BLAM:
|
|
return 2;
|
|
|
|
case RESAMPLER_QUALITY_CUBIC:
|
|
return 4;
|
|
|
|
case RESAMPLER_QUALITY_SINC:
|
|
return SINC_WIDTH * 2;
|
|
}
|
|
}
|
|
|
|
static int resampler_input_delay(resampler *r) {
|
|
switch (r->quality) {
|
|
default:
|
|
case RESAMPLER_QUALITY_ZOH:
|
|
case RESAMPLER_QUALITY_BLEP:
|
|
case RESAMPLER_QUALITY_LINEAR:
|
|
case RESAMPLER_QUALITY_BLAM:
|
|
return 0;
|
|
|
|
case RESAMPLER_QUALITY_CUBIC:
|
|
return 1;
|
|
|
|
case RESAMPLER_QUALITY_SINC:
|
|
return SINC_WIDTH - 1;
|
|
}
|
|
}
|
|
|
|
static int resampler_output_delay(resampler *r) {
|
|
switch (r->quality) {
|
|
default:
|
|
case RESAMPLER_QUALITY_ZOH:
|
|
case RESAMPLER_QUALITY_LINEAR:
|
|
case RESAMPLER_QUALITY_CUBIC:
|
|
case RESAMPLER_QUALITY_SINC:
|
|
return 0;
|
|
|
|
case RESAMPLER_QUALITY_BLEP:
|
|
case RESAMPLER_QUALITY_BLAM:
|
|
return SINC_WIDTH - 1;
|
|
}
|
|
}
|
|
|
|
int resampler_ready(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
return r->write_filled > resampler_min_filled(r);
|
|
}
|
|
|
|
void resampler_clear(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
r->write_pos = SINC_WIDTH - 1;
|
|
r->write_filled = 0;
|
|
r->read_pos = 0;
|
|
r->read_filled = 0;
|
|
r->phase = 0;
|
|
r->delay_added = -1;
|
|
r->delay_removed = -1;
|
|
memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
|
|
memset(r->buffer_in + resampler_buffer_size, 0,
|
|
(SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
|
|
if (r->quality == RESAMPLER_QUALITY_BLEP ||
|
|
r->quality == RESAMPLER_QUALITY_BLAM) {
|
|
r->inv_phase = 0;
|
|
r->last_amp = 0;
|
|
r->accumulator = 0;
|
|
memset(r->buffer_out, 0, sizeof(r->buffer_out));
|
|
}
|
|
}
|
|
|
|
void resampler_set_rate(void *_r, double new_factor) {
|
|
resampler *r = (resampler *)_r;
|
|
r->phase_inc = new_factor;
|
|
new_factor = 1.0 / new_factor;
|
|
r->inv_phase_inc = new_factor;
|
|
}
|
|
|
|
void resampler_write_sample(void *_r, short s) {
|
|
resampler *r = (resampler *)_r;
|
|
|
|
if (r->delay_added < 0) {
|
|
r->delay_added = 0;
|
|
r->write_filled = resampler_input_delay(r);
|
|
}
|
|
|
|
if (r->write_filled < resampler_buffer_size) {
|
|
float s32 = s;
|
|
s32 *= 256.0;
|
|
|
|
r->buffer_in[r->write_pos] = s32;
|
|
r->buffer_in[r->write_pos + resampler_buffer_size] = s32;
|
|
|
|
++r->write_filled;
|
|
|
|
r->write_pos = (r->write_pos + 1) % resampler_buffer_size;
|
|
}
|
|
}
|
|
|
|
void resampler_write_sample_fixed(void *_r, int s, unsigned char depth) {
|
|
resampler *r = (resampler *)_r;
|
|
|
|
if (r->delay_added < 0) {
|
|
r->delay_added = 0;
|
|
r->write_filled = resampler_input_delay(r);
|
|
}
|
|
|
|
if (r->write_filled < resampler_buffer_size) {
|
|
float s32 = s;
|
|
s32 /= (double)(1 << (depth - 1));
|
|
|
|
r->buffer_in[r->write_pos] = s32;
|
|
r->buffer_in[r->write_pos + resampler_buffer_size] = s32;
|
|
|
|
++r->write_filled;
|
|
|
|
r->write_pos = (r->write_pos + 1) % resampler_buffer_size;
|
|
}
|
|
}
|
|
|
|
void resampler_write_sample_float(void *_r, float s) {
|
|
resampler *r = (resampler *)_r;
|
|
|
|
if (r->delay_added < 0) {
|
|
r->delay_added = 0;
|
|
r->write_filled = resampler_input_delay(r);
|
|
}
|
|
|
|
if (r->write_filled < resampler_buffer_size) {
|
|
r->buffer_in[r->write_pos] = s;
|
|
r->buffer_in[r->write_pos + resampler_buffer_size] = s;
|
|
|
|
++r->write_filled;
|
|
|
|
r->write_pos = (r->write_pos + 1) % resampler_buffer_size;
|
|
}
|
|
}
|
|
|
|
static int resampler_run_zoh(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 1;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
sample = *in;
|
|
*out++ = sample;
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
|
|
#ifndef RESAMPLER_NEON
|
|
static int resampler_run_blep(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 1;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = *in++ - last_amp;
|
|
|
|
if (sample) {
|
|
float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernel[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
for (i = 0; i < SINC_WIDTH * 2; ++i)
|
|
out[i] += sample * kernel[i];
|
|
}
|
|
|
|
inv_phase += inv_phase_inc;
|
|
|
|
out += (int)inv_phase;
|
|
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_SSE
|
|
static int resampler_run_blep_sse(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 1;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = *in++ - last_amp;
|
|
|
|
if (sample) {
|
|
float kernel_sum = 0.0f;
|
|
__m128 kernel[SINC_WIDTH / 2];
|
|
__m128 temp1, temp2;
|
|
__m128 samplex;
|
|
float *kernelf = (float *)(&kernel);
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
samplex = _mm_set1_ps(sample);
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = _mm_load_ps((const float *)(kernel + i));
|
|
temp1 = _mm_mul_ps(temp1, samplex);
|
|
temp2 = _mm_loadu_ps((const float *)out + i * 4);
|
|
temp1 = _mm_add_ps(temp1, temp2);
|
|
_mm_storeu_ps((float *)out + i * 4, temp1);
|
|
}
|
|
}
|
|
|
|
inv_phase += inv_phase_inc;
|
|
|
|
out += (int)inv_phase;
|
|
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_NEON
|
|
static int resampler_run_blep(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 1;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = *in++ - last_amp;
|
|
|
|
if (sample) {
|
|
float kernel_sum = 0.0f;
|
|
float32x4_t kernel[SINC_WIDTH / 2];
|
|
float32x4_t temp1, temp2;
|
|
float32x4_t samplex;
|
|
float *kernelf = (float *)(&kernel);
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
samplex = vdupq_n_f32(sample);
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = vld1q_f32((const float32_t *)(kernel + i));
|
|
temp2 = vld1q_f32((const float32_t *)out + i * 4);
|
|
temp2 = vmlaq_f32(temp2, temp1, samplex);
|
|
vst1q_f32((float32_t *)out + i * 4, temp2);
|
|
}
|
|
}
|
|
|
|
inv_phase += inv_phase_inc;
|
|
|
|
out += (int)inv_phase;
|
|
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
static int resampler_run_linear(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
sample = in[0] + (in[1] - in[0]) * phase;
|
|
*out++ = sample;
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
|
|
#ifndef RESAMPLER_NEON
|
|
static int resampler_run_blam(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = in[0];
|
|
if (phase_inc < 1.0f)
|
|
sample += (in[1] - in[0]) * phase;
|
|
sample -= last_amp;
|
|
|
|
if (sample) {
|
|
float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernel[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
for (i = 0; i < SINC_WIDTH * 2; ++i)
|
|
out[i] += sample * kernel[i];
|
|
}
|
|
|
|
if (inv_phase_inc < 1.0f) {
|
|
++in;
|
|
inv_phase += inv_phase_inc;
|
|
out += (int)inv_phase;
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} else {
|
|
phase += phase_inc;
|
|
++out;
|
|
in += (int)phase;
|
|
phase = fmod(phase, 1.0f);
|
|
}
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_SSE
|
|
static int resampler_run_blam_sse(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = in[0];
|
|
if (phase_inc < 1.0f) {
|
|
sample += (in[1] - in[0]) * phase;
|
|
}
|
|
sample -= last_amp;
|
|
|
|
if (sample) {
|
|
float kernel_sum = 0.0f;
|
|
__m128 kernel[SINC_WIDTH / 2];
|
|
__m128 temp1, temp2;
|
|
__m128 samplex;
|
|
float *kernelf = (float *)(&kernel);
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
samplex = _mm_set1_ps(sample);
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = _mm_load_ps((const float *)(kernel + i));
|
|
temp1 = _mm_mul_ps(temp1, samplex);
|
|
temp2 = _mm_loadu_ps((const float *)out + i * 4);
|
|
temp1 = _mm_add_ps(temp1, temp2);
|
|
_mm_storeu_ps((float *)out + i * 4, temp1);
|
|
}
|
|
}
|
|
|
|
if (inv_phase_inc < 1.0f) {
|
|
++in;
|
|
inv_phase += inv_phase_inc;
|
|
out += (int)inv_phase;
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} else {
|
|
phase += phase_inc;
|
|
++out;
|
|
|
|
if (phase >= 1.0f) {
|
|
++in;
|
|
phase = fmod(phase, 1.0f);
|
|
}
|
|
}
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_NEON
|
|
static int resampler_run_blam(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float last_amp = r->last_amp;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
float inv_phase = r->inv_phase;
|
|
float inv_phase_inc = r->inv_phase_inc;
|
|
|
|
const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
|
|
const int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float sample;
|
|
|
|
if (out + SINC_WIDTH * 2 > out_end)
|
|
break;
|
|
|
|
sample = in[0];
|
|
if (phase_inc < 1.0f)
|
|
sample += (in[1] - in[0]) * phase;
|
|
sample -= last_amp;
|
|
|
|
if (sample) {
|
|
float kernel_sum = 0.0;
|
|
float32x4_t kernel[SINC_WIDTH / 2];
|
|
float32x4_t temp1, temp2;
|
|
float32x4_t samplex;
|
|
float *kernelf = (float *)(&kernel);
|
|
int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
int i = SINC_WIDTH;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
last_amp += sample;
|
|
sample /= kernel_sum;
|
|
samplex = vdupq_n_f32(sample);
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = vld1q_f32((const float32_t *)(kernel + i));
|
|
temp2 = vld1q_f32((const float32_t *)out + i * 4);
|
|
temp2 = vmlaq_f32(temp2, temp1, samplex);
|
|
vst1q_f32((float32_t *)out + i * 4, temp2);
|
|
}
|
|
}
|
|
|
|
if (inv_phase_inc < 1.0f) {
|
|
++in;
|
|
inv_phase += inv_phase_inc;
|
|
out += (int)inv_phase;
|
|
inv_phase = fmod(inv_phase, 1.0f);
|
|
} else {
|
|
phase += phase_inc;
|
|
++out;
|
|
|
|
if (phase >= 1.0f) {
|
|
++in;
|
|
phase = fmod(phase, 1.0f);
|
|
}
|
|
}
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
r->inv_phase = inv_phase;
|
|
r->last_amp = last_amp;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifndef RESAMPLER_NEON
|
|
static int resampler_run_cubic(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 4;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
do {
|
|
float *kernel;
|
|
int i;
|
|
float sample;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4;
|
|
|
|
for (sample = 0, i = 0; i < 4; ++i)
|
|
sample += in[i] * kernel[i];
|
|
*out++ = sample;
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_SSE
|
|
static int resampler_run_cubic_sse(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 4;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
do {
|
|
__m128 temp1, temp2;
|
|
__m128 samplex = _mm_setzero_ps();
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
temp1 = _mm_loadu_ps((const float *)(in));
|
|
temp2 = _mm_load_ps(
|
|
(const float *)(cubic_lut +
|
|
(int)(phase * RESAMPLER_RESOLUTION) * 4));
|
|
temp1 = _mm_mul_ps(temp1, temp2);
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
temp1 = _mm_movehl_ps(temp1, samplex);
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
temp1 = samplex;
|
|
temp1 = _mm_shuffle_ps(temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1));
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
_mm_store_ss(out, samplex);
|
|
++out;
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_NEON
|
|
static int resampler_run_cubic(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= 4;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
do {
|
|
float32x4_t temp1, temp2;
|
|
float32x2_t half;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
temp1 = vld1q_f32((const float32_t *)(in));
|
|
temp2 = vld1q_f32(
|
|
(const float32_t *)(cubic_lut +
|
|
(int)(phase * RESAMPLER_RESOLUTION) * 4));
|
|
temp1 = vmulq_f32(temp1, temp2);
|
|
half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1));
|
|
*out++ = vget_lane_f32(vpadd_f32(half, half), 0);
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifndef RESAMPLER_NEON
|
|
static int resampler_run_sinc(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= SINC_WIDTH * 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
int step = phase_inc > 1.0f
|
|
? (int)(RESAMPLER_RESOLUTION / phase_inc *
|
|
RESAMPLER_SINC_CUTOFF)
|
|
: (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
|
|
int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
float kernel[SINC_WIDTH * 2], kernel_sum = 0.0;
|
|
int i = SINC_WIDTH;
|
|
int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
float sample;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernel[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i)
|
|
sample += in[i] * kernel[i];
|
|
*out++ = (float)(sample / kernel_sum);
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_SSE
|
|
static int resampler_run_sinc_sse(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= SINC_WIDTH * 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
int step = phase_inc > 1.0f
|
|
? (int)(RESAMPLER_RESOLUTION / phase_inc *
|
|
RESAMPLER_SINC_CUTOFF)
|
|
: (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
|
|
int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
// accumulate in extended precision
|
|
float kernel_sum = 0.0;
|
|
__m128 kernel[SINC_WIDTH / 2];
|
|
__m128 temp1, temp2;
|
|
__m128 samplex = _mm_setzero_ps();
|
|
float *kernelf = (float *)(&kernel);
|
|
int i = SINC_WIDTH;
|
|
int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = _mm_loadu_ps((const float *)(in + i * 4));
|
|
temp2 = _mm_load_ps((const float *)(kernel + i));
|
|
temp1 = _mm_mul_ps(temp1, temp2);
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
}
|
|
kernel_sum = 1.0 / kernel_sum;
|
|
temp1 = _mm_movehl_ps(temp1, samplex);
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
temp1 = samplex;
|
|
temp1 = _mm_shuffle_ps(temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1));
|
|
samplex = _mm_add_ps(samplex, temp1);
|
|
temp1 = _mm_set_ss(kernel_sum);
|
|
samplex = _mm_mul_ps(samplex, temp1);
|
|
_mm_store_ss(out, samplex);
|
|
++out;
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RESAMPLER_NEON
|
|
static int resampler_run_sinc(resampler *r, float **out_, float *out_end) {
|
|
int in_size = r->write_filled;
|
|
float const *in_ =
|
|
r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
|
|
int used = 0;
|
|
in_size -= SINC_WIDTH * 2;
|
|
if (in_size > 0) {
|
|
float *out = *out_;
|
|
float const *in = in_;
|
|
float const *const in_end = in + in_size;
|
|
float phase = r->phase;
|
|
float phase_inc = r->phase_inc;
|
|
|
|
int step = phase_inc > 1.0f
|
|
? (int)(RESAMPLER_RESOLUTION / phase_inc *
|
|
RESAMPLER_SINC_CUTOFF)
|
|
: (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
|
|
int window_step = RESAMPLER_RESOLUTION;
|
|
|
|
do {
|
|
// accumulate in extended precision
|
|
float kernel_sum = 0.0;
|
|
float32x4_t kernel[SINC_WIDTH / 2];
|
|
float32x4_t temp1, temp2;
|
|
float32x4_t samplex = {0};
|
|
float32x2_t half;
|
|
float *kernelf = (float *)(&kernel);
|
|
int i = SINC_WIDTH;
|
|
int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
|
|
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
|
|
|
|
if (out >= out_end)
|
|
break;
|
|
|
|
for (; i >= -SINC_WIDTH + 1; --i) {
|
|
int pos = i * step;
|
|
int window_pos = i * window_step;
|
|
kernel_sum += kernelf[i + SINC_WIDTH - 1] =
|
|
sinc_lut[abs(phase_adj - pos)] *
|
|
window_lut[abs(phase_reduced - window_pos)];
|
|
}
|
|
for (i = 0; i < SINC_WIDTH / 2; ++i) {
|
|
temp1 = vld1q_f32((const float32_t *)(in + i * 4));
|
|
temp2 = vld1q_f32((const float32_t *)(kernel + i));
|
|
samplex = vmlaq_f32(samplex, temp1, temp2);
|
|
}
|
|
kernel_sum = 1.0 / kernel_sum;
|
|
samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum));
|
|
half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex));
|
|
*out++ = vget_lane_f32(vpadd_f32(half, half), 0);
|
|
|
|
phase += phase_inc;
|
|
|
|
in += (int)phase;
|
|
|
|
phase = fmod(phase, 1.0f);
|
|
} while (in < in_end);
|
|
|
|
r->phase = phase;
|
|
*out_ = out;
|
|
|
|
used = (int)(in - in_);
|
|
|
|
r->write_filled -= used;
|
|
}
|
|
|
|
return used;
|
|
}
|
|
#endif
|
|
|
|
static void resampler_fill(resampler *r) {
|
|
int min_filled = resampler_min_filled(r);
|
|
int quality = r->quality;
|
|
while (r->write_filled > min_filled &&
|
|
r->read_filled < resampler_buffer_size) {
|
|
int write_pos = (r->read_pos + r->read_filled) % resampler_buffer_size;
|
|
int write_size = resampler_buffer_size - write_pos;
|
|
float *out = r->buffer_out + write_pos;
|
|
if (write_size > (resampler_buffer_size - r->read_filled))
|
|
write_size = resampler_buffer_size - r->read_filled;
|
|
switch (quality) {
|
|
case RESAMPLER_QUALITY_ZOH:
|
|
resampler_run_zoh(r, &out, out + write_size);
|
|
break;
|
|
|
|
case RESAMPLER_QUALITY_BLEP: {
|
|
int used;
|
|
int write_extra = 0;
|
|
if (write_pos >= r->read_pos)
|
|
write_extra = r->read_pos;
|
|
if (write_extra > SINC_WIDTH * 2 - 1)
|
|
write_extra = SINC_WIDTH * 2 - 1;
|
|
memcpy(r->buffer_out + resampler_buffer_size, r->buffer_out,
|
|
write_extra * sizeof(r->buffer_out[0]));
|
|
#ifdef RESAMPLER_SSE
|
|
if (resampler_has_sse)
|
|
used = resampler_run_blep_sse(r, &out,
|
|
out + write_size + write_extra);
|
|
else
|
|
#endif
|
|
used =
|
|
resampler_run_blep(r, &out, out + write_size + write_extra);
|
|
memcpy(r->buffer_out, r->buffer_out + resampler_buffer_size,
|
|
write_extra * sizeof(r->buffer_out[0]));
|
|
if (!used)
|
|
return;
|
|
break;
|
|
}
|
|
|
|
case RESAMPLER_QUALITY_LINEAR:
|
|
resampler_run_linear(r, &out, out + write_size);
|
|
break;
|
|
|
|
case RESAMPLER_QUALITY_BLAM: {
|
|
float *out_ = out;
|
|
int write_extra = 0;
|
|
if (write_pos >= r->read_pos)
|
|
write_extra = r->read_pos;
|
|
if (write_extra > SINC_WIDTH * 2 - 1)
|
|
write_extra = SINC_WIDTH * 2 - 1;
|
|
memcpy(r->buffer_out + resampler_buffer_size, r->buffer_out,
|
|
write_extra * sizeof(r->buffer_out[0]));
|
|
#ifdef RESAMPLER_SSE
|
|
if (resampler_has_sse)
|
|
resampler_run_blam_sse(r, &out, out + write_size + write_extra);
|
|
else
|
|
#endif
|
|
resampler_run_blam(r, &out, out + write_size + write_extra);
|
|
memcpy(r->buffer_out, r->buffer_out + resampler_buffer_size,
|
|
write_extra * sizeof(r->buffer_out[0]));
|
|
if (out == out_)
|
|
return;
|
|
break;
|
|
}
|
|
|
|
case RESAMPLER_QUALITY_CUBIC:
|
|
#ifdef RESAMPLER_SSE
|
|
if (resampler_has_sse)
|
|
resampler_run_cubic_sse(r, &out, out + write_size);
|
|
else
|
|
#endif
|
|
resampler_run_cubic(r, &out, out + write_size);
|
|
break;
|
|
|
|
case RESAMPLER_QUALITY_SINC:
|
|
#ifdef RESAMPLER_SSE
|
|
if (resampler_has_sse)
|
|
resampler_run_sinc_sse(r, &out, out + write_size);
|
|
else
|
|
#endif
|
|
resampler_run_sinc(r, &out, out + write_size);
|
|
break;
|
|
}
|
|
r->read_filled += out - r->buffer_out - write_pos;
|
|
}
|
|
}
|
|
|
|
static void resampler_fill_and_remove_delay(resampler *r) {
|
|
resampler_fill(r);
|
|
if (r->delay_removed < 0) {
|
|
int delay = resampler_output_delay(r);
|
|
r->delay_removed = 0;
|
|
while (delay--)
|
|
resampler_remove_sample(r, 1);
|
|
}
|
|
}
|
|
|
|
int resampler_get_sample_count(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
if (r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP &&
|
|
r->quality != RESAMPLER_QUALITY_BLAM) ||
|
|
r->inv_phase_inc))
|
|
resampler_fill_and_remove_delay(r);
|
|
return r->read_filled;
|
|
}
|
|
|
|
int resampler_get_sample(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
if (r->read_filled < 1 && r->phase_inc)
|
|
resampler_fill_and_remove_delay(r);
|
|
if (r->read_filled < 1)
|
|
return 0;
|
|
if (r->quality == RESAMPLER_QUALITY_BLEP ||
|
|
r->quality == RESAMPLER_QUALITY_BLAM)
|
|
return (int)(r->buffer_out[r->read_pos] + r->accumulator);
|
|
else
|
|
return (int)r->buffer_out[r->read_pos];
|
|
}
|
|
|
|
float resampler_get_sample_float(void *_r) {
|
|
resampler *r = (resampler *)_r;
|
|
if (r->read_filled < 1 && r->phase_inc)
|
|
resampler_fill_and_remove_delay(r);
|
|
if (r->read_filled < 1)
|
|
return 0;
|
|
if (r->quality == RESAMPLER_QUALITY_BLEP ||
|
|
r->quality == RESAMPLER_QUALITY_BLAM)
|
|
return r->buffer_out[r->read_pos] + r->accumulator;
|
|
else
|
|
return r->buffer_out[r->read_pos];
|
|
}
|
|
|
|
void resampler_remove_sample(void *_r, int decay) {
|
|
resampler *r = (resampler *)_r;
|
|
if (r->read_filled > 0) {
|
|
if (r->quality == RESAMPLER_QUALITY_BLEP ||
|
|
r->quality == RESAMPLER_QUALITY_BLAM) {
|
|
r->accumulator += r->buffer_out[r->read_pos];
|
|
r->buffer_out[r->read_pos] = 0;
|
|
if (decay) {
|
|
r->accumulator -= r->accumulator * (1.0f / 8192.0f);
|
|
if (fabs(r->accumulator) < 1e-20f)
|
|
r->accumulator = 0;
|
|
}
|
|
}
|
|
--r->read_filled;
|
|
r->read_pos = (r->read_pos + 1) % resampler_buffer_size;
|
|
}
|
|
}
|