From d62a6463af35bb0438b11791ebb5dd80ff06e378 Mon Sep 17 00:00:00 2001 From: Chris Moeller Date: Sat, 14 Mar 2015 17:30:00 -0700 Subject: [PATCH] Updated vio2sf. --- .../vio2sf/vio2sf.xcodeproj/project.pbxproj | 8 + .../vio2sf/vio2sf/src/vio2sf/desmume/MMU.c | 28 +- .../vio2sf/src/vio2sf/desmume/NDSSystem.c | 13 + .../vio2sf/vio2sf/src/vio2sf/desmume/armcpu.c | 4 +- .../vio2sf/vio2sf/src/vio2sf/desmume/barray.c | 212 +++++ .../vio2sf/vio2sf/src/vio2sf/desmume/barray.h | 55 ++ .../vio2sf/src/vio2sf/desmume/resampler.c | 870 ++++++++++++++---- .../vio2sf/src/vio2sf/desmume/resampler.h | 23 +- .../vio2sf/vio2sf/src/vio2sf/desmume/state.c | 7 +- .../vio2sf/vio2sf/src/vio2sf/desmume/state.h | 4 +- .../HighlyComplete/HCDecoder.mm | 8 +- 11 files changed, 1050 insertions(+), 182 deletions(-) create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.c create mode 100644 Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.h diff --git a/Frameworks/vio2sf/vio2sf.xcodeproj/project.pbxproj b/Frameworks/vio2sf/vio2sf.xcodeproj/project.pbxproj index b59cbe637..9ac21d841 100644 --- a/Frameworks/vio2sf/vio2sf.xcodeproj/project.pbxproj +++ b/Frameworks/vio2sf/vio2sf.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ 833B1A3E180BAD0200414852 /* isqrt.c in Sources */ = {isa = PBXBuildFile; fileRef = 833B1A3A180BAD0200414852 /* isqrt.c */; }; 833B1A3F180BAD0200414852 /* isqrt.h in Headers */ = {isa = PBXBuildFile; fileRef = 833B1A3B180BAD0200414852 /* isqrt.h */; }; + 83699ABA1AB3D8EB00F5A6E3 /* barray.c in Sources */ = {isa = PBXBuildFile; fileRef = 83699AB81AB3D8EB00F5A6E3 /* barray.c */; }; + 83699ABB1AB3D8EB00F5A6E3 /* barray.h in Headers */ = {isa = PBXBuildFile; fileRef = 83699AB91AB3D8EB00F5A6E3 /* barray.h */; }; 83DD1A0318EA634F00DADA1A /* resampler.c in Sources */ = {isa = PBXBuildFile; fileRef = 83DD1A0118EA634F00DADA1A /* resampler.c */; }; 83DD1A0418EA634F00DADA1A /* resampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 83DD1A0218EA634F00DADA1A /* resampler.h */; }; 83DE0C14180A9BD400269051 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 83DE0C12180A9BD400269051 /* InfoPlist.strings */; }; @@ -55,6 +57,8 @@ /* Begin PBXFileReference section */ 833B1A3A180BAD0200414852 /* isqrt.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = isqrt.c; sourceTree = ""; }; 833B1A3B180BAD0200414852 /* isqrt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = isqrt.h; sourceTree = ""; }; + 83699AB81AB3D8EB00F5A6E3 /* barray.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = barray.c; sourceTree = ""; }; + 83699AB91AB3D8EB00F5A6E3 /* barray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = barray.h; sourceTree = ""; }; 83DD1A0118EA634F00DADA1A /* resampler.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resampler.c; sourceTree = ""; }; 83DD1A0218EA634F00DADA1A /* resampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resampler.h; sourceTree = ""; }; 83DE0C06180A9BD400269051 /* vio2sf.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = vio2sf.framework; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -173,6 +177,8 @@ 83DE0C45180A9CA400269051 /* desmume */ = { isa = PBXGroup; children = ( + 83699AB81AB3D8EB00F5A6E3 /* barray.c */, + 83699AB91AB3D8EB00F5A6E3 /* barray.h */, 83DD1A0118EA634F00DADA1A /* resampler.c */, 83DD1A0218EA634F00DADA1A /* resampler.h */, 833B1A3A180BAD0200414852 /* isqrt.c */, @@ -241,6 +247,7 @@ 83DD1A0418EA634F00DADA1A /* resampler.h in Headers */, 83DE0C89180A9CA400269051 /* config.h in Headers */, 83DE0C8D180A9CA400269051 /* debug.h in Headers */, + 83699ABB1AB3D8EB00F5A6E3 /* barray.h in Headers */, 83DE0C98180A9CA400269051 /* mem.h in Headers */, 83DE0C90180A9CA400269051 /* FIFO.h in Headers */, 83DE0C92180A9CA400269051 /* GPU.h in Headers */, @@ -325,6 +332,7 @@ 83DE0CA6180A9CA400269051 /* thumb_tabdef.inc in Sources */, 83DE0C96180A9CA400269051 /* mc.c in Sources */, 83DE0C91180A9CA400269051 /* GPU.c in Sources */, + 83699ABA1AB3D8EB00F5A6E3 /* barray.c in Sources */, 83DE0CA4180A9CA400269051 /* thumb_instructions.c in Sources */, 83DE0C84180A9CA400269051 /* armcpu.c in Sources */, 83DE0C94180A9CA400269051 /* matrix.c in Sources */, diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c index 25ad43c05..d982b93fc 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/MMU.c @@ -44,6 +44,8 @@ #include "registers.h" #include "isqrt.h" +#include "barray.h" + #if VIO2SF_GPU_ENABLE #include "render3D.h" #else @@ -543,6 +545,14 @@ u8 FASTCALL MMU_read8(NDS_state *state, u32 proc, u32 adr) return WIFI_read16(&state->wifiMac,adr) & 0xFF; } #endif + + if (state->array_rom_coverage) + { + if (state->MMU->MMU_MEM[proc][(adr>>20)&0xFF] == state->MMU->CART_ROM) + { + bit_array_set(state->array_rom_coverage, (adr & state->MMU->MMU_MASK[proc][(adr>>20)&0xFF]) / 4); + } + } return state->MMU->MMU_MEM[proc][(adr>>20)&0xFF][adr&state->MMU->MMU_MASK[proc][(adr>>20)&0xFF]]; } @@ -618,7 +628,15 @@ u16 FASTCALL MMU_read16(NDS_state *state, u32 proc, u32 adr) } } - /* Returns data from memory */ + if (state->array_rom_coverage) + { + if (state->MMU->MMU_MEM[proc][(adr>>20)&0xFF] == state->MMU->CART_ROM) + { + bit_array_set(state->array_rom_coverage, (adr & state->MMU->MMU_MASK[proc][(adr>>20)&0xFF]) / 4); + } + } + + /* Returns data from memory */ return T1ReadWord(state->MMU->MMU_MEM[proc][(adr >> 20) & 0xFF], adr & state->MMU->MMU_MASK[proc][(adr >> 20) & 0xFF]); } @@ -792,6 +810,14 @@ u32 FASTCALL MMU_read32(NDS_state *state, u32 proc, u32 adr) } } + if (state->array_rom_coverage) + { + if (state->MMU->MMU_MEM[proc][(adr>>20)&0xFF] == state->MMU->CART_ROM) + { + bit_array_set(state->array_rom_coverage, (adr & state->MMU->MMU_MASK[proc][(adr>>20)&0xFF]) / 4); + } + } + /* Returns data from memory */ return T1ReadLong(state->MMU->MMU_MEM[proc][(adr >> 20) & 0xFF], adr & state->MMU->MMU_MASK[proc][(adr >> 20) & 0xFF]); } diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c index 8872a8947..c0950160a 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/NDSSystem.c @@ -29,6 +29,8 @@ //#include "cflash.h" #include "spu_exports.h" +#include "barray.h" + //#include "ROMReader.h" /* the count of bytes copied from the firmware into memory */ @@ -245,6 +247,13 @@ NDS_header * NDS_getROMHeader(NDS_state *state) header->logoCRC16 = T1ReadWord(state->MMU->CART_ROM, 348); header->headerCRC16 = T1ReadWord(state->MMU->CART_ROM, 350); memcpy(header->reserved, state->MMU->CART_ROM + 352, 160); + + if (state->array_rom_coverage) + { + int i; + for (i = 0; i < 0x200 / 4; ++i) + bit_array_set(state->array_rom_coverage, i); + } return header; @@ -284,6 +293,8 @@ void NDS_Reset( NDS_state *state) for(i = 0; i < (header->ARM9binSize>>2); ++i) { + if (state->array_rom_coverage) + bit_array_set(state->array_rom_coverage, src/4); MMU_write32(state, 0, dst, T1ReadLong(state->MMU->CART_ROM, src)); dst += 4; src += 4; @@ -294,6 +305,8 @@ void NDS_Reset( NDS_state *state) for(i = 0; i < (header->ARM7binSize>>2); ++i) { + if (state->array_rom_coverage) + bit_array_set(state->array_rom_coverage, src/4); MMU_write32(state, 1, dst, T1ReadLong(state->MMU->CART_ROM, src)); dst += 4; src += 4; diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/armcpu.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/armcpu.c index 6d9af88ac..07e9767a5 100755 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/armcpu.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/armcpu.c @@ -78,8 +78,8 @@ const unsigned char arm_cond_table[16*16] = { 0x00,0xFF,0xFF,0x00,0x00,0xFF,0xFF,0x20, }; -armcpu_t NDS_ARM7; -armcpu_t NDS_ARM9; +/*armcpu_t NDS_ARM7; +armcpu_t NDS_ARM9;*/ #define SWAP(a, b, c) do \ { \ diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.c new file mode 100644 index 000000000..873f5a5ac --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.c @@ -0,0 +1,212 @@ +#include "barray.h" + +#include + + +void * bit_array_create(size_t size) +{ + size_t bsize = ((size + 7) >> 3) + sizeof(size_t); + void * ret = calloc(1, bsize); + if (ret) *(size_t *)ret = size; + return ret; +} + +void bit_array_destroy(void * array) +{ + if (array) free(array); +} + +void * bit_array_dup(const void * array) +{ + if (array) + { + const size_t * size = (const size_t *) array; + size_t bsize = ((*size + 7) >> 3) + sizeof(*size); + void * ret = malloc(bsize); + if (ret) memcpy(ret, array, bsize); + return ret; + } + return NULL; +} + +void bit_array_reset(void * array) +{ + if (array) + { + size_t * size = (size_t *) array; + size_t bsize = (*size + 7) >> 3; + memset(size + 1, 0, bsize); + } +} + + +size_t bit_array_size(const void * array) +{ + if (array) + { + return *(const size_t *) array; + } + return 0; +} + +size_t bit_array_count(const void * array) +{ + if (array) + { + size_t i; + size_t count = 0; + const size_t * size = (const size_t *) array; + for (i = 0; i < *size; ++i) + count += bit_array_test(array, i); + return count; + } + return 0; +} + +void bit_array_set(void * array, size_t bit) +{ + if (array) + { + size_t * size = (size_t *) array; + if (bit < *size) + { + unsigned char * ptr = (unsigned char *)(size + 1); + ptr[bit >> 3] |= (1U << (bit & 7)); + } + } +} + +void bit_array_set_range(void * array, size_t bit, size_t count) +{ + if (array && count) + { + size_t * size = (size_t *) array; + if (bit < *size) + { + unsigned char * ptr = (unsigned char *)(size + 1); + size_t i; + for (i = bit; i < *size && i < bit + count; ++i) + ptr[i >> 3] |= (1U << (i & 7)); + } + } +} + +int bit_array_test(const void * array, size_t bit) +{ + if (array) + { + const size_t * size = (const size_t *) array; + if (bit < *size) + { + const unsigned char * ptr = (const unsigned char *)(size + 1); + if (ptr[bit >> 3] & (1U << (bit & 7))) + { + return 1; + } + } + } + return 0; +} + +int bit_array_test_range(const void * array, size_t bit, size_t count) +{ + if (array) + { + const size_t * size = (const size_t *) array; + if (bit < *size) + { + const unsigned char * ptr = (const unsigned char *)(size + 1); + if ((bit & 7) && (count > 8)) + { + while ((bit < *size) && count && (bit & 7)) + { + if (ptr[bit >> 3] & (1U << (bit & 7))) return 1; + bit++; + count--; + } + } + if (!(bit & 7)) + { + while (((*size - bit) >= 8) && (count >= 8)) + { + if (ptr[bit >> 3]) return 1; + bit += 8; + count -= 8; + } + } + while ((bit < *size) && count) + { + if (ptr[bit >> 3] & (1U << (bit & 7))) return 1; + bit++; + count--; + } + } + } + return 0; +} + +void bit_array_clear(void * array, size_t bit) +{ + if (array) + { + size_t * size = (size_t *) array; + if (bit < *size) + { + unsigned char * ptr = (unsigned char *)(size + 1); + ptr[bit >> 3] &= ~(1U << (bit & 7)); + } + } +} + +void bit_array_clear_range(void * array, size_t bit, size_t count) +{ + if (array && count) + { + size_t * size = (size_t *) array; + if (bit < *size) + { + unsigned char * ptr = (unsigned char *)(size + 1); + size_t i; + for (i = bit; i < *size && i < bit + count; ++i) + ptr[i >> 3] &= ~(1U << (i & 7)); + } + } +} + +void bit_array_merge(void * dest, const void * source, size_t offset) +{ + if (dest && source) + { + size_t * dsize = (size_t *) dest; + const size_t * ssize = (const size_t *) source; + size_t soffset = 0; + while (offset < *dsize && soffset < *ssize) + { + if (bit_array_test(source, soffset)) + { + bit_array_set(dest, offset); + } + soffset++; + offset++; + } + } +} + +void bit_array_mask(void * dest, const void * source, size_t offset) +{ + if (dest && source) + { + size_t * dsize = (size_t *) dest; + const size_t * ssize = (const size_t *) source; + size_t soffset = 0; + while (offset < *dsize && soffset < *ssize) + { + if (bit_array_test(source, soffset)) + { + bit_array_clear(dest, offset); + } + soffset++; + offset++; + } + } +} diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.h new file mode 100644 index 000000000..5027f18d9 --- /dev/null +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/barray.h @@ -0,0 +1,55 @@ +#ifndef _B_ARRAY_H_ +#define _B_ARRAY_H_ + +#include + +#ifdef BARRAY_DECORATE +#define PASTE(a,b) a ## b +#define EVALUATE(a,b) PASTE(a,b) +#define bit_array_create EVALUATE(BARRAY_DECORATE,_bit_array_create) +#define bit_array_destroy EVALUATE(BARRAY_DECORATE,_bit_array_destroy) +#define bit_array_dup EVALUATE(BARRAY_DECORATE,_bit_array_dup) +#define bit_array_size EVALUATE(BARRAY_DECORATE,_bit_array_size) +#define bit_array_reset EVALUATE(BARRAY_DECORATE,_bit_array_reset) +#define bit_array_count EVALUATE(BARRAY_DECORATE,_bit_array_count) +#define bit_array_set EVALUATE(BARRAY_DECORATE,_bit_array_set) +#define bit_array_set_range EVALUATE(BARRAY_DECORATE,_bit_array_set_range) +#define bit_array_test EVALUATE(BARRAY_DECORATE,_bit_array_test) +#define bit_array_test_range EVALUATE(BARRAY_DECORATE,_bit_array_test_range) +#define bit_array_clear EVALUATE(BARRAY_DECORATE,_bit_array_clear) +#define bit_array_clear_range EVALUATE(BARRAY_DECORATE,_bit_array_clear_range) +#define bit_array_merge EVALUATE(BARRAY_DECORATE,_bit_array_merge) +#define bit_array_mask EVALUATE(BARRAY_DECORATE,_bit_array_mask) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +void * bit_array_create(size_t size); +void bit_array_destroy(void * array); +void * bit_array_dup(const void * array); + +size_t bit_array_size(const void * array); + +void bit_array_reset(void * array); + +size_t bit_array_count(const void * array); + +void bit_array_set(void * array, size_t bit); +void bit_array_set_range(void * array, size_t bit, size_t count); + +int bit_array_test(const void * array, size_t bit); +int bit_array_test_range(const void * array, size_t bit, size_t count); + +void bit_array_clear(void * array, size_t bit); +void bit_array_clear_range(void * array, size_t bit, size_t count); + +void bit_array_merge(void * array, const void * source, size_t offset); +void bit_array_mask(void * array, const void * source, size_t offset); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.c index 6a828caeb..bcfc28f0a 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.c @@ -6,6 +6,13 @@ #include #define RESAMPLER_SSE #endif +#ifdef __APPLE__ +#include +#if TARGET_CPU_ARM || TARGET_CPU_ARM64 +#include +#define RESAMPLER_NEON +#endif +#endif #ifdef _MSC_VER #define ALIGNED _declspec(align(16)) @@ -20,11 +27,17 @@ #include "resampler.h" enum { RESAMPLER_SHIFT = 10 }; +enum { RESAMPLER_SHIFT_EXTRA = 8 }; enum { RESAMPLER_RESOLUTION = 1 << RESAMPLER_SHIFT }; +enum { RESAMPLER_RESOLUTION_EXTRA = 1 << (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA) }; enum { SINC_WIDTH = 16 }; enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH }; enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 }; +static const float RESAMPLER_BLEP_CUTOFF = 0.90f; +static const float RESAMPLER_BLAM_CUTOFF = 0.93f; +static const float RESAMPLER_SINC_CUTOFF = 0.999f; + ALIGNED static float cubic_lut[CUBIC_SAMPLES]; static float sinc_lut[SINC_SAMPLES + 1]; @@ -127,10 +140,10 @@ typedef struct resampler { int write_pos, write_filled; int read_pos, read_filled; - unsigned int phase; - unsigned int phase_inc; - unsigned int inv_phase; - unsigned int inv_phase_inc; + float phase; + float phase_inc; + float inv_phase; + float inv_phase_inc; unsigned char quality; signed char delay_added; signed char delay_removed; @@ -171,25 +184,10 @@ void resampler_delete(void * _r) void * resampler_dup(const void * _r) { - const resampler * r_in = ( const resampler * ) _r; - resampler * r_out = ( resampler * ) malloc( sizeof(resampler) ); + void * r_out = malloc( sizeof(resampler) ); if ( !r_out ) return 0; - r_out->write_pos = r_in->write_pos; - r_out->write_filled = r_in->write_filled; - r_out->read_pos = r_in->read_pos; - r_out->read_filled = r_in->read_filled; - r_out->phase = r_in->phase; - r_out->phase_inc = r_in->phase_inc; - r_out->inv_phase = r_in->inv_phase; - r_out->inv_phase_inc = r_in->inv_phase_inc; - r_out->quality = r_in->quality; - r_out->delay_added = r_in->delay_added; - r_out->delay_removed = r_in->delay_removed; - r_out->last_amp = r_in->last_amp; - r_out->accumulator = r_in->accumulator; - memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) ); - memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) ); + resampler_dup_inplace(r_out, _r); return r_out; } @@ -225,7 +223,8 @@ void resampler_set_quality(void *_r, int quality) quality = RESAMPLER_QUALITY_MAX; if ( r->quality != quality ) { - if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP ) + if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP || + quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM ) { r->read_pos = 0; r->read_filled = 0; @@ -255,6 +254,7 @@ static int resampler_min_filled(resampler *r) return 1; case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 2; case RESAMPLER_QUALITY_CUBIC: @@ -273,6 +273,7 @@ static int resampler_input_delay(resampler *r) case RESAMPLER_QUALITY_ZOH: case RESAMPLER_QUALITY_BLEP: case RESAMPLER_QUALITY_LINEAR: + case RESAMPLER_QUALITY_BLAM: return 0; case RESAMPLER_QUALITY_CUBIC: @@ -295,6 +296,7 @@ static int resampler_output_delay(resampler *r) return 0; case RESAMPLER_QUALITY_BLEP: + case RESAMPLER_QUALITY_BLAM: return SINC_WIDTH - 1; } } @@ -317,19 +319,24 @@ void resampler_clear(void *_r) r->delay_removed = -1; memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0])); - if (r->quality == RESAMPLER_QUALITY_BLEP) + if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM) + { + r->inv_phase = 0; + r->last_amp = 0; + r->accumulator = 0; memset(r->buffer_out, 0, sizeof(r->buffer_out)); + } } void resampler_set_rate(void *_r, double new_factor) { resampler * r = ( resampler * ) _r; - r->phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION ); + r->phase_inc = new_factor; new_factor = 1.0 / new_factor; - r->inv_phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION ); + r->inv_phase_inc = new_factor; } -void resampler_write_sample(void *_r, int s) +void resampler_write_sample(void *_r, short s) { resampler * r = ( resampler * ) _r; @@ -342,6 +349,7 @@ void resampler_write_sample(void *_r, int s) if ( r->write_filled < resampler_buffer_size ) { float s32 = s; + s32 *= 256.0; r->buffer_in[ r->write_pos ] = s32; r->buffer_in[ r->write_pos + resampler_buffer_size ] = s32; @@ -387,8 +395,8 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -402,13 +410,13 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> RESAMPLER_SHIFT; + in += (int)phase; - phase &= RESAMPLER_RESOLUTION-1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); - r->phase = (unsigned short) phase; + r->phase = phase; *out_ = out; used = (int)(in - in_); @@ -419,6 +427,7 @@ static int resampler_run_zoh(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON static int resampler_run_blep(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -431,37 +440,45 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; - int i = SINC_WIDTH; float sample; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(inv_phase - pos); - kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) - out[i] += sample * kernel[i]; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } inv_phase += inv_phase_inc; - out += inv_phase >> RESAMPLER_SHIFT; + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION-1; + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -476,6 +493,7 @@ static int resampler_run_blep(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) @@ -490,49 +508,134 @@ static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end) float const* in = in_; float const* const in_end = in + in_size; float last_amp = r->last_amp; - int inv_phase = r->inv_phase; - int inv_phase_inc = r->inv_phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; - const int step = RESAMPLER_RESOLUTION; + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; do { - // accumulate in extended precision - float kernel_sum = 0.0; - __m128 kernel[SINC_WIDTH / 2]; - __m128 temp1, temp2; - __m128 samplex; float sample; - float *kernelf = (float*)(&kernel); - int i = SINC_WIDTH; if ( out + SINC_WIDTH * 2 > out_end ) break; - for (; i >= -SINC_WIDTH + 1; --i) - { - int pos = i * step; - int abs_pos = abs(inv_phase - pos); - kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos]; - } sample = *in++ - last_amp; - last_amp += sample; - sample /= kernel_sum; - samplex = _mm_set1_ps( sample ); - for (i = 0; i < SINC_WIDTH / 2; ++i) + + if (sample) { - temp1 = _mm_load_ps( (const float *)( kernel + i ) ); - temp1 = _mm_mul_ps( temp1, samplex ); - temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); - temp1 = _mm_add_ps( temp1, temp2 ); - _mm_storeu_ps( (float *) out + i * 4, temp1 ); + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } } inv_phase += inv_phase_inc; - out += inv_phase >> RESAMPLER_SHIFT; + out += (int)inv_phase; - inv_phase &= RESAMPLER_RESOLUTION - 1; + inv_phase = fmod(inv_phase, 1.0f); + } + while ( in < in_end ); + + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blep(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 1; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = *in++ - last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp2 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp2 ); + } + } + + inv_phase += inv_phase_inc; + + out += (int)inv_phase; + + inv_phase = fmod(inv_phase, 1.0f); } while ( in < in_end ); @@ -560,8 +663,8 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -570,14 +673,14 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - sample = in[0] + (in[1] - in[0]) * ((float)phase / RESAMPLER_RESOLUTION); + sample = in[0] + (in[1] - in[0]) * phase; *out++ = sample; phase += phase_inc; - in += phase >> RESAMPLER_SHIFT; + in += (int)phase; - phase &= RESAMPLER_RESOLUTION-1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -592,6 +695,287 @@ static int resampler_run_linear(resampler * r, float ** out_, float * out_end) return used; } +#ifndef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * phase; + sample -= last_amp; + + if (sample) + { + float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f; + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + for (i = 0; i < SINC_WIDTH * 2; ++i) + out[i] += sample * kernel[i]; + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + in += (int)phase; + phase = fmod(phase, 1.0f); + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_SSE +static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + { + sample += (in[1] - in[0]) * phase; + } + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0f; + __m128 kernel[SINC_WIDTH / 2]; + __m128 temp1, temp2; + __m128 samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = _mm_set1_ps( sample ); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = _mm_load_ps( (const float *)( kernel + i ) ); + temp1 = _mm_mul_ps( temp1, samplex ); + temp2 = _mm_loadu_ps( (const float *) out + i * 4 ); + temp1 = _mm_add_ps( temp1, temp2 ); + _mm_storeu_ps( (float *) out + i * 4, temp1 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_blam(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float last_amp = r->last_amp; + float phase = r->phase; + float phase_inc = r->phase_inc; + float inv_phase = r->inv_phase; + float inv_phase_inc = r->inv_phase_inc; + + const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION; + const int window_step = RESAMPLER_RESOLUTION; + + do + { + float sample; + + if ( out + SINC_WIDTH * 2 > out_end ) + break; + + sample = in[0]; + if (phase_inc < 1.0f) + sample += (in[1] - in[0]) * phase; + sample -= last_amp; + + if (sample) + { + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex; + float *kernelf = (float*)(&kernel); + int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + int i = SINC_WIDTH; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + last_amp += sample; + sample /= kernel_sum; + samplex = vdupq_n_f32(sample); + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( kernel + i ) ); + temp2 = vld1q_f32( (const float32_t *) out + i * 4 ); + temp2 = vmlaq_f32( temp2, temp1, samplex ); + vst1q_f32( (float32_t *) out + i * 4, temp2 ); + } + } + + if (inv_phase_inc < 1.0f) + { + ++in; + inv_phase += inv_phase_inc; + out += (int)inv_phase; + inv_phase = fmod(inv_phase, 1.0f); + } + else + { + phase += phase_inc; + ++out; + + if (phase >= 1.0f) + { + ++in; + phase = fmod(phase, 1.0f); + } + } + } + while ( in < in_end ); + + r->phase = phase; + r->inv_phase = inv_phase; + r->last_amp = last_amp; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -603,8 +987,8 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; do { @@ -615,7 +999,7 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) if ( out >= out_end ) break; - kernel = cubic_lut + phase * 4; + kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4; for (sample = 0, i = 0; i < 4; ++i) sample += in[i] * kernel[i]; @@ -623,63 +1007,9 @@ static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> RESAMPLER_SHIFT; + in += (int)phase; - phase &= RESAMPLER_RESOLUTION-1; - } - while ( in < in_end ); - - r->phase = phase; - *out_ = out; - - used = (int)(in - in_); - - r->write_filled -= used; - } - - return used; -} - -#ifdef RESAMPLER_SSE -static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) -{ - int in_size = r->write_filled; - float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; - int used = 0; - in_size -= 4; - if ( in_size > 0 ) - { - float* out = *out_; - float const* in = in_; - float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; - - do - { - __m128 temp1, temp2; - __m128 samplex = _mm_setzero_ps(); - - if ( out >= out_end ) - break; - - temp1 = _mm_loadu_ps( (const float *)( in ) ); - temp2 = _mm_load_ps( (const float *)( cubic_lut + phase * 4 ) ); - temp1 = _mm_mul_ps( temp1, temp2 ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = _mm_movehl_ps( temp1, samplex ); - samplex = _mm_add_ps( samplex, temp1 ); - temp1 = samplex; - temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); - samplex = _mm_add_ps( samplex, temp1 ); - _mm_store_ss( out, samplex ); - ++out; - - phase += phase_inc; - - in += phase >> RESAMPLER_SHIFT; - - phase &= RESAMPLER_RESOLUTION - 1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -695,6 +1025,111 @@ static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end } #endif +#ifdef RESAMPLER_SSE +static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + __m128 temp1, temp2; + __m128 samplex = _mm_setzero_ps(); + + if ( out >= out_end ) + break; + + temp1 = _mm_loadu_ps( (const float *)( in ) ); + temp2 = _mm_load_ps( (const float *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = _mm_mul_ps( temp1, temp2 ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = _mm_movehl_ps( temp1, samplex ); + samplex = _mm_add_ps( samplex, temp1 ); + temp1 = samplex; + temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) ); + samplex = _mm_add_ps( samplex, temp1 ); + _mm_store_ss( out, samplex ); + ++out; + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_cubic(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= 4; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + do + { + float32x4_t temp1, temp2; + float32x2_t half; + + if ( out >= out_end ) + break; + + temp1 = vld1q_f32( (const float32_t *)( in ) ); + temp2 = vld1q_f32( (const float32_t *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) ); + temp1 = vmulq_f32( temp1, temp2 ); + half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifndef RESAMPLER_NEON static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) { int in_size = r->write_filled; @@ -706,17 +1141,18 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do { float kernel[SINC_WIDTH * 2], kernel_sum = 0.0; int i = SINC_WIDTH; - int phase_adj = phase * step / RESAMPLER_RESOLUTION; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; float sample; if ( out >= out_end ) @@ -726,7 +1162,7 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) { int pos = i * step; int window_pos = i * window_step; - kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase - window_pos)]; + kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; } for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i) sample += in[i] * kernel[i]; @@ -734,9 +1170,9 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> RESAMPLER_SHIFT; + in += (int)phase; - phase &= RESAMPLER_RESOLUTION-1; + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -750,6 +1186,7 @@ static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) return used; } +#endif #ifdef RESAMPLER_SSE static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) @@ -763,10 +1200,10 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) float* out = *out_; float const* in = in_; float const* const in_end = in + in_size; - int phase = r->phase; - int phase_inc = r->phase_inc; + float phase = r->phase; + float phase_inc = r->phase_inc; - int step = phase_inc > RESAMPLER_RESOLUTION ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION / phase_inc : RESAMPLER_RESOLUTION; + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); int window_step = RESAMPLER_RESOLUTION; do @@ -778,7 +1215,8 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) __m128 samplex = _mm_setzero_ps(); float *kernelf = (float*)(&kernel); int i = SINC_WIDTH; - int phase_adj = phase * step / RESAMPLER_RESOLUTION; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; if ( out >= out_end ) break; @@ -787,7 +1225,7 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) { int pos = i * step; int window_pos = i * window_step; - kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase - window_pos)]; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; } for (i = 0; i < SINC_WIDTH / 2; ++i) { @@ -809,9 +1247,80 @@ static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end) phase += phase_inc; - in += phase >> RESAMPLER_SHIFT; + in += (int)phase; - phase &= RESAMPLER_RESOLUTION - 1; + phase = fmod(phase, 1.0f); + } + while ( in < in_end ); + + r->phase = phase; + *out_ = out; + + used = (int)(in - in_); + + r->write_filled -= used; + } + + return used; +} +#endif + +#ifdef RESAMPLER_NEON +static int resampler_run_sinc(resampler * r, float ** out_, float * out_end) +{ + int in_size = r->write_filled; + float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled; + int used = 0; + in_size -= SINC_WIDTH * 2; + if ( in_size > 0 ) + { + float* out = *out_; + float const* in = in_; + float const* const in_end = in + in_size; + float phase = r->phase; + float phase_inc = r->phase_inc; + + int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF); + int window_step = RESAMPLER_RESOLUTION; + + do + { + // accumulate in extended precision + float kernel_sum = 0.0; + float32x4_t kernel[SINC_WIDTH / 2]; + float32x4_t temp1, temp2; + float32x4_t samplex = {0}; + float32x2_t half; + float *kernelf = (float*)(&kernel); + int i = SINC_WIDTH; + int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION); + int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION; + + if ( out >= out_end ) + break; + + for (; i >= -SINC_WIDTH + 1; --i) + { + int pos = i * step; + int window_pos = i * window_step; + kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)]; + } + for (i = 0; i < SINC_WIDTH / 2; ++i) + { + temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) ); + temp2 = vld1q_f32( (const float32_t *)( kernel + i ) ); + samplex = vmlaq_f32( samplex, temp1, temp2 ); + } + kernel_sum = 1.0 / kernel_sum; + samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum)); + half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex)); + *out++ = vget_lane_f32(vpadd_f32(half, half), 0); + + phase += phase_inc; + + in += (int)phase; + + phase = fmod(phase, 1.0f); } while ( in < in_end ); @@ -854,9 +1363,11 @@ static void resampler_fill(resampler * r) if ( write_extra > SINC_WIDTH * 2 - 1 ) write_extra = SINC_WIDTH * 2 - 1; memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE if ( resampler_has_sse ) used = resampler_run_blep_sse( r, &out, out + write_size + write_extra ); else +#endif used = resampler_run_blep( r, &out, out + write_size + write_extra ); memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); if (!used) @@ -868,6 +1379,27 @@ static void resampler_fill(resampler * r) resampler_run_linear( r, &out, out + write_size ); break; + case RESAMPLER_QUALITY_BLAM: + { + float * out_ = out; + int write_extra = 0; + if ( write_pos >= r->read_pos ) + write_extra = r->read_pos; + if ( write_extra > SINC_WIDTH * 2 - 1 ) + write_extra = SINC_WIDTH * 2 - 1; + memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) ); +#ifdef RESAMPLER_SSE + if ( resampler_has_sse ) + resampler_run_blam_sse( r, &out, out + write_size + write_extra ); + else +#endif + resampler_run_blam( r, &out, out + write_size + write_extra ); + memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) ); + if ( out == out_ ) + return; + break; + } + case RESAMPLER_QUALITY_CUBIC: #ifdef RESAMPLER_SSE if ( resampler_has_sse ) @@ -898,14 +1430,14 @@ static void resampler_fill_and_remove_delay(resampler * r) int delay = resampler_output_delay( r ); r->delay_removed = 0; while ( delay-- ) - resampler_remove_sample( r ); + resampler_remove_sample( r, 1 ); } } int resampler_get_sample_count(void *_r) { resampler * r = ( resampler * ) _r; - if ( r->read_filled < 1 && (r->quality != RESAMPLER_QUALITY_BLEP || r->inv_phase_inc)) + if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc)) resampler_fill_and_remove_delay( r ); return r->read_filled; } @@ -917,24 +1449,40 @@ int resampler_get_sample(void *_r) resampler_fill_and_remove_delay( r ); if ( r->read_filled < 1 ) return 0; - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) return (int)(r->buffer_out[ r->read_pos ] + r->accumulator); else return (int)r->buffer_out[ r->read_pos ]; } -void resampler_remove_sample(void *_r) +float resampler_get_sample_float(void *_r) +{ + resampler * r = ( resampler * ) _r; + if ( r->read_filled < 1 && r->phase_inc) + resampler_fill_and_remove_delay( r ); + if ( r->read_filled < 1 ) + return 0; + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) + return r->buffer_out[ r->read_pos ] + r->accumulator; + else + return r->buffer_out[ r->read_pos ]; +} + +void resampler_remove_sample(void *_r, int decay) { resampler * r = ( resampler * ) _r; if ( r->read_filled > 0 ) { - if ( r->quality == RESAMPLER_QUALITY_BLEP ) + if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM ) { r->accumulator += r->buffer_out[ r->read_pos ]; r->buffer_out[ r->read_pos ] = 0; - r->accumulator -= r->accumulator * (1.0 / 8192.0); - if (fabs(r->accumulator) < 1e-20) - r->accumulator = 0; + if (decay) + { + r->accumulator -= r->accumulator * (1.0f / 8192.0f); + if (fabs(r->accumulator) < 1e-20f) + r->accumulator = 0; + } } --r->read_filled; r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size; diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.h index 2432999a7..0050ebf1a 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.h +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/resampler.h @@ -13,18 +13,16 @@ #define resampler_set_quality EVALUATE(RESAMPLER_DECORATE,_resampler_set_quality) #define resampler_get_free_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_free_count) #define resampler_write_sample EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample) +#define resampler_write_sample_fixed EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample_fixed) #define resampler_set_rate EVALUATE(RESAMPLER_DECORATE,_resampler_set_rate) #define resampler_ready EVALUATE(RESAMPLER_DECORATE,_resampler_ready) #define resampler_clear EVALUATE(RESAMPLER_DECORATE,_resampler_clear) #define resampler_get_sample_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_count) #define resampler_get_sample EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample) +#define resampler_get_sample_float EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_float) #define resampler_remove_sample EVALUATE(RESAMPLER_DECORATE,_resampler_remove_sample) #endif -#ifdef __cplusplus -extern "C" { -#endif - void resampler_init(void); void * resampler_create(void); @@ -38,24 +36,23 @@ enum RESAMPLER_QUALITY_ZOH = 0, RESAMPLER_QUALITY_BLEP = 1, RESAMPLER_QUALITY_LINEAR = 2, - RESAMPLER_QUALITY_CUBIC = 3, - RESAMPLER_QUALITY_SINC = 4, - RESAMPLER_QUALITY_MAX = 4 + RESAMPLER_QUALITY_BLAM = 3, + RESAMPLER_QUALITY_CUBIC = 4, + RESAMPLER_QUALITY_SINC = 5, + RESAMPLER_QUALITY_MAX = 5 }; void resampler_set_quality(void *, int quality); int resampler_get_free_count(void *); -void resampler_write_sample(void *, int sample); +void resampler_write_sample(void *, short sample); +void resampler_write_sample_fixed(void *, int sample, unsigned char depth); void resampler_set_rate( void *, double new_factor ); int resampler_ready(void *); void resampler_clear(void *); int resampler_get_sample_count(void *); int resampler_get_sample(void *); -void resampler_remove_sample(void *); - -#ifdef __cplusplus -} -#endif +float resampler_get_sample_float(void *); +void resampler_remove_sample(void *, int decay); #endif diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c index 5db94bfb8..e44174288 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.c @@ -16,6 +16,7 @@ #include "armcpu.h" #include "cp15.h" #include "spu_exports.h" +#include "barray.h" #include "state.h" @@ -279,12 +280,16 @@ void state_deinit(struct NDS_state *state) state->MainScreen = NULL; if (state->SubScreen) free(state->SubScreen); state->SubScreen = NULL; + if (state->array_rom_coverage) bit_array_destroy(state->array_rom_coverage); + state->array_rom_coverage = NULL; } -void state_setrom(struct NDS_state *state, u8 * rom, u32 rom_size) +void state_setrom(struct NDS_state *state, u8 * rom, u32 rom_size, unsigned int enable_coverage_checking) { assert(!(rom_size & (rom_size - 1))); NDS_SetROM(state, rom, rom_size - 1); + if (enable_coverage_checking) + state->array_rom_coverage = bit_array_create(rom_size / 4); NDS_Reset(state); state->execute = TRUE; } diff --git a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h index c1f4c2bbd..f19d88a7f 100644 --- a/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h +++ b/Frameworks/vio2sf/vio2sf/src/vio2sf/desmume/state.h @@ -77,13 +77,15 @@ typedef struct NDS_state s16 *sample_buffer; unsigned long sample_pointer; unsigned long sample_size; + + void * array_rom_coverage; } NDS_state; int state_init(NDS_state *state); void state_deinit(NDS_state *state); -void state_setrom(NDS_state *state, u8 * rom, u32 rom_size); +void state_setrom(NDS_state *state, u8 * rom, u32 rom_size, unsigned int enable_coverage_checking); void state_loadstate(NDS_state *state, const u8 * ss, u32 ss_size); diff --git a/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm b/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm index f02d396f7..581c3c09f 100644 --- a/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm +++ b/Plugins/HighlyComplete/HighlyComplete/HCDecoder.mm @@ -1099,10 +1099,12 @@ static int usf_info(void * context, const char * name, const char * value) resampling_int = 1; else if ([resampling isEqualToString:@"linear"]) resampling_int = 2; - else if ([resampling isEqualToString:@"cubic"]) + else if ([resampling isEqualToString:@"blam"]) resampling_int = 3; - else if ([resampling isEqualToString:@"sinc"]) + else if ([resampling isEqualToString:@"cubic"]) resampling_int = 4; + else if ([resampling isEqualToString:@"sinc"]) + resampling_int = 5; core->dwInterpolation = resampling_int; core->dwChannelMute = 0; @@ -1121,7 +1123,7 @@ static int usf_info(void * context, const char * name, const char * value) emulatorExtra = state.rom; if ( state.rom ) - state_setrom(core, state.rom, (u32) state.rom_size ); + state_setrom(core, state.rom, (u32) state.rom_size, 0 ); state_loadstate(core, state.state, (u32) state.state_size);