diff --git a/Frameworks/Dumb/dumb/include/internal/it.h b/Frameworks/Dumb/dumb/include/internal/it.h index 8ca5fada6..f30cfcd2b 100644 --- a/Frameworks/Dumb/dumb/include/internal/it.h +++ b/Frameworks/Dumb/dumb/include/internal/it.h @@ -924,4 +924,10 @@ long _dumb_it_read_sample_data_adpcm4(IT_SAMPLE *sample, DUMBFILE *f); void _dumb_it_interleave_stereo_sample(IT_SAMPLE *sample); +/* Calling either of these is optional */ +void _dumb_init_cubic(); +#ifdef _USE_SSE +void _dumb_init_sse(); +#endif + #endif /* INTERNAL_IT_H */ diff --git a/Frameworks/Dumb/dumb/src/helpers/resamp3.inc b/Frameworks/Dumb/dumb/src/helpers/resamp3.inc index e0bc8ee78..d96085819 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resamp3.inc +++ b/Frameworks/Dumb/dumb/src/helpers/resamp3.inc @@ -65,7 +65,7 @@ long dumb_resample(DUMB_RESAMPLER *resampler, sample_t *dst, long dst_size, VOLU if (VOLUMES_ARE_ZERO) dst = NULL; - init_cubic(); + _dumb_init_cubic(); quality = resampler->quality; @@ -382,7 +382,7 @@ void dumb_resample_get_current_sample(DUMB_RESAMPLER *resampler, VOLUME_PARAMETE if (VOLUMES_ARE_ZERO) { MIX_ZEROS(=); return; } - init_cubic(); + _dumb_init_cubic(); quality = resampler->quality; diff --git a/Frameworks/Dumb/dumb/src/helpers/resample.c b/Frameworks/Dumb/dumb/src/helpers/resample.c index cb3683266..7c7ecb363 100644 --- a/Frameworks/Dumb/dumb/src/helpers/resample.c +++ b/Frameworks/Dumb/dumb/src/helpers/resample.c @@ -160,12 +160,11 @@ int dumb_resampling_quality = DUMB_RQ_CUBIC; static short cubicA0[1025], cubicA1[1025]; -/*static*/ void init_cubic(void) +void _dumb_init_cubic(void) { unsigned int t; /* 3*1024*1024*1024 is within range if it's unsigned */ static int done = 0; if (done) return; - done = 1; for (t = 0; t < 1025; t++) { /* int casts to pacify warnings about negating unsigned values */ cubicA0[t] = -(int)( t*t*t >> 17) + (int)( t*t >> 6) - (int)(t << 3); @@ -173,6 +172,8 @@ static short cubicA0[1025], cubicA1[1025]; } lanczos_init(); + + done = 1; } diff --git a/Frameworks/Dumb/dumb/src/it/itrender.c b/Frameworks/Dumb/dumb/src/it/itrender.c index 1a9e2f905..bc898ef51 100644 --- a/Frameworks/Dumb/dumb/src/it/itrender.c +++ b/Frameworks/Dumb/dumb/src/it/itrender.c @@ -19,6 +19,7 @@ #include #include +#include #include "dumb.h" #include "internal/dumb.h" @@ -786,11 +787,56 @@ static void it_filter_sse(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample #undef LOG10 -int _dumb_it_use_sse = 0; +#if defined(_M_IX86) || defined(__i386__) + +#ifdef _MSC_VER +#include +#else +static inline void +__cpuid(int *data, int selector) +{ + asm("cpuid" + : "=a" (data[0]), + "=b" (data[1]), + "=c" (data[2]), + "=d" (data[3]) + : "a"(selector)); +} +#endif + +static int query_cpu_feature_sse() { + int buffer[4]; + __cpuid(buffer,1); + if ((buffer[3]&(1<<25)) == 0) return 0; + return 1; +} + +static int _dumb_it_use_sse = 0; + +void _dumb_init_sse() +{ + static int initialized = 0; + if (!initialized) + { + _dumb_it_use_sse = query_cpu_feature_sse(); + initialized = 1; + } +} + +#elif defined(_M_X64) || defined(__amd64__) + +static const int _dumb_it_use_sse = 1; + +#else + +static const int _dumb_it_use_sse = 0; + +#endif static void it_filter(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, long pos, sample_t *src, long size, int step, int sampfreq, int cutoff, int resonance) { #if defined(_USE_SSE) + _dumb_init_sse(); if ( _dumb_it_use_sse ) it_filter_sse( cr, state, dst, pos, src, size, step, sampfreq, cutoff, resonance ); else #endif @@ -938,7 +984,7 @@ static void reset_channel_effects(IT_CHANNEL *channel) channel->xm_volslide = 0; channel->panslide = 0; channel->channelvolslide = 0; - channel->arpeggio_table = &arpeggio_mod; + channel->arpeggio_table = (const unsigned char *) &arpeggio_mod; memset(channel->arpeggio_offsets, 0, sizeof(channel->arpeggio_offsets)); channel->retrig = 0; if (channel->xm_retrig) { @@ -2490,7 +2536,7 @@ Yxy This uses a table 4 times larger (hence 4 times slower) than channel->arpeggio_offsets[0] = 0; channel->arpeggio_offsets[1] = (v & 0xF0) >> 4; channel->arpeggio_offsets[2] = (v & 0x0F); - channel->arpeggio_table = ((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))==IT_WAS_AN_XM) ? &arpeggio_xm : &arpeggio_mod; + channel->arpeggio_table = (const unsigned char *)(((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))==IT_WAS_AN_XM) ? &arpeggio_xm : &arpeggio_mod); } break; case IT_SET_CHANNEL_VOLUME: @@ -3124,15 +3170,15 @@ Yxy This uses a table 4 times larger (hence 4 times slower) than switch (entry->effect) { case IT_OKT_ARPEGGIO_3: - channel->arpeggio_table = &arpeggio_okt_3; + channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_3; break; case IT_OKT_ARPEGGIO_4: - channel->arpeggio_table = &arpeggio_okt_4; + channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_4; break; case IT_OKT_ARPEGGIO_5: - channel->arpeggio_table = &arpeggio_okt_5; + channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_5; break; } }