Made the OpenMPT / legacy OpenMPT and mpg123 libraries pre-built. Changed the OpenMPT and vgmstream plugins to import the libraries as they are now. Made mpg123 embedded and imported by the main binary, since it's now shared by two plugins. Signed-off-by: Christopher Snowhill <kode54@gmail.com>
268 lines
5.6 KiB
C
268 lines
5.6 KiB
C
/*
|
|
decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
|
|
|
|
copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
|
|
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
initially written by the mysterious higway for MMX (apparently)
|
|
then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
|
|
Both have agreed to distribution under LGPL 2.1 .
|
|
|
|
Transformed back into standalone asm, with help of
|
|
gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
|
|
|
|
The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
|
|
This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
|
|
That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
|
|
Maybe I'll change it in future, but now I need something that works.
|
|
|
|
Original comment from MPlayer source follows. Regarding the license history see
|
|
synth_mmx.S, which the original comment about this being licensed under GPL is
|
|
relating to.
|
|
*/
|
|
|
|
/*
|
|
* This code was taken from http://www.mpg123.org
|
|
* See ChangeLog of mpg123-0.59s-pre.1 for detail
|
|
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
|
|
*
|
|
* Local ChangeLog:
|
|
* - Partial loops unrolling and removing MOVW insn from loops
|
|
*/
|
|
|
|
#include "mangle.h"
|
|
|
|
.data
|
|
ALIGN8
|
|
one_null:
|
|
.long -65536
|
|
.long -65536
|
|
ALIGN8
|
|
null_one:
|
|
.long 65535
|
|
.long 65535
|
|
|
|
.text
|
|
ALIGN16
|
|
/* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
|
|
.globl SYNTH_NAME
|
|
SYNTH_NAME:
|
|
pushl %ebp
|
|
/* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
|
|
movl %esp, %ebp
|
|
|
|
/* Now the old stack addresses are preserved via %epb. */
|
|
#ifdef PIC
|
|
subl $8,%esp /* What has been called temp before. */
|
|
#else
|
|
subl $4,%esp /* What has been called temp before. */
|
|
#endif
|
|
pushl %edi
|
|
pushl %esi
|
|
pushl %ebx
|
|
|
|
#ifdef PIC
|
|
#undef _EBX_
|
|
#define _EBX_ %eax
|
|
GET_GOT
|
|
#define EBXSAVE -4(%ebp)
|
|
movl _EBX_, EBXSAVE /* save PIC register */
|
|
#endif
|
|
|
|
#define TEMP 12(%esp)
|
|
/* APP */
|
|
movl 12(%ebp),%ecx
|
|
movl 16(%ebp),%edi
|
|
movl $15,%ebx
|
|
movl 24(%ebp),%edx
|
|
leal (%edi,%ecx,2),%edi
|
|
decl %ecx
|
|
movl 20(%ebp),%esi
|
|
movl (%edx),%eax
|
|
jecxz 1f
|
|
decl %eax
|
|
andl %ebx,%eax
|
|
leal 1088(%esi),%esi
|
|
movl %eax,(%edx)
|
|
1:
|
|
leal (%esi,%eax,2),%edx
|
|
movl %eax,TEMP
|
|
incl %eax
|
|
andl %ebx,%eax
|
|
leal 544(%esi,%eax,2),%ecx
|
|
incl %ebx
|
|
testl $1, %eax
|
|
jnz 2f
|
|
xchgl %edx,%ecx
|
|
incl TEMP
|
|
leal 544(%esi),%esi
|
|
2:
|
|
pushl 8(%ebp)
|
|
pushl %edx
|
|
pushl %ecx
|
|
call MPL_DCT64
|
|
addl $12, %esp
|
|
leal 1(%ebx), %ecx
|
|
subl TEMP,%ebx
|
|
pushl %ecx
|
|
/* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
|
|
movl 28(%ebp),%ecx
|
|
leal (%ecx,%ebx,2), %edx
|
|
movl (%esp),%ecx /* restore, but leave value on stack */
|
|
shrl $1, %ecx
|
|
#ifdef PIC
|
|
movl EBXSAVE, _EBX_
|
|
#endif
|
|
ALIGN16
|
|
3:
|
|
movq (%edx),%mm0
|
|
movq 64(%edx),%mm4
|
|
pmaddwd (%esi),%mm0
|
|
pmaddwd 32(%esi),%mm4
|
|
movq 8(%edx),%mm1
|
|
movq 72(%edx),%mm5
|
|
pmaddwd 8(%esi),%mm1
|
|
pmaddwd 40(%esi),%mm5
|
|
movq 16(%edx),%mm2
|
|
movq 80(%edx),%mm6
|
|
pmaddwd 16(%esi),%mm2
|
|
pmaddwd 48(%esi),%mm6
|
|
movq 24(%edx),%mm3
|
|
movq 88(%edx),%mm7
|
|
pmaddwd 24(%esi),%mm3
|
|
pmaddwd 56(%esi),%mm7
|
|
paddd %mm1,%mm0
|
|
paddd %mm5,%mm4
|
|
paddd %mm2,%mm0
|
|
paddd %mm6,%mm4
|
|
paddd %mm3,%mm0
|
|
paddd %mm7,%mm4
|
|
movq %mm0,%mm1
|
|
movq %mm4,%mm5
|
|
psrlq $32,%mm1
|
|
psrlq $32,%mm5
|
|
paddd %mm1,%mm0
|
|
paddd %mm5,%mm4
|
|
psrad $13,%mm0
|
|
psrad $13,%mm4
|
|
packssdw %mm0,%mm0
|
|
packssdw %mm4,%mm4
|
|
movq (%edi), %mm1
|
|
punpckldq %mm4, %mm0
|
|
pand LOCAL_VAR(one_null), %mm1
|
|
pand LOCAL_VAR(null_one), %mm0
|
|
por %mm0, %mm1
|
|
movq %mm1,(%edi)
|
|
leal 64(%esi),%esi
|
|
leal 128(%edx),%edx
|
|
leal 8(%edi),%edi
|
|
decl %ecx
|
|
jnz 3b
|
|
popl %ecx
|
|
andl $1, %ecx
|
|
jecxz 4f
|
|
movq (%edx),%mm0
|
|
pmaddwd (%esi),%mm0
|
|
movq 8(%edx),%mm1
|
|
pmaddwd 8(%esi),%mm1
|
|
movq 16(%edx),%mm2
|
|
pmaddwd 16(%esi),%mm2
|
|
movq 24(%edx),%mm3
|
|
pmaddwd 24(%esi),%mm3
|
|
paddd %mm1,%mm0
|
|
paddd %mm2,%mm0
|
|
paddd %mm3,%mm0
|
|
movq %mm0,%mm1
|
|
psrlq $32,%mm1
|
|
paddd %mm1,%mm0
|
|
psrad $13,%mm0
|
|
packssdw %mm0,%mm0
|
|
movd %mm0,%eax
|
|
movw %ax, (%edi)
|
|
leal 32(%esi),%esi
|
|
leal 64(%edx),%edx
|
|
leal 4(%edi),%edi
|
|
4:
|
|
subl $64,%esi
|
|
movl $7,%ecx
|
|
|
|
#ifdef PIC
|
|
movl EBXSAVE, _EBX_
|
|
#endif
|
|
ALIGN16
|
|
5:
|
|
movq (%edx),%mm0
|
|
movq 64(%edx),%mm4
|
|
pmaddwd (%esi),%mm0
|
|
pmaddwd -32(%esi),%mm4
|
|
movq 8(%edx),%mm1
|
|
movq 72(%edx),%mm5
|
|
pmaddwd 8(%esi),%mm1
|
|
pmaddwd -24(%esi),%mm5
|
|
movq 16(%edx),%mm2
|
|
movq 80(%edx),%mm6
|
|
pmaddwd 16(%esi),%mm2
|
|
pmaddwd -16(%esi),%mm6
|
|
movq 24(%edx),%mm3
|
|
movq 88(%edx),%mm7
|
|
pmaddwd 24(%esi),%mm3
|
|
pmaddwd -8(%esi),%mm7
|
|
paddd %mm1,%mm0
|
|
paddd %mm5,%mm4
|
|
paddd %mm2,%mm0
|
|
paddd %mm6,%mm4
|
|
paddd %mm3,%mm0
|
|
paddd %mm7,%mm4
|
|
movq %mm0,%mm1
|
|
movq %mm4,%mm5
|
|
psrlq $32,%mm1
|
|
psrlq $32,%mm5
|
|
paddd %mm0,%mm1
|
|
paddd %mm4,%mm5
|
|
psrad $13,%mm1
|
|
psrad $13,%mm5
|
|
packssdw %mm1,%mm1
|
|
packssdw %mm5,%mm5
|
|
psubd %mm0,%mm0
|
|
psubd %mm4,%mm4
|
|
psubsw %mm1,%mm0
|
|
psubsw %mm5,%mm4
|
|
movq (%edi), %mm1
|
|
punpckldq %mm4, %mm0
|
|
pand LOCAL_VAR(one_null), %mm1
|
|
pand LOCAL_VAR(null_one), %mm0
|
|
por %mm0, %mm1
|
|
movq %mm1,(%edi)
|
|
subl $64,%esi
|
|
addl $128,%edx
|
|
leal 8(%edi),%edi
|
|
decl %ecx
|
|
jnz 5b
|
|
movq (%edx),%mm0
|
|
pmaddwd (%esi),%mm0
|
|
movq 8(%edx),%mm1
|
|
pmaddwd 8(%esi),%mm1
|
|
movq 16(%edx),%mm2
|
|
pmaddwd 16(%esi),%mm2
|
|
movq 24(%edx),%mm3
|
|
pmaddwd 24(%esi),%mm3
|
|
paddd %mm1,%mm0
|
|
paddd %mm2,%mm0
|
|
paddd %mm3,%mm0
|
|
movq %mm0,%mm1
|
|
psrlq $32,%mm1
|
|
paddd %mm0,%mm1
|
|
psrad $13,%mm1
|
|
packssdw %mm1,%mm1
|
|
psubd %mm0,%mm0
|
|
psubsw %mm1,%mm0
|
|
movd %mm0,%eax
|
|
movw %ax,(%edi)
|
|
emms
|
|
|
|
/* NO_APP */
|
|
popl %ebx
|
|
popl %esi
|
|
popl %edi
|
|
mov %ebp, %esp
|
|
popl %ebp
|
|
ret
|