Cog/Frameworks/lazyusf/lazyusf/rsp/vu/cf.h

191 lines
6 KiB
C

/******************************************************************************\
* Authors: Iconoclast *
* Release: 2013.12.04 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
* and related and neighboring rights to this software to the public domain *
* worldwide. This software is distributed without any warranty. *
* *
* You should have received a copy of the CC0 Public Domain Dedication along *
* with this software. *
* If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. *
\******************************************************************************/
#ifndef _CF_H
#define _CF_H
/*
* For a non-cycle-accurate RSP emulator using SSE2, the following
* scalar definitions of the control registers are obsolete.
*/
#if (0)
/*
* Many vector units have pairs of "vector condition flags" registers.
* In SGI's vector unit implementation, these are denoted as the
* "vector control registers" under coprocessor 2.
*
* VCF-0 is the carry-out flags register: $vco.
* VCF-1 is the compare code flags register: $vcc.
* VCF-2 is the compare extension flags register: $vce.
* There is no fourth RSP flags register.
*/
unsigned short VCO;
unsigned short VCC;
unsigned char VCE;
#endif
/*
* These normally should have type `int` because they are Boolean T/F arrays.
* However, since SSE2 uses 128-bit XMM's, and Win32 `int` storage is 32-bit,
* we have the problem of 32*8 > 128 bits, so we use `short` to reduce packs.
*/
#ifndef ARCH_MIN_SSE2
unsigned short get_VCO(usf_state_t * state)
{
register unsigned short VCO;
VCO = 0x0000
| (state->ne[0xF % 8] << 0xF)
| (state->ne[0xE % 8] << 0xE)
| (state->ne[0xD % 8] << 0xD)
| (state->ne[0xC % 8] << 0xC)
| (state->ne[0xB % 8] << 0xB)
| (state->ne[0xA % 8] << 0xA)
| (state->ne[0x9 % 8] << 0x9)
| (state->ne[0x8 % 8] << 0x8)
| (state->co[0x7 % 8] << 0x7)
| (state->co[0x6 % 8] << 0x6)
| (state->co[0x5 % 8] << 0x5)
| (state->co[0x4 % 8] << 0x4)
| (state->co[0x3 % 8] << 0x3)
| (state->co[0x2 % 8] << 0x2)
| (state->co[0x1 % 8] << 0x1)
| (state->co[0x0 % 8] << 0x0);
return (VCO); /* Big endian becomes little. */
}
unsigned short get_VCC(usf_state_t * state)
{
register unsigned short VCC;
VCC = 0x0000
| (state->clip[0xF % 8] << 0xF)
| (state->clip[0xE % 8] << 0xE)
| (state->clip[0xD % 8] << 0xD)
| (state->clip[0xC % 8] << 0xC)
| (state->clip[0xB % 8] << 0xB)
| (state->clip[0xA % 8] << 0xA)
| (state->clip[0x9 % 8] << 0x9)
| (state->clip[0x8 % 8] << 0x8)
| (state->comp[0x7 % 8] << 0x7)
| (state->comp[0x6 % 8] << 0x6)
| (state->comp[0x5 % 8] << 0x5)
| (state->comp[0x4 % 8] << 0x4)
| (state->comp[0x3 % 8] << 0x3)
| (state->comp[0x2 % 8] << 0x2)
| (state->comp[0x1 % 8] << 0x1)
| (state->comp[0x0 % 8] << 0x0);
return (VCC); /* Big endian becomes little. */
}
unsigned char get_VCE(usf_state_t * state)
{
register unsigned char VCE;
VCE = 0x00
| (state->vce[07] << 0x7)
| (state->vce[06] << 0x6)
| (state->vce[05] << 0x5)
| (state->vce[04] << 0x4)
| (state->vce[03] << 0x3)
| (state->vce[02] << 0x2)
| (state->vce[01] << 0x1)
| (state->vce[00] << 0x0);
return (VCE); /* Big endian becomes little. */
}
#else
unsigned short get_VCO(usf_state_t * state)
{
__m128i xmm, hi, lo;
register unsigned short VCO;
hi = _mm_load_si128((__m128i *)state->ne);
lo = _mm_load_si128((__m128i *)state->co);
/*
* Rotate Boolean storage from LSB to MSB.
*/
hi = _mm_slli_epi16(hi, 15);
lo = _mm_slli_epi16(lo, 15);
xmm = _mm_packs_epi16(lo, hi); /* Decompress INT16 Booleans to INT8 ones. */
VCO = _mm_movemask_epi8(xmm) & 0x0000FFFF; /* PMOVMSKB combines each MSB. */
return (VCO);
}
unsigned short get_VCC(usf_state_t * state)
{
__m128i xmm, hi, lo;
register unsigned short VCC;
hi = _mm_load_si128((__m128i *)state->clip);
lo = _mm_load_si128((__m128i *)state->comp);
/*
* Rotate Boolean storage from LSB to MSB.
*/
hi = _mm_slli_epi16(hi, 15);
lo = _mm_slli_epi16(lo, 15);
xmm = _mm_packs_epi16(lo, hi); /* Decompress INT16 Booleans to INT8 ones. */
VCC = _mm_movemask_epi8(xmm) & 0x0000FFFF; /* PMOVMSKB combines each MSB. */
return (VCC);
}
unsigned char get_VCE(usf_state_t * state)
{
__m128i xmm, hi, lo;
register unsigned char VCE;
hi = _mm_setzero_si128();
lo = _mm_load_si128((__m128i *)state->vce);
lo = _mm_slli_epi16(lo, 15); /* Rotate Boolean storage from LSB to MSB. */
xmm = _mm_packs_epi16(lo, hi); /* Decompress INT16 Booleans to INT8 ones. */
VCE = _mm_movemask_epi8(xmm) & 0x000000FF; /* PMOVMSKB combines each MSB. */
return (VCE);
}
#endif
/*
* CTC2 resources
* not sure how to vectorize going the other direction into SSE2
*/
void set_VCO(usf_state_t * state, unsigned short VCO)
{
register int i;
for (i = 0; i < 8; i++)
state->co[i] = (VCO >> (i + 0x0)) & 1;
for (i = 0; i < 8; i++)
state->ne[i] = (VCO >> (i + 0x8)) & 1;
return; /* Little endian becomes big. */
}
void set_VCC(usf_state_t * state, unsigned short VCC)
{
register int i;
for (i = 0; i < 8; i++)
state->comp[i] = (VCC >> (i + 0x0)) & 1;
for (i = 0; i < 8; i++)
state->clip[i] = (VCC >> (i + 0x8)) & 1;
return; /* Little endian becomes big. */
}
void set_VCE(usf_state_t * state, unsigned char VCE)
{
register int i;
for (i = 0; i < 8; i++)
state->vce[i] = (VCE >> i) & 1;
return; /* Little endian becomes big. */
}
#endif