Added in altivec code, but it does not work, so it is disabled for now

This commit is contained in:
vspader 2006-05-12 18:01:53 +00:00
parent dfeaca5ed5
commit 904144a22b
5 changed files with 370 additions and 185 deletions

View file

@ -349,27 +349,10 @@
8CA0B48909CAD9E300B4B731 /* UnBitArray.h */, 8CA0B48909CAD9E300B4B731 /* UnBitArray.h */,
8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */, 8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */,
8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */, 8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */,
8CA0B39709CAD77400B4B731 /* Assembly */,
); );
name = "Library Headers"; name = "Library Headers";
sourceTree = "<group>"; sourceTree = "<group>";
}; };
8CA0B39709CAD77400B4B731 /* Assembly */ = {
isa = PBXGroup;
children = (
8CA0B39809CAD77400B4B731 /* .deps */,
);
name = Assembly;
path = src/MACLib/Assembly;
sourceTree = "<group>";
};
8CA0B39809CAD77400B4B731 /* .deps */ = {
isa = PBXGroup;
children = (
);
path = .deps;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */ /* Begin PBXHeadersBuildPhase section */
@ -566,10 +549,13 @@
FRAMEWORK_VERSION = A; FRAMEWORK_VERSION = A;
GCC_GENERATE_DEBUGGING_SYMBOLS = NO; GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = G5; GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PRECOMPILE_PREFIX_HEADER = YES; GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = MAC_Prefix.pch; GCC_PREFIX_HEADER = MAC_Prefix.pch;
INFOPLIST_FILE = Info.plist; INFOPLIST_FILE = Info.plist;
INSTALL_PATH = "@executable_path/../Frameworks"; INSTALL_PATH = "@executable_path/../Frameworks";
OTHER_LDFLAGS = "-Wl,-read_only_relocs,warning";
PER_ARCH_CFLAGS_ppc = "-maltivec";
PRODUCT_NAME = MAC; PRODUCT_NAME = MAC;
USER_HEADER_SEARCH_PATHS = "mac-src"; USER_HEADER_SEARCH_PATHS = "mac-src";
WRAPPER_EXTENSION = framework; WRAPPER_EXTENSION = framework;
@ -591,6 +577,7 @@
buildSettings = { buildSettings = {
GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES; GCC_WARN_UNUSED_VARIABLE = YES;
PER_ARCH_CFLAGS_ppc = "-maltivec";
PREBINDING = NO; PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
}; };

View file

@ -1,168 +1,346 @@
#include "All.h" #include "All.h"
#include "GlobalFunctions.h" #include "GlobalFunctions.h"
#include "NNFilter.h" #include "NNFilter.h"
#include "Assembly/Assembly.h" #include "Assembly/Assembly.h"
CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion) #ifdef __ppc__
{ #include <altivec.h>
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1); #endif
m_nOrder = nOrder;
m_nShift = nShift; CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
m_nVersion = nVersion; {
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1);
m_bMMXAvailable = GetMMXAvailable(); m_nOrder = nOrder;
m_nShift = nShift;
m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder); m_nVersion = nVersion;
m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
m_paryM = new short [m_nOrder]; m_bMMXAvailable = GetMMXAvailable();
m_AltiVecAvailable = IsAltiVecAvailable();
#ifdef NN_TEST_MMX
srand(GetTickCount()); m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder);
#endif m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
} m_paryM = new short [m_nOrder];
CNNFilter::~CNNFilter() #ifdef NN_TEST_MMX
{ srand(GetTickCount());
SAFE_ARRAY_DELETE(m_paryM) #endif
} }
void CNNFilter::Flush() CNNFilter::~CNNFilter()
{ {
memset(&m_paryM[0], 0, m_nOrder * sizeof(short)); SAFE_ARRAY_DELETE(m_paryM)
m_rbInput.Flush(); }
m_rbDeltaM.Flush();
m_nRunningAverage = 0; void CNNFilter::Flush()
} {
memset(&m_paryM[0], 0, m_nOrder * sizeof(short));
int CNNFilter::Compress(int nInput) m_rbInput.Flush();
{ m_rbDeltaM.Flush();
// convert the input to a short and store it m_nRunningAverage = 0;
m_rbInput[0] = GetSaturatedShortFromInt(nInput); }
// figure a dot product int CNNFilter::Compress(int nInput)
int nDotProduct; {
if (m_bMMXAvailable) // convert the input to a short and store it
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder); m_rbInput[0] = GetSaturatedShortFromInt(nInput);
else
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder); // figure a dot product
int nDotProduct;
// calculate the output if(m_AltiVecAvailable)
int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift); {
nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// adapt // printf("Dot product altivec: %i\n", nDotProduct);
if (m_bMMXAvailable) nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder); // printf("Dot product: %i\n", nDotProduct);
else }
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder); else if (m_bMMXAvailable)
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
int nTempABS = abs(nInput); else
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
if (nTempABS > (m_nRunningAverage * 3))
m_rbDeltaM[0] = ((nInput >> 25) & 64) - 32; // calculate the output
else if (nTempABS > (m_nRunningAverage * 4) / 3) int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
m_rbDeltaM[0] = ((nInput >> 26) & 32) - 16;
else if (nTempABS > 0) // adapt
m_rbDeltaM[0] = ((nInput >> 27) & 16) - 8; if(m_AltiVecAvailable)
else // AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
m_rbDeltaM[0] = 0; AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
else if (m_bMMXAvailable)
m_nRunningAverage += (nTempABS - m_nRunningAverage) / 16; Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder);
else
m_rbDeltaM[-1] >>= 1; AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
m_rbDeltaM[-2] >>= 1;
m_rbDeltaM[-8] >>= 1; int nTempABS = abs(nInput);
// increment and roll if necessary if (nTempABS > (m_nRunningAverage * 3))
m_rbInput.IncrementSafe(); m_rbDeltaM[0] = ((nInput >> 25) & 64) - 32;
m_rbDeltaM.IncrementSafe(); else if (nTempABS > (m_nRunningAverage * 4) / 3)
m_rbDeltaM[0] = ((nInput >> 26) & 32) - 16;
return nOutput; else if (nTempABS > 0)
} m_rbDeltaM[0] = ((nInput >> 27) & 16) - 8;
else
int CNNFilter::Decompress(int nInput) m_rbDeltaM[0] = 0;
{
// figure a dot product m_nRunningAverage += (nTempABS - m_nRunningAverage) / 16;
int nDotProduct;
m_rbDeltaM[-1] >>= 1;
if (m_bMMXAvailable) m_rbDeltaM[-2] >>= 1;
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder); m_rbDeltaM[-8] >>= 1;
else
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder); // increment and roll if necessary
m_rbInput.IncrementSafe();
// adapt m_rbDeltaM.IncrementSafe();
if (m_bMMXAvailable)
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder); return nOutput;
else }
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
int CNNFilter::Decompress(int nInput)
// store the output value {
int nOutput = nInput + ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift); // figure a dot product
int nDotProduct;
// update the input buffer
m_rbInput[0] = GetSaturatedShortFromInt(nOutput); if(m_AltiVecAvailable)
{
if (m_nVersion >= 3980) // nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
{ // printf("Dot product altivec: %i\n", nDotProduct);
int nTempABS = abs(nOutput); nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// printf("Dot product: %i\n", nDotProduct);
if (nTempABS > (m_nRunningAverage * 3)) }
m_rbDeltaM[0] = ((nOutput >> 25) & 64) - 32; else if (m_bMMXAvailable)
else if (nTempABS > (m_nRunningAverage * 4) / 3) nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
m_rbDeltaM[0] = ((nOutput >> 26) & 32) - 16; else
else if (nTempABS > 0) nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
m_rbDeltaM[0] = ((nOutput >> 27) & 16) - 8;
else // adapt
m_rbDeltaM[0] = 0; if(m_AltiVecAvailable)
AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
m_nRunningAverage += (nTempABS - m_nRunningAverage) / 16; // AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
else if (m_bMMXAvailable)
m_rbDeltaM[-1] >>= 1; Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder);
m_rbDeltaM[-2] >>= 1; else
m_rbDeltaM[-8] >>= 1; AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
}
else // store the output value
{ int nOutput = nInput + ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
m_rbDeltaM[0] = (nOutput == 0) ? 0 : ((nOutput >> 28) & 8) - 4;
m_rbDeltaM[-4] >>= 1; // update the input buffer
m_rbDeltaM[-8] >>= 1; m_rbInput[0] = GetSaturatedShortFromInt(nOutput);
}
if (m_nVersion >= 3980)
// increment and roll if necessary {
m_rbInput.IncrementSafe(); int nTempABS = abs(nOutput);
m_rbDeltaM.IncrementSafe();
if (nTempABS > (m_nRunningAverage * 3))
return nOutput; m_rbDeltaM[0] = ((nOutput >> 25) & 64) - 32;
} else if (nTempABS > (m_nRunningAverage * 4) / 3)
m_rbDeltaM[0] = ((nOutput >> 26) & 32) - 16;
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder) else if (nTempABS > 0)
{ m_rbDeltaM[0] = ((nOutput >> 27) & 16) - 8;
nOrder >>= 4; else
m_rbDeltaM[0] = 0;
if (nDirection < 0)
{ m_nRunningAverage += (nTempABS - m_nRunningAverage) / 16;
while (nOrder--)
{ m_rbDeltaM[-1] >>= 1;
EXPAND_16_TIMES(*pM++ += *pAdapt++;) m_rbDeltaM[-2] >>= 1;
} m_rbDeltaM[-8] >>= 1;
} }
else if (nDirection > 0) else
{ {
while (nOrder--) m_rbDeltaM[0] = (nOutput == 0) ? 0 : ((nOutput >> 28) & 8) - 4;
{ m_rbDeltaM[-4] >>= 1;
EXPAND_16_TIMES(*pM++ -= *pAdapt++;) m_rbDeltaM[-8] >>= 1;
} }
}
} // increment and roll if necessary
m_rbInput.IncrementSafe();
int CNNFilter::CalculateDotProductNoMMX(short * pA, short * pB, int nOrder) m_rbDeltaM.IncrementSafe();
{
int nDotProduct = 0; return nOutput;
nOrder >>= 4; }
while (nOrder--) #ifdef __ppc__
{ void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
EXPAND_16_TIMES(nDotProduct += *pA++ * *pB++;) {
} vector signed short LSQ2, LSQ4, v1, v2;
vector unsigned char mask2;
return nDotProduct;
} nOrder >>= 4;
//mask1 = vec_lvsl(0,pM);
mask2 = vec_lvsl(0,pAdapt);
//align = vec_lvsr(0,pM);
//zero = (vector unsigned char)(0);
//(vector signed char) one = (vector signed char)(-1);
//mask3 = vec_perm((vector unsigned char)(0),(vector unsigned char)(-1),align);
//LSQ3 = vec_ld(0,pM);
LSQ4 = vec_ld(0,pAdapt);
if (nDirection < 0)
{
while (nOrder--)
{
v1 = vec_ld(0,pM);
LSQ2 = vec_ld(16,pAdapt);
v2 = vec_perm(LSQ4,LSQ2,mask2);
v1 = vec_add(v1,v2);
vec_st(v1,0,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
vec_st(LSQ3,0,pM);
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
vec_st(LSQ4,16,pM);
*/
v1 = vec_ld(16,pM);
LSQ4 = vec_ld(32,pAdapt);
v2 = vec_perm(LSQ2,LSQ4,mask2);
v1 = vec_add(v1,v2);
vec_st(v1,16,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
vec_st(LSQ,16,pM);
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
vec_st(LSQ2,32,pM);
*/
//memcpy(pM,buffer,32);
pM = pM + 16;
pAdapt = pAdapt + 16;
}
}
else if (nDirection > 0)
{
while (nOrder--)
{
v1 = vec_ld(0,pM);
LSQ2 = vec_ld(16,pAdapt);
v2 = vec_perm(LSQ4,LSQ2,mask2);
v1 = vec_sub(v1,v2);
vec_st(v1,0,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
vec_st(LSQ3,0,pM);
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
vec_st(LSQ4,16,pM);
*/
v1 = vec_ld(16,pM);
LSQ4 = vec_ld(32,pAdapt);
v2 = vec_perm(LSQ2,LSQ4,mask2);
v1 = vec_sub(v1,v2);
vec_st(v1,16,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
vec_st(LSQ,16,pM);
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
vec_st(LSQ2,32,pM);
*/
//memcpy(pM,buffer,32);
pM = pM + 16;
pAdapt = pAdapt + 16;
}
}
}
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
{
vector signed short LSQ, LSQ3, v1, v2;
vector unsigned char mask1;
vector signed int vzero = (vector signed int)(0);
vector signed int sum = (vector signed int)(0);
// sum = vec_xor(sum,sum);
//int nDotProduct;
int p[4];
nOrder >>= 4;
mask1 = vec_lvsl(0,pA);
//mask2 = vec_lvsl(0,pB);
LSQ3 = vec_ld(0, pA);
//LSQ4 = vec_ld(0, pB);
while (nOrder--)
{
LSQ = vec_ld(16,pA);
v1 = vec_perm(LSQ3,LSQ,mask1);
v2 = vec_ld(0,pB);
sum = vec_msum(v1,v2,sum);
LSQ3 = vec_ld(32,pA);
v1 = vec_perm(LSQ,LSQ3,mask1);
v2 = vec_ld(16,pB);
sum = vec_msum(v1,v2,sum);
pA = pA + 16;
pB = pB + 16;
}
sum = vec_sums(sum,vzero);
vec_st(sum,0,p);
return p[3];
}
#else
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
{
AdaptNoMMX(pM, pAdapt, nDirection, nOrder);
}
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
{
return CalculateDotProductNoMMX(pA, pB, nOrder);
}
#endif
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder)
{
nOrder >>= 4;
if (nDirection < 0)
{
while (nOrder--)
{
EXPAND_16_TIMES(*pM++ += *pAdapt++;)
}
}
else if (nDirection > 0)
{
while (nOrder--)
{
EXPAND_16_TIMES(*pM++ -= *pAdapt++;)
}
}
}
int CNNFilter::CalculateDotProductNoMMX(short * pA, short * pB, int nOrder)
{
int nDotProduct = 0;
nOrder >>= 4;
while (nOrder--)
{
EXPAND_16_TIMES(nDotProduct += *pA++ * *pB++;)
}
return nDotProduct;
}

View file

@ -22,6 +22,7 @@ private:
int m_nShift; int m_nShift;
int m_nVersion; int m_nVersion;
BOOL m_bMMXAvailable; BOOL m_bMMXAvailable;
int m_AltiVecAvailable;
int m_nRunningAverage; int m_nRunningAverage;
CRollBuffer<short> m_rbInput; CRollBuffer<short> m_rbInput;
@ -36,6 +37,9 @@ private:
inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder); inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder);
inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder); inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder);
int CalculateDotProductAltiVec(short * pA, short * pB, int nOrder);
void AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder);
}; };
#endif // #ifndef APE_NNFILTER_H #endif // #ifndef APE_NNFILTER_H

View file

@ -2,6 +2,7 @@
#include "GlobalFunctions.h" #include "GlobalFunctions.h"
#include "IO.h" #include "IO.h"
#include "CharacterHelper.h" #include "CharacterHelper.h"
#include <sys/sysctl.h>
/* /*
#ifndef __GNUC_IA32__ #ifndef __GNUC_IA32__
@ -38,6 +39,19 @@ extern "C" BOOL GetMMXAvailable(void)
#endif // #ifndef __GNUC_IA32__ #endif // #ifndef __GNUC_IA32__
*/ */
int IsAltiVecAvailable( void )
{
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
int hasVectorUnit = 0;
size_t length = sizeof(hasVectorUnit);
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
printf("ALTIVEC: %i\n", hasVectorUnit);
if( 0 == error ) return hasVectorUnit != 0;
return 0;
}
int ReadSafe(CIO * pIO, void * pBuffer, int nBytes) int ReadSafe(CIO * pIO, void * pBuffer, int nBytes)
{ {
unsigned int nBytesRead = 0; unsigned int nBytesRead = 0;

View file

@ -6,6 +6,8 @@ Definitions
*************************************************************************************/ *************************************************************************************/
class CIO; class CIO;
int IsAltiVecAvailable( void );
/************************************************************************************* /*************************************************************************************
Read / Write from an IO source and return failure if the number of bytes specified Read / Write from an IO source and return failure if the number of bytes specified
isn't read or written isn't read or written