Added in altivec code, but it does not work, so it is disabled for now

This commit is contained in:
vspader 2006-05-12 18:01:53 +00:00
parent dfeaca5ed5
commit 904144a22b
5 changed files with 370 additions and 185 deletions

View file

@ -349,27 +349,10 @@
8CA0B48909CAD9E300B4B731 /* UnBitArray.h */,
8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */,
8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */,
8CA0B39709CAD77400B4B731 /* Assembly */,
);
name = "Library Headers";
sourceTree = "<group>";
};
8CA0B39709CAD77400B4B731 /* Assembly */ = {
isa = PBXGroup;
children = (
8CA0B39809CAD77400B4B731 /* .deps */,
);
name = Assembly;
path = src/MACLib/Assembly;
sourceTree = "<group>";
};
8CA0B39809CAD77400B4B731 /* .deps */ = {
isa = PBXGroup;
children = (
);
path = .deps;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
@ -566,10 +549,13 @@
FRAMEWORK_VERSION = A;
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = MAC_Prefix.pch;
INFOPLIST_FILE = Info.plist;
INSTALL_PATH = "@executable_path/../Frameworks";
OTHER_LDFLAGS = "-Wl,-read_only_relocs,warning";
PER_ARCH_CFLAGS_ppc = "-maltivec";
PRODUCT_NAME = MAC;
USER_HEADER_SEARCH_PATHS = "mac-src";
WRAPPER_EXTENSION = framework;
@ -591,6 +577,7 @@
buildSettings = {
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PER_ARCH_CFLAGS_ppc = "-maltivec";
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};

View file

@ -3,6 +3,10 @@
#include "NNFilter.h"
#include "Assembly/Assembly.h"
#ifdef __ppc__
#include <altivec.h>
#endif
CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
{
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1);
@ -11,6 +15,7 @@ CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
m_nVersion = nVersion;
m_bMMXAvailable = GetMMXAvailable();
m_AltiVecAvailable = IsAltiVecAvailable();
m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder);
m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
@ -41,7 +46,14 @@ int CNNFilter::Compress(int nInput)
// figure a dot product
int nDotProduct;
if (m_bMMXAvailable)
if(m_AltiVecAvailable)
{
nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// printf("Dot product altivec: %i\n", nDotProduct);
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// printf("Dot product: %i\n", nDotProduct);
}
else if (m_bMMXAvailable)
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
else
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
@ -50,7 +62,10 @@ int CNNFilter::Compress(int nInput)
int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
// adapt
if (m_bMMXAvailable)
if(m_AltiVecAvailable)
// AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
else if (m_bMMXAvailable)
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder);
else
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
@ -84,13 +99,23 @@ int CNNFilter::Decompress(int nInput)
// figure a dot product
int nDotProduct;
if (m_bMMXAvailable)
if(m_AltiVecAvailable)
{
// nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// printf("Dot product altivec: %i\n", nDotProduct);
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// printf("Dot product: %i\n", nDotProduct);
}
else if (m_bMMXAvailable)
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
else
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
// adapt
if (m_bMMXAvailable)
if(m_AltiVecAvailable)
AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
// AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
else if (m_bMMXAvailable)
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder);
else
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
@ -134,6 +159,159 @@ int CNNFilter::Decompress(int nInput)
return nOutput;
}
#ifdef __ppc__
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
{
vector signed short LSQ2, LSQ4, v1, v2;
vector unsigned char mask2;
nOrder >>= 4;
//mask1 = vec_lvsl(0,pM);
mask2 = vec_lvsl(0,pAdapt);
//align = vec_lvsr(0,pM);
//zero = (vector unsigned char)(0);
//(vector signed char) one = (vector signed char)(-1);
//mask3 = vec_perm((vector unsigned char)(0),(vector unsigned char)(-1),align);
//LSQ3 = vec_ld(0,pM);
LSQ4 = vec_ld(0,pAdapt);
if (nDirection < 0)
{
while (nOrder--)
{
v1 = vec_ld(0,pM);
LSQ2 = vec_ld(16,pAdapt);
v2 = vec_perm(LSQ4,LSQ2,mask2);
v1 = vec_add(v1,v2);
vec_st(v1,0,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
vec_st(LSQ3,0,pM);
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
vec_st(LSQ4,16,pM);
*/
v1 = vec_ld(16,pM);
LSQ4 = vec_ld(32,pAdapt);
v2 = vec_perm(LSQ2,LSQ4,mask2);
v1 = vec_add(v1,v2);
vec_st(v1,16,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
vec_st(LSQ,16,pM);
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
vec_st(LSQ2,32,pM);
*/
//memcpy(pM,buffer,32);
pM = pM + 16;
pAdapt = pAdapt + 16;
}
}
else if (nDirection > 0)
{
while (nOrder--)
{
v1 = vec_ld(0,pM);
LSQ2 = vec_ld(16,pAdapt);
v2 = vec_perm(LSQ4,LSQ2,mask2);
v1 = vec_sub(v1,v2);
vec_st(v1,0,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
vec_st(LSQ3,0,pM);
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
vec_st(LSQ4,16,pM);
*/
v1 = vec_ld(16,pM);
LSQ4 = vec_ld(32,pAdapt);
v2 = vec_perm(LSQ2,LSQ4,mask2);
v1 = vec_sub(v1,v2);
vec_st(v1,16,pM);
/*
v1 = vec_perm(v1,v1,align);
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
vec_st(LSQ,16,pM);
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
vec_st(LSQ2,32,pM);
*/
//memcpy(pM,buffer,32);
pM = pM + 16;
pAdapt = pAdapt + 16;
}
}
}
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
{
vector signed short LSQ, LSQ3, v1, v2;
vector unsigned char mask1;
vector signed int vzero = (vector signed int)(0);
vector signed int sum = (vector signed int)(0);
// sum = vec_xor(sum,sum);
//int nDotProduct;
int p[4];
nOrder >>= 4;
mask1 = vec_lvsl(0,pA);
//mask2 = vec_lvsl(0,pB);
LSQ3 = vec_ld(0, pA);
//LSQ4 = vec_ld(0, pB);
while (nOrder--)
{
LSQ = vec_ld(16,pA);
v1 = vec_perm(LSQ3,LSQ,mask1);
v2 = vec_ld(0,pB);
sum = vec_msum(v1,v2,sum);
LSQ3 = vec_ld(32,pA);
v1 = vec_perm(LSQ,LSQ3,mask1);
v2 = vec_ld(16,pB);
sum = vec_msum(v1,v2,sum);
pA = pA + 16;
pB = pB + 16;
}
sum = vec_sums(sum,vzero);
vec_st(sum,0,p);
return p[3];
}
#else
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
{
AdaptNoMMX(pM, pAdapt, nDirection, nOrder);
}
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
{
return CalculateDotProductNoMMX(pA, pB, nOrder);
}
#endif
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder)
{
nOrder >>= 4;

View file

@ -22,6 +22,7 @@ private:
int m_nShift;
int m_nVersion;
BOOL m_bMMXAvailable;
int m_AltiVecAvailable;
int m_nRunningAverage;
CRollBuffer<short> m_rbInput;
@ -36,6 +37,9 @@ private:
inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder);
inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder);
int CalculateDotProductAltiVec(short * pA, short * pB, int nOrder);
void AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder);
};
#endif // #ifndef APE_NNFILTER_H

View file

@ -2,6 +2,7 @@
#include "GlobalFunctions.h"
#include "IO.h"
#include "CharacterHelper.h"
#include <sys/sysctl.h>
/*
#ifndef __GNUC_IA32__
@ -38,6 +39,19 @@ extern "C" BOOL GetMMXAvailable(void)
#endif // #ifndef __GNUC_IA32__
*/
int IsAltiVecAvailable( void )
{
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
int hasVectorUnit = 0;
size_t length = sizeof(hasVectorUnit);
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
printf("ALTIVEC: %i\n", hasVectorUnit);
if( 0 == error ) return hasVectorUnit != 0;
return 0;
}
int ReadSafe(CIO * pIO, void * pBuffer, int nBytes)
{
unsigned int nBytesRead = 0;

View file

@ -6,6 +6,8 @@ Definitions
*************************************************************************************/
class CIO;
int IsAltiVecAvailable( void );
/*************************************************************************************
Read / Write from an IO source and return failure if the number of bytes specified
isn't read or written