Added in altivec code, but it does not work, so it is disabled for now
This commit is contained in:
parent
dfeaca5ed5
commit
904144a22b
5 changed files with 370 additions and 185 deletions
|
@ -349,27 +349,10 @@
|
|||
8CA0B48909CAD9E300B4B731 /* UnBitArray.h */,
|
||||
8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */,
|
||||
8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */,
|
||||
8CA0B39709CAD77400B4B731 /* Assembly */,
|
||||
);
|
||||
name = "Library Headers";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8CA0B39709CAD77400B4B731 /* Assembly */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8CA0B39809CAD77400B4B731 /* .deps */,
|
||||
);
|
||||
name = Assembly;
|
||||
path = src/MACLib/Assembly;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8CA0B39809CAD77400B4B731 /* .deps */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
);
|
||||
path = .deps;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXHeadersBuildPhase section */
|
||||
|
@ -566,10 +549,13 @@
|
|||
FRAMEWORK_VERSION = A;
|
||||
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_OPTIMIZATION_LEVEL = 3;
|
||||
GCC_PRECOMPILE_PREFIX_HEADER = YES;
|
||||
GCC_PREFIX_HEADER = MAC_Prefix.pch;
|
||||
INFOPLIST_FILE = Info.plist;
|
||||
INSTALL_PATH = "@executable_path/../Frameworks";
|
||||
OTHER_LDFLAGS = "-Wl,-read_only_relocs,warning";
|
||||
PER_ARCH_CFLAGS_ppc = "-maltivec";
|
||||
PRODUCT_NAME = MAC;
|
||||
USER_HEADER_SEARCH_PATHS = "mac-src";
|
||||
WRAPPER_EXTENSION = framework;
|
||||
|
@ -591,6 +577,7 @@
|
|||
buildSettings = {
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
PER_ARCH_CFLAGS_ppc = "-maltivec";
|
||||
PREBINDING = NO;
|
||||
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
|
||||
};
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
#include "NNFilter.h"
|
||||
#include "Assembly/Assembly.h"
|
||||
|
||||
#ifdef __ppc__
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
|
||||
CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
|
||||
{
|
||||
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1);
|
||||
|
@ -11,6 +15,7 @@ CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
|
|||
m_nVersion = nVersion;
|
||||
|
||||
m_bMMXAvailable = GetMMXAvailable();
|
||||
m_AltiVecAvailable = IsAltiVecAvailable();
|
||||
|
||||
m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
||||
m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
||||
|
@ -41,7 +46,14 @@ int CNNFilter::Compress(int nInput)
|
|||
|
||||
// figure a dot product
|
||||
int nDotProduct;
|
||||
if (m_bMMXAvailable)
|
||||
if(m_AltiVecAvailable)
|
||||
{
|
||||
nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
// printf("Dot product altivec: %i\n", nDotProduct);
|
||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
// printf("Dot product: %i\n", nDotProduct);
|
||||
}
|
||||
else if (m_bMMXAvailable)
|
||||
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
else
|
||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
|
@ -50,7 +62,10 @@ int CNNFilter::Compress(int nInput)
|
|||
int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
|
||||
|
||||
// adapt
|
||||
if (m_bMMXAvailable)
|
||||
if(m_AltiVecAvailable)
|
||||
// AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||
else if (m_bMMXAvailable)
|
||||
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder);
|
||||
else
|
||||
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||
|
@ -84,13 +99,23 @@ int CNNFilter::Decompress(int nInput)
|
|||
// figure a dot product
|
||||
int nDotProduct;
|
||||
|
||||
if (m_bMMXAvailable)
|
||||
if(m_AltiVecAvailable)
|
||||
{
|
||||
// nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
// printf("Dot product altivec: %i\n", nDotProduct);
|
||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
// printf("Dot product: %i\n", nDotProduct);
|
||||
}
|
||||
else if (m_bMMXAvailable)
|
||||
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
else
|
||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||
|
||||
// adapt
|
||||
if (m_bMMXAvailable)
|
||||
if(m_AltiVecAvailable)
|
||||
AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||
// AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||
else if (m_bMMXAvailable)
|
||||
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder);
|
||||
else
|
||||
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||
|
@ -134,6 +159,159 @@ int CNNFilter::Decompress(int nInput)
|
|||
return nOutput;
|
||||
}
|
||||
|
||||
#ifdef __ppc__
|
||||
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||
{
|
||||
vector signed short LSQ2, LSQ4, v1, v2;
|
||||
vector unsigned char mask2;
|
||||
|
||||
nOrder >>= 4;
|
||||
|
||||
//mask1 = vec_lvsl(0,pM);
|
||||
mask2 = vec_lvsl(0,pAdapt);
|
||||
//align = vec_lvsr(0,pM);
|
||||
//zero = (vector unsigned char)(0);
|
||||
//(vector signed char) one = (vector signed char)(-1);
|
||||
//mask3 = vec_perm((vector unsigned char)(0),(vector unsigned char)(-1),align);
|
||||
|
||||
//LSQ3 = vec_ld(0,pM);
|
||||
LSQ4 = vec_ld(0,pAdapt);
|
||||
|
||||
if (nDirection < 0)
|
||||
{
|
||||
while (nOrder--)
|
||||
{
|
||||
|
||||
v1 = vec_ld(0,pM);
|
||||
LSQ2 = vec_ld(16,pAdapt);
|
||||
v2 = vec_perm(LSQ4,LSQ2,mask2);
|
||||
v1 = vec_add(v1,v2);
|
||||
vec_st(v1,0,pM);
|
||||
|
||||
/*
|
||||
v1 = vec_perm(v1,v1,align);
|
||||
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
|
||||
vec_st(LSQ3,0,pM);
|
||||
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
|
||||
vec_st(LSQ4,16,pM);
|
||||
*/
|
||||
|
||||
v1 = vec_ld(16,pM);
|
||||
LSQ4 = vec_ld(32,pAdapt);
|
||||
v2 = vec_perm(LSQ2,LSQ4,mask2);
|
||||
v1 = vec_add(v1,v2);
|
||||
vec_st(v1,16,pM);
|
||||
|
||||
/*
|
||||
v1 = vec_perm(v1,v1,align);
|
||||
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
|
||||
vec_st(LSQ,16,pM);
|
||||
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
|
||||
vec_st(LSQ2,32,pM);
|
||||
*/
|
||||
|
||||
//memcpy(pM,buffer,32);
|
||||
pM = pM + 16;
|
||||
pAdapt = pAdapt + 16;
|
||||
}
|
||||
}
|
||||
else if (nDirection > 0)
|
||||
{
|
||||
while (nOrder--)
|
||||
{
|
||||
|
||||
v1 = vec_ld(0,pM);
|
||||
LSQ2 = vec_ld(16,pAdapt);
|
||||
v2 = vec_perm(LSQ4,LSQ2,mask2);
|
||||
v1 = vec_sub(v1,v2);
|
||||
vec_st(v1,0,pM);
|
||||
|
||||
/*
|
||||
v1 = vec_perm(v1,v1,align);
|
||||
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
|
||||
vec_st(LSQ3,0,pM);
|
||||
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
|
||||
vec_st(LSQ4,16,pM);
|
||||
*/
|
||||
|
||||
v1 = vec_ld(16,pM);
|
||||
LSQ4 = vec_ld(32,pAdapt);
|
||||
v2 = vec_perm(LSQ2,LSQ4,mask2);
|
||||
v1 = vec_sub(v1,v2);
|
||||
vec_st(v1,16,pM);
|
||||
|
||||
/*
|
||||
v1 = vec_perm(v1,v1,align);
|
||||
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
|
||||
vec_st(LSQ,16,pM);
|
||||
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
|
||||
vec_st(LSQ2,32,pM);
|
||||
*/
|
||||
|
||||
//memcpy(pM,buffer,32);
|
||||
pM = pM + 16;
|
||||
pAdapt = pAdapt + 16;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
|
||||
{
|
||||
vector signed short LSQ, LSQ3, v1, v2;
|
||||
vector unsigned char mask1;
|
||||
|
||||
vector signed int vzero = (vector signed int)(0);
|
||||
vector signed int sum = (vector signed int)(0);
|
||||
// sum = vec_xor(sum,sum);
|
||||
|
||||
//int nDotProduct;
|
||||
int p[4];
|
||||
nOrder >>= 4;
|
||||
|
||||
mask1 = vec_lvsl(0,pA);
|
||||
//mask2 = vec_lvsl(0,pB);
|
||||
|
||||
|
||||
LSQ3 = vec_ld(0, pA);
|
||||
//LSQ4 = vec_ld(0, pB);
|
||||
|
||||
while (nOrder--)
|
||||
{
|
||||
|
||||
LSQ = vec_ld(16,pA);
|
||||
v1 = vec_perm(LSQ3,LSQ,mask1);
|
||||
v2 = vec_ld(0,pB);
|
||||
sum = vec_msum(v1,v2,sum);
|
||||
|
||||
LSQ3 = vec_ld(32,pA);
|
||||
v1 = vec_perm(LSQ,LSQ3,mask1);
|
||||
v2 = vec_ld(16,pB);
|
||||
sum = vec_msum(v1,v2,sum);
|
||||
|
||||
pA = pA + 16;
|
||||
pB = pB + 16;
|
||||
|
||||
}
|
||||
|
||||
sum = vec_sums(sum,vzero);
|
||||
vec_st(sum,0,p);
|
||||
|
||||
return p[3];
|
||||
}
|
||||
|
||||
#else
|
||||
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||
{
|
||||
AdaptNoMMX(pM, pAdapt, nDirection, nOrder);
|
||||
}
|
||||
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
|
||||
{
|
||||
return CalculateDotProductNoMMX(pA, pB, nOrder);
|
||||
}
|
||||
#endif
|
||||
|
||||
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||
{
|
||||
nOrder >>= 4;
|
||||
|
|
|
@ -22,6 +22,7 @@ private:
|
|||
int m_nShift;
|
||||
int m_nVersion;
|
||||
BOOL m_bMMXAvailable;
|
||||
int m_AltiVecAvailable;
|
||||
int m_nRunningAverage;
|
||||
|
||||
CRollBuffer<short> m_rbInput;
|
||||
|
@ -36,6 +37,9 @@ private:
|
|||
|
||||
inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder);
|
||||
inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder);
|
||||
int CalculateDotProductAltiVec(short * pA, short * pB, int nOrder);
|
||||
void AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder);
|
||||
|
||||
};
|
||||
|
||||
#endif // #ifndef APE_NNFILTER_H
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "GlobalFunctions.h"
|
||||
#include "IO.h"
|
||||
#include "CharacterHelper.h"
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
/*
|
||||
#ifndef __GNUC_IA32__
|
||||
|
@ -38,6 +39,19 @@ extern "C" BOOL GetMMXAvailable(void)
|
|||
#endif // #ifndef __GNUC_IA32__
|
||||
*/
|
||||
|
||||
int IsAltiVecAvailable( void )
|
||||
{
|
||||
|
||||
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
|
||||
int hasVectorUnit = 0;
|
||||
size_t length = sizeof(hasVectorUnit);
|
||||
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
|
||||
printf("ALTIVEC: %i\n", hasVectorUnit);
|
||||
if( 0 == error ) return hasVectorUnit != 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ReadSafe(CIO * pIO, void * pBuffer, int nBytes)
|
||||
{
|
||||
unsigned int nBytesRead = 0;
|
||||
|
|
|
@ -6,6 +6,8 @@ Definitions
|
|||
*************************************************************************************/
|
||||
class CIO;
|
||||
|
||||
int IsAltiVecAvailable( void );
|
||||
|
||||
/*************************************************************************************
|
||||
Read / Write from an IO source and return failure if the number of bytes specified
|
||||
isn't read or written
|
||||
|
|
Loading…
Reference in a new issue