Added in altivec code, but it does not work, so it is disabled for now
This commit is contained in:
parent
dfeaca5ed5
commit
904144a22b
5 changed files with 370 additions and 185 deletions
|
@ -349,27 +349,10 @@
|
||||||
8CA0B48909CAD9E300B4B731 /* UnBitArray.h */,
|
8CA0B48909CAD9E300B4B731 /* UnBitArray.h */,
|
||||||
8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */,
|
8CA0B48A09CAD9E300B4B731 /* UnBitArrayBase.h */,
|
||||||
8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */,
|
8CA0B48B09CAD9E300B4B731 /* WAVInputSource.h */,
|
||||||
8CA0B39709CAD77400B4B731 /* Assembly */,
|
|
||||||
);
|
);
|
||||||
name = "Library Headers";
|
name = "Library Headers";
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
8CA0B39709CAD77400B4B731 /* Assembly */ = {
|
|
||||||
isa = PBXGroup;
|
|
||||||
children = (
|
|
||||||
8CA0B39809CAD77400B4B731 /* .deps */,
|
|
||||||
);
|
|
||||||
name = Assembly;
|
|
||||||
path = src/MACLib/Assembly;
|
|
||||||
sourceTree = "<group>";
|
|
||||||
};
|
|
||||||
8CA0B39809CAD77400B4B731 /* .deps */ = {
|
|
||||||
isa = PBXGroup;
|
|
||||||
children = (
|
|
||||||
);
|
|
||||||
path = .deps;
|
|
||||||
sourceTree = "<group>";
|
|
||||||
};
|
|
||||||
/* End PBXGroup section */
|
/* End PBXGroup section */
|
||||||
|
|
||||||
/* Begin PBXHeadersBuildPhase section */
|
/* Begin PBXHeadersBuildPhase section */
|
||||||
|
@ -566,10 +549,13 @@
|
||||||
FRAMEWORK_VERSION = A;
|
FRAMEWORK_VERSION = A;
|
||||||
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
|
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
|
||||||
GCC_MODEL_TUNING = G5;
|
GCC_MODEL_TUNING = G5;
|
||||||
|
GCC_OPTIMIZATION_LEVEL = 3;
|
||||||
GCC_PRECOMPILE_PREFIX_HEADER = YES;
|
GCC_PRECOMPILE_PREFIX_HEADER = YES;
|
||||||
GCC_PREFIX_HEADER = MAC_Prefix.pch;
|
GCC_PREFIX_HEADER = MAC_Prefix.pch;
|
||||||
INFOPLIST_FILE = Info.plist;
|
INFOPLIST_FILE = Info.plist;
|
||||||
INSTALL_PATH = "@executable_path/../Frameworks";
|
INSTALL_PATH = "@executable_path/../Frameworks";
|
||||||
|
OTHER_LDFLAGS = "-Wl,-read_only_relocs,warning";
|
||||||
|
PER_ARCH_CFLAGS_ppc = "-maltivec";
|
||||||
PRODUCT_NAME = MAC;
|
PRODUCT_NAME = MAC;
|
||||||
USER_HEADER_SEARCH_PATHS = "mac-src";
|
USER_HEADER_SEARCH_PATHS = "mac-src";
|
||||||
WRAPPER_EXTENSION = framework;
|
WRAPPER_EXTENSION = framework;
|
||||||
|
@ -591,6 +577,7 @@
|
||||||
buildSettings = {
|
buildSettings = {
|
||||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||||
|
PER_ARCH_CFLAGS_ppc = "-maltivec";
|
||||||
PREBINDING = NO;
|
PREBINDING = NO;
|
||||||
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
|
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
#include "NNFilter.h"
|
#include "NNFilter.h"
|
||||||
#include "Assembly/Assembly.h"
|
#include "Assembly/Assembly.h"
|
||||||
|
|
||||||
|
#ifdef __ppc__
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
|
CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
|
||||||
{
|
{
|
||||||
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1);
|
if ((nOrder <= 0) || ((nOrder % 16) != 0)) throw(1);
|
||||||
|
@ -11,6 +15,7 @@ CNNFilter::CNNFilter(int nOrder, int nShift, int nVersion)
|
||||||
m_nVersion = nVersion;
|
m_nVersion = nVersion;
|
||||||
|
|
||||||
m_bMMXAvailable = GetMMXAvailable();
|
m_bMMXAvailable = GetMMXAvailable();
|
||||||
|
m_AltiVecAvailable = IsAltiVecAvailable();
|
||||||
|
|
||||||
m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
m_rbInput.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
||||||
m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
m_rbDeltaM.Create(NN_WINDOW_ELEMENTS, m_nOrder);
|
||||||
|
@ -41,7 +46,14 @@ int CNNFilter::Compress(int nInput)
|
||||||
|
|
||||||
// figure a dot product
|
// figure a dot product
|
||||||
int nDotProduct;
|
int nDotProduct;
|
||||||
if (m_bMMXAvailable)
|
if(m_AltiVecAvailable)
|
||||||
|
{
|
||||||
|
nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
// printf("Dot product altivec: %i\n", nDotProduct);
|
||||||
|
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
// printf("Dot product: %i\n", nDotProduct);
|
||||||
|
}
|
||||||
|
else if (m_bMMXAvailable)
|
||||||
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
else
|
else
|
||||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
@ -50,7 +62,10 @@ int CNNFilter::Compress(int nInput)
|
||||||
int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
|
int nOutput = nInput - ((nDotProduct + (1 << (m_nShift - 1))) >> m_nShift);
|
||||||
|
|
||||||
// adapt
|
// adapt
|
||||||
if (m_bMMXAvailable)
|
if(m_AltiVecAvailable)
|
||||||
|
// AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||||
|
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||||
|
else if (m_bMMXAvailable)
|
||||||
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder);
|
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nOutput, m_nOrder);
|
||||||
else
|
else
|
||||||
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nOutput, m_nOrder);
|
||||||
|
@ -84,13 +99,23 @@ int CNNFilter::Decompress(int nInput)
|
||||||
// figure a dot product
|
// figure a dot product
|
||||||
int nDotProduct;
|
int nDotProduct;
|
||||||
|
|
||||||
if (m_bMMXAvailable)
|
if(m_AltiVecAvailable)
|
||||||
|
{
|
||||||
|
// nDotProduct = CalculateDotProductAltiVec(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
// printf("Dot product altivec: %i\n", nDotProduct);
|
||||||
|
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
// printf("Dot product: %i\n", nDotProduct);
|
||||||
|
}
|
||||||
|
else if (m_bMMXAvailable)
|
||||||
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
nDotProduct = CalculateDotProduct(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
else
|
else
|
||||||
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
nDotProduct = CalculateDotProductNoMMX(&m_rbInput[-m_nOrder], &m_paryM[0], m_nOrder);
|
||||||
|
|
||||||
// adapt
|
// adapt
|
||||||
if (m_bMMXAvailable)
|
if(m_AltiVecAvailable)
|
||||||
|
AdaptAltiVec(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||||
|
// AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||||
|
else if (m_bMMXAvailable)
|
||||||
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder);
|
Adapt(&m_paryM[0], &m_rbDeltaM[-m_nOrder], -nInput, m_nOrder);
|
||||||
else
|
else
|
||||||
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
AdaptNoMMX(&m_paryM[0], &m_rbDeltaM[-m_nOrder], nInput, m_nOrder);
|
||||||
|
@ -134,6 +159,159 @@ int CNNFilter::Decompress(int nInput)
|
||||||
return nOutput;
|
return nOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __ppc__
|
||||||
|
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||||
|
{
|
||||||
|
vector signed short LSQ2, LSQ4, v1, v2;
|
||||||
|
vector unsigned char mask2;
|
||||||
|
|
||||||
|
nOrder >>= 4;
|
||||||
|
|
||||||
|
//mask1 = vec_lvsl(0,pM);
|
||||||
|
mask2 = vec_lvsl(0,pAdapt);
|
||||||
|
//align = vec_lvsr(0,pM);
|
||||||
|
//zero = (vector unsigned char)(0);
|
||||||
|
//(vector signed char) one = (vector signed char)(-1);
|
||||||
|
//mask3 = vec_perm((vector unsigned char)(0),(vector unsigned char)(-1),align);
|
||||||
|
|
||||||
|
//LSQ3 = vec_ld(0,pM);
|
||||||
|
LSQ4 = vec_ld(0,pAdapt);
|
||||||
|
|
||||||
|
if (nDirection < 0)
|
||||||
|
{
|
||||||
|
while (nOrder--)
|
||||||
|
{
|
||||||
|
|
||||||
|
v1 = vec_ld(0,pM);
|
||||||
|
LSQ2 = vec_ld(16,pAdapt);
|
||||||
|
v2 = vec_perm(LSQ4,LSQ2,mask2);
|
||||||
|
v1 = vec_add(v1,v2);
|
||||||
|
vec_st(v1,0,pM);
|
||||||
|
|
||||||
|
/*
|
||||||
|
v1 = vec_perm(v1,v1,align);
|
||||||
|
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ3,0,pM);
|
||||||
|
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ4,16,pM);
|
||||||
|
*/
|
||||||
|
|
||||||
|
v1 = vec_ld(16,pM);
|
||||||
|
LSQ4 = vec_ld(32,pAdapt);
|
||||||
|
v2 = vec_perm(LSQ2,LSQ4,mask2);
|
||||||
|
v1 = vec_add(v1,v2);
|
||||||
|
vec_st(v1,16,pM);
|
||||||
|
|
||||||
|
/*
|
||||||
|
v1 = vec_perm(v1,v1,align);
|
||||||
|
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ,16,pM);
|
||||||
|
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ2,32,pM);
|
||||||
|
*/
|
||||||
|
|
||||||
|
//memcpy(pM,buffer,32);
|
||||||
|
pM = pM + 16;
|
||||||
|
pAdapt = pAdapt + 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (nDirection > 0)
|
||||||
|
{
|
||||||
|
while (nOrder--)
|
||||||
|
{
|
||||||
|
|
||||||
|
v1 = vec_ld(0,pM);
|
||||||
|
LSQ2 = vec_ld(16,pAdapt);
|
||||||
|
v2 = vec_perm(LSQ4,LSQ2,mask2);
|
||||||
|
v1 = vec_sub(v1,v2);
|
||||||
|
vec_st(v1,0,pM);
|
||||||
|
|
||||||
|
/*
|
||||||
|
v1 = vec_perm(v1,v1,align);
|
||||||
|
LSQ3 = vec_sel(LSQ3,v1,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ3,0,pM);
|
||||||
|
LSQ4 = vec_sel(v1,LSQ,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ4,16,pM);
|
||||||
|
*/
|
||||||
|
|
||||||
|
v1 = vec_ld(16,pM);
|
||||||
|
LSQ4 = vec_ld(32,pAdapt);
|
||||||
|
v2 = vec_perm(LSQ2,LSQ4,mask2);
|
||||||
|
v1 = vec_sub(v1,v2);
|
||||||
|
vec_st(v1,16,pM);
|
||||||
|
|
||||||
|
/*
|
||||||
|
v1 = vec_perm(v1,v1,align);
|
||||||
|
LSQ = vec_sel(LSQ,v1,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ,16,pM);
|
||||||
|
LSQ2 = vec_sel(v1,LSQ3,(vector unsigned short)mask3);
|
||||||
|
vec_st(LSQ2,32,pM);
|
||||||
|
*/
|
||||||
|
|
||||||
|
//memcpy(pM,buffer,32);
|
||||||
|
pM = pM + 16;
|
||||||
|
pAdapt = pAdapt + 16;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
|
||||||
|
{
|
||||||
|
vector signed short LSQ, LSQ3, v1, v2;
|
||||||
|
vector unsigned char mask1;
|
||||||
|
|
||||||
|
vector signed int vzero = (vector signed int)(0);
|
||||||
|
vector signed int sum = (vector signed int)(0);
|
||||||
|
// sum = vec_xor(sum,sum);
|
||||||
|
|
||||||
|
//int nDotProduct;
|
||||||
|
int p[4];
|
||||||
|
nOrder >>= 4;
|
||||||
|
|
||||||
|
mask1 = vec_lvsl(0,pA);
|
||||||
|
//mask2 = vec_lvsl(0,pB);
|
||||||
|
|
||||||
|
|
||||||
|
LSQ3 = vec_ld(0, pA);
|
||||||
|
//LSQ4 = vec_ld(0, pB);
|
||||||
|
|
||||||
|
while (nOrder--)
|
||||||
|
{
|
||||||
|
|
||||||
|
LSQ = vec_ld(16,pA);
|
||||||
|
v1 = vec_perm(LSQ3,LSQ,mask1);
|
||||||
|
v2 = vec_ld(0,pB);
|
||||||
|
sum = vec_msum(v1,v2,sum);
|
||||||
|
|
||||||
|
LSQ3 = vec_ld(32,pA);
|
||||||
|
v1 = vec_perm(LSQ,LSQ3,mask1);
|
||||||
|
v2 = vec_ld(16,pB);
|
||||||
|
sum = vec_msum(v1,v2,sum);
|
||||||
|
|
||||||
|
pA = pA + 16;
|
||||||
|
pB = pB + 16;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sum = vec_sums(sum,vzero);
|
||||||
|
vec_st(sum,0,p);
|
||||||
|
|
||||||
|
return p[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
void CNNFilter::AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||||
|
{
|
||||||
|
AdaptNoMMX(pM, pAdapt, nDirection, nOrder);
|
||||||
|
}
|
||||||
|
int CNNFilter::CalculateDotProductAltiVec(short * pA, short * pB, int nOrder)
|
||||||
|
{
|
||||||
|
return CalculateDotProductNoMMX(pA, pB, nOrder);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder)
|
void CNNFilter::AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder)
|
||||||
{
|
{
|
||||||
nOrder >>= 4;
|
nOrder >>= 4;
|
||||||
|
|
|
@ -22,6 +22,7 @@ private:
|
||||||
int m_nShift;
|
int m_nShift;
|
||||||
int m_nVersion;
|
int m_nVersion;
|
||||||
BOOL m_bMMXAvailable;
|
BOOL m_bMMXAvailable;
|
||||||
|
int m_AltiVecAvailable;
|
||||||
int m_nRunningAverage;
|
int m_nRunningAverage;
|
||||||
|
|
||||||
CRollBuffer<short> m_rbInput;
|
CRollBuffer<short> m_rbInput;
|
||||||
|
@ -36,6 +37,9 @@ private:
|
||||||
|
|
||||||
inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder);
|
inline int CalculateDotProductNoMMX(short * pA, short * pB, int nOrder);
|
||||||
inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder);
|
inline void AdaptNoMMX(short * pM, short * pAdapt, int nDirection, int nOrder);
|
||||||
|
int CalculateDotProductAltiVec(short * pA, short * pB, int nOrder);
|
||||||
|
void AdaptAltiVec(short * pM, short * pAdapt, int nDirection, int nOrder);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // #ifndef APE_NNFILTER_H
|
#endif // #ifndef APE_NNFILTER_H
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "GlobalFunctions.h"
|
#include "GlobalFunctions.h"
|
||||||
#include "IO.h"
|
#include "IO.h"
|
||||||
#include "CharacterHelper.h"
|
#include "CharacterHelper.h"
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#ifndef __GNUC_IA32__
|
#ifndef __GNUC_IA32__
|
||||||
|
@ -38,6 +39,19 @@ extern "C" BOOL GetMMXAvailable(void)
|
||||||
#endif // #ifndef __GNUC_IA32__
|
#endif // #ifndef __GNUC_IA32__
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
int IsAltiVecAvailable( void )
|
||||||
|
{
|
||||||
|
|
||||||
|
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
|
||||||
|
int hasVectorUnit = 0;
|
||||||
|
size_t length = sizeof(hasVectorUnit);
|
||||||
|
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
|
||||||
|
printf("ALTIVEC: %i\n", hasVectorUnit);
|
||||||
|
if( 0 == error ) return hasVectorUnit != 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int ReadSafe(CIO * pIO, void * pBuffer, int nBytes)
|
int ReadSafe(CIO * pIO, void * pBuffer, int nBytes)
|
||||||
{
|
{
|
||||||
unsigned int nBytesRead = 0;
|
unsigned int nBytesRead = 0;
|
||||||
|
|
|
@ -6,6 +6,8 @@ Definitions
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
class CIO;
|
class CIO;
|
||||||
|
|
||||||
|
int IsAltiVecAvailable( void );
|
||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
Read / Write from an IO source and return failure if the number of bytes specified
|
Read / Write from an IO source and return failure if the number of bytes specified
|
||||||
isn't read or written
|
isn't read or written
|
||||||
|
|
Loading…
Reference in a new issue