#include "PtexPlatform.h"
#include "PtexUtils.h"
#include "PtexHalf.h"
#include "PtexSeparableKernel.h"
namespace {
template<class T, int nChan>
void Apply(PtexSeparableKernel& k, double* result, void* data, int , int )
{
double* rowResult = (double*) alloca(nChan*sizeof(double));
int rowlen = k.res.u() * nChan;
int datalen = k.uw * nChan;
int rowskip = rowlen - datalen;
double* kvp = k.kv;
T* p = (T*)data + (k.v * k.res.u() + k.u) * nChan;
T* pEnd = p + k.vw * rowlen;
while (p != pEnd)
{
double* kup = k.ku;
T* pRowEnd = p + datalen;
PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
p += nChan;
while (p != pRowEnd) {
PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
p += nChan;
}
PtexUtils::VecAccum<double,nChan>()(result, rowResult, *kvp++);
p += rowskip;
}
}
template<class T, int nChan>
void ApplyS(PtexSeparableKernel& k, double* result, void* data, int , int nTxChan)
{
double* rowResult = (double*) alloca(nChan*sizeof(double));
int rowlen = k.res.u() * nTxChan;
int datalen = k.uw * nTxChan;
int rowskip = rowlen - datalen;
double* kvp = k.kv;
T* p = (T*)data + (k.v * k.res.u() + k.u) * nTxChan;
T* pEnd = p + k.vw * rowlen;
while (p != pEnd)
{
double* kup = k.ku;
T* pRowEnd = p + datalen;
PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
p += nTxChan;
while (p != pRowEnd) {
PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
p += nTxChan;
}
PtexUtils::VecAccum<double,nChan>()(result, rowResult, *kvp++);
p += rowskip;
}
}
template<class T>
void ApplyN(PtexSeparableKernel& k, double* result, void* data, int nChan, int nTxChan)
{
double* rowResult = (double*) alloca(nChan*sizeof(double));
int rowlen = k.res.u() * nTxChan;
int datalen = k.uw * nTxChan;
int rowskip = rowlen - datalen;
double* kvp = k.kv;
T* p = (T*)data + (k.v * k.res.u() + k.u) * nTxChan;
T* pEnd = p + k.vw * rowlen;
while (p != pEnd)
{
double* kup = k.ku;
T* pRowEnd = p + datalen;
PtexUtils::VecMultN<T>()(rowResult, p, nChan, *kup++);
p += nTxChan;
while (p != pRowEnd) {
PtexUtils::VecAccumN<T>()(rowResult, p, nChan, *kup++);
p += nTxChan;
}
PtexUtils::VecAccumN<double>()(result, rowResult, nChan, *kvp++);
p += rowskip;
}
}
}
PtexSeparableKernel::ApplyFn
PtexSeparableKernel::applyFunctions[] = {
ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
Apply<uint8_t,1>, Apply<uint16_t,1>, Apply<PtexHalf,1>, Apply<float,1>,
Apply<uint8_t,2>, Apply<uint16_t,2>, Apply<PtexHalf,2>, Apply<float,2>,
Apply<uint8_t,3>, Apply<uint16_t,3>, Apply<PtexHalf,3>, Apply<float,3>,
Apply<uint8_t,4>, Apply<uint16_t,4>, Apply<PtexHalf,4>, Apply<float,4>,
ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
ApplyS<uint8_t,1>, ApplyS<uint16_t,1>, ApplyS<PtexHalf,1>, ApplyS<float,1>,
ApplyS<uint8_t,2>, ApplyS<uint16_t,2>, ApplyS<PtexHalf,2>, ApplyS<float,2>,
ApplyS<uint8_t,3>, ApplyS<uint16_t,3>, ApplyS<PtexHalf,3>, ApplyS<float,3>,
ApplyS<uint8_t,4>, ApplyS<uint16_t,4>, ApplyS<PtexHalf,4>, ApplyS<float,4>,
};