122 ne10_float32_t * pSrc,
123 ne10_float32_t * pDst,
124 ne10_uint32_t blockSize)
127 ne10_float32_t *pState = S->
pState;
128 ne10_float32_t *pCoeffs = S->
pCoeffs;
129 ne10_float32_t *pStateCurnt;
130 ne10_float32_t *px, *pb;
131 ne10_uint32_t numTaps = S->
numTaps;
132 ne10_uint32_t i, tapCnt, blkCnt;
136 ne10_float32_t acc0, acc1, acc2, acc3;
137 ne10_float32_t x0, x1, x2, x3, c0;
142 pStateCurnt = & (S->
pState[ (numTaps - 1u)]);
152 blkCnt = blockSize >> 2;
159 *pStateCurnt++ = *pSrc++;
160 *pStateCurnt++ = *pSrc++;
161 *pStateCurnt++ = *pSrc++;
162 *pStateCurnt++ = *pSrc++;
182 tapCnt = numTaps >> 2u;
246 tapCnt = numTaps % 0x4u;
285 blkCnt = blockSize % 0x4u;
290 *pStateCurnt++ = *pSrc++;
306 acc0 += *px++ * *pb++;
328 tapCnt = (numTaps - 1u) >> 2u;
333 *pStateCurnt++ = *pState++;
334 *pStateCurnt++ = *pState++;
335 *pStateCurnt++ = *pState++;
336 *pStateCurnt++ = *pState++;
343 tapCnt = (numTaps - 1u) % 0x4u;
348 *pStateCurnt++ = *pState++;
453 ne10_float32_t * pSrc,
454 ne10_float32_t * pDst,
455 ne10_uint32_t blockSize)
457 ne10_float32_t *pState = S->
pState;
458 ne10_float32_t *pCoeffs = S->
pCoeffs;
459 ne10_float32_t *pStateCurnt;
460 ne10_float32_t *px, *pb;
462 ne10_float32_t x0, c0;
463 ne10_uint32_t numTaps = S->
numTaps;
464 ne10_uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->
M;
471 pStateCurnt = S->
pState + (numTaps - 1u);
474 blkCnt = outBlockSize;
483 *pStateCurnt++ = *pSrc++;
498 tapCnt = numTaps >> 2;
545 tapCnt = numTaps % 0x4u;
564 pState = pState + S->
M;
580 i = (numTaps - 1u) >> 2;
585 *pStateCurnt++ = *pState++;
586 *pStateCurnt++ = *pState++;
587 *pStateCurnt++ = *pState++;
588 *pStateCurnt++ = *pState++;
594 i = (numTaps - 1u) % 0x04u;
599 *pStateCurnt++ = *pState++;
713 ne10_float32_t * pSrc,
714 ne10_float32_t * pDst,
715 ne10_uint32_t blockSize)
717 ne10_float32_t *pState = S->
pState;
718 ne10_float32_t *pCoeffs = S->
pCoeffs;
719 ne10_float32_t *pStateCurnt;
720 ne10_float32_t *ptr1, *ptr2;
726 ne10_float32_t x0, c0;
727 ne10_uint32_t i, blkCnt, j;
733 pStateCurnt = S->
pState + (phaseLen - 1u);
742 *pStateCurnt++ = *pSrc++;
758 ptr2 = pCoeffs + (S->
L - j);
762 tapCnt = phaseLen >> 2u;
821 tapCnt = phaseLen % 0x4u;
826 sum0 += * (ptr1++) * (*ptr2);
860 tapCnt = (phaseLen - 1u) >> 2u;
865 *pStateCurnt++ = *pState++;
866 *pStateCurnt++ = *pState++;
867 *pStateCurnt++ = *pState++;
868 *pStateCurnt++ = *pState++;
874 tapCnt = (phaseLen - 1u) % 0x04u;
878 *pStateCurnt++ = *pState++;
973 ne10_float32_t * pSrc,
974 ne10_float32_t * pDst,
975 ne10_uint32_t blockSize)
977 ne10_float32_t *pState;
978 ne10_float32_t *pCoeffs = S->
pCoeffs;
985 ne10_float32_t fcurr1, fnext1, gcurr1, gnext1;
986 ne10_float32_t fcurr2, fnext2, gnext2;
987 ne10_float32_t fcurr3, fnext3, gnext3;
988 ne10_float32_t fcurr4, fnext4, gnext4;
990 ne10_uint32_t blkCnt, stageCnt;
995 blkCnt = blockSize >> 2;
1018 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1020 gnext1 = (fcurr1 * (*pk)) + gcurr1;
1024 fnext2 = fcurr2 + ( (*pk) * fcurr1);
1025 gnext2 = (fcurr2 * (*pk)) + fcurr1;
1037 fnext3 = fcurr3 + ( (*pk) * fcurr2);
1038 gnext3 = (fcurr3 * (*pk)) + fcurr2;
1041 fnext4 = fcurr4 + ( (*pk) * fcurr3);
1042 gnext4 = (fcurr4 * (*pk++)) + fcurr3;
1051 stageCnt = (numStages - 1u) >> 2u;
1057 while (stageCnt > 0u)
1068 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1071 fnext2 = fcurr2 + ( (*pk) * gnext1);
1073 fnext3 = fcurr3 + ( (*pk) * gnext2);
1075 fnext4 = fcurr4 + ( (*pk) * gnext3);
1079 gnext4 = (fcurr4 * (*pk)) + gnext3;
1080 gnext3 = (fcurr3 * (*pk)) + gnext2;
1081 gnext2 = (fcurr2 * (*pk)) + gnext1;
1082 gnext1 = (fcurr1 * (*pk++)) + gcurr1;
1094 fcurr1 = fnext1 + ( (*pk) * gcurr1);
1096 fcurr2 = fnext2 + ( (*pk) * gnext1);
1098 fcurr3 = fnext3 + ( (*pk) * gnext2);
1100 fcurr4 = fnext4 + ( (*pk) * gnext3);
1104 gnext4 = (fnext4 * (*pk)) + gnext3;
1105 gnext3 = (fnext3 * (*pk)) + gnext2;
1106 gnext2 = (fnext2 * (*pk)) + gnext1;
1107 gnext1 = (fnext1 * (*pk++)) + gcurr1;
1119 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1122 fnext2 = fcurr2 + ( (*pk) * gnext1);
1124 fnext3 = fcurr3 + ( (*pk) * gnext2);
1126 fnext4 = fcurr4 + ( (*pk) * gnext3);
1130 gnext4 = (fcurr4 * (*pk)) + gnext3;
1131 gnext3 = (fcurr3 * (*pk)) + gnext2;
1132 gnext2 = (fcurr2 * (*pk)) + gnext1;
1133 gnext1 = (fcurr1 * (*pk++)) + gcurr1;
1144 fcurr1 = fnext1 + ( (*pk) * gcurr1);
1146 fcurr2 = fnext2 + ( (*pk) * gnext1);
1148 fcurr3 = fnext3 + ( (*pk) * gnext2);
1150 fcurr4 = fnext4 + ( (*pk) * gnext3);
1154 gnext4 = (fnext4 * (*pk)) + gnext3;
1155 gnext3 = (fnext3 * (*pk)) + gnext2;
1156 gnext2 = (fnext2 * (*pk)) + gnext1;
1157 gnext1 = (fnext1 * (*pk++)) + gcurr1;
1163 stageCnt = (numStages - 1u) % 0x4u;
1165 while (stageCnt > 0u)
1173 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1174 fnext2 = fcurr2 + ( (*pk) * gnext1);
1175 fnext3 = fcurr3 + ( (*pk) * gnext2);
1176 fnext4 = fcurr4 + ( (*pk) * gnext3);
1179 gnext4 = (fcurr4 * (*pk)) + gnext3;
1180 gnext3 = (fcurr3 * (*pk)) + gnext2;
1181 gnext2 = (fcurr2 * (*pk)) + gnext1;
1182 gnext1 = (fcurr1 * (*pk++)) + gcurr1;
1206 blkCnt = blockSize % 0x4u;
1224 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1226 gnext1 = (fcurr1 * (*pk++)) + gcurr1;
1235 stageCnt = (numStages - 1u);
1238 while (stageCnt > 0u)
1248 fnext1 = fcurr1 + ( (*pk) * gcurr1);
1250 gnext1 = (fcurr1 * (*pk++)) + gcurr1;
1440 ne10_float32_t * pSrc,
1441 ne10_float32_t * pDst,
1442 ne10_float32_t * pScratchIn,
1443 ne10_uint32_t blockSize)
1446 ne10_float32_t *pState = S->
pState;
1447 ne10_float32_t *pCoeffs = S->
pCoeffs;
1449 ne10_float32_t *py = pState;
1450 ne10_float32_t *pb = pScratchIn;
1451 ne10_float32_t *pOut;
1453 ne10_uint32_t delaySize = S->
maxDelay + blockSize;
1454 ne10_uint16_t numTaps = S->
numTaps;
1455 ne10_int32_t readIndex;
1456 ne10_uint32_t tapCnt, blkCnt;
1457 ne10_float32_t coeff = *pCoeffs++;
1463 ne10_circular_write_float ( (ne10_int32_t *) py, delaySize, &S->
stateIndex, 1,
1464 (ne10_int32_t *) pSrc, 1, blockSize);
1468 readIndex = ( (ne10_int32_t) S->
stateIndex - (ne10_int32_t) blockSize) - *pTapDelay++;
1473 readIndex += (ne10_int32_t) delaySize;
1480 ne10_circular_read_float ( (ne10_int32_t *) py, delaySize, &readIndex, 1,
1481 (ne10_int32_t *) pb, (ne10_int32_t *) pb, blockSize, 1,
1495 blkCnt = blockSize >> 2u;
1500 *pOut++ = *px++ * coeff;
1501 *pOut++ = *px++ * coeff;
1502 *pOut++ = *px++ * coeff;
1503 *pOut++ = *px++ * coeff;
1511 blkCnt = blockSize % 0x4u;
1516 *pOut++ = *px++ * coeff;
1527 readIndex = ( (ne10_int32_t) S->
stateIndex - (ne10_int32_t) blockSize) - *pTapDelay++;
1532 readIndex += (ne10_int32_t) delaySize;
1536 tapCnt = (ne10_uint32_t) numTaps - 1u;
1545 ne10_circular_read_float ( (ne10_int32_t *) py, delaySize, &readIndex, 1,
1546 (ne10_int32_t *) pb, (ne10_int32_t *) pb, blockSize, 1,
1557 blkCnt = blockSize >> 2u;
1562 *pOut++ += *px++ * coeff;
1563 *pOut++ += *px++ * coeff;
1564 *pOut++ += *px++ * coeff;
1565 *pOut++ += *px++ * coeff;
1573 blkCnt = blockSize % 0x4u;
1578 *pOut++ += *px++ * coeff;
1590 (ne10_int32_t) blockSize) - *pTapDelay++;
1595 readIndex += (ne10_int32_t) delaySize;