/hardware/arduino/sam/system/CMSIS/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_iir_lattice_q15.c

https://github.com/sgk/Arduino · C · 403 lines · 166 code · 88 blank · 149 comment · 8 complexity · be472e713a634f05101856153b226b15 MD5 · raw file

  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 15. July 2011
  5. * $Revision: V1.0.10
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_iir_lattice_q15.c
  9. *
  10. * Description: Q15 IIR lattice filter processing function.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Version 1.0.10 2011/7/15
  15. * Big Endian support added and Merged M0 and M3/M4 Source code.
  16. *
  17. * Version 1.0.3 2010/11/29
  18. * Re-organized the CMSIS folders and updated documentation.
  19. *
  20. * Version 1.0.2 2010/11/11
  21. * Documentation updated.
  22. *
  23. * Version 1.0.1 2010/10/05
  24. * Production release and review comments incorporated.
  25. *
  26. * Version 1.0.0 2010/09/20
  27. * Production release and review comments incorporated
  28. *
  29. * Version 0.0.7 2010/06/10
  30. * Misra-C changes done
  31. * -------------------------------------------------------------------- */
  32. #include "arm_math.h"
  33. /**
  34. * @ingroup groupFilters
  35. */
  36. /**
  37. * @addtogroup IIR_Lattice
  38. * @{
  39. */
  40. /**
  41. * @brief Processing function for the Q15 IIR lattice filter.
  42. * @param[in] *S points to an instance of the Q15 IIR lattice structure.
  43. * @param[in] *pSrc points to the block of input data.
  44. * @param[out] *pDst points to the block of output data.
  45. * @param[in] blockSize number of samples to process.
  46. * @return none.
  47. *
  48. * @details
  49. * <b>Scaling and Overflow Behavior:</b>
  50. * \par
  51. * The function is implemented using a 64-bit internal accumulator.
  52. * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  53. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  54. * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  55. * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  56. * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  57. */
  58. void arm_iir_lattice_q15(
  59. const arm_iir_lattice_instance_q15 * S,
  60. q15_t * pSrc,
  61. q15_t * pDst,
  62. uint32_t blockSize)
  63. {
  64. #ifndef ARM_MATH_CM0
  65. /* Run the below code for Cortex-M4 and Cortex-M3 */
  66. q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */
  67. q15_t gnext1, gnext2; /* Temporary variables for lattice stages */
  68. uint32_t stgCnt; /* Temporary variables for counts */
  69. q63_t acc; /* Accumlator */
  70. uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
  71. q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
  72. uint32_t numStages = S->numStages; /* number of stages */
  73. q15_t *pState; /* State pointer */
  74. q15_t *pStateCurnt; /* State current pointer */
  75. q15_t out; /* Temporary variable for output */
  76. q31_t v; /* Temporary variable for ladder coefficient */
  77. blkCnt = blockSize;
  78. pState = &S->pState[0];
  79. /* Sample processing */
  80. while(blkCnt > 0u)
  81. {
  82. /* Read Sample from input buffer */
  83. /* fN(n) = x(n) */
  84. fcurr = *pSrc++;
  85. /* Initialize state read pointer */
  86. px1 = pState;
  87. /* Initialize state write pointer */
  88. px2 = pState;
  89. /* Set accumulator to zero */
  90. acc = 0;
  91. /* Initialize Ladder coeff pointer */
  92. pv = &S->pvCoeffs[0];
  93. /* Initialize Reflection coeff pointer */
  94. pk = &S->pkCoeffs[0];
  95. /* Process sample for first tap */
  96. gcurr = *px1++;
  97. /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
  98. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  99. fnext = __SSAT(fnext, 16);
  100. /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
  101. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  102. gnext = __SSAT(gnext, 16);
  103. /* write gN(n) into state for next sample processing */
  104. *px2++ = (q15_t) gnext;
  105. /* y(n) += gN(n) * vN */
  106. acc += (q31_t) ((gnext * (*pv++)));
  107. /* Update f values for next coefficient processing */
  108. fcurr = fnext;
  109. /* Loop unrolling. Process 4 taps at a time. */
  110. tapCnt = (numStages - 1u) >> 2;
  111. while(tapCnt > 0u)
  112. {
  113. /* Process sample for 2nd, 6th ...taps */
  114. /* Read gN-2(n-1) from state buffer */
  115. gcurr = *px1++;
  116. /* Process sample for 2nd, 6th .. taps */
  117. /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
  118. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  119. fnext = __SSAT(fnext, 16);
  120. /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
  121. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  122. gnext1 = (q15_t) __SSAT(gnext, 16);
  123. /* write gN-1(n) into state */
  124. *px2++ = (q15_t) gnext1;
  125. /* Process sample for 3nd, 7th ...taps */
  126. /* Read gN-3(n-1) from state */
  127. gcurr = *px1++;
  128. /* Process sample for 3rd, 7th .. taps */
  129. /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
  130. fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
  131. fcurr = __SSAT(fcurr, 16);
  132. /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
  133. gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
  134. gnext2 = (q15_t) __SSAT(gnext, 16);
  135. /* write gN-2(n) into state */
  136. *px2++ = (q15_t) gnext2;
  137. /* Read vN-1 and vN-2 at a time */
  138. v = *__SIMD32(pv)++;
  139. /* Pack gN-1(n) and gN-2(n) */
  140. #ifndef ARM_MATH_BIG_ENDIAN
  141. gnext = __PKHBT(gnext1, gnext2, 16);
  142. #else
  143. gnext = __PKHBT(gnext2, gnext1, 16);
  144. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  145. /* y(n) += gN-1(n) * vN-1 */
  146. /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
  147. /* y(n) += gN-2(n) * vN-2 */
  148. /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
  149. acc = __SMLALD(gnext, v, acc);
  150. /* Process sample for 4th, 8th ...taps */
  151. /* Read gN-4(n-1) from state */
  152. gcurr = *px1++;
  153. /* Process sample for 4th, 8th .. taps */
  154. /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
  155. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  156. fnext = __SSAT(fnext, 16);
  157. /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
  158. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  159. gnext1 = (q15_t) __SSAT(gnext, 16);
  160. /* write gN-3(n) for the next sample process */
  161. *px2++ = (q15_t) gnext1;
  162. /* Process sample for 5th, 9th ...taps */
  163. /* Read gN-5(n-1) from state */
  164. gcurr = *px1++;
  165. /* Process sample for 5th, 9th .. taps */
  166. /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
  167. fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
  168. fcurr = __SSAT(fcurr, 16);
  169. /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
  170. gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
  171. gnext2 = (q15_t) __SSAT(gnext, 16);
  172. /* write gN-4(n) for the next sample process */
  173. *px2++ = (q15_t) gnext2;
  174. /* Read vN-3 and vN-4 at a time */
  175. v = *__SIMD32(pv)++;
  176. /* Pack gN-3(n) and gN-4(n) */
  177. #ifndef ARM_MATH_BIG_ENDIAN
  178. gnext = __PKHBT(gnext1, gnext2, 16);
  179. #else
  180. gnext = __PKHBT(gnext2, gnext1, 16);
  181. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  182. /* y(n) += gN-4(n) * vN-4 */
  183. /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
  184. /* y(n) += gN-3(n) * vN-3 */
  185. /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
  186. acc = __SMLALD(gnext, v, acc);
  187. tapCnt--;
  188. }
  189. fnext = fcurr;
  190. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  191. tapCnt = (numStages - 1u) % 0x4u;
  192. while(tapCnt > 0u)
  193. {
  194. gcurr = *px1++;
  195. /* Process sample for last taps */
  196. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  197. fnext = __SSAT(fnext, 16);
  198. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  199. gnext = __SSAT(gnext, 16);
  200. /* Output samples for last taps */
  201. acc += (q31_t) (((q31_t) gnext * (*pv++)));
  202. *px2++ = (q15_t) gnext;
  203. fcurr = fnext;
  204. tapCnt--;
  205. }
  206. /* y(n) += g0(n) * v0 */
  207. acc += (q31_t) (((q31_t) fnext * (*pv++)));
  208. out = (q15_t) __SSAT(acc >> 15, 16);
  209. *px2++ = (q15_t) fnext;
  210. /* write out into pDst */
  211. *pDst++ = out;
  212. /* Advance the state pointer by 4 to process the next group of 4 samples */
  213. pState = pState + 1u;
  214. blkCnt--;
  215. }
  216. /* Processing is complete. Now copy last S->numStages samples to start of the buffer
  217. for the preperation of next frame process */
  218. /* Points to the start of the state buffer */
  219. pStateCurnt = &S->pState[0];
  220. pState = &S->pState[blockSize];
  221. stgCnt = (numStages >> 2u);
  222. /* copy data */
  223. while(stgCnt > 0u)
  224. {
  225. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  226. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  227. /* Decrement the loop counter */
  228. stgCnt--;
  229. }
  230. /* Calculation of count for remaining q15_t data */
  231. stgCnt = (numStages) % 0x4u;
  232. /* copy data */
  233. while(stgCnt > 0u)
  234. {
  235. *pStateCurnt++ = *pState++;
  236. /* Decrement the loop counter */
  237. stgCnt--;
  238. }
  239. #else
  240. /* Run the below code for Cortex-M0 */
  241. q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
  242. uint32_t stgCnt; /* Temporary variables for counts */
  243. q63_t acc; /* Accumlator */
  244. uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
  245. q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
  246. uint32_t numStages = S->numStages; /* number of stages */
  247. q15_t *pState; /* State pointer */
  248. q15_t *pStateCurnt; /* State current pointer */
  249. q15_t out; /* Temporary variable for output */
  250. blkCnt = blockSize;
  251. pState = &S->pState[0];
  252. /* Sample processing */
  253. while(blkCnt > 0u)
  254. {
  255. /* Read Sample from input buffer */
  256. /* fN(n) = x(n) */
  257. fcurr = *pSrc++;
  258. /* Initialize state read pointer */
  259. px1 = pState;
  260. /* Initialize state write pointer */
  261. px2 = pState;
  262. /* Set accumulator to zero */
  263. acc = 0;
  264. /* Initialize Ladder coeff pointer */
  265. pv = &S->pvCoeffs[0];
  266. /* Initialize Reflection coeff pointer */
  267. pk = &S->pkCoeffs[0];
  268. tapCnt = numStages;
  269. while(tapCnt > 0u)
  270. {
  271. gcurr = *px1++;
  272. /* Process sample */
  273. /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
  274. fnext = fcurr - ((gcurr * (*pk)) >> 15);
  275. fnext = __SSAT(fnext, 16);
  276. /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
  277. gnext = ((fnext * (*pk++)) >> 15) + gcurr;
  278. gnext = __SSAT(gnext, 16);
  279. /* Output samples */
  280. /* y(n) += gN(n) * vN */
  281. acc += (q31_t) ((gnext * (*pv++)));
  282. /* write gN(n) into state for next sample processing */
  283. *px2++ = (q15_t) gnext;
  284. /* Update f values for next coefficient processing */
  285. fcurr = fnext;
  286. tapCnt--;
  287. }
  288. /* y(n) += g0(n) * v0 */
  289. acc += (q31_t) ((fnext * (*pv++)));
  290. out = (q15_t) __SSAT(acc >> 15, 16);
  291. *px2++ = (q15_t) fnext;
  292. /* write out into pDst */
  293. *pDst++ = out;
  294. /* Advance the state pointer by 1 to process the next group of samples */
  295. pState = pState + 1u;
  296. blkCnt--;
  297. }
  298. /* Processing is complete. Now copy last S->numStages samples to start of the buffer
  299. for the preperation of next frame process */
  300. /* Points to the start of the state buffer */
  301. pStateCurnt = &S->pState[0];
  302. pState = &S->pState[blockSize];
  303. stgCnt = numStages;
  304. /* copy data */
  305. while(stgCnt > 0u)
  306. {
  307. *pStateCurnt++ = *pState++;
  308. /* Decrement the loop counter */
  309. stgCnt--;
  310. }
  311. #endif /* #ifndef ARM_MATH_CM0 */
  312. }
  313. /**
  314. * @} end of IIR_Lattice group
  315. */