/modules/features2d/src/opencl/fast.cl

https://github.com/magtar/opencv · OpenCL · 162 lines · 139 code · 23 blank · 0 comment · 0 complexity · f863a74f11078e9024131741c401eb48 MD5 · raw file

  1. // OpenCL port of the FAST corner detector.
  2. // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
  3. inline int cornerScore(__global const uchar* img, int step)
  4. {
  5. int k, tofs, v = img[0], a0 = 0, b0;
  6. int d[16];
  7. #define LOAD2(idx, ofs) \
  8. tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])
  9. LOAD2(0, 3);
  10. LOAD2(1, -step+3);
  11. LOAD2(2, -step*2+2);
  12. LOAD2(3, -step*3+1);
  13. LOAD2(4, -step*3);
  14. LOAD2(5, -step*3-1);
  15. LOAD2(6, -step*2-2);
  16. LOAD2(7, -step-3);
  17. #pragma unroll
  18. for( k = 0; k < 16; k += 2 )
  19. {
  20. int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);
  21. a = min(a, (int)d[(k+3)&15]);
  22. a = min(a, (int)d[(k+4)&15]);
  23. a = min(a, (int)d[(k+5)&15]);
  24. a = min(a, (int)d[(k+6)&15]);
  25. a = min(a, (int)d[(k+7)&15]);
  26. a = min(a, (int)d[(k+8)&15]);
  27. a0 = max(a0, min(a, (int)d[k&15]));
  28. a0 = max(a0, min(a, (int)d[(k+9)&15]));
  29. }
  30. b0 = -a0;
  31. #pragma unroll
  32. for( k = 0; k < 16; k += 2 )
  33. {
  34. int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);
  35. b = max(b, (int)d[(k+3)&15]);
  36. b = max(b, (int)d[(k+4)&15]);
  37. b = max(b, (int)d[(k+5)&15]);
  38. b = max(b, (int)d[(k+6)&15]);
  39. b = max(b, (int)d[(k+7)&15]);
  40. b = max(b, (int)d[(k+8)&15]);
  41. b0 = min(b0, max(b, (int)d[k]));
  42. b0 = min(b0, max(b, (int)d[(k+9)&15]));
  43. }
  44. return -b0-1;
  45. }
  46. __kernel
  47. void FAST_findKeypoints(
  48. __global const uchar * _img, int step, int img_offset,
  49. int img_rows, int img_cols,
  50. volatile __global int* kp_loc,
  51. int max_keypoints, int threshold )
  52. {
  53. int j = get_global_id(0) + 3;
  54. int i = get_global_id(1) + 3;
  55. if (i < img_rows - 3 && j < img_cols - 3)
  56. {
  57. __global const uchar* img = _img + mad24(i, step, j + img_offset);
  58. int v = img[0], t0 = v - threshold, t1 = v + threshold;
  59. int k, tofs, v0, v1;
  60. int m0 = 0, m1 = 0;
  61. #define UPDATE_MASK(idx, ofs) \
  62. tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \
  63. m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \
  64. m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))
  65. UPDATE_MASK(0, 3);
  66. if( (m0 | m1) == 0 )
  67. return;
  68. UPDATE_MASK(2, -step*2+2);
  69. UPDATE_MASK(4, -step*3);
  70. UPDATE_MASK(6, -step*2-2);
  71. #define EVEN_MASK (1+4+16+64)
  72. if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&
  73. ((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )
  74. return;
  75. UPDATE_MASK(1, -step+3);
  76. UPDATE_MASK(3, -step*3+1);
  77. UPDATE_MASK(5, -step*3-1);
  78. UPDATE_MASK(7, -step-3);
  79. if( ((m0 | (m0 >> 8)) & 255) != 255 &&
  80. ((m1 | (m1 >> 8)) & 255) != 255 )
  81. return;
  82. m0 |= m0 << 16;
  83. m1 |= m1 << 16;
  84. #define CHECK0(i) ((m0 & (511 << i)) == (511 << i))
  85. #define CHECK1(i) ((m1 & (511 << i)) == (511 << i))
  86. if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +
  87. CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +
  88. CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +
  89. CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +
  90. CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +
  91. CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +
  92. CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +
  93. CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )
  94. return;
  95. {
  96. int idx = atomic_inc(kp_loc);
  97. if( idx < max_keypoints )
  98. {
  99. kp_loc[1 + 2*idx] = j;
  100. kp_loc[2 + 2*idx] = i;
  101. }
  102. }
  103. }
  104. }
  105. ///////////////////////////////////////////////////////////////////////////
  106. // nonmaxSupression
  107. __kernel
  108. void FAST_nonmaxSupression(
  109. __global const int* kp_in, volatile __global int* kp_out,
  110. __global const uchar * _img, int step, int img_offset,
  111. int rows, int cols, int counter, int max_keypoints)
  112. {
  113. const int idx = get_global_id(0);
  114. if (idx < counter)
  115. {
  116. int x = kp_in[1 + 2*idx];
  117. int y = kp_in[2 + 2*idx];
  118. __global const uchar* img = _img + mad24(y, step, x + img_offset);
  119. int s = cornerScore(img, step);
  120. if( (x < 4 || s > cornerScore(img-1, step)) +
  121. (y < 4 || s > cornerScore(img-step, step)) != 2 )
  122. return;
  123. if( (x >= cols - 4 || s > cornerScore(img+1, step)) +
  124. (y >= rows - 4 || s > cornerScore(img+step, step)) +
  125. (x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +
  126. (x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +
  127. (x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +
  128. (x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)
  129. {
  130. int new_idx = atomic_inc(kp_out);
  131. if( new_idx < max_keypoints )
  132. {
  133. kp_out[1 + 3*new_idx] = x;
  134. kp_out[2 + 3*new_idx] = y;
  135. kp_out[3 + 3*new_idx] = s;
  136. }
  137. }
  138. }
  139. }