/ocr/ocrservice/jni/opticalflow/optical_flow.h

http://eyes-free.googlecode.com/ · C Header · 291 lines · 143 code · 67 blank · 81 comment · 6 complexity · 6bac870daae3242af61ecdeb059402c8 MD5 · raw file

  1. // Copyright 2009 Google Inc. All Rights Reserved.
  2. // Author: andrewharp@google.com (Andrew Harp)
  3. #ifndef JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_
  4. #define JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_
  5. #include "types.h"
  6. #include "utils.h"
  7. // Feature detection.
  8. #define MAX_TEMP_FEATURES 4096
  9. #define MAX_FEATURES 128
  10. // Number of floats each feature takes up when exporting to an array.
  11. #define FEATURE_STEP 7
  12. // Number of frame deltas to keep around in the circular queue.
  13. #define NUM_FRAMES 128
  14. // Redetect if we ever have less than this number of features.
  15. #define MIN_FEATURES 6
  16. // How long to wait between forcing complete feature regeneration.
  17. #define REGEN_FEATURES_MS 400
  18. // Number of iterations to do tracking on each feature at each pyramid level.
  19. #define NUM_ITERATIONS 3
  20. // Number of pyramid levels used for tracking.
  21. #define NUM_LEVELS 4
  22. // Window size to integrate over to find local image derivative.
  23. #define WINDOW_SIZE 3
  24. // Total area of integration windows.
  25. #define ARRAY_SIZE (2 * WINDOW_SIZE + 1) * (2 * WINDOW_SIZE + 1)
  26. // Error that's considered good enough to early abort tracking.
  27. #define THRESHOLD 0.03f
  28. // Maximum number of deviations a feature delta can be from the weighted
  29. // average before being thrown out for region-based queries.
  30. #define NUM_DEVIATIONS 2.0f
  31. // Resolution of feature grid to seed features with.
  32. #define FEATURE_GRID_WIDTH 4
  33. #define FEATURE_GRID_HEIGHT 3
  34. // Whether to normalize feature windows for intensity.
  35. #define NORMALIZE
  36. namespace flow {
  37. template <typename T>
  38. class Image;
  39. // Class that encapsulates all bulky processed data for a frame.
  40. class ImageData {
  41. public:
  42. explicit ImageData(Size size) {
  43. timestamp_ = 0;
  44. image_ = new Image<uint8>(size);
  45. for (int32 i = 0; i < NUM_LEVELS; ++i) {
  46. pyramid_[i] = (i == 0) ? image_ : new Image<uint8>(size);
  47. spatial_x_[i] = new Image<int32>(size);
  48. spatial_y_[i] = new Image<int32>(size);
  49. size.width /= 2;
  50. size.height /= 2;
  51. }
  52. }
  53. ~ImageData() {
  54. // image_ will be deleted along with the rest of the pyramids.
  55. for (int32 i = 0; i < NUM_LEVELS; ++i) {
  56. SAFE_DELETE(pyramid_[i]);
  57. SAFE_DELETE(spatial_x_[i]);
  58. SAFE_DELETE(spatial_y_[i]);
  59. }
  60. }
  61. void init(const uint8* const new_frame, const int32 stride,
  62. const clock_t timestamp, const int32 downsample_factor_) {
  63. timestamp_ = timestamp;
  64. image_->fromArray(new_frame, stride, downsample_factor_);
  65. timeLog("Downsampled image");
  66. // Create the smoothed pyramids.
  67. computeSmoothedPyramid(*image_, NUM_LEVELS, pyramid_);
  68. timeLog("Created smoothed pyramids");
  69. // Create the spatial derivatives for frame 1.
  70. computeSpatialPyramid((const Image<uint8>**)pyramid_,
  71. NUM_LEVELS, spatial_x_, spatial_y_);
  72. timeLog("Created spatial derivatives");
  73. }
  74. clock_t timestamp_;
  75. Image<uint8>* image_;
  76. Image<uint8>* pyramid_[NUM_LEVELS];
  77. Image<int32>* spatial_x_[NUM_LEVELS];
  78. Image<int32>* spatial_y_[NUM_LEVELS];
  79. };
  80. // A class that records a timestamped frame features
  81. // translation delta for optical flow.
  82. class FramePair {
  83. public:
  84. // Cleans up the FramePair so that they can be reused.
  85. void init(const clock_t end_time);
  86. // Throws out outliers based on the input weighting.
  87. Point2D getWeightedDelta(const float32* const weights) const;
  88. // Weights points based on the query_point and cutoff_dist, then
  89. // returns getWeightedDelta. Essentially tells you where a point at the
  90. // beginning of a frame ends up.
  91. Point2D queryFlow(const Point2D& query_point,
  92. const float32 cutoff_dist) const;
  93. // Just count up and return the number of features from the first frame that
  94. // were found in the second frame.
  95. inline int32 countFoundFeatures() const {
  96. int32 num_found_features = 0;
  97. for (int32 i = 0; i < number_of_features_; ++i) {
  98. if (optical_flow_found_feature_[i]) {
  99. ++num_found_features;
  100. }
  101. }
  102. return num_found_features;
  103. }
  104. // The time at frame2.
  105. clock_t end_time;
  106. // This array will contain the features found in frame 1.
  107. Point2D frame1_features_[MAX_FEATURES];
  108. // Contain the locations of the points from frame 1 in frame 2.
  109. Point2D frame2_features_[MAX_FEATURES];
  110. // The number of features in frame 1.
  111. int32 number_of_features_;
  112. // Keeps track of which features were actually found from one frame
  113. // another.
  114. // The i-th element of this array will be non-zero if and only if the i-th
  115. // feature of frame 1 was found in frame 2.
  116. bool optical_flow_found_feature_[MAX_FEATURES];
  117. };
  118. // Class encapsulating all the data and logic necessary for performing optical
  119. // flow. The general order of operations on a per frame basis is:
  120. //
  121. // // Notify optical flow that a new frame is available.
  122. // nextFrame(...);
  123. //
  124. // // Tell it any regions we want it to pay special attention to.
  125. // addInterestRegion(...);
  126. //
  127. // // Have it compute the flow.
  128. // computeFlow();
  129. //
  130. // // Look up the delta from a given point at a given time to the current time.
  131. // getAccumulatedDelta(...);
  132. class OpticalFlow {
  133. public:
  134. OpticalFlow(const int32 frame_width, const int32 frame_height,
  135. const int32 downsample_factor);
  136. ~OpticalFlow();
  137. // Add a new frame to the optical flow. Will update all the non-feature
  138. // related member variables.
  139. //
  140. // new_frame should be a buffer of grayscale values, one byte per pixel,
  141. // at the original frame_width and frame_height used to initialize the
  142. // OpticalFlow object. Downsampling will be handled internally.
  143. //
  144. // time_stamp should be a time in milliseconds that later calls to this and
  145. // other methods will be relative to.
  146. void nextFrame(const uint8* const new_frame, const clock_t timestamp);
  147. // Find the features in the frame before the current frame.
  148. // If only one frame exists, features will be found in that frame.
  149. void computeFeatures(const bool cached_ok = false);
  150. // Process the most recent two frames, and fill in the feature arrays.
  151. void computeFlow();
  152. // Copy the feature arrays after computeFlow is called.
  153. // out_data should be at least MAX_FEATURES * FEATURE_STEP long.
  154. // Currently, its format is [x1 y1 found x2 y2 score] repeated N times,
  155. // where N is the number of features tracked. N is returned as the result.
  156. int32 getFeatures(const bool only_found, float32* const out_data) const;
  157. // Tells you the overall flow for region of a given radius at a given time to
  158. // the present.
  159. Point2D getAccumulatedDelta(const Point2D& position,
  160. const float radius,
  161. const clock_t timestamp) const;
  162. // Pay special attention to the area inside this box on the next
  163. // optical flow pass.
  164. void addInterestRegion(const int32 num_x, const int32 num_y,
  165. float32 left, float32 top,
  166. float32 right, float32 bottom);
  167. // Finds the correspondences for all the points in the current pair of frames.
  168. // Stores the results in the given FramePair.
  169. void findCorrespondences(FramePair* const curr_change) const;
  170. // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
  171. bool findFlowAtPoint(const float32 u_x, const float32 u_y,
  172. float32* final_x, float32* final_y) const;
  173. void printInfo() const {
  174. #ifdef VERBOSE_LOGGING
  175. const int32 first_frame_index = geNthIndexFromStart(0);
  176. const FramePair& first_frame_pair = frame_pairs_[first_frame_index];
  177. const int32 last_frame_index = geNthIndexFromEnd(0);
  178. const FramePair& last_frame_pair = frame_pairs_[last_frame_index];
  179. LOGV ("Queue size: %d, last/first: %4d %4d: %8ld - %8ld = %8ld",
  180. num_frames_, last_frame_index, first_frame_index,
  181. last_frame_pair.end_time, first_frame_pair.end_time,
  182. last_frame_pair.end_time - first_frame_pair.end_time);
  183. #endif
  184. }
  185. private:
  186. inline int32 geNthIndexFromStart(const int32 offset) const {
  187. CHECK(offset >= 0 && offset < num_frames_,
  188. "Offset out of range! %d out of %d.", offset, num_frames_);
  189. return (first_frame_index_ + offset) % NUM_FRAMES;
  190. }
  191. inline int32 geNthIndexFromEnd(const int32 offset) const {
  192. return geNthIndexFromStart(num_frames_ - 1 - offset);
  193. }
  194. // Finds features in the previous frame and adds them to curr_change.
  195. void findFeatures(const FramePair& prev_change,
  196. FramePair* const curr_change);
  197. // Copies and compacts the found features in the second frame of prev_change
  198. // into the array at new_features.
  199. static int32 copyFeatures(const FramePair& prev_change,
  200. Point2D* const new_features);
  201. const int32 downsample_factor_;
  202. // Size of the original images.
  203. const Size original_size_;
  204. // Size of the internally allocated images (after original is downsampled).
  205. const Size working_size_;
  206. float32 avg_g_x_;
  207. float32 avg_g_y_;
  208. int32 first_frame_index_;
  209. int32 num_frames_;
  210. clock_t last_time_fresh_features_;
  211. Point2D tmp_features_[MAX_TEMP_FEATURES];
  212. FramePair frame_pairs_[NUM_FRAMES];
  213. // Scratch memory for feature candidacy detection and non-max suppression.
  214. Image<uint8>* feature_scratch_;
  215. // Regions of the image to pay special attention to.
  216. Image<bool>* interest_map_;
  217. ImageData* frame1_;
  218. ImageData* frame2_;
  219. bool frame_added_;
  220. bool features_computed_;
  221. bool flow_computed_;
  222. };
  223. } // namespace flow
  224. #endif // JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_