/ocr/ocrservice/jni/opticalflow/optical_flow.h
C++ Header | 291 lines | 143 code | 67 blank | 81 comment | 6 complexity | 6bac870daae3242af61ecdeb059402c8 MD5 | raw file
1// Copyright 2009 Google Inc. All Rights Reserved. 2// Author: andrewharp@google.com (Andrew Harp) 3 4#ifndef JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_ 5#define JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_ 6 7#include "types.h" 8#include "utils.h" 9 10// Feature detection. 11#define MAX_TEMP_FEATURES 4096 12#define MAX_FEATURES 128 13 14// Number of floats each feature takes up when exporting to an array. 15#define FEATURE_STEP 7 16 17// Number of frame deltas to keep around in the circular queue. 18#define NUM_FRAMES 128 19 20// Redetect if we ever have less than this number of features. 21#define MIN_FEATURES 6 22 23// How long to wait between forcing complete feature regeneration. 24#define REGEN_FEATURES_MS 400 25 26// Number of iterations to do tracking on each feature at each pyramid level. 27#define NUM_ITERATIONS 3 28 29// Number of pyramid levels used for tracking. 30#define NUM_LEVELS 4 31 32// Window size to integrate over to find local image derivative. 33#define WINDOW_SIZE 3 34 35// Total area of integration windows. 36#define ARRAY_SIZE (2 * WINDOW_SIZE + 1) * (2 * WINDOW_SIZE + 1) 37 38// Error that's considered good enough to early abort tracking. 39#define THRESHOLD 0.03f 40 41// Maximum number of deviations a feature delta can be from the weighted 42// average before being thrown out for region-based queries. 43#define NUM_DEVIATIONS 2.0f 44 45// Resolution of feature grid to seed features with. 46#define FEATURE_GRID_WIDTH 4 47#define FEATURE_GRID_HEIGHT 3 48 49// Whether to normalize feature windows for intensity. 50#define NORMALIZE 51 52namespace flow { 53 54template <typename T> 55class Image; 56 57// Class that encapsulates all bulky processed data for a frame. 58class ImageData { 59 public: 60 explicit ImageData(Size size) { 61 timestamp_ = 0; 62 63 image_ = new Image<uint8>(size); 64 65 for (int32 i = 0; i < NUM_LEVELS; ++i) { 66 pyramid_[i] = (i == 0) ? image_ : new Image<uint8>(size); 67 68 spatial_x_[i] = new Image<int32>(size); 69 spatial_y_[i] = new Image<int32>(size); 70 71 size.width /= 2; 72 size.height /= 2; 73 } 74 } 75 76 ~ImageData() { 77 // image_ will be deleted along with the rest of the pyramids. 78 79 for (int32 i = 0; i < NUM_LEVELS; ++i) { 80 SAFE_DELETE(pyramid_[i]); 81 SAFE_DELETE(spatial_x_[i]); 82 SAFE_DELETE(spatial_y_[i]); 83 } 84 } 85 86 void init(const uint8* const new_frame, const int32 stride, 87 const clock_t timestamp, const int32 downsample_factor_) { 88 timestamp_ = timestamp; 89 90 image_->fromArray(new_frame, stride, downsample_factor_); 91 timeLog("Downsampled image"); 92 93 // Create the smoothed pyramids. 94 computeSmoothedPyramid(*image_, NUM_LEVELS, pyramid_); 95 timeLog("Created smoothed pyramids"); 96 97 // Create the spatial derivatives for frame 1. 98 computeSpatialPyramid((const Image<uint8>**)pyramid_, 99 NUM_LEVELS, spatial_x_, spatial_y_); 100 timeLog("Created spatial derivatives"); 101 } 102 103 clock_t timestamp_; 104 Image<uint8>* image_; 105 Image<uint8>* pyramid_[NUM_LEVELS]; 106 Image<int32>* spatial_x_[NUM_LEVELS]; 107 Image<int32>* spatial_y_[NUM_LEVELS]; 108}; 109 110// A class that records a timestamped frame features 111// translation delta for optical flow. 112class FramePair { 113 public: 114 // Cleans up the FramePair so that they can be reused. 115 void init(const clock_t end_time); 116 117 // Throws out outliers based on the input weighting. 118 Point2D getWeightedDelta(const float32* const weights) const; 119 120 // Weights points based on the query_point and cutoff_dist, then 121 // returns getWeightedDelta. Essentially tells you where a point at the 122 // beginning of a frame ends up. 123 Point2D queryFlow(const Point2D& query_point, 124 const float32 cutoff_dist) const; 125 126 // Just count up and return the number of features from the first frame that 127 // were found in the second frame. 128 inline int32 countFoundFeatures() const { 129 int32 num_found_features = 0; 130 for (int32 i = 0; i < number_of_features_; ++i) { 131 if (optical_flow_found_feature_[i]) { 132 ++num_found_features; 133 } 134 } 135 return num_found_features; 136 } 137 138 // The time at frame2. 139 clock_t end_time; 140 141 // This array will contain the features found in frame 1. 142 Point2D frame1_features_[MAX_FEATURES]; 143 144 // Contain the locations of the points from frame 1 in frame 2. 145 Point2D frame2_features_[MAX_FEATURES]; 146 147 // The number of features in frame 1. 148 int32 number_of_features_; 149 150 // Keeps track of which features were actually found from one frame 151 // another. 152 // The i-th element of this array will be non-zero if and only if the i-th 153 // feature of frame 1 was found in frame 2. 154 bool optical_flow_found_feature_[MAX_FEATURES]; 155}; 156 157// Class encapsulating all the data and logic necessary for performing optical 158// flow. The general order of operations on a per frame basis is: 159// 160// // Notify optical flow that a new frame is available. 161// nextFrame(...); 162// 163// // Tell it any regions we want it to pay special attention to. 164// addInterestRegion(...); 165// 166// // Have it compute the flow. 167// computeFlow(); 168// 169// // Look up the delta from a given point at a given time to the current time. 170// getAccumulatedDelta(...); 171class OpticalFlow { 172 public: 173 OpticalFlow(const int32 frame_width, const int32 frame_height, 174 const int32 downsample_factor); 175 ~OpticalFlow(); 176 177 // Add a new frame to the optical flow. Will update all the non-feature 178 // related member variables. 179 // 180 // new_frame should be a buffer of grayscale values, one byte per pixel, 181 // at the original frame_width and frame_height used to initialize the 182 // OpticalFlow object. Downsampling will be handled internally. 183 // 184 // time_stamp should be a time in milliseconds that later calls to this and 185 // other methods will be relative to. 186 void nextFrame(const uint8* const new_frame, const clock_t timestamp); 187 188 // Find the features in the frame before the current frame. 189 // If only one frame exists, features will be found in that frame. 190 void computeFeatures(const bool cached_ok = false); 191 192 // Process the most recent two frames, and fill in the feature arrays. 193 void computeFlow(); 194 195 // Copy the feature arrays after computeFlow is called. 196 // out_data should be at least MAX_FEATURES * FEATURE_STEP long. 197 // Currently, its format is [x1 y1 found x2 y2 score] repeated N times, 198 // where N is the number of features tracked. N is returned as the result. 199 int32 getFeatures(const bool only_found, float32* const out_data) const; 200 201 // Tells you the overall flow for region of a given radius at a given time to 202 // the present. 203 Point2D getAccumulatedDelta(const Point2D& position, 204 const float radius, 205 const clock_t timestamp) const; 206 207 // Pay special attention to the area inside this box on the next 208 // optical flow pass. 209 void addInterestRegion(const int32 num_x, const int32 num_y, 210 float32 left, float32 top, 211 float32 right, float32 bottom); 212 213 // Finds the correspondences for all the points in the current pair of frames. 214 // Stores the results in the given FramePair. 215 void findCorrespondences(FramePair* const curr_change) const; 216 217 // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm. 218 bool findFlowAtPoint(const float32 u_x, const float32 u_y, 219 float32* final_x, float32* final_y) const; 220 221 void printInfo() const { 222#ifdef VERBOSE_LOGGING 223 const int32 first_frame_index = geNthIndexFromStart(0); 224 const FramePair& first_frame_pair = frame_pairs_[first_frame_index]; 225 226 const int32 last_frame_index = geNthIndexFromEnd(0); 227 const FramePair& last_frame_pair = frame_pairs_[last_frame_index]; 228 229 LOGV ("Queue size: %d, last/first: %4d %4d: %8ld - %8ld = %8ld", 230 num_frames_, last_frame_index, first_frame_index, 231 last_frame_pair.end_time, first_frame_pair.end_time, 232 last_frame_pair.end_time - first_frame_pair.end_time); 233#endif 234 } 235 236 private: 237 inline int32 geNthIndexFromStart(const int32 offset) const { 238 CHECK(offset >= 0 && offset < num_frames_, 239 "Offset out of range! %d out of %d.", offset, num_frames_); 240 return (first_frame_index_ + offset) % NUM_FRAMES; 241 } 242 243 inline int32 geNthIndexFromEnd(const int32 offset) const { 244 return geNthIndexFromStart(num_frames_ - 1 - offset); 245 } 246 247 // Finds features in the previous frame and adds them to curr_change. 248 void findFeatures(const FramePair& prev_change, 249 FramePair* const curr_change); 250 251 // Copies and compacts the found features in the second frame of prev_change 252 // into the array at new_features. 253 static int32 copyFeatures(const FramePair& prev_change, 254 Point2D* const new_features); 255 256 const int32 downsample_factor_; 257 258 // Size of the original images. 259 const Size original_size_; 260 261 // Size of the internally allocated images (after original is downsampled). 262 const Size working_size_; 263 264 float32 avg_g_x_; 265 float32 avg_g_y_; 266 267 int32 first_frame_index_; 268 int32 num_frames_; 269 270 clock_t last_time_fresh_features_; 271 272 Point2D tmp_features_[MAX_TEMP_FEATURES]; 273 274 FramePair frame_pairs_[NUM_FRAMES]; 275 276 // Scratch memory for feature candidacy detection and non-max suppression. 277 Image<uint8>* feature_scratch_; 278 279 // Regions of the image to pay special attention to. 280 Image<bool>* interest_map_; 281 282 ImageData* frame1_; 283 ImageData* frame2_; 284 285 bool frame_added_; 286 bool features_computed_; 287 bool flow_computed_; 288}; 289 290} // namespace flow 291#endif // JAVA_COM_GOOGLE_ANDROID_APPS_UNVEIL_JNI_OPTICALFLOW_OPTICAL_FLOW_H_