/ocr/ocrservice/jni/opticalflow/optical_flow.cpp
C++ | 652 lines | 419 code | 147 blank | 86 comment | 58 complexity | c182ffcc712f91333171ce6790039dd1 MD5 | raw file
1/* 2 * Copyright 2011, Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// Author: Andrew Harp 18 19#include "utils.h" 20#include "time_log.h" 21 22#include "image.h" 23 24#include "math.h" 25 26#include "optical_flow.h" 27#include "feature_detector.h" 28 29namespace flow { 30 31OpticalFlow::OpticalFlow(const int32 frame_width, 32 const int32 frame_height, 33 const int32 downsample_factor) : 34 downsample_factor_(downsample_factor), 35 original_size_(frame_width, frame_height), 36 working_size_(frame_width / downsample_factor_, 37 frame_height / downsample_factor_), 38 avg_g_x_(0.0f), 39 avg_g_y_(0.0f), 40 first_frame_index_(0), 41 num_frames_(0), 42 last_time_fresh_features_(0), 43 frame_added_(false), 44 features_computed_(false), 45 flow_computed_(false) { 46 for (int i = 0; i < NUM_FRAMES; ++i) { 47 frame_pairs_[i].init(0); 48 } 49 50 interest_map_ = new Image<bool>(working_size_); 51 feature_scratch_ = new Image<uint8>(working_size_); 52 53 frame1_ = new ImageData(working_size_); 54 frame2_ = new ImageData(working_size_); 55} 56 57 58OpticalFlow::~OpticalFlow() { 59 // Delete all image storage. 60 SAFE_DELETE(feature_scratch_); 61 SAFE_DELETE(interest_map_); 62 63 SAFE_DELETE(frame1_); 64 SAFE_DELETE(frame2_); 65} 66 67 68void OpticalFlow::nextFrame(const uint8* const new_frame, 69 const clock_t timestamp) { 70 frame_added_ = false; 71 features_computed_ = false; 72 flow_computed_ = false; 73 74 // Move the current framechange index up. 75 ++num_frames_; 76 77 // If we've got too many, push up the start of the queue. 78 if (num_frames_ > NUM_FRAMES) { 79 first_frame_index_ = geNthIndexFromStart(1); 80 --num_frames_; 81 } 82 83 FramePair* const curr_change = frame_pairs_ + geNthIndexFromEnd(0); 84 curr_change->init(timestamp); 85 86 interest_map_->clear(false); 87 88 // Cache out data from last frame. 89 // Don't run on frame 0 (no point) or frame 1 (because the old frame will 90 // already be in frame1_). 91 if (num_frames_ > 2) { 92 swap(&frame1_, &frame2_); 93 timeLog("Copied data from last run"); 94 } 95 96 frame2_->init(new_frame, original_size_.width, timestamp, downsample_factor_); 97 98 // Special case for the first frame: make sure the image ends up in 99 // frame1_ so that feature detection can be done on it if desired. 100 // TODO(andrewharp): Make it so that feature detection is always done 101 // on the last frame added. 102 if (num_frames_ == 1) { 103 swap(&frame1_, &frame2_); 104 } 105 106 frame_added_ = true; 107} 108 109 110void OpticalFlow::computeFlow() { 111 CHECK(frame_added_ && features_computed_ && !flow_computed_, 112 "Optical Flow function called out of order!"); 113 114 if (num_frames_ < 2) { 115 LOGV("Frame index was %d, skipping computation.", num_frames_); 116 return; 117 } 118 119 FramePair* const curr_change = &frame_pairs_[geNthIndexFromEnd(0)]; 120 121 findCorrespondences(curr_change); 122 123 flow_computed_ = true; 124} 125 126 127void OpticalFlow::findFeatures(const FramePair& prev_change, 128 FramePair* const curr_change) { 129 int32 number_of_tmp_features = 0; 130 131 // Copy features from second frame of last pass to temp features of this 132 // pass. 133 number_of_tmp_features = copyFeatures(prev_change, tmp_features_); 134 135 const float32 buffer = 30.0f; 136 number_of_tmp_features += seedFeatures(*frame1_->image_, 137 FEATURE_GRID_WIDTH, FEATURE_GRID_HEIGHT, 138 buffer, buffer, 139 (float32) working_size_.width - buffer, 140 (float32) working_size_.height - buffer, FEATURE_DEFAULT, 141 tmp_features_ + number_of_tmp_features); 142 timeLog("Seeded features."); 143 144 const int32 max_num_fast = MAX_TEMP_FEATURES - number_of_tmp_features; 145 number_of_tmp_features += 146 findFastFeatures(*frame1_->image_, max_num_fast, 147 tmp_features_ + number_of_tmp_features, 148 feature_scratch_); 149 150 if (number_of_tmp_features >= MAX_TEMP_FEATURES) { 151 LOGW("Hit cap of %d for temporary features!", number_of_tmp_features); 152 } 153 154 // Score them... 155 scoreFeatures(*frame1_->spatial_x_[0], *frame1_->spatial_y_[0], 156 number_of_tmp_features, tmp_features_); 157 158 timeLog("Scored features"); 159 160 // Now pare it down a bit. 161 curr_change->number_of_features_ = sortAndSelect( 162 number_of_tmp_features, 163 MAX_FEATURES, 164 *interest_map_, 165 tmp_features_, 166 curr_change->frame1_features_, 167 feature_scratch_); 168 169 timeLog("Sorted and selected features"); 170 171 LOGV("Picked %d (%d max) final features out of %d potential.", 172 curr_change->number_of_features_, MAX_FEATURES, number_of_tmp_features); 173 174 last_time_fresh_features_ = curr_change->end_time; 175} 176 177 178int32 OpticalFlow::copyFeatures(const FramePair& prev_change, 179 Point2D* const new_features) { 180 int32 number_of_features = 0; 181 182 // Caching values from last pass, just copy and compact. 183 for (int32 i = 0; i < prev_change.number_of_features_; ++i) { 184 if (prev_change.optical_flow_found_feature_[i]) { 185 new_features[number_of_features] = 186 prev_change.frame2_features_[i]; 187 188 new_features[number_of_features].score = 189 prev_change.frame1_features_[i].score; 190 191 ++number_of_features; 192 } 193 } 194 195 timeLog("Copied features"); 196 197 return number_of_features; 198} 199 200 201void OpticalFlow::computeFeatures(const bool cached_ok) { 202 CHECK(frame_added_ && !features_computed_ && !flow_computed_, 203 "Optical Flow function called out of order!"); 204 205 const FramePair& prev_change = frame_pairs_[geNthIndexFromEnd(1)]; 206 FramePair* const curr_change = &frame_pairs_[geNthIndexFromEnd(0)]; 207 208 const int32 num_found_features = prev_change.countFoundFeatures(); 209 const clock_t ms_since_last_refresh = 210 (curr_change->end_time - last_time_fresh_features_); 211 212 if (cached_ok && 213 num_found_features >= MIN_FEATURES && 214 ms_since_last_refresh <= REGEN_FEATURES_MS) { 215 // Reuse the found features from the last frame if we can. 216 curr_change->number_of_features_ = 217 copyFeatures(prev_change, curr_change->frame1_features_); 218 } else { 219 // Only find new features to track if we've lost too many since the last 220 // time, or it's time to regenerate anyway. 221 LOGV("Not enough features (%d/%d), or it's been too long (%ld), " 222 "finding more.", 223 num_found_features, MIN_FEATURES, ms_since_last_refresh); 224 findFeatures(prev_change, curr_change); 225 } 226 227 features_computed_ = true; 228} 229 230 231int32 OpticalFlow::getFeatures(const bool only_found, 232 float32* const out_data) const { 233 CHECK(frame_added_ && features_computed_, 234 "Optical Flow function called out of order!"); 235 236 int32 curr_feature = 0; 237 const FramePair& change = frame_pairs_[geNthIndexFromEnd(0)]; 238 239 for (int32 i = 0; i < change.number_of_features_; ++i) { 240 if (!only_found || change.optical_flow_found_feature_[i]) { 241 const int base = curr_feature * FEATURE_STEP; 242 out_data[base + 0] = change.frame1_features_[i].x * downsample_factor_; 243 out_data[base + 1] = change.frame1_features_[i].y * downsample_factor_; 244 out_data[base + 2] = change.optical_flow_found_feature_[i]; 245 out_data[base + 3] = change.frame2_features_[i].x * downsample_factor_; 246 out_data[base + 4] = change.frame2_features_[i].y * downsample_factor_; 247 out_data[base + 5] = change.frame1_features_[i].score; 248 out_data[base + 6] = change.frame1_features_[i].type; 249 ++curr_feature; 250 } 251 } 252 253 LOGV("Got %d features.", curr_feature); 254 255 return curr_feature; 256} 257 258 259// Finds the correspondences for all the points in the current pair of frames. 260// Stores the results in the given FramePair. 261void OpticalFlow::findCorrespondences(FramePair* const frame_pair) const { 262 // Features aren't found until they're found. 263 memset(frame_pair->optical_flow_found_feature_, false, 264 sizeof(*frame_pair->optical_flow_found_feature_) * MAX_FEATURES); 265 timeLog("Cleared old found features"); 266 267 int32 num_features_found = 0; 268 269 // For every feature... 270 for (int32 i_feat = 0; i_feat < frame_pair->number_of_features_; ++i_feat) { 271 Point2D* feature1 = frame_pair->frame1_features_ + i_feat; 272 Point2D* feature2 = frame_pair->frame2_features_ + i_feat; 273 274 if (findFlowAtPoint(feature1->x, feature1->y, 275 &feature2->x, &feature2->y)) { 276 frame_pair->optical_flow_found_feature_[i_feat] = true; 277 ++num_features_found; 278 } 279 } 280 281 timeLog("Found correspondences"); 282 283 LOGV("Found %d of %d feature correspondences", 284 num_features_found, frame_pair->number_of_features_); 285} 286 287 288// An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm. 289// See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details. 290bool OpticalFlow::findFlowAtPoint(const float32 u_x, const float32 u_y, 291 float32* final_x, float32* final_y) const { 292 const float32 threshold_squared = square(THRESHOLD); 293 294 // Initial guess. 295 float32 g_x = 0.0f; 296 float32 g_y = 0.0f; 297 298 // For every level in the pyramid, update the coordinates of the best match. 299 for (int32 l = NUM_LEVELS - 1; l >= 0; --l) { 300 // Shrink factor from original. 301 const int32 shrink_factor = (1 << l); 302 303 // Images I (prev) and J (next). 304 const Image<uint8>& img_I = *frame1_->pyramid_[l]; 305 const Image<uint8>& img_J = *frame2_->pyramid_[l]; 306 307 // Computed gradients. 308 const Image<int32>& I_x = *frame1_->spatial_x_[l]; 309 const Image<int32>& I_y = *frame1_->spatial_y_[l]; 310 311 // Image position vector (p := u^l), scaled for this level. 312 const float32 p_x = u_x / static_cast<float32>(shrink_factor); 313 const float32 p_y = u_y / static_cast<float32>(shrink_factor); 314 315 // LOGV("Level %d: (%d, %d) / %d -> (%d, %d)", 316 // l, u_x, u_y, shrink_factor, p_x, p_y); 317 318 // Get values for frame 1. They remain constant through the inner 319 // iteration loop. 320 float32 vals_I[ARRAY_SIZE]; 321 float32 vals_I_x[ARRAY_SIZE]; 322 float32 vals_I_y[ARRAY_SIZE]; 323 324 int32 val_idx = 0; 325 for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { 326 for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { 327 const float32 x_pos = p_x + win_x; 328 const float32 y_pos = p_y + win_y; 329 330 if (!img_I.validInterpPixel(x_pos, y_pos)) { 331 return false; 332 } 333 334 vals_I[val_idx] = img_I.getPixelInterp(x_pos, y_pos); 335 336 vals_I_x[val_idx] = I_x.getPixelInterp(x_pos, y_pos); 337 vals_I_y[val_idx] = I_y.getPixelInterp(x_pos, y_pos); 338 339 ++val_idx; 340 } 341 } 342 343 // Compute the spatial gradient matrix about point p. 344 float32 G[] = { 0, 0, 0, 0 }; 345 calculateG(vals_I_x, vals_I_y, ARRAY_SIZE, G); 346 347 // Find the inverse of G. 348 float32 G_inv[4]; 349 if (!invert2x2(G, G_inv)) { 350 // If we can't invert, hope that the next level will have better luck. 351 continue; 352 } 353 354#ifdef NORMALIZE 355 const float32 mean_I = computeMean(vals_I, ARRAY_SIZE); 356 const float32 std_dev_I = computeStdDev(vals_I, ARRAY_SIZE, mean_I); 357#endif 358 359 // Iterate NUM_ITERATIONS times or until we converge. 360 for (int32 iteration = 0; iteration < NUM_ITERATIONS; ++iteration) { 361 // Get values for frame 2. 362 float32 vals_J[ARRAY_SIZE]; 363 int32 val_idx = 0; 364 for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { 365 for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { 366 const float32 x_pos = p_x + win_x + g_x; 367 const float32 y_pos = p_y + win_y + g_y; 368 369 if (!img_I.validInterpPixel(x_pos, y_pos)) { 370 return false; 371 } 372 373 vals_J[val_idx] = img_J.getPixelInterp(x_pos, y_pos); 374 375 ++val_idx; 376 } 377 } 378 379#ifdef NORMALIZE 380 const float32 mean_J = computeMean(vals_J, ARRAY_SIZE); 381 const float32 std_dev_J = computeStdDev(vals_J, ARRAY_SIZE, mean_J); 382 383 const float32 std_dev_ratio = std_dev_I / std_dev_J; 384#endif 385 386 // Compute image mismatch vector. 387 float32 b_x = 0.0f; 388 float32 b_y = 0.0f; 389 val_idx = 0; 390 for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { 391 for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { 392 // Normalized Image difference. 393 394#ifdef NORMALIZE 395 const float32 dI = (vals_I[val_idx] - mean_I) - 396 (vals_J[val_idx] - mean_J) * std_dev_ratio; 397#else 398 const float32 dI = vals_I[val_idx] - vals_J[val_idx]; 399#endif 400 401 b_x += dI * vals_I_x[val_idx]; 402 b_y += dI * vals_I_y[val_idx]; 403 404 ++val_idx; 405 } 406 } 407 408 // Optical flow... solve n = G^-1 * b 409 const float32 n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y); 410 const float32 n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y); 411 412 // Update best guess with residual displacement from this level and 413 // iteration. 414 g_x += n_x; 415 g_y += n_y; 416 417 // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y); 418 419 // Abort early if we're already below the threshold. 420 if (square(n_x) + square(n_y) < threshold_squared) { 421 break; 422 } 423 } // Iteration. 424 425 if (l > 0) { 426 // Every lower level of the pyramid is 2x as large dimensionally. 427 g_x = 2.0f * g_x; 428 g_y = 2.0f * g_y; 429 } 430 } // Level. 431 432 // LOGV("Final displacement for feature %d was (%.2f, %.2f)", 433 // iFeat, g_x, g_y); 434 435 *final_x = u_x + g_x; 436 *final_y = u_y + g_y; 437 438 // Assign the best guess, if we're still in the image. 439 if (frame1_->pyramid_[0]->validInterpPixel(*final_x, *final_y)) { 440 return true; 441 } else { 442 return false; 443 } 444} 445 446 447void OpticalFlow::addInterestRegion(const int32 num_x, const int32 num_y, 448 float32 left, float32 top, 449 float32 right, float32 bottom) { 450 left = max(left / downsample_factor_, 0); 451 top = max(top / downsample_factor_, 0); 452 right = min(right / downsample_factor_, working_size_.width - 1); 453 bottom = min(bottom / downsample_factor_, working_size_.height - 1); 454 455 if (left > right || top > bottom) { 456 return; 457 } 458 459 // This is inclusive of the border pixels, hence the +1. 460 const int32 width = right - left + 1; 461 462 // Also inclusive, so it uses a LTE. 463 for (int32 y = top; y <= bottom; ++y) { 464 bool* row_start = interest_map_->getPixelPtr(left, y); 465 memset(row_start, true, width * sizeof(*row_start)); 466 } 467} 468 469 470Point2D OpticalFlow::getAccumulatedDelta(const Point2D& position, 471 const float32 radius, 472 const clock_t timestamp) const { 473 Point2D curr_pos(position); 474 475 // Scale down to downsampled size. 476 curr_pos.x /= downsample_factor_; 477 curr_pos.y /= downsample_factor_; 478 479 LOGV("Tracking accumulated delta from %.2f, %.2f", curr_pos.x, curr_pos.y); 480 481 const float32 cutoff_dist = radius / downsample_factor_; 482 483 // Anything that ended before the requested timestamp is of no concern to us. 484 bool found_it = false; 485 int32 num_frames_back = -1; 486 for (int32 i = 0; i < num_frames_; ++i) { 487 const FramePair& frame_pair = 488 frame_pairs_[geNthIndexFromEnd(i)]; 489 490 if (frame_pair.end_time <= timestamp) { 491 num_frames_back = i - 1; 492 493 if (num_frames_back > 0) { 494 LOGV("Went %d out of %d frames before finding frame. (index: %d)", 495 num_frames_back, num_frames_, geNthIndexFromEnd(i)); 496 } 497 498 found_it = true; 499 break; 500 } 501 } 502 503 if (!found_it) { 504 const FramePair& frame_pair = frame_pairs_[geNthIndexFromStart(0)]; 505 const FramePair& latest_frame_pair = frame_pairs_[geNthIndexFromEnd(0)]; 506 507 clock_t latest_time = latest_frame_pair.end_time; 508 509 LOGW("History did not go back far enough! %ld vs %ld", 510 latest_time - frame_pair.end_time, 511 latest_time - timestamp); 512 } 513 514 // Loop over all the frames in the queue, tracking the accumulated delta 515 // of the point from frame to frame. It's possible the point could 516 // go out of frame, but keep tracking as best we can, using points near 517 // the edge of the screen where it went out of bounds. 518 for (int32 i = num_frames_back; i >= 0; --i) { 519 const FramePair& frame_pair = frame_pairs_[geNthIndexFromEnd(i)]; 520 CHECK(frame_pair.end_time >= timestamp, "Frame timestamp was too early!"); 521 522 const Point2D delta = frame_pair.queryFlow(curr_pos, cutoff_dist); 523 curr_pos.x += delta.x; 524 curr_pos.y += delta.y; 525 } 526 527 // Scale back to original size. 528 curr_pos.x *= downsample_factor_; 529 curr_pos.y *= downsample_factor_; 530 531 // Return the delta only. 532 return curr_pos - position; 533} 534 535 536void FramePair::init(const clock_t end_time) { 537 this->end_time = end_time; 538 memset(optical_flow_found_feature_, false, 539 sizeof(*optical_flow_found_feature_) * MAX_FEATURES); 540 number_of_features_ = 0; 541} 542 543 544Point2D FramePair::queryFlow( 545 const Point2D& initial, const float32 cutoff_dist) const { 546 float32 weights[MAX_FEATURES]; 547 memset(weights, 0, sizeof(float32) * MAX_FEATURES); 548 549 // Compute the max score. 550 float32 max_score = 0.0f; 551 for (int32 i = 0; i < MAX_FEATURES; ++i) { 552 if (optical_flow_found_feature_[i]) { 553 max_score = max(max_score, frame1_features_[i].score); 554 } 555 } 556 557 const float32 cutoff_dist_squared = cutoff_dist * cutoff_dist; 558 for (int32 i = 0; i < MAX_FEATURES; ++i) { 559 if (optical_flow_found_feature_[i]) { 560 const float32 sq_x_dist = square(initial.x - frame1_features_[i].x); 561 const float32 sq_y_dist = square(initial.y - frame1_features_[i].y); 562 563 const float32 dist_squared = sq_x_dist + sq_y_dist; 564 565 // The weighting based off distance. Anything within the cuttoff 566 // distance has a weight of 1, and everything outside of that is within 567 // the range [0, 1). 568 const float32 distance_score = 569 min(cutoff_dist_squared / dist_squared, 1.0f); 570 571 // The weighting based on score strength. 0.5f - 1.0f. 572 float32 intrinsic_score = 1.0f; 573 if (max_score > 0) { 574 intrinsic_score = (frame1_features_[i].score / max_score) / 2.0f; 575 } 576 577 // The final score will be in the range [0, 1]. 578 weights[i] = distance_score * intrinsic_score; 579 } 580 } 581 582 return getWeightedDelta(weights); 583} 584 585 586Point2D FramePair::getWeightedDelta(const float32* const weights) const { 587 float32 total_weight = 0.0f; 588 float32 weighted_sum_x = 0.0f; 589 float32 weighted_sum_y = 0.0f; 590 591 Point2D deltas[MAX_FEATURES]; 592 593 // Compute weighted mean and deltas. 594 for (int32 i = 0; i < MAX_FEATURES; ++i) { 595 const float32 weight = weights[i]; 596 if (weight > 0.0f) { 597 deltas[i] = frame2_features_[i] - frame1_features_[i]; 598 weighted_sum_x += deltas[i].x * weight; 599 weighted_sum_y += deltas[i].y * weight; 600 total_weight += weight; 601 } 602 } 603 const float32 weighted_mean_x = weighted_sum_x / total_weight; 604 const float32 weighted_mean_y = weighted_sum_y / total_weight; 605 606 // Compute weighted squared standard deviation from weighted mean. 607 float32 weighted_dev_squared_sum = 0.0f; 608 for (int32 i = 0; i < MAX_FEATURES; ++i) { 609 const float32 weight = weights[i]; 610 611 if (weight > 0.0f) { 612 const float32 devX = deltas[i].x - weighted_mean_x; 613 const float32 devY = deltas[i].y - weighted_mean_y; 614 615 const float32 squared_deviation = (devX * devX) + (devY * devY); 616 weighted_dev_squared_sum += squared_deviation * weight; 617 } 618 } 619 const float32 weighted_std_dev_squared = 620 weighted_dev_squared_sum / total_weight; 621 622 // Recompute weighted mean change without outliers. 623 float32 good_weight = 0.0f; 624 float32 good_sum_x = 0.0f; 625 float32 good_sum_y = 0.0f; 626 627 for (int32 i = 0; i < MAX_FEATURES; ++i) { 628 const float32 weight = weights[i]; 629 630 if (weight > 0.0f) { 631 const float32 dev_x = deltas[i].x - weighted_mean_x; 632 const float32 dev_y = deltas[i].y - weighted_mean_y; 633 634 const float32 sqrd_deviation = (dev_x * dev_x) + (dev_y * dev_y); 635 636 // Throw out anything beyond NUM_DEVIATIONS. 637 if (sqrd_deviation <= NUM_DEVIATIONS * weighted_std_dev_squared) { 638 good_sum_x += deltas[i].x * weight; 639 good_sum_y += deltas[i].y * weight; 640 good_weight += weight; 641 } 642 } 643 } 644 645 if (good_weight > 0.0f) { 646 return Point2D(good_sum_x / good_weight, good_sum_y / good_weight); 647 } else { 648 return Point2D(0.0f, 0.0f); 649 } 650} 651 652} // namespace flow