/ocr/ocrservice/src/com/googlecode/eyesfree/ocr/client/Ocr.java
Java | 857 lines | 444 code | 141 blank | 272 comment | 53 complexity | fe71e606a7675711a449ed490e35c556 MD5 | raw file
1/* 2 * Copyright (C) 2011 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.googlecode.eyesfree.ocr.client; 18 19import android.content.ComponentName; 20import android.content.Context; 21import android.content.DialogInterface; 22import android.content.DialogInterface.OnClickListener; 23import android.content.Intent; 24import android.content.ServiceConnection; 25import android.content.pm.PackageManager; 26import android.content.pm.ResolveInfo; 27import android.graphics.Bitmap; 28import android.graphics.Bitmap.CompressFormat; 29import android.os.Bundle; 30import android.os.DeadObjectException; 31import android.os.Environment; 32import android.os.IBinder; 33import android.os.Parcel; 34import android.os.Parcelable; 35import android.os.RemoteException; 36import android.util.Log; 37 38import java.io.ByteArrayOutputStream; 39import java.io.File; 40import java.io.FileOutputStream; 41import java.io.IOException; 42import java.lang.ref.WeakReference; 43import java.util.List; 44import java.util.Set; 45 46/** 47 * Recognizes text in images. This abstracts away the complexities of using the 48 * OCR service such as setting up the IBinder connection and handling 49 * RemoteExceptions, etc. Specifically, this class initializes the OCR service 50 * and pushes recognization requests across IPC for processing in the service 51 * thread. 52 * 53 * @author alanv@google.com (Alan Viverette) 54 */ 55public class Ocr { 56 private static final String TAG = "Ocr"; 57 58 // This is the minimum version of the Ocr service that is needed by this 59 // version of the library stub. 60 private static final int MIN_VER = 1; 61 62 public static final int STATUS_SUCCESS = 0; 63 public static final int STATUS_FAILURE = 1; 64 public static final int STATUS_MISSING = 2; 65 66 public static final int ERROR = -1; 67 public static final int SUCCESS = 1; 68 69 public static final long INVALID_TOKEN = -1; 70 71 private static final int BINDER_SIZE_LIMIT = 40000; 72 73 private int mVersion = -1; 74 75 private IOcr mIOcr; 76 77 private ServiceConnection mServiceConnection; 78 79 private boolean mStorageAvailable; 80 81 private boolean mSuppressAlerts; 82 83 private WeakReference<Context> mContext; 84 85 private ResultCallback mOnResult; 86 87 private CompletionCallback mOnCompleted; 88 89 private Parameters mParameters; 90 91 /** 92 * The constructor for the OCR service client. Initializes the service if 93 * necessary and calls the supplied InitCallback when it's ready. 94 * 95 * @param context the context of the parent activity 96 * @param init the callback to call on initialization 97 */ 98 public Ocr(Context context, InitCallback init) { 99 this(context, init, false); 100 } 101 102 /** 103 * The constructor for the OCR service client. Initializes the service if 104 * necessary and calls the supplied InitCallback when it's ready. 105 * <p> 106 * You may optionally set suppressAlerts to true to turn off alert dialogs. 107 * 108 * @param context the context of the parent activity 109 * @param init the callback to call on initialization 110 * @param suppressAlerts <code>true</code> to suppress alert dialogs 111 */ 112 public Ocr(Context context, InitCallback init, boolean suppressAlerts) { 113 if (context == null) { 114 throw new IllegalArgumentException("Context must not be null"); 115 } 116 117 mContext = new WeakReference<Context>(context); 118 mSuppressAlerts = suppressAlerts; 119 mParameters = new Parameters(); 120 121 connectOcrService(init); 122 } 123 124 /** 125 * Sets the result callback. If text detection is enabled, this will be 126 * called once for each individual box before the completion callback 127 * occurs. 128 * 129 * @param callback 130 */ 131 public void setResultCallback(ResultCallback callback) { 132 mOnResult = callback; 133 } 134 135 /** 136 * Sets the completion callback. This is called when recognition is complete 137 * and receives an ArrayList of results. 138 * 139 * @param callback 140 */ 141 public void setCompletionCallback(CompletionCallback callback) { 142 mOnCompleted = callback; 143 } 144 145 /** 146 * Enqueues an image represented as a Bitmap for OCR. 147 * 148 * @param bitmap The bitmap on which to perform OCR. 149 * @return A Job representing the queued OCR job. 150 */ 151 public Job enqueue(Bitmap bitmap) { 152 if (bitmap == null) { 153 throw new IllegalArgumentException("Bitmap must be non-null"); 154 } 155 156 // TODO(alanv): Replace this with native Bitmap conversion 157 ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); 158 bitmap.compress(CompressFormat.JPEG, 85, byteStream); 159 160 byte[] jpegData = byteStream.toByteArray(); 161 162 return enqueue(jpegData); 163 } 164 165 /** 166 * Enqueues an image represented as JPEG-compressed bytes for OCR. 167 * 168 * @param jpegData The JPEG-compressed image on which to perform OCR. 169 * @return A Job representing the queued OCR job. 170 */ 171 public Job enqueue(byte[] jpegData) { 172 if (jpegData == null) { 173 throw new IllegalArgumentException("JPEG data must be non-null"); 174 } 175 176 // If we're over the binder size limit, write to disk. 177 if (jpegData.length > BINDER_SIZE_LIMIT) { 178 return cacheAndEnqueue(jpegData); 179 } 180 181 try { 182 long taskId = mIOcr.enqueueData(jpegData, mParameters); 183 return new Job(taskId); 184 } catch (DeadObjectException e) { 185 e.printStackTrace(); 186 } catch (RemoteException e) { 187 e.printStackTrace(); 188 } 189 190 return null; 191 } 192 193 /** 194 * Internal method that writes image bytes to disk when they exceed the 195 * binder transaction limit. 196 * 197 * @param data The bytes to write to disk. 198 * @return A Job representing the queued OCR job. 199 */ 200 private Job cacheAndEnqueue(byte[] data) { 201 Job job = null; 202 203 try { 204 File cacheDir = mContext.get().getExternalCacheDir(); 205 File cached = File.createTempFile("ocr", ".jpg", cacheDir); 206 207 FileOutputStream output = new FileOutputStream(cached); 208 output.write(data); 209 output.close(); 210 211 job = enqueue(cached); 212 213 if (job != null) { 214 job.mCached = cached; 215 } 216 } catch (IOException e) { 217 e.printStackTrace(); 218 } 219 220 return job; 221 } 222 223 /** 224 * Enqueues an image represented as an encoded file. The file extension must 225 * match the encoding and must be one of the following formats: 226 * <ol> 227 * <li>JPEG</li> 228 * <li>BMP</li> 229 * </ol> 230 * 231 * @param file An encoded file containing the image to OCR. 232 * @return A Job representing the queued OCR job. 233 */ 234 public Job enqueue(File file) { 235 if (file == null) { 236 throw new IllegalArgumentException("File must be non-null"); 237 } 238 239 try { 240 long taskId = mIOcr.enqueueFile(file.getAbsolutePath(), mParameters); 241 return new Job(taskId); 242 } catch (DeadObjectException e) { 243 e.printStackTrace(); 244 } catch (RemoteException e) { 245 e.printStackTrace(); 246 } 247 248 return null; 249 } 250 251 /** 252 * Returns the OCR parameters that will be used to process new enqueue 253 * requests. If changes are made, you must call setParameters to commit 254 * them. 255 * 256 * @return The parameters used when processing new OCR requests. 257 */ 258 public Parameters getParameters() { 259 return mParameters; 260 } 261 262 /** 263 * Sets the OCR parameters that will be used to process new enqueue 264 * requests. 265 * 266 * @param parameters The parameters to use when processing new OCR requests. 267 */ 268 public void setParameters(Parameters parameters) { 269 mParameters = parameters; 270 } 271 272 /** 273 * Returns the absolute path of the OCR service's language data folder. 274 * Typically this is on the user's SD card. 275 * 276 * @return the absolute path of the OCR service's language data folder 277 */ 278 public File getTessdata() { 279 if (mIOcr == null) { 280 Log.e(TAG, "getTessdata() without a connection to Ocr service."); 281 return null; 282 } 283 284 File tessdata = null; 285 286 try { 287 String tessstr = mIOcr.getTessdata(); 288 289 tessdata = tessstr == null ? null : new File(tessstr); 290 } catch (DeadObjectException e) { 291 e.printStackTrace(); 292 } catch (RemoteException e) { 293 e.printStackTrace(); 294 } 295 296 return tessdata; 297 } 298 299 /** 300 * Forces the Ocr service to refresh the list of available languages. 301 */ 302 public void reloadLanguages() { 303 if (mIOcr == null) { 304 Log.e(TAG, "reloadLanguages() without a connection to Ocr service."); 305 } 306 307 try { 308 mIOcr.reloadLanguages(); 309 } catch (DeadObjectException e) { 310 e.printStackTrace(); 311 } catch (RemoteException e) { 312 e.printStackTrace(); 313 } 314 } 315 316 /** 317 * Returns the list of available languages. 318 * 319 * @return a sorted list of available languages 320 */ 321 public List<Language> getAvailableLanguages() { 322 if (mIOcr == null) { 323 Log.e(TAG, "getAvailableLanguages() without a connection to Ocr service."); 324 return null; 325 } 326 327 List<Language> available = null; 328 329 try { 330 available = mIOcr.getAvailableLanguages(); 331 } catch (DeadObjectException e) { 332 e.printStackTrace(); 333 } catch (RemoteException e) { 334 e.printStackTrace(); 335 } 336 337 return available; 338 } 339 340 /** 341 * Disconnects from the Ocr service. 342 * <p> 343 * It is recommended that you call this as soon as you're done with the Ocr 344 * object. After this call the receiving Ocr object will be unusable. 345 */ 346 public synchronized void release() { 347 mOnCompleted = null; 348 mOnResult = null; 349 350 try { 351 Context context = mContext.get(); 352 353 if (context != null) { 354 context.unbindService(mServiceConnection); 355 } 356 } catch (IllegalArgumentException e) { 357 // Do nothing and fail silently since an error here indicates that 358 // binding never succeeded in the first place. 359 } 360 361 mIOcr = null; 362 mContext = null; 363 } 364 365 /** 366 * Internal method used to connect to the OCR service. 367 * 368 * @param init Initialization callback. 369 */ 370 private void connectOcrService(final InitCallback init) { 371 // Initialize the OCR service, run the callback after the binding is 372 // successful 373 mServiceConnection = new ServiceConnection() { 374 @Override 375 public void onServiceConnected(ComponentName name, IBinder service) { 376 mIOcr = IOcr.Stub.asInterface(service); 377 378 try { 379 mVersion = mIOcr.getVersion(); 380 381 // The Ocr service must be at least the min version needed 382 // by the library stub. Do not try to run the older Ocr with 383 // the newer library stub as the newer library may reference 384 // methods which are unavailable and cause a crash. 385 386 if (mVersion < MIN_VER) { 387 Log.e(TAG, "OCR service too old (version " + mVersion + " < " + MIN_VER 388 + ")"); 389 390 if (!mSuppressAlerts) { 391 OnClickListener onClick = new OnClickListener() { 392 @Override 393 public void onClick(DialogInterface dialog, int which) { 394 postInitialized(init, STATUS_MISSING); 395 } 396 }; 397 398 VersionAlert.createUpdateAlert(mContext.get(), null).show(); 399 } else { 400 postInitialized(init, STATUS_MISSING); 401 } 402 403 return; 404 } 405 406 mStorageAvailable = Environment.getExternalStorageDirectory().exists(); 407 408 if (!mStorageAvailable) { 409 Log.e(TAG, "External storage is not available"); 410 411 if (!mSuppressAlerts) { 412 OnClickListener onClick = new OnClickListener() { 413 @Override 414 public void onClick(DialogInterface dialog, int which) { 415 postInitialized(init, STATUS_MISSING); 416 } 417 }; 418 419 VersionAlert.createStorageAlert(mContext.get(), onClick).show(); 420 } else { 421 postInitialized(init, STATUS_MISSING); 422 } 423 424 return; 425 } 426 427 List<Language> languages = mIOcr.getAvailableLanguages(); 428 429 if (languages == null || languages.isEmpty()) { 430 Log.e(TAG, "No languages are installed"); 431 432 if (!mSuppressAlerts) { 433 OnClickListener onClick = new OnClickListener() { 434 @Override 435 public void onClick(DialogInterface dialog, int which) { 436 postInitialized(init, STATUS_MISSING); 437 } 438 }; 439 440 VersionAlert.createLanguagesAlert(mContext.get(), onClick, onClick) 441 .show(); 442 } else { 443 postInitialized(init, STATUS_MISSING); 444 } 445 446 return; 447 } 448 449 // Set the callback so that we can receive completion events 450 mIOcr.setCallback(mCallback); 451 452 } catch (RemoteException e) { 453 Log.e(TAG, "Exception caught in onServiceConnected(): " + e.toString()); 454 455 postInitialized(init, STATUS_FAILURE); 456 457 return; 458 } 459 460 postInitialized(init, STATUS_SUCCESS); 461 } 462 463 @Override 464 public void onServiceDisconnected(ComponentName name) { 465 mIOcr = null; 466 } 467 }; 468 469 Intent intent = new Intent(Intents.Service.ACTION); 470 intent.addCategory(Intent.CATEGORY_DEFAULT); 471 472 // Binding will fail only if the Ocr doesn't exist; 473 // the OcrVersionAlert will give users a chance to install 474 // the needed Ocr. 475 476 Context context = mContext.get(); 477 478 if (!context.bindService(intent, mServiceConnection, Context.BIND_AUTO_CREATE)) { 479 Log.e(TAG, "Cannot bind to OCR service, assuming not installed"); 480 481 OnClickListener onClick = new OnClickListener() { 482 @Override 483 public void onClick(DialogInterface dialog, int which) { 484 postInitialized(init, STATUS_MISSING); 485 } 486 }; 487 488 if (!mSuppressAlerts) { 489 VersionAlert.createInstallAlert(context, onClick).show(); 490 } 491 492 return; 493 } 494 } 495 496 /** 497 * Passes the initialization status to the InitCallback. 498 * 499 * @param init The initialization callback. 500 * @param status The initialization status. 501 */ 502 private void postInitialized(final InitCallback init, final int status) { 503 if (init != null) { 504 init.onInitialized(status); 505 } 506 } 507 508 /** 509 * Cancels all active and pending OCR jobs. 510 */ 511 public void stop() { 512 if (mIOcr == null) { 513 Log.e(TAG, "Attempted to call stop() without a connection to Ocr service."); 514 return; 515 } 516 517 try { 518 mIOcr.stop(); 519 } catch (DeadObjectException e) { 520 e.printStackTrace(); 521 } catch (RemoteException e) { 522 e.printStackTrace(); 523 } 524 } 525 526 /** 527 * Returns the version number of the Ocr library that the user has 528 * installed. 529 * 530 * @return te version number of the Ocr library that the user has installed 531 */ 532 public int getVersion() { 533 return mVersion; 534 } 535 536 /** 537 * Checks if the Ocr service is installed or not 538 * 539 * @return a boolean that indicates whether the Ocr service is installed 540 */ 541 public static boolean isInstalled(Context ctx) { 542 Intent intent = new Intent(Intents.Service.ACTION); 543 544 PackageManager pm = ctx.getPackageManager(); 545 ResolveInfo info = pm.resolveService(intent, 0); 546 547 if (info == null) { 548 return false; 549 } else { 550 return true; 551 } 552 } 553 554 /** 555 * Handles the callback when the Ocr service has initialized. 556 */ 557 public static interface InitCallback { 558 public void onInitialized(int status); 559 } 560 561 /** 562 * Handles the callback for when recognition is completed. 563 */ 564 public static interface CompletionCallback { 565 public void onCompleted(List<OcrResult> results); 566 } 567 568 /** 569 * Handles the callback for a single mid-recognition result. 570 */ 571 public static interface ResultCallback { 572 public void onResult(OcrResult result); 573 } 574 575 private final IOcrCallback mCallback = new IOcrCallback.Stub() { 576 @Override 577 public void onCompleted(final long token, final List<OcrResult> results) { 578 if (mOnCompleted != null) { 579 mOnCompleted.onCompleted(results); 580 } 581 } 582 583 @Override 584 public void onResult(final long token, final OcrResult result) { 585 if (mOnResult != null) { 586 mOnResult.onResult(result); 587 } 588 } 589 }; 590 591 /** 592 * Represents a single OCR job. 593 * 594 * @author alanv@google.com (Alan Viverette) 595 */ 596 public class Job { 597 long mTaskId; 598 599 File mCached; 600 601 Job(long taskId) { 602 mTaskId = taskId; 603 mCached = null; 604 } 605 606 @Override 607 protected void finalize() throws Throwable { 608 // If we have a cached file, delete it when we're done. 609 try { 610 if (mCached != null) { 611 mCached.delete(); 612 } 613 } finally { 614 super.finalize(); 615 } 616 } 617 618 /** 619 * Cancels this OCR job. 620 */ 621 public void cancel() { 622 try { 623 mIOcr.cancel(mTaskId); 624 } catch (DeadObjectException e) { 625 e.printStackTrace(); 626 } catch (RemoteException e) { 627 e.printStackTrace(); 628 } 629 } 630 } 631 632 /** 633 * Represents a set of OCR processing parameters. 634 * 635 * @author alanv@google.com (Alan Viverette) 636 */ 637 public static class Parameters implements Parcelable { 638 /** Whitelist of characters to recognize */ 639 public static final String VAR_CHAR_WHITELIST = "tessedit_char_whitelist"; 640 641 /** Blacklist of characters to not recognize */ 642 public static final String VAR_CHAR_BLACKLIST = "tessedit_char_blacklist"; 643 644 /** Detect text in image using TextDetect */ 645 public static final String FLAG_DETECT_TEXT = "detect_text"; 646 647 /** Aligns horizontal text in an image */ 648 public static final String FLAG_ALIGN_TEXT = "align_text"; 649 650 /** Perform spell-checking on results */ 651 public static final String FLAG_SPELLCHECK = "spellcheck"; 652 653 /** Write intermediate files to external storage */ 654 public static final String FLAG_DEBUG_MODE = "debug_mode"; 655 656 /** Fully automatic page segmentation. */ 657 public static final int PSM_AUTO = 0; 658 659 /** Assume a single column of text of variable sizes. */ 660 public static final int PSM_SINGLE_COLUMN = 1; 661 662 /** Assume a single uniform block of text. */ 663 public static final int PSM_SINGLE_BLOCK = 2; 664 665 /** Treat the image as a single text line. (Default) */ 666 public static final int PSM_SINGLE_LINE = 3; 667 668 /** Treat the image as a single word. */ 669 public static final int PSM_SINGLE_WORD = 4; 670 671 /** Treat the image as a single character. */ 672 public static final int PSM_SINGLE_CHAR = 5; 673 674 private static final int PSM_MODE_COUNT = 6; 675 676 private Bundle mVariables; 677 678 private Bundle mFlags; 679 680 private String mLanguage; 681 682 private int mPageSegMode; 683 684 /** 685 * Constructs a new Parameters object using the default values. 686 */ 687 public Parameters() { 688 mVariables = new Bundle(); 689 mFlags = new Bundle(); 690 mPageSegMode = PSM_SINGLE_LINE; 691 mLanguage = "eng"; 692 } 693 694 /** 695 * Sets the value of the variable identified by <code>key</code>. If the 696 * value is null, removes the variable. 697 * 698 * @param key The key that identifies the variable to set. 699 * @param value The String value to assign to the variable. 700 */ 701 public void setVariable(String key, String value) { 702 if (value == null) { 703 mVariables.remove(key); 704 } else { 705 mVariables.putString(key, value); 706 } 707 } 708 709 /** 710 * Returns the value of the variable identified by <code>key</code>, or 711 * <code>null</code> if it has not been set. 712 * 713 * @param key The key that identifies the variable to retrieve. 714 * @return The value of the variable or <code>null</code> if it has not 715 * been set. 716 */ 717 public String getVariable(String key) { 718 return mVariables.getString(key); 719 } 720 721 /** 722 * Returns the list of keys identifying variables that have been set. 723 * 724 * @return A set of Strings representing the variable keys that have 725 * been set. 726 */ 727 public Set<String> getVariableKeys() { 728 return mVariables.keySet(); 729 } 730 731 /** 732 * Sets the value of the flag identified by <code>key</code>. If the 733 * value is <code>null</code>, removes the flag. 734 * 735 * @param key The key that identifies the flag to set. 736 * @param value The boolean value to assign to the flag. 737 */ 738 public void setFlag(String key, boolean value) { 739 mFlags.putBoolean(key, value); 740 } 741 742 /** 743 * Returns the value of the flag identified by <code>key</code>. If 744 * <code>key</code> has not been set, returns <code>false</code>. 745 * 746 * @param key The key that identifies the flag to retrieve. 747 * @return The value of the flag or <code>false</code> if it has not 748 * been set. 749 */ 750 public boolean getFlag(String key) { 751 if (!mFlags.containsKey(key)) { 752 return false; 753 } else { 754 return mFlags.getBoolean(key); 755 } 756 } 757 758 /** 759 * Sets the language used by the OCR engine. Use 760 * Ocr.getAvailableLanguages() to retrieve the list of available 761 * languages. 762 * 763 * @param language A language present in Ocr.getAvailableLanguages(). 764 */ 765 public void setLanguage(Language language) { 766 mLanguage = language.iso_639_2; 767 } 768 769 /** 770 * Sets the language (as an ISO 639-2 code) used by the OCR engine. Use 771 * Ocr.getAvailableLanguages() to retrieve the list of available 772 * languages and Language.iso_639_2 to retrieve the ISO 639-2 code. If 773 * the specified language is not available, the OCR engine will default 774 * to English. 775 * 776 * @param language An ISO 639-2 code representing a supported language. 777 */ 778 public void setLanguage(String language) { 779 mLanguage = language; 780 } 781 782 /** 783 * Returns the ISO 639-2 code representing the current language that 784 * will be used by the OCR engine. 785 * 786 * @return The ISO 639-2 code representing the current languages used 787 * for OCR. 788 */ 789 public String getLanguage() { 790 return mLanguage; 791 } 792 793 /** 794 * Sets the page segmentation mode, which is used by the OCR engine to 795 * detect and group areas of text. See the Parameters.PSM_* constants 796 * for available values. 797 * 798 * @param pageSegMode A page segmentation mode from Parameters.PSM_* 799 * constants. 800 */ 801 public void setPageSegMode(int pageSegMode) { 802 if (pageSegMode < 0 || pageSegMode > PSM_MODE_COUNT) { 803 throw new IllegalArgumentException("Invalid page segmentation mode"); 804 } 805 806 mPageSegMode = pageSegMode; 807 } 808 809 /** 810 * Returns the current page segmentation mode as defined in 811 * Parameters.PSM_* constants. 812 * 813 * @return The current page segmentation mode. 814 */ 815 public int getPageSegMode() { 816 return mPageSegMode; 817 } 818 819 // ************************ 820 // * Parcelable functions * 821 // ************************ 822 823 private Parameters(Parcel src) { 824 readFromParcel(src); 825 } 826 827 @Override 828 public int describeContents() { 829 return 0; 830 } 831 832 @Override 833 public void writeToParcel(Parcel dest, int flags) { 834 dest.writeBundle(mVariables); 835 dest.writeBundle(mFlags); 836 dest.writeString(mLanguage); 837 } 838 839 private void readFromParcel(Parcel src) { 840 mVariables = src.readBundle(); 841 mFlags = src.readBundle(); 842 mLanguage = src.readString(); 843 } 844 845 public static final Parcelable.Creator<Parameters> CREATOR = new Parcelable.Creator<Parameters>() { 846 @Override 847 public Parameters createFromParcel(Parcel in) { 848 return new Parameters(in); 849 } 850 851 @Override 852 public Parameters[] newArray(int size) { 853 return new Parameters[size]; 854 } 855 }; 856 } 857}