/ocr/worldreader/src/com/google/marvin/worldreader/RecognizeActivity.java

http://eyes-free.googlecode.com/ · Java · 320 lines · 238 code · 47 blank · 35 comment · 31 complexity · b1dfffe39fb5d1d54cc64b851d34370d MD5 · raw file

  1. /*
  2. * Copyright (C) 2009 Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. package com.google.marvin.worldreader;
  17. import android.app.Activity;
  18. import android.content.Context;
  19. import android.content.Intent;
  20. import android.graphics.Bitmap;
  21. import android.graphics.BitmapFactory;
  22. import android.os.Bundle;
  23. import android.os.Handler;
  24. import android.os.Message;
  25. import android.speech.tts.TextToSpeech;
  26. import android.speech.tts.TextToSpeech.OnUtteranceCompletedListener;
  27. import android.util.Log;
  28. import android.view.Display;
  29. import android.view.KeyEvent;
  30. import android.view.View;
  31. import android.view.WindowManager;
  32. import android.widget.Button;
  33. import android.widget.ImageView;
  34. import android.widget.ProgressBar;
  35. import android.widget.TextView;
  36. import com.android.ocr.client.Config;
  37. import com.android.ocr.client.Intents;
  38. import com.android.ocr.client.Ocr;
  39. import com.android.ocr.client.Result;
  40. import com.android.ocr.client.StatusMonitor;
  41. import java.util.HashMap;
  42. /**
  43. * This activity runs text recognition and displays bounding box results. If the
  44. * OCR service fails or is missing, this activity will return null.
  45. *
  46. * Modified from com.google.marvin.ocr.intent.RecognizeActivity to speak results
  47. * out loud.
  48. *
  49. * @author alanv@google.com (Alan Viverette)
  50. */
  51. public class RecognizeActivity extends Activity implements Button.OnClickListener,
  52. OnUtteranceCompletedListener {
  53. private static final String TAG = "RecognizeActivity";
  54. private static final int ACTION_INITIALIZED = 0;
  55. private static final int ACTION_RESULT = 1;
  56. private static final int ACTION_RECOGNIZED = 2;
  57. private static final int ACTION_UPDATE = 3;
  58. private Ocr mOcr;
  59. private Bitmap mBitmap;
  60. private ImageView mImageView;
  61. private RectsView mOverlayView;
  62. private Button mCancel;
  63. private Config mConfig;
  64. private ProgressBar mProgress;
  65. private StatusMonitor mStatusMonitor;
  66. private TextToSpeech mTts;
  67. private boolean mOcrBusy;
  68. private final Handler mHandler = new Handler() {
  69. @Override
  70. public void handleMessage(Message message) {
  71. switch (message.what) {
  72. case ACTION_INITIALIZED: {
  73. if (message.arg1 == Ocr.STATUS_SUCCESS) {
  74. processConfig();
  75. } else {
  76. Log.e(TAG, "Ocr initialization failed");
  77. processResults(null);
  78. }
  79. break;
  80. }
  81. case ACTION_RESULT: {
  82. if (message.obj == null || message.obj instanceof Result) {
  83. processResult((Result) message.obj);
  84. }
  85. break;
  86. }
  87. case ACTION_RECOGNIZED: {
  88. mStatusMonitor.release();
  89. if (message.obj == null || message.obj instanceof Result[]) {
  90. processResults((Result[]) message.obj);
  91. }
  92. break;
  93. }
  94. case ACTION_UPDATE: {
  95. updateProgress((String) message.obj, message.arg1, message.arg2);
  96. break;
  97. }
  98. }
  99. }
  100. };
  101. @Override
  102. public void onCreate(Bundle savedInstanceState) {
  103. super.onCreate(savedInstanceState);
  104. setContentView(R.layout.recognize);
  105. mTts = ReaderActivity.mTts;
  106. mTts.setOnUtteranceCompletedListener(this);
  107. mProgress = (ProgressBar) findViewById(R.id.progress);
  108. mProgress.setIndeterminate(false);
  109. mImageView = (ImageView) findViewById(R.id.image);
  110. mOverlayView = (RectsView) findViewById(R.id.overlay);
  111. mCancel = (Button) findViewById(R.id.cancelOcr);
  112. mCancel.setOnClickListener(this);
  113. mConfig = (Config) getIntent().getParcelableExtra(Intents.Recognize.CONFIG);
  114. setBackground();
  115. Ocr.InitCallback onInit = new Ocr.InitCallback() {
  116. @Override
  117. public void onInitialized(int status) {
  118. Message msg = mHandler.obtainMessage(ACTION_INITIALIZED, status, 0);
  119. msg.sendToTarget();
  120. }
  121. };
  122. mOcr = new Ocr(this, onInit);
  123. mStatusMonitor = new StatusMonitor(mOcr, mHandler, ACTION_UPDATE, 500L);
  124. }
  125. @Override
  126. public boolean onKeyDown(int keyCode, KeyEvent event) {
  127. switch (keyCode) {
  128. case KeyEvent.KEYCODE_BACK: {
  129. if (mOcrBusy) {
  130. mOcr.stop();
  131. return true;
  132. } else {
  133. mTts.stop();
  134. }
  135. break;
  136. }
  137. }
  138. return false;
  139. }
  140. @Override
  141. public void onDestroy() {
  142. mBitmap.recycle();
  143. mOcr.release();
  144. mTts.setOnUtteranceCompletedListener(null);
  145. mTts.stop();
  146. super.onDestroy();
  147. }
  148. @Override
  149. public void onClick(View v) {
  150. if (v == mCancel) {
  151. KeyEvent event = new KeyEvent(KeyEvent.ACTION_DOWN, KeyEvent.KEYCODE_BACK);
  152. onKeyDown(KeyEvent.KEYCODE_BACK, event);
  153. }
  154. }
  155. private void processConfig() {
  156. Log.i(TAG, "Processing supplied configuration...");
  157. Ocr.CompletionCallback onCompleted = new Ocr.CompletionCallback() {
  158. @Override
  159. public void onCompleted(Result[] results) {
  160. Message msg = mHandler.obtainMessage(ACTION_RECOGNIZED, results);
  161. msg.sendToTarget();
  162. }
  163. };
  164. Ocr.ResultCallback onResult = new Ocr.ResultCallback() {
  165. @Override
  166. public void onResult(Result result) {
  167. Message msg = mHandler.obtainMessage(ACTION_RESULT, result);
  168. msg.sendToTarget();
  169. }
  170. };
  171. if (!mOcr.recognizeText(mConfig, onResult, onCompleted)) {
  172. Log.e(TAG, "Text recognition call failed");
  173. onCompleted.onCompleted(null);
  174. } else {
  175. mOcrBusy = true;
  176. mStatusMonitor.start();
  177. }
  178. }
  179. private void processResults(Result[] results) {
  180. if (results == null) {
  181. Log.e(TAG, "Received null results");
  182. setResult(RESULT_CANCELED);
  183. } else {
  184. Intent result = new Intent();
  185. result.setAction(Intents.Recognize.ACTION);
  186. result.putExtra(Intents.Recognize.RESULTS, results);
  187. setResult(RESULT_OK, result);
  188. Log.e(TAG, "Set OUT_RESULTS to array with length " + results.length);
  189. for (Result res : results) {
  190. Log.e(TAG, " Result: " + res.getString());
  191. }
  192. Log.e(TAG, "Confirm contains " + result.getExtras().size() + " extras");
  193. }
  194. HashMap<String, String> params = new HashMap<String, String>();
  195. params.put("utteranceId", TAG);
  196. params.put("utterance_id", TAG);
  197. params.put("utterance-id", TAG);
  198. mOcrBusy = false;
  199. mTts.speak("end", TextToSpeech.QUEUE_ADD, params);
  200. }
  201. /**
  202. * Speak the text and draw the bounding box of a single result.
  203. *
  204. * @param result
  205. */
  206. private void processResult(Result result) {
  207. String str = postProcess(result.getString());
  208. mTts.speak(str, TextToSpeech.QUEUE_ADD, null);
  209. mOverlayView.addRect(result.getBounds());
  210. }
  211. private void setBackground() {
  212. byte[] image = mConfig.image;
  213. WindowManager manager = (WindowManager) getSystemService(Context.WINDOW_SERVICE);
  214. Display display = manager.getDefaultDisplay();
  215. int width = display.getWidth();
  216. int scale = Integer.highestOneBit(mConfig.width / width);
  217. BitmapFactory.Options opts = new BitmapFactory.Options();
  218. opts.inSampleSize = Math.max(1, scale);
  219. mBitmap = BitmapFactory.decodeByteArray(image, 0, image.length, opts);
  220. mImageView.setImageBitmap(mBitmap);
  221. mOverlayView.setScaling(mConfig.width, mConfig.height, display.getWidth(), display.getHeight());
  222. }
  223. private void updateProgress(String status, int current, int max) {
  224. if (current < 0 || max <= 0) {
  225. return;
  226. }
  227. ProgressBar progress = (ProgressBar) findViewById(R.id.progress);
  228. TextView txtPercent = (TextView) findViewById(R.id.progress_percent);
  229. TextView txtNumber = (TextView) findViewById(R.id.progress_number);
  230. int intPercent = 100 * current / max;
  231. String strPercent = getString(R.string.percent, intPercent);
  232. String strNumber = getString(R.string.ratio, current, max);
  233. progress.setMax(max);
  234. progress.setProgress(current);
  235. txtPercent.setText(strPercent);
  236. txtNumber.setText(strNumber);
  237. progress.postInvalidate();
  238. txtPercent.postInvalidate();
  239. txtNumber.postInvalidate();
  240. }
  241. /**
  242. * Removes words that consist of more than 1/3 non-word characters.
  243. *
  244. * @param text the text to process
  245. * @return the processed text
  246. */
  247. private String postProcess(String text) {
  248. String[] input = text.split(" ");
  249. String output = "";
  250. for (int i = 0; i < input.length; i++) {
  251. if (input[i].length() <= 0) {
  252. continue;
  253. }
  254. int letterCount = 0;
  255. for (int j = 0; j < input[i].length(); j++) {
  256. char chr = input[i].charAt(j);
  257. if (chr == '\n' || Character.isLetterOrDigit(chr)) {
  258. letterCount++;
  259. }
  260. }
  261. if (10 * letterCount / input[i].length() > 6) {
  262. output += input[i] + " ";
  263. }
  264. }
  265. return output;
  266. }
  267. @Override
  268. public void onUtteranceCompleted(String utteranceId) {
  269. if (utteranceId.equals(TAG)) {
  270. finish();
  271. }
  272. }
  273. }