PageRenderTime 55ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/TextExtractors/BaseTextExtractor.cs

#
C# | 177 lines | 125 code | 4 blank | 48 comment | 1 complexity | 2b45b26d1da9e3e96df871aedd9db41c MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. namespace TextExtractors
  5. {
  6. //public interface IFileReader
  7. //{
  8. // // Methods
  9. // string GetTextFromFile(string fileName);
  10. // string FileDescription
  11. // {
  12. // get;
  13. // }
  14. // string FileExtension
  15. // {
  16. // get;
  17. // }
  18. // event EventHandler ProgressChanged;
  19. //}
  20. abstract public class BaseTextExtractor
  21. {
  22. protected static char CharFromByte(byte iValue) // Convert non-ANSI Windows-1252 characters
  23. {
  24. switch (iValue)
  25. {
  26. case 128: // [€]
  27. {
  28. return (char)(0x20ac);
  29. }
  30. //ORIGINAL LINE: Case 130 // [‚]
  31. case 130: // [‚]
  32. {
  33. return (char)0x201A;// (8218);
  34. }
  35. //ORIGINAL LINE: Case 131 // [ƒ]
  36. case 131: // [ƒ]
  37. {
  38. return (char)(402);
  39. }
  40. //ORIGINAL LINE: Case 132 // [„]
  41. case 132: // [„]
  42. {
  43. return (char)(8222);
  44. }
  45. //ORIGINAL LINE: Case 133 // […]
  46. case 133: // […]
  47. {
  48. return (char)(8230);
  49. }
  50. //ORIGINAL LINE: Case 134 // [†]
  51. case 134: // [†]
  52. {
  53. return (char)(8224);
  54. }
  55. //ORIGINAL LINE: Case 135 // [‡]
  56. case 135: // [‡]
  57. {
  58. return (char)(8225);
  59. }
  60. //ORIGINAL LINE: Case 136 // [ˆ]
  61. case 136: // [ˆ]
  62. {
  63. return (char)(710);
  64. }
  65. //ORIGINAL LINE: Case 137 // [‰]
  66. case 137: // [‰]
  67. {
  68. return (char)(8240);
  69. }
  70. //ORIGINAL LINE: Case 138 // [Š]
  71. case 138: // [Š]
  72. {
  73. return (char)(352);
  74. }
  75. //ORIGINAL LINE: Case 139 // [‹]
  76. case 139: // [‹]
  77. {
  78. return (char)(8249);
  79. }
  80. //ORIGINAL LINE: Case 140 // [Œ]
  81. case 140: // [Œ]
  82. {
  83. return (char)(338);
  84. }
  85. //ORIGINAL LINE: Case 142 // [Ž]
  86. case 142: // [Ž]
  87. {
  88. return (char)(381);
  89. }
  90. //ORIGINAL LINE: Case 145 // [‘]
  91. case 145: // [‘]
  92. {
  93. return (char)(8216);
  94. }
  95. //ORIGINAL LINE: Case 146 // [’]
  96. case 146: // [’]
  97. {
  98. return (char)(8217);
  99. }
  100. //ORIGINAL LINE: Case 147 // ["]
  101. case 147: // ["]
  102. {
  103. return (char)(8220);
  104. }
  105. //ORIGINAL LINE: Case 148 // ["]
  106. case 148: // ["]
  107. {
  108. return (char)(8221);
  109. }
  110. //ORIGINAL LINE: Case 149 // [•]
  111. case 149: // [•]
  112. {
  113. return (char)(8226);
  114. }
  115. //ORIGINAL LINE: Case 150 // [–]
  116. case 150: // [–]
  117. {
  118. return (char)(8211);
  119. }
  120. //ORIGINAL LINE: Case 151 // [—]
  121. case 151: // [—]
  122. {
  123. return (char)(8212);
  124. }
  125. //ORIGINAL LINE: Case 152 // [˜]
  126. case 152: // [˜]
  127. {
  128. return (char)(732);
  129. }
  130. //ORIGINAL LINE: Case 153 // [™]
  131. case 153: // [™]
  132. {
  133. return (char)(8482);
  134. }
  135. //ORIGINAL LINE: Case 154 // [š]
  136. case 154: // [š]
  137. {
  138. return (char)(353);
  139. }
  140. //ORIGINAL LINE: Case 155 // [›]
  141. case 155: // [›]
  142. {
  143. return (char)(8250);
  144. }
  145. //ORIGINAL LINE: Case 156 // [œ]
  146. case 156: // [œ]
  147. {
  148. return (char)(339);
  149. }
  150. //ORIGINAL LINE: Case 158 // [ž]
  151. case 158: // [ž]
  152. {
  153. return (char)(382);
  154. }
  155. //ORIGINAL LINE: Case 159 // [Ÿ]
  156. case 159: // [Ÿ]
  157. {
  158. return (char)(376);
  159. }
  160. default:
  161. return (char)iValue;
  162. }
  163. //ORIGINAL LINE: Case 9, 10, 11, 13, Is > 31 // Ignore unprintable chars
  164. //else if ((iValue == 9) || (iValue == 10) || (iValue == 11) || (iValue == 13) || (iValue > 31)) // Ignore unprintable chars
  165. //{
  166. // return (char)(iValue);
  167. //}
  168. //else
  169. // return (char)iValue;
  170. //}
  171. }
  172. }
  173. }