/TextExtractors/BaseTextExtractor.cs
C# | 177 lines | 125 code | 4 blank | 48 comment | 1 complexity | 2b45b26d1da9e3e96df871aedd9db41c MD5 | raw file
- using System;
- using System.Collections.Generic;
- using System.Text;
-
- namespace TextExtractors
- {
- //public interface IFileReader
- //{
- // // Methods
- // string GetTextFromFile(string fileName);
- // string FileDescription
- // {
- // get;
- // }
- // string FileExtension
- // {
- // get;
- // }
- // event EventHandler ProgressChanged;
- //}
-
- abstract public class BaseTextExtractor
- {
-
- protected static char CharFromByte(byte iValue) // Convert non-ANSI Windows-1252 characters
- {
-
- switch (iValue)
- {
- case 128: // [€]
- {
- return (char)(0x20ac);
- }
- //ORIGINAL LINE: Case 130 // [‚]
- case 130: // [‚]
- {
- return (char)0x201A;// (8218);
- }
- //ORIGINAL LINE: Case 131 // [ƒ]
- case 131: // [ƒ]
- {
- return (char)(402);
- }
- //ORIGINAL LINE: Case 132 // [„]
- case 132: // [„]
- {
- return (char)(8222);
- }
- //ORIGINAL LINE: Case 133 // […]
- case 133: // […]
- {
- return (char)(8230);
- }
- //ORIGINAL LINE: Case 134 // [†]
- case 134: // [†]
- {
- return (char)(8224);
- }
- //ORIGINAL LINE: Case 135 // [‡]
- case 135: // [‡]
- {
- return (char)(8225);
- }
- //ORIGINAL LINE: Case 136 // [ˆ]
- case 136: // [ˆ]
- {
- return (char)(710);
- }
- //ORIGINAL LINE: Case 137 // [‰]
- case 137: // [‰]
- {
- return (char)(8240);
- }
- //ORIGINAL LINE: Case 138 // [Š]
- case 138: // [Š]
- {
- return (char)(352);
- }
- //ORIGINAL LINE: Case 139 // [‹]
- case 139: // [‹]
- {
- return (char)(8249);
- }
- //ORIGINAL LINE: Case 140 // [Œ]
- case 140: // [Œ]
- {
- return (char)(338);
- }
- //ORIGINAL LINE: Case 142 // [Ž]
- case 142: // [Ž]
- {
- return (char)(381);
- }
- //ORIGINAL LINE: Case 145 // [‘]
- case 145: // [‘]
- {
- return (char)(8216);
- }
- //ORIGINAL LINE: Case 146 // [’]
- case 146: // [’]
- {
- return (char)(8217);
- }
- //ORIGINAL LINE: Case 147 // ["]
- case 147: // ["]
- {
- return (char)(8220);
- }
- //ORIGINAL LINE: Case 148 // ["]
- case 148: // ["]
- {
- return (char)(8221);
- }
- //ORIGINAL LINE: Case 149 // [•]
- case 149: // [•]
- {
- return (char)(8226);
- }
- //ORIGINAL LINE: Case 150 // [–]
- case 150: // [–]
- {
- return (char)(8211);
- }
- //ORIGINAL LINE: Case 151 // [—]
- case 151: // [—]
- {
- return (char)(8212);
- }
- //ORIGINAL LINE: Case 152 // [˜]
- case 152: // [˜]
- {
- return (char)(732);
- }
- //ORIGINAL LINE: Case 153 // [™]
- case 153: // [™]
- {
- return (char)(8482);
- }
- //ORIGINAL LINE: Case 154 // [š]
- case 154: // [š]
- {
- return (char)(353);
- }
- //ORIGINAL LINE: Case 155 // [›]
- case 155: // [›]
- {
- return (char)(8250);
- }
- //ORIGINAL LINE: Case 156 // [œ]
- case 156: // [œ]
- {
- return (char)(339);
- }
- //ORIGINAL LINE: Case 158 // [ž]
- case 158: // [ž]
- {
- return (char)(382);
- }
- //ORIGINAL LINE: Case 159 // [Ÿ]
- case 159: // [Ÿ]
- {
- return (char)(376);
- }
- default:
- return (char)iValue;
- }
- //ORIGINAL LINE: Case 9, 10, 11, 13, Is > 31 // Ignore unprintable chars
- //else if ((iValue == 9) || (iValue == 10) || (iValue == 11) || (iValue == 13) || (iValue > 31)) // Ignore unprintable chars
- //{
- // return (char)(iValue);
- //}
- //else
- // return (char)iValue;
- //}
- }
- }
- }