PageRenderTime 17ms CodeModel.GetById 11ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 1ms

/TextExtractors/BaseTextExtractor.cs

#
C# | 177 lines | 125 code | 4 blank | 48 comment | 1 complexity | 2b45b26d1da9e3e96df871aedd9db41c MD5 | raw file
  1using System;
  2using System.Collections.Generic;
  3using System.Text;
  4
  5namespace TextExtractors
  6{
  7    //public interface IFileReader
  8    //{
  9    //    // Methods
 10    //    string GetTextFromFile(string fileName);
 11    //    string FileDescription
 12    //    {
 13    //        get;
 14    //    }
 15    //    string FileExtension
 16    //    {
 17    //        get;
 18    //    }
 19    //    event EventHandler ProgressChanged;
 20    //}
 21
 22    abstract public class BaseTextExtractor 
 23    {
 24
 25        protected static char CharFromByte(byte iValue) // Convert non-ANSI Windows-1252 characters
 26        {
 27
 28            switch (iValue)
 29            {
 30                case 128: // [€]
 31                    {
 32                        return (char)(0x20ac);
 33                    }
 34                //ORIGINAL LINE: Case 130 // [‚]
 35                case 130: // [‚]
 36                    {
 37                        return (char)0x201A;// (8218);
 38                    }
 39                //ORIGINAL LINE: Case 131 // [ƒ]
 40                case 131: // [ƒ]
 41                    {
 42                        return (char)(402);
 43                    }
 44                //ORIGINAL LINE: Case 132 // [„]
 45                case 132: // [„]
 46                    {
 47                        return (char)(8222);
 48                    }
 49                //ORIGINAL LINE: Case 133 // […]
 50                case 133: // […]
 51                    {
 52                        return (char)(8230);
 53                    }
 54                //ORIGINAL LINE: Case 134 // [†]
 55                case 134: // [†]
 56                    {
 57                        return (char)(8224);
 58                    }
 59                //ORIGINAL LINE: Case 135 // [‡]
 60                case 135: // [‡]
 61                    {
 62                        return (char)(8225);
 63                    }
 64                //ORIGINAL LINE: Case 136 // [ˆ]
 65                case 136: // [ˆ]
 66                    {
 67                        return (char)(710);
 68                    }
 69                //ORIGINAL LINE: Case 137 // [‰]
 70                case 137: // [‰]
 71                    {
 72                        return (char)(8240);
 73                    }
 74                //ORIGINAL LINE: Case 138 // [Š]
 75                case 138: // [Š]
 76                    {
 77                        return (char)(352);
 78                    }
 79                //ORIGINAL LINE: Case 139 // [‹]
 80                case 139: // [‹]
 81                    {
 82                        return (char)(8249);
 83                    }
 84                //ORIGINAL LINE: Case 140 // [Œ]
 85                case 140: // [Œ]
 86                    {
 87                        return (char)(338);
 88                    }
 89                //ORIGINAL LINE: Case 142 // [Ž]
 90                case 142: // [Ž]
 91                    {
 92                        return (char)(381);
 93                    }
 94                //ORIGINAL LINE: Case 145 // [‘]
 95                case 145: // [‘]
 96                    {
 97                        return (char)(8216);
 98                    }
 99                //ORIGINAL LINE: Case 146 // [’]
100                case 146: // [’]
101                    {
102                        return (char)(8217);
103                    }
104                //ORIGINAL LINE: Case 147 // ["]
105                case 147: // ["]
106                    {
107                        return (char)(8220);
108                    }
109                //ORIGINAL LINE: Case 148 // ["]
110                case 148: // ["]
111                    {
112                        return (char)(8221);
113                    }
114                //ORIGINAL LINE: Case 149 // [•]
115                case 149: // [•]
116                    {
117                        return (char)(8226);
118                    }
119                //ORIGINAL LINE: Case 150 // [–]
120                case 150: // [–]
121                    {
122                        return (char)(8211);
123                    }
124                //ORIGINAL LINE: Case 151 // [—]
125                case 151: // [—]
126                    {
127                        return (char)(8212);
128                    }
129                //ORIGINAL LINE: Case 152 // [˜]
130                case 152: // [˜]
131                    {
132                        return (char)(732);
133                    }
134                //ORIGINAL LINE: Case 153 // [™]
135                case 153: // [™]
136                    {
137                        return (char)(8482);
138                    }
139                //ORIGINAL LINE: Case 154 // [š]
140                case 154: // [š]
141                    {
142                        return (char)(353);
143                    }
144                //ORIGINAL LINE: Case 155 // [›]
145                case 155: // [›]
146                    {
147                        return (char)(8250);
148                    }
149                //ORIGINAL LINE: Case 156 // [œ]
150                case 156: // [œ]
151                    {
152                        return (char)(339);
153                    }
154                //ORIGINAL LINE: Case 158 // [ž]
155                case 158: // [ž]
156                    {
157                        return (char)(382);
158                    }
159                //ORIGINAL LINE: Case 159 // [Ÿ]
160                case 159: // [Ÿ]
161                    {
162                        return (char)(376);
163                    }
164                default:
165                    return (char)iValue;
166            }
167            //ORIGINAL LINE: Case 9, 10, 11, 13, Is > 31 // Ignore unprintable chars
168            //else if ((iValue == 9) || (iValue == 10) || (iValue == 11) || (iValue == 13) || (iValue > 31)) // Ignore unprintable chars
169            //{
170            //    return (char)(iValue);
171            //}
172            //else
173            //    return (char)iValue;
174            //}
175        }
176    }
177}