/jEdit/branches/new_bufferset_api/org/gjt/sp/jedit/io/RegexEncodingDetector.java

# · Java · 100 lines · 50 code · 7 blank · 43 comment · 3 complexity · e576b0156b985016b829d868faa89e55 MD5 · raw file

  1. /*
  2. * :tabSize=8:indentSize=8:noTabs=false:
  3. * :folding=explicit:collapseFolds=1:
  4. *
  5. * Copyright (C) 2008 Kazutoshi Satoda
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or any later version.
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19. */
  20. package org.gjt.sp.jedit.io;
  21. import java.io.InputStream;
  22. import java.io.InputStreamReader;
  23. import java.io.IOException;
  24. import java.util.regex.Pattern;
  25. import java.util.regex.Matcher;
  26. import java.nio.CharBuffer;
  27. /**
  28. * An encoding detector which finds regex pattern.
  29. *
  30. * This reads the sample in the system default encoding for first some
  31. * lines and look for a regex pattern. This can fail if the
  32. * stream cannot be read in the system default encoding or the
  33. * pattern is not found at near the top of the stream.
  34. *
  35. * @since 4.3pre16
  36. * @author Kazutoshi Satoda
  37. */
  38. public class RegexEncodingDetector implements EncodingDetector
  39. {
  40. /**
  41. * A regex pattern matches to "Charset names" specified for
  42. * java.nio.charset.Charset.
  43. * @see <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html#names">Charset names</a>
  44. */
  45. public static final String VALID_ENCODING_PATTERN
  46. = "\\p{Alnum}[\\p{Alnum}\\-.:_]*";
  47. private final Pattern pattern;
  48. private final String replacement;
  49. public RegexEncodingDetector(String pattern, String replacement)
  50. {
  51. this.pattern = Pattern.compile(pattern);
  52. this.replacement = replacement;
  53. }
  54. public String detectEncoding(InputStream sample) throws IOException
  55. {
  56. InputStreamReader reader = new InputStreamReader(sample);
  57. final int bufferSize = 1024;
  58. char[] buffer = new char[bufferSize];
  59. int readSize = reader.read(buffer, 0, bufferSize);
  60. if (readSize > 0)
  61. {
  62. Matcher matcher = pattern.matcher(
  63. CharBuffer.wrap(buffer, 0, readSize));
  64. while (matcher.find())
  65. {
  66. String extracted = extractReplacement(
  67. matcher, replacement);
  68. if (EncodingServer.hasEncoding(extracted))
  69. {
  70. return extracted;
  71. }
  72. }
  73. }
  74. return null;
  75. }
  76. /**
  77. * Returns a replaced string for a Matcher which has been matched
  78. * by find() method.
  79. */
  80. private static String extractReplacement(Matcher found, String replacement)
  81. {
  82. /*
  83. * It doesn't make sense to read before start, but
  84. * appendReplacement() requires to to it.
  85. */
  86. int found_start = found.start();
  87. int found_end = found.end();
  88. int source_length = found_end - found_start;
  89. StringBuffer replaced = new StringBuffer(found_start + (source_length * 2));
  90. found.appendReplacement(replaced, replacement);
  91. return replaced.substring(found_start);
  92. }
  93. }