/jEdit/branches/new_bufferset_api/org/gjt/sp/jedit/io/RegexEncodingDetector.java
# · Java · 100 lines · 50 code · 7 blank · 43 comment · 3 complexity · e576b0156b985016b829d868faa89e55 MD5 · raw file
- /*
- * :tabSize=8:indentSize=8:noTabs=false:
- * :folding=explicit:collapseFolds=1:
- *
- * Copyright (C) 2008 Kazutoshi Satoda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- package org.gjt.sp.jedit.io;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.io.IOException;
- import java.util.regex.Pattern;
- import java.util.regex.Matcher;
- import java.nio.CharBuffer;
- /**
- * An encoding detector which finds regex pattern.
- *
- * This reads the sample in the system default encoding for first some
- * lines and look for a regex pattern. This can fail if the
- * stream cannot be read in the system default encoding or the
- * pattern is not found at near the top of the stream.
- *
- * @since 4.3pre16
- * @author Kazutoshi Satoda
- */
- public class RegexEncodingDetector implements EncodingDetector
- {
- /**
- * A regex pattern matches to "Charset names" specified for
- * java.nio.charset.Charset.
- * @see <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html#names">Charset names</a>
- */
- public static final String VALID_ENCODING_PATTERN
- = "\\p{Alnum}[\\p{Alnum}\\-.:_]*";
- private final Pattern pattern;
- private final String replacement;
- public RegexEncodingDetector(String pattern, String replacement)
- {
- this.pattern = Pattern.compile(pattern);
- this.replacement = replacement;
- }
- public String detectEncoding(InputStream sample) throws IOException
- {
- InputStreamReader reader = new InputStreamReader(sample);
- final int bufferSize = 1024;
- char[] buffer = new char[bufferSize];
- int readSize = reader.read(buffer, 0, bufferSize);
- if (readSize > 0)
- {
- Matcher matcher = pattern.matcher(
- CharBuffer.wrap(buffer, 0, readSize));
- while (matcher.find())
- {
- String extracted = extractReplacement(
- matcher, replacement);
- if (EncodingServer.hasEncoding(extracted))
- {
- return extracted;
- }
- }
- }
- return null;
- }
- /**
- * Returns a replaced string for a Matcher which has been matched
- * by find() method.
- */
- private static String extractReplacement(Matcher found, String replacement)
- {
- /*
- * It doesn't make sense to read before start, but
- * appendReplacement() requires to to it.
- */
- int found_start = found.start();
- int found_end = found.end();
- int source_length = found_end - found_start;
- StringBuffer replaced = new StringBuffer(found_start + (source_length * 2));
- found.appendReplacement(replaced, replacement);
- return replaced.substring(found_start);
- }
- }