Rfc822Tokenizer.java - This Java code is a tokenizer for pa…

/core/java/android/text/util/Rfc822Tokenizer.java

http://github.com/android/platform_frameworks_base · Java · 314 lines · 214 code · 40 blank · 60 comment · 107 complexity · 1bc13ed944b46c2bd0ad9fc8fa1be350 MD5 · raw file

/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package android.text.util;

import android.widget.MultiAutoCompleteTextView;

import java.util.ArrayList;
import java.util.Collection;

/**
 * This class works as a Tokenizer for MultiAutoCompleteTextView for
 * address list fields, and also provides a method for converting
 * a string of addresses (such as might be typed into such a field)
 * into a series of Rfc822Tokens.
 */
public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer {

    /**
     * This constructor will try to take a string like
     * "Foo Bar (something) &lt;foo\@google.com&gt;,
     * blah\@google.com (something)"
     * and convert it into one or more Rfc822Tokens, output into the supplied
     * collection.
     *
     * It does *not* decode MIME encoded-words; charset conversion
     * must already have taken place if necessary.
     * It will try to be tolerant of broken syntax instead of
     * returning an error.
     *
     */
    public static void tokenize(CharSequence text, Collection<Rfc822Token> out) {
        StringBuilder name = new StringBuilder();
        StringBuilder address = new StringBuilder();
        StringBuilder comment = new StringBuilder();

        int i = 0;
        int cursor = text.length();

        while (i < cursor) {
            char c = text.charAt(i);

            if (c == ',' || c == ';') {
                i++;

                while (i < cursor && text.charAt(i) == ' ') {
                    i++;
                }

                crunch(name);

                if (address.length() > 0) {
                    out.add(new Rfc822Token(name.toString(),
                                            address.toString(),
                                            comment.toString()));
                } else if (name.length() > 0) {
                    out.add(new Rfc822Token(null,
                                            name.toString(),
                                            comment.toString()));
                }

                name.setLength(0);
                address.setLength(0);
                comment.setLength(0);
            } else if (c == '"') {
                i++;

                while (i < cursor) {
                    c = text.charAt(i);

                    if (c == '"') {
                        i++;
                        break;
                    } else if (c == '\\') {
                        if (i + 1 < cursor) {
                            name.append(text.charAt(i + 1));
                        }
                        i += 2;
                    } else {
                        name.append(c);
                        i++;
                    }
                }
            } else if (c == '(') {
                int level = 1;
                i++;

                while (i < cursor && level > 0) {
                    c = text.charAt(i);

                    if (c == ')') {
                        if (level > 1) {
                            comment.append(c);
                        }

                        level--;
                        i++;
                    } else if (c == '(') {
                        comment.append(c);
                        level++;
                        i++;
                    } else if (c == '\\') {
                        if (i + 1 < cursor) {
                            comment.append(text.charAt(i + 1));
                        }
                        i += 2;
                    } else {
                        comment.append(c);
                        i++;
                    }
                }
            } else if (c == '<') {
                i++;

                while (i < cursor) {
                    c = text.charAt(i);

                    if (c == '>') {
                        i++;
                        break;
                    } else {
                        address.append(c);
                        i++;
                    }
                }
            } else if (c == ' ') {
                name.append('\0');
                i++;
            } else {
                name.append(c);
                i++;
            }
        }

        crunch(name);

        if (address.length() > 0) {
            out.add(new Rfc822Token(name.toString(),
                                    address.toString(),
                                    comment.toString()));
        } else if (name.length() > 0) {
            out.add(new Rfc822Token(null,
                                    name.toString(),
                                    comment.toString()));
        }
    }

    /**
     * This method will try to take a string like
     * "Foo Bar (something) &lt;foo\@google.com&gt;,
     * blah\@google.com (something)"
     * and convert it into one or more Rfc822Tokens.
     * It does *not* decode MIME encoded-words; charset conversion
     * must already have taken place if necessary.
     * It will try to be tolerant of broken syntax instead of
     * returning an error.
     */
    public static Rfc822Token[] tokenize(CharSequence text) {
        ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>();
        tokenize(text, out);
        return out.toArray(new Rfc822Token[out.size()]);
    }

    private static void crunch(StringBuilder sb) {
        int i = 0;
        int len = sb.length();

        while (i < len) {
            char c = sb.charAt(i);

            if (c == '\0') {
                if (i == 0 || i == len - 1 ||
                    sb.charAt(i - 1) == ' ' ||
                    sb.charAt(i - 1) == '\0' ||
                    sb.charAt(i + 1) == ' ' ||
                    sb.charAt(i + 1) == '\0') {
                    sb.deleteCharAt(i);
                    len--;
                } else {
                    i++;
                }
            } else {
                i++;
            }
        }

        for (i = 0; i < len; i++) {
            if (sb.charAt(i) == '\0') {
                sb.setCharAt(i, ' ');
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    public int findTokenStart(CharSequence text, int cursor) {
        /*
         * It's hard to search backward, so search forward until
         * we reach the cursor.
         */

        int best = 0;
        int i = 0;

        while (i < cursor) {
            i = findTokenEnd(text, i);

            if (i < cursor) {
                i++; // Skip terminating punctuation

                while (i < cursor && text.charAt(i) == ' ') {
                    i++;
                }

                if (i < cursor) {
                    best = i;
                }
            }
        }

        return best;
    }

    /**
     * {@inheritDoc}
     */
    public int findTokenEnd(CharSequence text, int cursor) {
        int len = text.length();
        int i = cursor;

        while (i < len) {
            char c = text.charAt(i);

            if (c == ',' || c == ';') {
                return i;
            } else if (c == '"') {
                i++;

                while (i < len) {
                    c = text.charAt(i);

                    if (c == '"') {
                        i++;
                        break;
                    } else if (c == '\\' && i + 1 < len) {
                        i += 2;
                    } else {
                        i++;
                    }
                }
            } else if (c == '(') {
                int level = 1;
                i++;

                while (i < len && level > 0) {
                    c = text.charAt(i);

                    if (c == ')') {
                        level--;
                        i++;
                    } else if (c == '(') {
                        level++;
                        i++;
                    } else if (c == '\\' && i + 1 < len) {
                        i += 2;
                    } else {
                        i++;
                    }
                }
            } else if (c == '<') {
                i++;

                while (i < len) {
                    c = text.charAt(i);

                    if (c == '>') {
                        i++;
                        break;
                    } else {
                        i++;
                    }
                }
            } else {
                i++;
            }
        }

        return i;
    }

    /**
     * Terminates the specified address with a comma and space.
     * This assumes that the specified text already has valid syntax.
     * The Adapter subclass's convertToString() method must make that
     * guarantee.
     */
    public CharSequence terminateToken(CharSequence text) {
        return text + ", ";
    }
}
Summary ✨

This Java code is a tokenizer for parsing and formatting email addresses according to the RFC822 standard. It takes a string input, breaks it down into individual address components (name, address, comment), and returns them as Rfc822Tokens. The tokenizer handles various edge cases, such as escaped characters and broken syntax, to provide a more robust parsing experience.
Tech Fingerprint

Standard Collections
Alerts (6)

Complexity hotspot; lines 184 to 189 (total complexity: 10)
184 185 186 187 188 189