/jodd-mail/src/main/java/jodd/mail/EmailAddress.java
Java | 300 lines | 175 code | 7 blank | 118 comment | 0 complexity | ba77fd843320ef8876e9a31044fa200d MD5 | raw file
- // Copyright (c) 2003-2012, Jodd Team (jodd.org). All Rights Reserved.
- package jodd.mail;
- import java.util.regex.Pattern;
- /**
- * A utility class to parse, clean up, and extract email addresses from messages
- * per RFC2822 syntax. Designed to integrate with Javamail (this class will require that you
- * have a javamail mail.jar in your classpath), but you could easily change
- * the existing methods around to not use Javamail at all. For example, if you're changing
- * the code, see the difference between getInternetAddress and getDomain: the latter doesn't
- * depend on any javamail code. This is all a by-product of what this class was written for,
- * so feel free to modify it to suit your needs.
- * <p>
- * For real-world addresses, this class is roughly 3-4 times slower than parsing with
- * InternetAddress, but
- * it can handle a whole lot more. Because of sensible design tradeoffs made in javamail, if
- * InternetAddress has trouble parsing,
- * it might throw an exception, but often it will silently leave the entire original string
- * in the result of ia.getAddress(). This class can be trusted to only provide authenticated
- * results.
- * <p>
- * This class has been tested on a few thousand real-world addresses, and is live in
- * production environments, but you may want to do some of your own testing to ensure
- * that it works for you. In other words, it's not beta, but it's not guaranteed yet.
- * <p>
- * Comments/Questions/Corrections welcome: java <at> caseyconnor.org
- * <p>
- * Started with code by Les Hazlewood:
- * <a href="http://www.leshazlewood.com">leshazlewood.com</a>.
- * <p>
- * Modified/added: removed some functions, added support for CFWS token,
- * corrected FWSP token, added some boolean flags, added getInternetAddress and
- * extractHeaderAddresses and other methods, some optimization.
- * <p>
- * Where Mr. Hazlewood's version was more for ensuring certain forms that were passed in during
- * registrations, etc, this handles more types of verifying as well a few forms of extracting
- * the data in predictable, cleaned-up chunks.
- * <p>
- * Note: CFWS means the "comment folded whitespace" token from 2822, in other words,
- * whitespace and comment text that is enclosed in ()'s.
- * <p>
- * <b>Limitations</b>: doesn't support nested CFWS (comments within (other) comments), doesn't
- * support mailbox groups except when flat-extracting addresses from headers or when doing
- * verification, doesn't support
- * any of the obs-* tokens. Also: the getInternetAddress and
- * extractHeaderAddresses methods return InternetAddress objects; if the personal name has
- * any quotes or \'s in it at all, the InternetAddress object will always
- * escape the name entirely and put it in quotes, so
- * multiple-token personal names with those characters somewhere in them will always be munged
- * into one big escaped string. This is not really a big deal at all, but I mention it anyway.
- * (And you could get around it by a simple modification to those methods to not use
- * InternetAddress objects.) See the docs of those methods for more info.
- * <p>
- * Note: This does not do any header-length-checking. There are no such limitations on the
- * email address grammar in 2822, though email headers in general do have length restrictions.
- * So if the return path
- * is 40000 unfolded characters long, but otherwise valid under 2822, this class will pass it.
- * <p>
- * Examples of passing (2822-valid) addresses, believe it or not:
- * <p>
- * <tt>bob @example.com</tt>
- * <BR><tt>"bob" @ example.com</tt>
- * <BR><tt>bob (comment) (other comment) @example.com (personal name)</tt>
- * <BR><tt>"<bob \" (here) " < (hi there) "bob(the man)smith" (hi) @ (there) example.com (hello) > (again)</tt>
- * <p>
- * (none of which are permitted by javamail, incidentally)
- * <p>
- * By using getInternetAddress(), you can retrieve an InternetAddress object that, when
- * toString()'ed, would reveal that the parser had converted the above into:
- * <p>
- * <tt><bob@example.com></tt>
- * <BR><tt><bob@example.com></tt>
- * <BR><tt>"personal name" <bob@example.com></tt>
- * <BR><tt>"<bob \" (here)" <"bob(the man)smith"@example.com></tt>
- * <P>(respectively)
- * <P>If parsing headers, however, you'll probably be calling extractHeaderAddresses().
- * <p>
- * A future improvement may be to use this class to extract info from corrupted
- * addresses, but for now, it does not permit them.
- * <p>
- * <b>Some of the configuration booleans allow a bit of tweaking
- * already. The source code can be compiled with these booleans in various
- * states. They are configured to what is probably the most commonly-useful state.</b>
- *
- * @author Les Hazlewood, Casey Connor, Igor Spasic
- */
- public class EmailAddress {
- /**
- * This constant states that domain literals are allowed in the email address, e.g.:
- * <p>
- * <p><tt>someone@[192.168.1.100]</tt> or <br/>
- * <tt>john.doe@[23:33:A2:22:16:1F]</tt> or <br/>
- * <tt>me@[my computer]</tt></p>
- * <p>
- * <p>The RFC says these are valid email addresses, but most people don't like allowing them.
- * If you don't want to allow them, and only want to allow valid domain names
- * (<a href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a>, x.y.z.com, etc),
- * change this constant to <tt>false</tt>.
- * <p>
- * <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but
- * you should set it depending on what you need for your application.
- */
- private static final boolean ALLOW_DOMAIN_LITERALS = true;
- /**
- * This constant states that quoted identifiers are allowed
- * (using quotes and angle brackets around the raw address) are allowed, e.g.:
- * <p>
- * <p><tt>"John Smith" <john.smith@somewhere.com></tt>
- * <p>
- * <p>The RFC says this is a valid mailbox. If you don't want to
- * allow this, because for example, you only want users to enter in
- * a raw address (<tt>john.smith@somewhere.com</tt> - no quotes or angle
- * brackets), then change this constant to <tt>false</tt>.
- * <p>
- * <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but
- * you should set it depending on what you need for your application.
- */
- private static final boolean ALLOW_QUOTED_IDENTIFIERS = true;
- // RFC 2822 2.2.2 Structured Header Field Bodies
- private static final String wsp = "[ \\t]"; //space or tab
- private static final String fwsp = wsp + '*';
- //RFC 2822 3.2.1 Primitive tokens
- private static final String dquote = "\\\"";
- //ASCII Control characters excluding white space:
- private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
- //all ASCII characters except CR and LF:
- private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
- // RFC 2822 3.2.2 Quoted characters:
- //single backslash followed by a text char
- private static final String quotedPair = "(\\\\" + asciiText + ')';
- //RFC 2822 3.2.4 Atom:
- private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
- private static final String atom = fwsp + atext + '+' + fwsp;
- private static final String dotAtomText = atext + '+' + '(' + "\\." + atext + "+)*";
- private static final String dotAtom = fwsp + '(' + dotAtomText + ')' + fwsp;
- //RFC 2822 3.2.5 Quoted strings:
- //noWsCtl and the rest of ASCII except the doublequote and backslash characters:
- private static final String qtext = '[' + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
- private static final String qcontent = '(' + qtext + '|' + quotedPair + ')';
- private static final String quotedString = dquote + '(' + fwsp + qcontent + ")*" + fwsp + dquote;
- //RFC 2822 3.2.6 Miscellaneous tokens
- private static final String word = "((" + atom + ")|(" + quotedString + "))";
- private static final String phrase = word + '+'; //one or more words.
- //RFC 1035 tokens for domain names:
- private static final String letter = "[a-zA-Z]";
- private static final String letDig = "[a-zA-Z0-9]";
- private static final String letDigHyp = "[a-zA-Z0-9-]";
- private static final String rfcLabel = letDig + '(' + letDigHyp + "{0,61}" + letDig + ")?";
- private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
- //RFC 2822 3.4 Address specification
- //domain text - non white space controls and the rest of ASCII chars not including [, ], or \:
- private static final String dtext = '[' + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
- private static final String dcontent = dtext + '|' + quotedPair;
- private static final String domainLiteral = "\\[" + '(' + fwsp + dcontent + "+)*" + fwsp + "\\]";
- private static final String rfc2822Domain = '(' + dotAtom + '|' + domainLiteral + ')';
- private static final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName;
- private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
- private static final String addrSpec = localPart + '@' + domain;
- private static final String angleAddr = '<' + addrSpec + '>';
- private static final String nameAddr = '(' + phrase + ")?" + fwsp + angleAddr;
- private static final String mailbox = nameAddr + '|' + addrSpec;
- //now compile a pattern for efficient re-use:
- //if we're allowing quoted identifiers or not:
- private static final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec;
- public static final Pattern VALID_PATTERN = Pattern.compile(patternString);
- //class attributes
- private String text;
- private boolean bouncing = true;
- private boolean verified;
- private String label;
- public EmailAddress() {
- super();
- }
- public EmailAddress(String text) {
- super();
- setText(text);
- }
- /**
- * Returns the actual email address string, e.g. <tt>someone@somewhere.com</tt>
- *
- * @return the actual email address string.
- */
- public String getText() {
- return text;
- }
- public void setText(String text) {
- this.text = text;
- }
- /**
- * Returns whether or not any emails sent to this email address come back as bounced
- * (undeliverable).
- * <p>
- * <p>Default is <tt>false</tt> for convenience's sake - if a bounced message is ever received for this
- * address, this value should be set to <tt>true</tt> until verification can made.
- *
- * @return whether or not any emails sent to this email address come back as bounced
- * (undeliverable).
- */
- public boolean isBouncing() {
- return bouncing;
- }
- public void setBouncing(boolean bouncing) {
- this.bouncing = bouncing;
- }
- /**
- * Returns whether or not the party associated with this email has verified that it is
- * their email address.
- * <p>
- * <p>Verification is usually done by sending an email to this
- * address and waiting for the party to respond or click a specific link in the email.
- * <p>
- * <p>Default is <tt>false</tt>.
- *
- * @return whether or not the party associated with this email has verified that it is
- * their email address.
- */
- public boolean isVerified() {
- return verified;
- }
- public void setVerified(boolean verified) {
- this.verified = verified;
- }
- /**
- * Party label associated with this address, for example, 'Home', 'Work', etc.
- *
- * @return a label associated with this address, for example 'Home', 'Work', etc.
- */
- public String getLabel() {
- return label;
- }
- public void setLabel(String label) {
- this.label = label;
- }
- /**
- * Returns whether or not the text represented by this object instance is valid
- * according to the <tt>RFC 2822</tt> rules.
- *
- * @return true if the text represented by this instance is valid according
- * to RFC 2822, false otherwise.
- */
- public boolean isValid() {
- return isValidText(getText());
- }
- /**
- * Utility method that checks to see if the specified string is a valid
- * email address according to the RFC 2822 specification.
- *
- * @param email the email address string to test for validity.
- * @return true if the given text valid according to RFC 2822, false otherwise.
- */
- public static boolean isValidText(String email) {
- return (email != null) && VALID_PATTERN.matcher(email).matches();
- }
- @Override
- public boolean equals(Object o) {
- if (o instanceof EmailAddress) {
- EmailAddress ea = (EmailAddress) o;
- return getText().equals(ea.getText());
- }
- return false;
- }
- @Override
- public int hashCode() {
- return getText().hashCode();
- }
- @Override
- public String toString() {
- return getText();
- }
- }