/Frameworks/Core/ERExtensions/Sources/er/extensions/net/ERXEmailValidator.java
Java | 305 lines | 275 code | 10 blank | 20 comment | 0 complexity | f70aa3e3d125ec13bbccf55cecd3d0d0 MD5 | raw file
1package er.extensions.net;
2
3import java.io.Serializable;
4import java.util.Hashtable;
5import java.util.concurrent.Callable;
6import java.util.concurrent.ExecutionException;
7import java.util.concurrent.ExecutorService;
8import java.util.concurrent.Executors;
9import java.util.concurrent.Future;
10import java.util.concurrent.TimeUnit;
11import java.util.concurrent.TimeoutException;
12import java.util.regex.Pattern;
13
14import javax.naming.NameNotFoundException;
15import javax.naming.NamingException;
16import javax.naming.directory.Attribute;
17import javax.naming.directory.Attributes;
18import javax.naming.directory.DirContext;
19import javax.naming.directory.InitialDirContext;
20
21import org.apache.commons.lang.StringUtils;
22import org.apache.log4j.Logger;
23
24import com.webobjects.foundation.NSForwardException;
25
26import er.extensions.foundation.ERXValueUtilities;
27
28/**
29 * Email validation class inspired by <a
30 * href="http://leshazlewood.com/2006/11/06/emailaddress-java-class/">Les
31 * Hazlewood's email validator.</a> This class is immutable and thread safe.
32 *
33 * @author Les Hazlewood (regular expressions)
34 * @author Ramsey Gurley (threaded domain validation)
35 */
36public final class ERXEmailValidator implements Serializable {
37 /**
38 * Do I need to update serialVersionUID? See section 5.6 <cite>Type Changes
39 * Affecting Serialization</cite> on page 51 of the <a
40 * href="http://java.sun.com/j2se/1.4/pdf/serial-spec.pdf">Java Object
41 * Serialization Spec</a>
42 */
43 private static final long serialVersionUID = 1L;
44
45 private static final Logger log = Logger.getLogger(ERXEmailValidator.class);
46
47 // RFC 2822 2.2.2 Structured Header Field Bodies
48 private static final String wsp = "[ \\t]"; // space or tab
49 private static final String fwsp = wsp + "*";
50
51 // RFC 2822 3.2.1 Primitive tokens
52 private static final String dquote = "\\\"";
53 // ASCII Control characters excluding white space:
54 private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
55 // all ASCII characters except CR and LF:
56 private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
57
58 // RFC 2822 3.2.2 Quoted characters:
59 // single backslash followed by a text char
60 private static final String quotedPair = "(\\\\" + asciiText + ")";
61
62 // RFC 2822 3.2.4 Atom:
63 private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
64 private static final String atom = fwsp + atext + "+" + fwsp;
65 private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
66 private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;
67
68 // RFC 2822 3.2.5 Quoted strings:
69 // noWsCtl and the rest of ASCII except the doublequote and backslash
70 // characters:
71 private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
72 private static final String qcontent = "(" + qtext + "|" + quotedPair + ")";
73 private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;
74
75 // RFC 2822 3.2.6 Miscellaneous tokens
76 private static final String word = "((" + atom + ")|(" + quotedString + "))";
77 private static final String phrase = word + "+"; // one or more words.
78
79 // RFC 1035 tokens for domain names:
80 private static final String letter = "[a-zA-Z]";
81 private static final String letDig = "[a-zA-Z0-9]";
82 private static final String letDigHyp = "[a-zA-Z0-9-]";
83 private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
84 private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
85
86 // RFC 2822 3.4 Address specification
87 // domain text - non white space controls and the rest of ASCII chars not
88 // including [, ], or \:
89 private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
90 private static final String dcontent = dtext + "|" + quotedPair;
91 private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
92 private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";
93
94 private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
95
96 private final String domain;
97 private final String addrSpec;
98 private final String angleAddr;
99 private final String nameAddr;
100 private final String mailbox;
101 private final String patternString;
102 private final Pattern validPattern;
103
104 /**
105 * This second validator exists because there is an issue with validating
106 * addresses that allowQuotedIdentifiers that have no quoting and a long
107 * mailbox name. Example: blahblahblahblahblahblahblah@blah.com
108 *
109 * It seems that after about 25 chars, the regular expression matching
110 * takes exponentially longer to match the string. The same address with
111 * quoting does not exhibit the problem.
112 * Ex. "Blah blah" <blahblahblahblahblahblahblah@blah.com>
113 *
114 * Nor does using a validator that does not allow quoted identifiers. In
115 * order to work around this problem, a second internal validator is
116 * created when allowQuotedIdentifiers is true. This internal validator
117 * does not allow quoted identifiers. It is tried first and only if it
118 * returns false is the full regular expression used.
119 */
120 private final ERXEmailValidator _internal;
121
122 /**
123 *
124 * @param allowQuotedIdentifiers
125 * if true, quoted identifiers are allowed (using quotes and
126 * angle brackets around the raw address) are allowed, e.g.:
127 * "John Smith" <john.smith@somewhere.com> The RFC says
128 * this is a valid mailbox. If you don't want to allow this,
129 * because for example, you only want users to enter in a raw
130 * address (john.smith@somewhere.com - no quotes or angle
131 * brackets), then set this to false.
132 *
133 * @param allowDomainLiterals
134 * if true, domain literals are allowed in the email address,
135 * e.g.: someone@[192.168.1.100] or john.doe@[23:33:A2:22:16:1F]
136 * or me@[my computer] The RFC says these are valid email
137 * addresses, but most people don't like allowing them. If you
138 * don't want to allow them, and only want to allow valid domain
139 * names (RFC 1035, x.y.z.com, etc), set this to false.
140 */
141 public ERXEmailValidator(boolean allowQuotedIdentifiers, boolean allowDomainLiterals) {
142 domain = allowDomainLiterals ? rfc2822Domain : rfc1035DomainName;
143 addrSpec = localPart + "@" + domain;
144 angleAddr = "<" + addrSpec + ">";
145 nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
146 mailbox = nameAddr + "|" + addrSpec;
147 patternString = allowQuotedIdentifiers ? mailbox : addrSpec;
148 validPattern = Pattern.compile(patternString);
149
150 /*
151 * See javadoc for the _internal ivar
152 */
153 _internal = allowQuotedIdentifiers?new ERXEmailValidator(false, allowDomainLiterals):null;
154 }
155
156 /**
157 * Utility method that checks to see if the specified string is a valid
158 * email address according to the * RFC 2822 specification.
159 *
160 * @param email
161 * the email address string to test for validity.
162 * @return true if the given text valid according to RFC 2822, false
163 * otherwise.
164 */
165 public boolean isValidEmailString(String email) {
166 /*
167 * See javadoc for the _internal ivar
168 */
169 if(_internal != null && _internal.isValidEmailString(email)) {
170 return true;
171 }
172 return email != null && validPattern.matcher(email).matches();
173 }
174
175 /**
176 * The thread pool
177 */
178 private static final ExecutorService executorService = Executors.newCachedThreadPool();
179
180 /**
181 * Callable to actually validate the email domain.
182 */
183 private static class DomainValidator implements Callable<Boolean> {
184 private final String _hostName;
185
186 /**
187 * @param hostName
188 * the host name to validate
189 */
190 DomainValidator(String hostName) {
191 _hostName = hostName;
192 }
193
194 public Boolean call() {
195 Hashtable env = new Hashtable();
196 env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory");
197 try {
198 DirContext ictx = new InitialDirContext(env);
199 Attributes attrs = ictx.getAttributes(_hostName, new String[] { "MX" });
200 Attribute attr = attrs.get("MX");
201 return attr != null ? Boolean.TRUE : Boolean.FALSE;
202 }
203 catch (NameNotFoundException e) {
204 return Boolean.FALSE;
205 }
206 catch (NamingException e) {
207 throw NSForwardException._runtimeExceptionForThrowable(e);
208 }
209 }
210
211 }
212
213 /**
214 * Checks to see if the hostName is a valid email domain. A timeout is
215 * specified which limits the time spent waiting for the DNS lookup. If the
216 * timeout is exceeded, the method returns null.
217 *
218 * @param hostName
219 * the email hostName
220 * @param timeout
221 * the timeout in milliseconds
222 * @return true if the hostName is valid, false if no hostName or MX record
223 * is found, null if lookup times out
224 * @throws NamingException
225 *
226 * @deprecated this method will throw mysterious NullPointerExceptions if used
227 * in a loop. Evidently, something about the DirContext is not as thread safe
228 * as the javadocs claim. Do not use it.
229 */
230 public static Boolean isValidDomainString(String hostName, long timeout) {
231 if (timeout < 1) {
232 return null;
233 }
234 DomainValidator domainValidator = new DomainValidator(hostName);
235 Future<Boolean> future = executorService.submit(domainValidator);
236
237 try {
238 Boolean result = future.get(timeout, TimeUnit.MILLISECONDS);
239 return result;
240 }
241 catch (InterruptedException e) {
242 // This really shouldn't happen
243 log.info("Domain validation thread interrupted.");
244 return null;
245 }
246 catch (ExecutionException e) {
247 // Threw some naming exception?
248 log.warn("Exception thrown validating domain.", e);
249 return null;
250 }
251 catch (TimeoutException e) {
252 // If the future timed out, return null.
253 log.debug("Timeout validating email domain.");
254 return null;
255 }
256 }
257
258 /**
259 * Convenience method to validate email address string and domain. If a
260 * timeout occurs, the default boolean value is returned.
261 *
262 * @param email
263 * the email string to test
264 * @param timeout
265 * the timeout in milliseconds
266 * @param def
267 * default value if timeout occurs
268 * @return true if the email passes both validations
269 *
270 * @deprecated Deprecated because it relies on {@link ERXEmailValidator#isValidDomainString(String, long)}
271 */
272 public boolean isValidEmailAddress(String email, long timeout, boolean def) {
273 if (isValidEmailString(email)) {
274 String hostName = hostNameForEmailString(email);
275 Boolean value = ERXEmailValidator.isValidDomainString(hostName, timeout);
276 return ERXValueUtilities.booleanValueWithDefault(value, def);
277 }
278 return false;
279 }
280
281 /**
282 * Parses the host name from the email string
283 *
284 * @param email
285 * the email address
286 * @return the hostName portion of the email address
287 */
288 public static String hostNameForEmailString(String email) {
289 String hostName = StringUtils.substringAfterLast(email, "@");
290 // handle domain literals and quoted identifiers
291 hostName = StringUtils.trimToEmpty(hostName);
292 if(hostName.isEmpty()) { return hostName; }
293 int lastIndex = hostName.length() - 1;
294 if (hostName.lastIndexOf('>') == lastIndex) {
295 hostName = hostName.substring(0, lastIndex);
296 }
297 hostName = StringUtils.trimToEmpty(hostName);
298 lastIndex = hostName.length() - 1;
299 if (hostName.indexOf('[') == 0 && hostName.lastIndexOf(']') == lastIndex) {
300 hostName = hostName.substring(1, lastIndex);
301 }
302 hostName = StringUtils.trimToEmpty(hostName);
303 return hostName;
304 }
305}