PageRenderTime 60ms CodeModel.GetById 43ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/Frameworks/Core/ERExtensions/Sources/er/extensions/net/ERXEmailValidator.java

https://bitbucket.org/molequedeideias/wonder
Java | 305 lines | 275 code | 10 blank | 20 comment | 0 complexity | f70aa3e3d125ec13bbccf55cecd3d0d0 MD5 | raw file
  1package er.extensions.net;
  2
  3import java.io.Serializable;
  4import java.util.Hashtable;
  5import java.util.concurrent.Callable;
  6import java.util.concurrent.ExecutionException;
  7import java.util.concurrent.ExecutorService;
  8import java.util.concurrent.Executors;
  9import java.util.concurrent.Future;
 10import java.util.concurrent.TimeUnit;
 11import java.util.concurrent.TimeoutException;
 12import java.util.regex.Pattern;
 13
 14import javax.naming.NameNotFoundException;
 15import javax.naming.NamingException;
 16import javax.naming.directory.Attribute;
 17import javax.naming.directory.Attributes;
 18import javax.naming.directory.DirContext;
 19import javax.naming.directory.InitialDirContext;
 20
 21import org.apache.commons.lang.StringUtils;
 22import org.apache.log4j.Logger;
 23
 24import com.webobjects.foundation.NSForwardException;
 25
 26import er.extensions.foundation.ERXValueUtilities;
 27
 28/**
 29 * Email validation class inspired by <a
 30 * href="http://leshazlewood.com/2006/11/06/emailaddress-java-class/">Les
 31 * Hazlewood's email validator.</a> This class is immutable and thread safe.
 32 * 
 33 * @author Les Hazlewood (regular expressions)
 34 * @author Ramsey Gurley (threaded domain validation)
 35 */
 36public final class ERXEmailValidator implements Serializable {
 37	/**
 38	 * Do I need to update serialVersionUID? See section 5.6 <cite>Type Changes
 39	 * Affecting Serialization</cite> on page 51 of the <a
 40	 * href="http://java.sun.com/j2se/1.4/pdf/serial-spec.pdf">Java Object
 41	 * Serialization Spec</a>
 42	 */
 43	private static final long serialVersionUID = 1L;
 44
 45	private static final Logger log = Logger.getLogger(ERXEmailValidator.class);
 46
 47	// RFC 2822 2.2.2 Structured Header Field Bodies
 48	private static final String wsp = "[ \\t]"; // space or tab
 49	private static final String fwsp = wsp + "*";
 50
 51	// RFC 2822 3.2.1 Primitive tokens
 52	private static final String dquote = "\\\"";
 53	// ASCII Control characters excluding white space:
 54	private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
 55	// all ASCII characters except CR and LF:
 56	private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
 57
 58	// RFC 2822 3.2.2 Quoted characters:
 59	// single backslash followed by a text char
 60	private static final String quotedPair = "(\\\\" + asciiText + ")";
 61
 62	// RFC 2822 3.2.4 Atom:
 63	private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
 64	private static final String atom = fwsp + atext + "+" + fwsp;
 65	private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
 66	private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;
 67
 68	// RFC 2822 3.2.5 Quoted strings:
 69	// noWsCtl and the rest of ASCII except the doublequote and backslash
 70	// characters:
 71	private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
 72	private static final String qcontent = "(" + qtext + "|" + quotedPair + ")";
 73	private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;
 74
 75	// RFC 2822 3.2.6 Miscellaneous tokens
 76	private static final String word = "((" + atom + ")|(" + quotedString + "))";
 77	private static final String phrase = word + "+"; // one or more words.
 78
 79	// RFC 1035 tokens for domain names:
 80	private static final String letter = "[a-zA-Z]";
 81	private static final String letDig = "[a-zA-Z0-9]";
 82	private static final String letDigHyp = "[a-zA-Z0-9-]";
 83	private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
 84	private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
 85
 86	// RFC 2822 3.4 Address specification
 87	// domain text - non white space controls and the rest of ASCII chars not
 88	// including [, ], or \:
 89	private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
 90	private static final String dcontent = dtext + "|" + quotedPair;
 91	private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
 92	private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";
 93
 94	private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
 95
 96	private final String domain;
 97	private final String addrSpec;
 98	private final String angleAddr;
 99	private final String nameAddr;
100	private final String mailbox;
101	private final String patternString;
102	private final Pattern validPattern;
103	
104	/**
105	 * This second validator exists because there is an issue with validating
106	 * addresses that allowQuotedIdentifiers that have no quoting and a long
107	 * mailbox name. Example: blahblahblahblahblahblahblah@blah.com
108	 * 
109	 * It seems that after about 25 chars, the regular expression matching
110	 * takes exponentially longer to match the string. The same address with
111	 * quoting does not exhibit the problem. 
112	 * Ex. "Blah blah" <blahblahblahblahblahblahblah@blah.com>
113	 * 
114	 * Nor does using a validator that does not allow quoted identifiers. In
115	 * order to work around this problem, a second internal validator is
116	 * created when allowQuotedIdentifiers is true. This internal validator
117	 * does not allow quoted identifiers. It is tried first and only if it
118	 * returns false is the full regular expression used.
119	 */
120	private final ERXEmailValidator _internal;
121
122	/**
123	 * 
124	 * @param allowQuotedIdentifiers
125	 *            if true, quoted identifiers are allowed (using quotes and
126	 *            angle brackets around the raw address) are allowed, e.g.:
127	 *            "John Smith" &lt;john.smith@somewhere.com&gt; The RFC says
128	 *            this is a valid mailbox. If you don't want to allow this,
129	 *            because for example, you only want users to enter in a raw
130	 *            address (john.smith@somewhere.com - no quotes or angle
131	 *            brackets), then set this to false.
132	 * 
133	 * @param allowDomainLiterals
134	 *            if true, domain literals are allowed in the email address,
135	 *            e.g.: someone@[192.168.1.100] or john.doe@[23:33:A2:22:16:1F]
136	 *            or me@[my computer] The RFC says these are valid email
137	 *            addresses, but most people don't like allowing them. If you
138	 *            don't want to allow them, and only want to allow valid domain
139	 *            names (RFC 1035, x.y.z.com, etc), set this to false.
140	 */
141	public ERXEmailValidator(boolean allowQuotedIdentifiers, boolean allowDomainLiterals) {
142		domain = allowDomainLiterals ? rfc2822Domain : rfc1035DomainName;
143		addrSpec = localPart + "@" + domain;
144		angleAddr = "<" + addrSpec + ">";
145		nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
146		mailbox = nameAddr + "|" + addrSpec;
147		patternString = allowQuotedIdentifiers ? mailbox : addrSpec;
148		validPattern = Pattern.compile(patternString);
149		
150		/*
151		 * See javadoc for the _internal ivar
152		 */
153		_internal = allowQuotedIdentifiers?new ERXEmailValidator(false, allowDomainLiterals):null;
154	}
155
156	/**
157	 * Utility method that checks to see if the specified string is a valid
158	 * email address according to the * RFC 2822 specification.
159	 * 
160	 * @param email
161	 *            the email address string to test for validity.
162	 * @return true if the given text valid according to RFC 2822, false
163	 *         otherwise.
164	 */
165	public boolean isValidEmailString(String email) {
166		/*
167		 * See javadoc for the _internal ivar
168		 */
169		if(_internal != null && _internal.isValidEmailString(email)) {
170			return true;
171		}
172		return email != null && validPattern.matcher(email).matches();
173	}
174
175	/**
176	 * The thread pool
177	 */
178	private static final ExecutorService executorService = Executors.newCachedThreadPool();
179
180	/**
181	 * Callable to actually validate the email domain.
182	 */
183	private static class DomainValidator implements Callable<Boolean> {
184		private final String _hostName;
185
186		/**
187		 * @param hostName
188		 *            the host name to validate
189		 */
190		DomainValidator(String hostName) {
191			_hostName = hostName;
192		}
193
194		public Boolean call() {
195			Hashtable env = new Hashtable();
196			env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory");
197			try {
198				DirContext ictx = new InitialDirContext(env);
199				Attributes attrs = ictx.getAttributes(_hostName, new String[] { "MX" });
200				Attribute attr = attrs.get("MX");
201				return attr != null ? Boolean.TRUE : Boolean.FALSE;
202			}
203			catch (NameNotFoundException e) {
204				return Boolean.FALSE;
205			}
206			catch (NamingException e) {
207				throw NSForwardException._runtimeExceptionForThrowable(e);
208			}
209		}
210
211	}
212
213	/**
214	 * Checks to see if the hostName is a valid email domain. A timeout is
215	 * specified which limits the time spent waiting for the DNS lookup. If the
216	 * timeout is exceeded, the method returns null.
217	 * 
218	 * @param hostName
219	 *            the email hostName
220	 * @param timeout
221	 *            the timeout in milliseconds
222	 * @return true if the hostName is valid, false if no hostName or MX record
223	 *         is found, null if lookup times out
224	 * @throws NamingException
225	 * 
226	 * @deprecated this method will throw mysterious NullPointerExceptions if used
227	 * in a loop. Evidently, something about the DirContext is not as thread safe
228	 * as the javadocs claim. Do not use it.
229	 */
230	public static Boolean isValidDomainString(String hostName, long timeout) {
231		if (timeout < 1) {
232			return null;
233		}
234		DomainValidator domainValidator = new DomainValidator(hostName);
235		Future<Boolean> future = executorService.submit(domainValidator);
236
237		try {
238			Boolean result = future.get(timeout, TimeUnit.MILLISECONDS);
239			return result;
240		}
241		catch (InterruptedException e) {
242			// This really shouldn't happen
243			log.info("Domain validation thread interrupted.");
244			return null;
245		}
246		catch (ExecutionException e) {
247			// Threw some naming exception?
248			log.warn("Exception thrown validating domain.", e);
249			return null;
250		}
251		catch (TimeoutException e) {
252			// If the future timed out, return null.
253			log.debug("Timeout validating email domain.");
254			return null;
255		}
256	}
257
258	/**
259	 * Convenience method to validate email address string and domain. If a
260	 * timeout occurs, the default boolean value is returned.
261	 * 
262	 * @param email
263	 *            the email string to test
264	 * @param timeout
265	 *            the timeout in milliseconds
266	 * @param def
267	 *            default value if timeout occurs
268	 * @return true if the email passes both validations
269	 * 
270	 * @deprecated Deprecated because it relies on {@link ERXEmailValidator#isValidDomainString(String, long)}
271	 */
272	public boolean isValidEmailAddress(String email, long timeout, boolean def) {
273		if (isValidEmailString(email)) {
274			String hostName = hostNameForEmailString(email);
275			Boolean value = ERXEmailValidator.isValidDomainString(hostName, timeout);
276			return ERXValueUtilities.booleanValueWithDefault(value, def);
277		}
278		return false;
279	}
280
281	/**
282	 * Parses the host name from the email string
283	 * 
284	 * @param email
285	 *            the email address
286	 * @return the hostName portion of the email address
287	 */
288	public static String hostNameForEmailString(String email) {
289		String hostName = StringUtils.substringAfterLast(email, "@");
290		// handle domain literals and quoted identifiers
291		hostName = StringUtils.trimToEmpty(hostName);
292		if(hostName.isEmpty()) { return hostName; }
293		int lastIndex = hostName.length() - 1;
294		if (hostName.lastIndexOf('>') == lastIndex) {
295			hostName = hostName.substring(0, lastIndex);
296		}
297		hostName = StringUtils.trimToEmpty(hostName);
298		lastIndex = hostName.length() - 1;
299		if (hostName.indexOf('[') == 0 && hostName.lastIndexOf(']') == lastIndex) {
300			hostName = hostName.substring(1, lastIndex);
301		}
302		hostName = StringUtils.trimToEmpty(hostName);
303		return hostName;
304	}
305}