PageRenderTime 34ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/Frameworks/Core/ERExtensions/Sources/er/extensions/net/ERXEmailValidator.java

https://bitbucket.org/molequedeideias/wonder
Java | 305 lines | 275 code | 10 blank | 20 comment | 0 complexity | f70aa3e3d125ec13bbccf55cecd3d0d0 MD5 | raw file
  1. package er.extensions.net;
  2. import java.io.Serializable;
  3. import java.util.Hashtable;
  4. import java.util.concurrent.Callable;
  5. import java.util.concurrent.ExecutionException;
  6. import java.util.concurrent.ExecutorService;
  7. import java.util.concurrent.Executors;
  8. import java.util.concurrent.Future;
  9. import java.util.concurrent.TimeUnit;
  10. import java.util.concurrent.TimeoutException;
  11. import java.util.regex.Pattern;
  12. import javax.naming.NameNotFoundException;
  13. import javax.naming.NamingException;
  14. import javax.naming.directory.Attribute;
  15. import javax.naming.directory.Attributes;
  16. import javax.naming.directory.DirContext;
  17. import javax.naming.directory.InitialDirContext;
  18. import org.apache.commons.lang.StringUtils;
  19. import org.apache.log4j.Logger;
  20. import com.webobjects.foundation.NSForwardException;
  21. import er.extensions.foundation.ERXValueUtilities;
  22. /**
  23. * Email validation class inspired by <a
  24. * href="http://leshazlewood.com/2006/11/06/emailaddress-java-class/">Les
  25. * Hazlewood's email validator.</a> This class is immutable and thread safe.
  26. *
  27. * @author Les Hazlewood (regular expressions)
  28. * @author Ramsey Gurley (threaded domain validation)
  29. */
  30. public final class ERXEmailValidator implements Serializable {
  31. /**
  32. * Do I need to update serialVersionUID? See section 5.6 <cite>Type Changes
  33. * Affecting Serialization</cite> on page 51 of the <a
  34. * href="http://java.sun.com/j2se/1.4/pdf/serial-spec.pdf">Java Object
  35. * Serialization Spec</a>
  36. */
  37. private static final long serialVersionUID = 1L;
  38. private static final Logger log = Logger.getLogger(ERXEmailValidator.class);
  39. // RFC 2822 2.2.2 Structured Header Field Bodies
  40. private static final String wsp = "[ \\t]"; // space or tab
  41. private static final String fwsp = wsp + "*";
  42. // RFC 2822 3.2.1 Primitive tokens
  43. private static final String dquote = "\\\"";
  44. // ASCII Control characters excluding white space:
  45. private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
  46. // all ASCII characters except CR and LF:
  47. private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
  48. // RFC 2822 3.2.2 Quoted characters:
  49. // single backslash followed by a text char
  50. private static final String quotedPair = "(\\\\" + asciiText + ")";
  51. // RFC 2822 3.2.4 Atom:
  52. private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
  53. private static final String atom = fwsp + atext + "+" + fwsp;
  54. private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
  55. private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;
  56. // RFC 2822 3.2.5 Quoted strings:
  57. // noWsCtl and the rest of ASCII except the doublequote and backslash
  58. // characters:
  59. private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
  60. private static final String qcontent = "(" + qtext + "|" + quotedPair + ")";
  61. private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;
  62. // RFC 2822 3.2.6 Miscellaneous tokens
  63. private static final String word = "((" + atom + ")|(" + quotedString + "))";
  64. private static final String phrase = word + "+"; // one or more words.
  65. // RFC 1035 tokens for domain names:
  66. private static final String letter = "[a-zA-Z]";
  67. private static final String letDig = "[a-zA-Z0-9]";
  68. private static final String letDigHyp = "[a-zA-Z0-9-]";
  69. private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
  70. private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
  71. // RFC 2822 3.4 Address specification
  72. // domain text - non white space controls and the rest of ASCII chars not
  73. // including [, ], or \:
  74. private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
  75. private static final String dcontent = dtext + "|" + quotedPair;
  76. private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
  77. private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";
  78. private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
  79. private final String domain;
  80. private final String addrSpec;
  81. private final String angleAddr;
  82. private final String nameAddr;
  83. private final String mailbox;
  84. private final String patternString;
  85. private final Pattern validPattern;
  86. /**
  87. * This second validator exists because there is an issue with validating
  88. * addresses that allowQuotedIdentifiers that have no quoting and a long
  89. * mailbox name. Example: blahblahblahblahblahblahblah@blah.com
  90. *
  91. * It seems that after about 25 chars, the regular expression matching
  92. * takes exponentially longer to match the string. The same address with
  93. * quoting does not exhibit the problem.
  94. * Ex. "Blah blah" <blahblahblahblahblahblahblah@blah.com>
  95. *
  96. * Nor does using a validator that does not allow quoted identifiers. In
  97. * order to work around this problem, a second internal validator is
  98. * created when allowQuotedIdentifiers is true. This internal validator
  99. * does not allow quoted identifiers. It is tried first and only if it
  100. * returns false is the full regular expression used.
  101. */
  102. private final ERXEmailValidator _internal;
  103. /**
  104. *
  105. * @param allowQuotedIdentifiers
  106. * if true, quoted identifiers are allowed (using quotes and
  107. * angle brackets around the raw address) are allowed, e.g.:
  108. * "John Smith" &lt;john.smith@somewhere.com&gt; The RFC says
  109. * this is a valid mailbox. If you don't want to allow this,
  110. * because for example, you only want users to enter in a raw
  111. * address (john.smith@somewhere.com - no quotes or angle
  112. * brackets), then set this to false.
  113. *
  114. * @param allowDomainLiterals
  115. * if true, domain literals are allowed in the email address,
  116. * e.g.: someone@[192.168.1.100] or john.doe@[23:33:A2:22:16:1F]
  117. * or me@[my computer] The RFC says these are valid email
  118. * addresses, but most people don't like allowing them. If you
  119. * don't want to allow them, and only want to allow valid domain
  120. * names (RFC 1035, x.y.z.com, etc), set this to false.
  121. */
  122. public ERXEmailValidator(boolean allowQuotedIdentifiers, boolean allowDomainLiterals) {
  123. domain = allowDomainLiterals ? rfc2822Domain : rfc1035DomainName;
  124. addrSpec = localPart + "@" + domain;
  125. angleAddr = "<" + addrSpec + ">";
  126. nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
  127. mailbox = nameAddr + "|" + addrSpec;
  128. patternString = allowQuotedIdentifiers ? mailbox : addrSpec;
  129. validPattern = Pattern.compile(patternString);
  130. /*
  131. * See javadoc for the _internal ivar
  132. */
  133. _internal = allowQuotedIdentifiers?new ERXEmailValidator(false, allowDomainLiterals):null;
  134. }
  135. /**
  136. * Utility method that checks to see if the specified string is a valid
  137. * email address according to the * RFC 2822 specification.
  138. *
  139. * @param email
  140. * the email address string to test for validity.
  141. * @return true if the given text valid according to RFC 2822, false
  142. * otherwise.
  143. */
  144. public boolean isValidEmailString(String email) {
  145. /*
  146. * See javadoc for the _internal ivar
  147. */
  148. if(_internal != null && _internal.isValidEmailString(email)) {
  149. return true;
  150. }
  151. return email != null && validPattern.matcher(email).matches();
  152. }
  153. /**
  154. * The thread pool
  155. */
  156. private static final ExecutorService executorService = Executors.newCachedThreadPool();
  157. /**
  158. * Callable to actually validate the email domain.
  159. */
  160. private static class DomainValidator implements Callable<Boolean> {
  161. private final String _hostName;
  162. /**
  163. * @param hostName
  164. * the host name to validate
  165. */
  166. DomainValidator(String hostName) {
  167. _hostName = hostName;
  168. }
  169. public Boolean call() {
  170. Hashtable env = new Hashtable();
  171. env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory");
  172. try {
  173. DirContext ictx = new InitialDirContext(env);
  174. Attributes attrs = ictx.getAttributes(_hostName, new String[] { "MX" });
  175. Attribute attr = attrs.get("MX");
  176. return attr != null ? Boolean.TRUE : Boolean.FALSE;
  177. }
  178. catch (NameNotFoundException e) {
  179. return Boolean.FALSE;
  180. }
  181. catch (NamingException e) {
  182. throw NSForwardException._runtimeExceptionForThrowable(e);
  183. }
  184. }
  185. }
  186. /**
  187. * Checks to see if the hostName is a valid email domain. A timeout is
  188. * specified which limits the time spent waiting for the DNS lookup. If the
  189. * timeout is exceeded, the method returns null.
  190. *
  191. * @param hostName
  192. * the email hostName
  193. * @param timeout
  194. * the timeout in milliseconds
  195. * @return true if the hostName is valid, false if no hostName or MX record
  196. * is found, null if lookup times out
  197. * @throws NamingException
  198. *
  199. * @deprecated this method will throw mysterious NullPointerExceptions if used
  200. * in a loop. Evidently, something about the DirContext is not as thread safe
  201. * as the javadocs claim. Do not use it.
  202. */
  203. public static Boolean isValidDomainString(String hostName, long timeout) {
  204. if (timeout < 1) {
  205. return null;
  206. }
  207. DomainValidator domainValidator = new DomainValidator(hostName);
  208. Future<Boolean> future = executorService.submit(domainValidator);
  209. try {
  210. Boolean result = future.get(timeout, TimeUnit.MILLISECONDS);
  211. return result;
  212. }
  213. catch (InterruptedException e) {
  214. // This really shouldn't happen
  215. log.info("Domain validation thread interrupted.");
  216. return null;
  217. }
  218. catch (ExecutionException e) {
  219. // Threw some naming exception?
  220. log.warn("Exception thrown validating domain.", e);
  221. return null;
  222. }
  223. catch (TimeoutException e) {
  224. // If the future timed out, return null.
  225. log.debug("Timeout validating email domain.");
  226. return null;
  227. }
  228. }
  229. /**
  230. * Convenience method to validate email address string and domain. If a
  231. * timeout occurs, the default boolean value is returned.
  232. *
  233. * @param email
  234. * the email string to test
  235. * @param timeout
  236. * the timeout in milliseconds
  237. * @param def
  238. * default value if timeout occurs
  239. * @return true if the email passes both validations
  240. *
  241. * @deprecated Deprecated because it relies on {@link ERXEmailValidator#isValidDomainString(String, long)}
  242. */
  243. public boolean isValidEmailAddress(String email, long timeout, boolean def) {
  244. if (isValidEmailString(email)) {
  245. String hostName = hostNameForEmailString(email);
  246. Boolean value = ERXEmailValidator.isValidDomainString(hostName, timeout);
  247. return ERXValueUtilities.booleanValueWithDefault(value, def);
  248. }
  249. return false;
  250. }
  251. /**
  252. * Parses the host name from the email string
  253. *
  254. * @param email
  255. * the email address
  256. * @return the hostName portion of the email address
  257. */
  258. public static String hostNameForEmailString(String email) {
  259. String hostName = StringUtils.substringAfterLast(email, "@");
  260. // handle domain literals and quoted identifiers
  261. hostName = StringUtils.trimToEmpty(hostName);
  262. if(hostName.isEmpty()) { return hostName; }
  263. int lastIndex = hostName.length() - 1;
  264. if (hostName.lastIndexOf('>') == lastIndex) {
  265. hostName = hostName.substring(0, lastIndex);
  266. }
  267. hostName = StringUtils.trimToEmpty(hostName);
  268. lastIndex = hostName.length() - 1;
  269. if (hostName.indexOf('[') == 0 && hostName.lastIndexOf(']') == lastIndex) {
  270. hostName = hostName.substring(1, lastIndex);
  271. }
  272. hostName = StringUtils.trimToEmpty(hostName);
  273. return hostName;
  274. }
  275. }