Posted By

onefastsnail on 05/24/12


Tagged


Versions (?)

is_email


 / Published in: PHP
 

  1. <?php
  2. /**
  3.  * To validate an email address according to RFCs 5321, 5322 and others
  4.  *
  5.  * Copyright © 2008-2011, Dominic Sayers <br>
  6.  * Test schema documentation Copyright © 2011, Daniel Marschall <br>
  7.  * All rights reserved.
  8.  *
  9.  * Redistribution and use in source and binary forms, with or without modification,
  10.  * are permitted provided that the following conditions are met:
  11.  *
  12.  * - Redistributions of source code must retain the above copyright notice,
  13.  * this list of conditions and the following disclaimer.
  14.  * - Redistributions in binary form must reproduce the above copyright notice,
  15.  * this list of conditions and the following disclaimer in the documentation
  16.  * and/or other materials provided with the distribution.
  17.  * - Neither the name of Dominic Sayers nor the names of its contributors may be
  18.  * used to endorse or promote products derived from this software without
  19.  * specific prior written permission.
  20.  *
  21.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  22.  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  23.  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24.  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  25.  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26.  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27.  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  28.  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30.  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31.  *
  32.  * @package is_email
  33.  * @author Dominic Sayers <[email protected]>
  34.  * @copyright 2008-2011 Dominic Sayers
  35.  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  36.  * @link http://www.dominicsayers.com/isemail
  37.  * @version 3.01.1 - Fixed examples and readme.txt
  38.  */
  39.  
  40. // The quality of this code has been improved greatly by using PHPLint
  41. // Copyright (c) 2010 Umberto Salsi
  42. // This is free software; see the license for copying conditions.
  43. // More info: http://www.icosaedro.it/phplint/
  44. /*.
  45. require_module 'standard';
  46. require_module 'pcre';
  47. .*/
  48.  
  49. if (!defined('ISEMAIL_VALID')) {
  50. /*:diagnostic constants start:*/
  51. // This part of the code is generated using data from test/meta.xml. Beware of making manual alterations
  52. // Categories
  53. define('ISEMAIL_VALID_CATEGORY', 1);
  54. define('ISEMAIL_DNSWARN', 7);
  55. define('ISEMAIL_RFC5321', 15);
  56. define('ISEMAIL_CFWS', 31);
  57. define('ISEMAIL_DEPREC', 63);
  58. define('ISEMAIL_RFC5322', 127);
  59. define('ISEMAIL_ERR', 255);
  60.  
  61. // Diagnoses
  62. // Address is valid
  63. define('ISEMAIL_VALID', 0);
  64. // Address is valid but a DNS check was not successful
  65. define('ISEMAIL_DNSWARN_NO_MX_RECORD', 5);
  66. define('ISEMAIL_DNSWARN_NO_RECORD', 6);
  67. // Address is valid for SMTP but has unusual elements
  68. define('ISEMAIL_RFC5321_TLD', 9);
  69. define('ISEMAIL_RFC5321_TLDNUMERIC', 10);
  70. define('ISEMAIL_RFC5321_QUOTEDSTRING', 11);
  71. define('ISEMAIL_RFC5321_ADDRESSLITERAL', 12);
  72. define('ISEMAIL_RFC5321_IPV6DEPRECATED', 13);
  73. // Address is valid within the message but cannot be used unmodified for the envelope
  74. define('ISEMAIL_CFWS_COMMENT', 17);
  75. define('ISEMAIL_CFWS_FWS', 18);
  76. // Address contains deprecated elements but may still be valid in restricted contexts
  77. define('ISEMAIL_DEPREC_LOCALPART', 33);
  78. define('ISEMAIL_DEPREC_FWS', 34);
  79. define('ISEMAIL_DEPREC_QTEXT', 35);
  80. define('ISEMAIL_DEPREC_QP', 36);
  81. define('ISEMAIL_DEPREC_COMMENT', 37);
  82. define('ISEMAIL_DEPREC_CTEXT', 38);
  83. define('ISEMAIL_DEPREC_CFWS_NEAR_AT', 49);
  84. // The address is only valid according to the broad definition of RFC 5322. It is otherwise invalid.
  85. define('ISEMAIL_RFC5322_DOMAIN', 65);
  86. define('ISEMAIL_RFC5322_TOOLONG', 66);
  87. define('ISEMAIL_RFC5322_LOCAL_TOOLONG', 67);
  88. define('ISEMAIL_RFC5322_DOMAIN_TOOLONG', 68);
  89. define('ISEMAIL_RFC5322_LABEL_TOOLONG', 69);
  90. define('ISEMAIL_RFC5322_DOMAINLITERAL', 70);
  91. define('ISEMAIL_RFC5322_DOMLIT_OBSDTEXT', 71);
  92. define('ISEMAIL_RFC5322_IPV6_GRPCOUNT', 72);
  93. define('ISEMAIL_RFC5322_IPV6_2X2XCOLON', 73);
  94. define('ISEMAIL_RFC5322_IPV6_BADCHAR', 74);
  95. define('ISEMAIL_RFC5322_IPV6_MAXGRPS', 75);
  96. define('ISEMAIL_RFC5322_IPV6_COLONSTRT', 76);
  97. define('ISEMAIL_RFC5322_IPV6_COLONEND', 77);
  98. // Address is invalid for any purpose
  99. define('ISEMAIL_ERR_EXPECTING_DTEXT', 129);
  100. define('ISEMAIL_ERR_NOLOCALPART', 130);
  101. define('ISEMAIL_ERR_NODOMAIN', 131);
  102. define('ISEMAIL_ERR_CONSECUTIVEDOTS', 132);
  103. define('ISEMAIL_ERR_ATEXT_AFTER_CFWS', 133);
  104. define('ISEMAIL_ERR_ATEXT_AFTER_QS', 134);
  105. define('ISEMAIL_ERR_ATEXT_AFTER_DOMLIT', 135);
  106. define('ISEMAIL_ERR_EXPECTING_QPAIR', 136);
  107. define('ISEMAIL_ERR_EXPECTING_ATEXT', 137);
  108. define('ISEMAIL_ERR_EXPECTING_QTEXT', 138);
  109. define('ISEMAIL_ERR_EXPECTING_CTEXT', 139);
  110. define('ISEMAIL_ERR_BACKSLASHEND', 140);
  111. define('ISEMAIL_ERR_DOT_START', 141);
  112. define('ISEMAIL_ERR_DOT_END', 142);
  113. define('ISEMAIL_ERR_DOMAINHYPHENSTART', 143);
  114. define('ISEMAIL_ERR_DOMAINHYPHENEND', 144);
  115. define('ISEMAIL_ERR_UNCLOSEDQUOTEDSTR', 145);
  116. define('ISEMAIL_ERR_UNCLOSEDCOMMENT', 146);
  117. define('ISEMAIL_ERR_UNCLOSEDDOMLIT', 147);
  118. define('ISEMAIL_ERR_FWS_CRLF_X2', 148);
  119. define('ISEMAIL_ERR_FWS_CRLF_END', 149);
  120. define('ISEMAIL_ERR_CR_NO_LF', 150);
  121. // End of generated code
  122. /*:diagnostic constants end:*/
  123.  
  124. // function control
  125. define('ISEMAIL_THRESHOLD' , 16);
  126.  
  127. // Email parts
  128. define('ISEMAIL_COMPONENT_LOCALPART' , 0);
  129. define('ISEMAIL_COMPONENT_DOMAIN' , 1);
  130. define('ISEMAIL_COMPONENT_LITERAL' , 2);
  131. define('ISEMAIL_CONTEXT_COMMENT' , 3);
  132. define('ISEMAIL_CONTEXT_FWS' , 4);
  133. define('ISEMAIL_CONTEXT_QUOTEDSTRING' , 5);
  134. define('ISEMAIL_CONTEXT_QUOTEDPAIR' , 6);
  135.  
  136. // Miscellaneous string constants
  137. define('ISEMAIL_STRING_AT' , '@');
  138. define('ISEMAIL_STRING_BACKSLASH' , '\\');
  139. define('ISEMAIL_STRING_DOT' , '.');
  140. define('ISEMAIL_STRING_DQUOTE' , '"');
  141. define('ISEMAIL_STRING_OPENPARENTHESIS' , '(');
  142. define('ISEMAIL_STRING_CLOSEPARENTHESIS', ')');
  143. define('ISEMAIL_STRING_OPENSQBRACKET' , '[');
  144. define('ISEMAIL_STRING_CLOSESQBRACKET' , ']');
  145. define('ISEMAIL_STRING_HYPHEN' , '-');
  146. define('ISEMAIL_STRING_COLON' , ':');
  147. define('ISEMAIL_STRING_DOUBLECOLON' , '::');
  148. define('ISEMAIL_STRING_SP' , ' ');
  149. define('ISEMAIL_STRING_HTAB' , "\t");
  150. define('ISEMAIL_STRING_CR' , "\r");
  151. define('ISEMAIL_STRING_LF' , "\n");
  152. define('ISEMAIL_STRING_IPV6TAG' , 'IPv6:');
  153. // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)
  154. define('ISEMAIL_STRING_SPECIALS' , '()<>[]:;@\\,."');
  155. }
  156.  
  157. /**
  158.  * Check that an email address conforms to RFCs 5321, 5322 and others
  159.  *
  160.  * As of Version 3.0, we are now distinguishing clearly between a Mailbox
  161.  * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending
  162.  * on the context, either can be regarded as a valid email address. The
  163.  * RFC 5321 Mailbox specification is more restrictive (comments, white space
  164.  * and obsolete forms are not allowed)
  165.  *
  166.  * @param string $email The email address to check
  167.  * @param boolean $checkDNS If true then a DNS check for MX records will be made
  168.  * @param mixed $errorlevel Determines the boundary between valid and invalid addresses.
  169.  * Status codes above this number will be returned as-is,
  170.  * status codes below will be returned as ISEMAIL_VALID. Thus the
  171.  * calling program can simply look for ISEMAIL_VALID if it is
  172.  * only interested in whether an address is valid or not. The
  173.  * errorlevel will determine how "picky" is_email() is about
  174.  * the address.
  175.  *
  176.  * If omitted or passed as false then is_email() will return
  177.  * true or false rather than an integer error or warning.
  178.  *
  179.  * NB Note the difference between $errorlevel = false and
  180.  * $errorlevel = 0
  181.  * @param array $parsedata If passed, returns the parsed address components
  182.  */
  183. /*.mixed.*/ function is_email($email, $checkDNS = false, $errorlevel = false, &$parsedata = array()) {
  184. // Check that $email is a valid address. Read the following RFCs to understand the constraints:
  185. // (http://tools.ietf.org/html/rfc5321)
  186. // (http://tools.ietf.org/html/rfc5322)
  187. // (http://tools.ietf.org/html/rfc4291#section-2.2)
  188. // (http://tools.ietf.org/html/rfc1123#section-2.1)
  189. // (http://tools.ietf.org/html/rfc3696) (guidance only)
  190. // version 2.0: Enhance $diagnose parameter to $errorlevel
  191. // version 3.0: Introduced status categories
  192. // revision 3.1: BUG: $parsedata was passed by value instead of by reference
  193.  
  194. if (is_bool($errorlevel)) {
  195. $threshold = ISEMAIL_VALID;
  196. $diagnose = (bool) $errorlevel;
  197. } else {
  198. $diagnose = true;
  199.  
  200. switch ((int) $errorlevel) {
  201. case E_WARNING: $threshold = ISEMAIL_THRESHOLD; break; // For backward compatibility
  202. case E_ERROR: $threshold = ISEMAIL_VALID; break; // For backward compatibility
  203. default: $threshold = (int) $errorlevel;
  204. }
  205. }
  206.  
  207. $return_status = array(ISEMAIL_VALID);
  208.  
  209. // Parse the address into components, character by character
  210. $raw_length = strlen($email);
  211. $context = ISEMAIL_COMPONENT_LOCALPART; // Where we are
  212. $context_stack = array($context); // Where we have been
  213. $context_prior = ISEMAIL_COMPONENT_LOCALPART; // Where we just came from
  214. $token = ''; // The current character
  215. $token_prior = ''; // The previous character
  216. $parsedata = array(
  217. ISEMAIL_COMPONENT_LOCALPART => '',
  218. ISEMAIL_COMPONENT_DOMAIN => ''
  219. ); // For the components of the address
  220.  
  221. $atomlist = array(
  222. ISEMAIL_COMPONENT_LOCALPART => array(''),
  223. ISEMAIL_COMPONENT_DOMAIN => array('')
  224. ); // For the dot-atom elements of the address
  225. $element_count = 0;
  226. $element_len = 0;
  227. $hyphen_flag = false; // Hyphen cannot occur at the end of a subdomain
  228. $end_or_die = false; // CFWS can only appear at the end of the element
  229.  
  230. //-echo "<table style=\"clear:left;\">"; // debug
  231. for ($i = 0; $i < $raw_length; $i++) {
  232. $token = $email[$i];
  233. //-echo "<tr><td><strong>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</strong></td>"; // debug
  234.  
  235. switch ($context) {
  236. //-------------------------------------------------------------
  237. // local-part
  238. //-------------------------------------------------------------
  239. case ISEMAIL_COMPONENT_LOCALPART:
  240. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  241. // local-part = dot-atom / quoted-string / obs-local-part
  242. //
  243. // dot-atom = [CFWS] dot-atom-text [CFWS]
  244. //
  245. // dot-atom-text = 1*atext *("." 1*atext)
  246. //
  247. // quoted-string = [CFWS]
  248. // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
  249. // [CFWS]
  250. //
  251. // obs-local-part = word *("." word)
  252. //
  253. // word = atom / quoted-string
  254. //
  255. // atom = [CFWS] 1*atext [CFWS]
  256. switch ($token) {
  257. // Comment
  258. case ISEMAIL_STRING_OPENPARENTHESIS:
  259. if ($element_len === 0)
  260. // Comments are OK at the beginning of an element
  261. $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_COMMENT : ISEMAIL_DEPREC_COMMENT;
  262. else {
  263. $return_status[] = ISEMAIL_CFWS_COMMENT;
  264. $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end
  265. }
  266.  
  267. $context_stack[] = $context;
  268. $context = ISEMAIL_CONTEXT_COMMENT;
  269. break;
  270. // Next dot-atom element
  271. case ISEMAIL_STRING_DOT:
  272. if ($element_len === 0)
  273. // Another dot, already?
  274. $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error
  275. else
  276. // The entire local-part can be a quoted string for RFC 5321
  277. // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
  278. if ($end_or_die) $return_status[] = ISEMAIL_DEPREC_LOCALPART;
  279.  
  280. $end_or_die = false; // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms)
  281. $element_len = 0;
  282. $element_count++;
  283. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  284. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] = '';
  285.  
  286. break;
  287. // Quoted string
  288. case ISEMAIL_STRING_DQUOTE:
  289. if ($element_len === 0) {
  290. // The entire local-part can be a quoted string for RFC 5321
  291. // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
  292. $return_status[] = ($element_count === 0) ? ISEMAIL_RFC5321_QUOTEDSTRING : ISEMAIL_DEPREC_LOCALPART;
  293.  
  294. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  295. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
  296. $element_len++;
  297. $end_or_die = true; // Quoted string must be the entire element
  298. $context_stack[] = $context;
  299. $context = ISEMAIL_CONTEXT_QUOTEDSTRING;
  300. } else {
  301. $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
  302. }
  303.  
  304. break;
  305. // Folding White Space
  306. case ISEMAIL_STRING_CR:
  307. case ISEMAIL_STRING_SP:
  308. case ISEMAIL_STRING_HTAB:
  309. if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
  310.  
  311. if ($element_len === 0)
  312. $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_FWS : ISEMAIL_DEPREC_FWS;
  313. else
  314. $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end
  315.  
  316. $context_stack[] = $context;
  317. $context = ISEMAIL_CONTEXT_FWS;
  318. $token_prior = $token;
  319.  
  320. break;
  321. // @
  322. case ISEMAIL_STRING_AT:
  323. // At this point we should have a valid local-part
  324. if (count($context_stack) !== 1) die('Unexpected item on context stack');
  325.  
  326. if ($parsedata[ISEMAIL_COMPONENT_LOCALPART] === '')
  327. $return_status[] = ISEMAIL_ERR_NOLOCALPART; // Fatal error
  328. elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
  329. // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1
  330. // The maximum total length of a user name or other local-part is 64
  331. // octets.
  332. elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART]) > 64)
  333. $return_status[] = ISEMAIL_RFC5322_LOCAL_TOOLONG;
  334. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  335. // Comments and folding white space
  336. // SHOULD NOT be used around the "@" in the addr-spec.
  337. //
  338. // http://tools.ietf.org/html/rfc2119
  339. // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that
  340. // there may exist valid reasons in particular circumstances when the
  341. // particular behavior is acceptable or even useful, but the full
  342. // implications should be understood and the case carefully weighed
  343. // before implementing any behavior described with this label.
  344. elseif (($context_prior === ISEMAIL_CONTEXT_COMMENT) || ($context_prior === ISEMAIL_CONTEXT_FWS))
  345. $return_status[] = ISEMAIL_DEPREC_CFWS_NEAR_AT;
  346.  
  347. // Clear everything down for the domain parsing
  348. $context = ISEMAIL_COMPONENT_DOMAIN; // Where we are
  349. $context_stack = array($context); // Where we have been
  350. $element_count = 0;
  351. $element_len = 0;
  352. $end_or_die = false; // CFWS can only appear at the end of the element
  353.  
  354. break;
  355. // atext
  356. default:
  357. // http://tools.ietf.org/html/rfc5322#section-3.2.3
  358. // atext = ALPHA / DIGIT / ; Printable US-ASCII
  359. // "!" / "#" / ; characters not including
  360. // "$" / "%" / ; specials. Used for atoms.
  361. // "&" / "'" /
  362. // "*" / "+" /
  363. // "-" / "/" /
  364. // "=" / "?" /
  365. // "^" / "_" /
  366. // "`" / "{" /
  367. // "|" / "}" /
  368. // "~"
  369. if ($end_or_die) {
  370. // We have encountered atext where it is no longer valid
  371. switch ($context_prior) {
  372. case ISEMAIL_CONTEXT_COMMENT:
  373. case ISEMAIL_CONTEXT_FWS:
  374. $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
  375. break;
  376. case ISEMAIL_CONTEXT_QUOTEDSTRING:
  377. $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_QS;
  378. break;
  379. default:
  380. die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");
  381. }
  382. } else {
  383. $context_prior = $context;
  384. $ord = ord($token);
  385.  
  386. if (($ord < 33) || ($ord > 126) || ($ord === 10) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token))))
  387. $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
  388.  
  389. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  390. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
  391. $element_len++;
  392. }
  393. }
  394.  
  395. break;
  396. //-------------------------------------------------------------
  397. // Domain
  398. //-------------------------------------------------------------
  399. case ISEMAIL_COMPONENT_DOMAIN:
  400. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  401. // domain = dot-atom / domain-literal / obs-domain
  402. //
  403. // dot-atom = [CFWS] dot-atom-text [CFWS]
  404. //
  405. // dot-atom-text = 1*atext *("." 1*atext)
  406. //
  407. // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
  408. //
  409. // dtext = %d33-90 / ; Printable US-ASCII
  410. // %d94-126 / ; characters not including
  411. // obs-dtext ; "[", "]", or "\"
  412. //
  413. // obs-domain = atom *("." atom)
  414. //
  415. // atom = [CFWS] 1*atext [CFWS]
  416.  
  417.  
  418. // http://tools.ietf.org/html/rfc5321#section-4.1.2
  419. // Mailbox = Local-part "@" ( Domain / address-literal )
  420. //
  421. // Domain = sub-domain *("." sub-domain)
  422. //
  423. // address-literal = "[" ( IPv4-address-literal /
  424. // IPv6-address-literal /
  425. // General-address-literal ) "]"
  426. // ; See Section 4.1.3
  427.  
  428. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  429. // Note: A liberal syntax for the domain portion of addr-spec is
  430. // given here. However, the domain portion contains addressing
  431. // information specified by and used in other protocols (e.g.,
  432. // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
  433. // incumbent upon implementations to conform to the syntax of
  434. // addresses for the context in which they are used.
  435. // is_email() author's note: it's not clear how to interpret this in
  436. // the context of a general email address validator. The conclusion I
  437. // have reached is this: "addressing information" must comply with
  438. // RFC 5321 (and in turn RFC 1035), anything that is "semantically
  439. // invisible" must comply only with RFC 5322.
  440. switch ($token) {
  441. // Comment
  442. case ISEMAIL_STRING_OPENPARENTHESIS:
  443. if ($element_len === 0)
  444. // Comments at the start of the domain are deprecated in the text
  445. // Comments at the start of a subdomain are obs-domain
  446. // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
  447. $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_COMMENT;
  448. else {
  449. $return_status[] = ISEMAIL_CFWS_COMMENT;
  450. $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end
  451. }
  452.  
  453. $context_stack[] = $context;
  454. $context = ISEMAIL_CONTEXT_COMMENT;
  455. break;
  456. // Next dot-atom element
  457. case ISEMAIL_STRING_DOT:
  458. if ($element_len === 0)
  459. // Another dot, already?
  460. $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error
  461. elseif ($hyphen_flag)
  462. // Previous subdomain ended in a hyphen
  463. $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
  464. else
  465. // Nowhere in RFC 5321 does it say explicitly that the
  466. // domain part of a Mailbox must be a valid domain according
  467. // to the DNS standards set out in RFC 1035, but this *is*
  468. // implied in several places. For instance, wherever the idea
  469. // of host routing is discussed the RFC says that the domain
  470. // must be looked up in the DNS. This would be nonsense unless
  471. // the domain was designed to be a valid DNS domain. Hence we
  472. // must conclude that the RFC 1035 restriction on label length
  473. // also applies to RFC 5321 domains.
  474. //
  475. // http://tools.ietf.org/html/rfc1035#section-2.3.4
  476. // labels 63 octets or less
  477. if ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
  478.  
  479. $end_or_die = false; // CFWS is OK again now we're at the beginning of an element (although it may be obsolete CFWS)
  480. $element_len = 0;
  481. $element_count++;
  482. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] = '';
  483. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  484.  
  485. break;
  486. // Domain literal
  487. case ISEMAIL_STRING_OPENSQBRACKET:
  488. if ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') {
  489. $end_or_die = true; // Domain literal must be the only component
  490. $element_len++;
  491. $context_stack[] = $context;
  492. $context = ISEMAIL_COMPONENT_LITERAL;
  493. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  494. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
  495. $parsedata[ISEMAIL_COMPONENT_LITERAL] = '';
  496. } else {
  497. $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
  498. }
  499.  
  500. break;
  501. // Folding White Space
  502. case ISEMAIL_STRING_CR:
  503. case ISEMAIL_STRING_SP:
  504. case ISEMAIL_STRING_HTAB:
  505. if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
  506.  
  507. if ($element_len === 0)
  508. $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_FWS;
  509. else {
  510. $return_status[] = ISEMAIL_CFWS_FWS;
  511. $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end
  512. }
  513.  
  514. $context_stack[] = $context;
  515. $context = ISEMAIL_CONTEXT_FWS;
  516. $token_prior = $token;
  517. break;
  518. // atext
  519. default:
  520. // RFC 5322 allows any atext...
  521. // http://tools.ietf.org/html/rfc5322#section-3.2.3
  522. // atext = ALPHA / DIGIT / ; Printable US-ASCII
  523. // "!" / "#" / ; characters not including
  524. // "$" / "%" / ; specials. Used for atoms.
  525. // "&" / "'" /
  526. // "*" / "+" /
  527. // "-" / "/" /
  528. // "=" / "?" /
  529. // "^" / "_" /
  530. // "`" / "{" /
  531. // "|" / "}" /
  532. // "~"
  533.  
  534. // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)
  535. // http://tools.ietf.org/html/rfc5321#section-4.1.2
  536. // sub-domain = Let-dig [Ldh-str]
  537. //
  538. // Let-dig = ALPHA / DIGIT
  539. //
  540. // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
  541. //
  542. if ($end_or_die) {
  543. // We have encountered atext where it is no longer valid
  544. switch ($context_prior) {
  545. case ISEMAIL_CONTEXT_COMMENT:
  546. case ISEMAIL_CONTEXT_FWS:
  547. $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
  548. break;
  549. case ISEMAIL_COMPONENT_LITERAL:
  550. $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_DOMLIT;
  551. break;
  552. default:
  553. die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");
  554. }
  555. }
  556.  
  557. $ord = ord($token);
  558. $hyphen_flag = false; // Assume this token isn't a hyphen unless we discover it is
  559.  
  560. if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token)))) {
  561. $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
  562. } elseif ($token === ISEMAIL_STRING_HYPHEN) {
  563. if ($element_len === 0) {
  564. // Hyphens can't be at the beginning of a subdomain
  565. $return_status[] = ISEMAIL_ERR_DOMAINHYPHENSTART; // Fatal error
  566. }
  567.  
  568. $hyphen_flag = true;
  569. } elseif (!(($ord > 47 && $ord < 58) || ($ord > 64 && $ord < 91) || ($ord > 96 && $ord < 123))) {
  570. // Not an RFC 5321 subdomain, but still OK by RFC 5322
  571. $return_status[] = ISEMAIL_RFC5322_DOMAIN;
  572. }
  573.  
  574. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  575. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
  576. $element_len++;
  577. }
  578.  
  579. break;
  580. //-------------------------------------------------------------
  581. // Domain literal
  582. //-------------------------------------------------------------
  583. case ISEMAIL_COMPONENT_LITERAL:
  584. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  585. // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
  586. //
  587. // dtext = %d33-90 / ; Printable US-ASCII
  588. // %d94-126 / ; characters not including
  589. // obs-dtext ; "[", "]", or "\"
  590. //
  591. // obs-dtext = obs-NO-WS-CTL / quoted-pair
  592. switch ($token) {
  593. // End of domain literal
  594. case ISEMAIL_STRING_CLOSESQBRACKET:
  595. if ((int) max($return_status) < ISEMAIL_DEPREC) {
  596. // Could be a valid RFC 5321 address literal, so let's check
  597.  
  598. // http://tools.ietf.org/html/rfc5321#section-4.1.2
  599. // address-literal = "[" ( IPv4-address-literal /
  600. // IPv6-address-literal /
  601. // General-address-literal ) "]"
  602. // ; See Section 4.1.3
  603. //
  604. // http://tools.ietf.org/html/rfc5321#section-4.1.3
  605. // IPv4-address-literal = Snum 3("." Snum)
  606. //
  607. // IPv6-address-literal = "IPv6:" IPv6-addr
  608. //
  609. // General-address-literal = Standardized-tag ":" 1*dcontent
  610. //
  611. // Standardized-tag = Ldh-str
  612. // ; Standardized-tag MUST be specified in a
  613. // ; Standards-Track RFC and registered with IANA
  614. //
  615. // dcontent = %d33-90 / ; Printable US-ASCII
  616. // %d94-126 ; excl. "[", "\", "]"
  617. //
  618. // Snum = 1*3DIGIT
  619. // ; representing a decimal integer
  620. // ; value in the range 0 through 255
  621. //
  622. // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
  623. //
  624. // IPv6-hex = 1*4HEXDIG
  625. //
  626. // IPv6-full = IPv6-hex 7(":" IPv6-hex)
  627. //
  628. // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
  629. // [IPv6-hex *5(":" IPv6-hex)]
  630. // ; The "::" represents at least 2 16-bit groups of
  631. // ; zeros. No more than 6 groups in addition to the
  632. // ; "::" may be present.
  633. //
  634. // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
  635. //
  636. // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
  637. // [IPv6-hex *3(":" IPv6-hex) ":"]
  638. // IPv4-address-literal
  639. // ; The "::" represents at least 2 16-bit groups of
  640. // ; zeros. No more than 4 groups in addition to the
  641. // ; "::" and IPv4-address-literal may be present.
  642. //
  643. // is_email() author's note: We can't use ip2long() to validate
  644. // IPv4 addresses because it accepts abbreviated addresses
  645. // (xxx.xxx.xxx), expanding the last group to complete the address.
  646. // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3
  647. // at least) -- see http://bugs.php.net/bug.php?id=53236 for example
  648. $max_groups = 8;
  649. $matchesIP = array();
  650. /*.mixed.*/ $index = false;
  651. $addressliteral = $parsedata[ISEMAIL_COMPONENT_LITERAL];
  652.  
  653. // Extract IPv4 part from the end of the address-literal (if there is one)
  654. if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressliteral, $matchesIP) > 0) {
  655. $index = strrpos($addressliteral, $matchesIP[0]);
  656. if ($index !== 0) $addressliteral = substr($addressliteral, 0, $index) . '0:0'; // Convert IPv4 part to IPv6 format for further testing
  657. }
  658.  
  659. if ($index === 0) {
  660. // Nothing there except a valid IPv4 address, so...
  661. $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
  662. } elseif (strncasecmp($addressliteral, ISEMAIL_STRING_IPV6TAG, 5) !== 0) {
  663. $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
  664. } else {
  665. $IPv6 = substr($addressliteral, 5);
  666. $matchesIP = explode(ISEMAIL_STRING_COLON, $IPv6); // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
  667. $groupCount = count($matchesIP);
  668. $index = strpos($IPv6,ISEMAIL_STRING_DOUBLECOLON);
  669.  
  670. if ($index === false) {
  671. // We need exactly the right number of groups
  672. if ($groupCount !== $max_groups)
  673. $return_status[] = ISEMAIL_RFC5322_IPV6_GRPCOUNT;
  674. } else {
  675. if ($index !== strrpos($IPv6,ISEMAIL_STRING_DOUBLECOLON))
  676. $return_status[] = ISEMAIL_RFC5322_IPV6_2X2XCOLON;
  677. else {
  678. if ($index === 0 || $index === (strlen($IPv6) - 2)) $max_groups++; // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
  679.  
  680. if ($groupCount > $max_groups)
  681. $return_status[] = ISEMAIL_RFC5322_IPV6_MAXGRPS;
  682. elseif ($groupCount === $max_groups)
  683. $return_status[] = ISEMAIL_RFC5321_IPV6DEPRECATED; // Eliding a single "::"
  684. }
  685. }
  686.  
  687. // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
  688. if ((substr($IPv6, 0, 1) === ISEMAIL_STRING_COLON) && (substr($IPv6, 1, 1) !== ISEMAIL_STRING_COLON))
  689. $return_status[] = ISEMAIL_RFC5322_IPV6_COLONSTRT; // Address starts with a single colon
  690. elseif ((substr($IPv6, -1) === ISEMAIL_STRING_COLON) && (substr($IPv6, -2, 1) !== ISEMAIL_STRING_COLON))
  691. $return_status[] = ISEMAIL_RFC5322_IPV6_COLONEND; // Address ends with a single colon
  692. elseif (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0)
  693. $return_status[] = ISEMAIL_RFC5322_IPV6_BADCHAR; // Check for unmatched characters
  694. else
  695. $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
  696. }
  697. } else
  698. $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
  699.  
  700.  
  701. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  702. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
  703. $element_len++;
  704. $context_prior = $context;
  705. $context = (int) array_pop($context_stack);
  706. break;
  707. case ISEMAIL_STRING_BACKSLASH:
  708. $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
  709. $context_stack[] = $context;
  710. $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
  711. break;
  712. // Folding White Space
  713. case ISEMAIL_STRING_CR:
  714. case ISEMAIL_STRING_SP:
  715. case ISEMAIL_STRING_HTAB:
  716. if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
  717.  
  718. $return_status[] = ISEMAIL_CFWS_FWS;
  719.  
  720. $context_stack[] = $context;
  721. $context = ISEMAIL_CONTEXT_FWS;
  722. $token_prior = $token;
  723. break;
  724. // dtext
  725. default:
  726. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  727. // dtext = %d33-90 / ; Printable US-ASCII
  728. // %d94-126 / ; characters not including
  729. // obs-dtext ; "[", "]", or "\"
  730. //
  731. // obs-dtext = obs-NO-WS-CTL / quoted-pair
  732. //
  733. // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
  734. // %d11 / ; characters that do not
  735. // %d12 / ; include the carriage
  736. // %d14-31 / ; return, line feed, and
  737. // %d127 ; white space characters
  738. $ord = ord($token);
  739.  
  740. // CR, LF, SP & HTAB have already been parsed above
  741. if (($ord > 127) || ($ord === 0) || ($token === ISEMAIL_STRING_OPENSQBRACKET)) {
  742. $return_status[] = ISEMAIL_ERR_EXPECTING_DTEXT; // Fatal error
  743. break;
  744. } elseif (($ord < 33) || ($ord === 127)) {
  745. $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
  746. }
  747.  
  748. $parsedata[ISEMAIL_COMPONENT_LITERAL] .= $token;
  749. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  750. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
  751. $element_len++;
  752. }
  753.  
  754. break;
  755. //-------------------------------------------------------------
  756. // Quoted string
  757. //-------------------------------------------------------------
  758. case ISEMAIL_CONTEXT_QUOTEDSTRING:
  759. // http://tools.ietf.org/html/rfc5322#section-3.2.4
  760. // quoted-string = [CFWS]
  761. // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
  762. // [CFWS]
  763. //
  764. // qcontent = qtext / quoted-pair
  765. switch ($token) {
  766. // Quoted pair
  767. case ISEMAIL_STRING_BACKSLASH:
  768. $context_stack[] = $context;
  769. $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
  770. break;
  771. // Folding White Space
  772. // Inside a quoted string, spaces are allowed as regular characters.
  773. // It's only FWS if we include HTAB or CRLF
  774. case ISEMAIL_STRING_CR:
  775. case ISEMAIL_STRING_HTAB:
  776. if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
  777.  
  778. // http://tools.ietf.org/html/rfc5322#section-3.2.2
  779. // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
  780. // structured header field are semantically interpreted as a single
  781. // space character.
  782.  
  783. // http://tools.ietf.org/html/rfc5322#section-3.2.4
  784. // the CRLF in any FWS/CFWS that appears within the quoted-string [is]
  785. // semantically "invisible" and therefore not part of the quoted-string
  786. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= ISEMAIL_STRING_SP;
  787. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= ISEMAIL_STRING_SP;
  788. $element_len++;
  789.  
  790. $return_status[] = ISEMAIL_CFWS_FWS;
  791. $context_stack[] = $context;
  792. $context = ISEMAIL_CONTEXT_FWS;
  793. $token_prior = $token;
  794. break;
  795. // End of quoted string
  796. case ISEMAIL_STRING_DQUOTE:
  797. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  798. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
  799. $element_len++;
  800. $context_prior = $context;
  801. $context = (int) array_pop($context_stack);
  802. break;
  803. // qtext
  804. default:
  805. // http://tools.ietf.org/html/rfc5322#section-3.2.4
  806. // qtext = %d33 / ; Printable US-ASCII
  807. // %d35-91 / ; characters not including
  808. // %d93-126 / ; "\" or the quote character
  809. // obs-qtext
  810. //
  811. // obs-qtext = obs-NO-WS-CTL
  812. //
  813. // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
  814. // %d11 / ; characters that do not
  815. // %d12 / ; include the carriage
  816. // %d14-31 / ; return, line feed, and
  817. // %d127 ; white space characters
  818. $ord = ord($token);
  819.  
  820. if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
  821. $return_status[] = ISEMAIL_ERR_EXPECTING_QTEXT; // Fatal error
  822. } elseif (($ord < 32) || ($ord === 127))
  823. $return_status[] = ISEMAIL_DEPREC_QTEXT;
  824.  
  825. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  826. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
  827. $element_len++;
  828. }
  829.  
  830. // http://tools.ietf.org/html/rfc5322#section-3.4.1
  831. // If the
  832. // string can be represented as a dot-atom (that is, it contains no
  833. // characters other than atext characters or "." surrounded by atext
  834. // characters), then the dot-atom form SHOULD be used and the quoted-
  835. // string form SHOULD NOT be used.
  836. // To do
  837. break;
  838. //-------------------------------------------------------------
  839. // Quoted pair
  840. //-------------------------------------------------------------
  841. case ISEMAIL_CONTEXT_QUOTEDPAIR:
  842. // http://tools.ietf.org/html/rfc5322#section-3.2.1
  843. // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
  844. //
  845. // VCHAR = %d33-126 ; visible (printing) characters
  846. // WSP = SP / HTAB ; white space
  847. //
  848. // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
  849. //
  850. // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
  851. // %d11 / ; characters that do not
  852. // %d12 / ; include the carriage
  853. // %d14-31 / ; return, line feed, and
  854. // %d127 ; white space characters
  855. //
  856. // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
  857. $ord = ord($token);
  858.  
  859. if ($ord > 127)
  860. $return_status[] = ISEMAIL_ERR_EXPECTING_QPAIR; // Fatal error
  861. elseif ((($ord < 31) && ($ord !== 9)) || ($ord === 127)) // SP & HTAB are allowed
  862. $return_status[] = ISEMAIL_DEPREC_QP;
  863.  
  864. // At this point we know where this qpair occurred so
  865. // we could check to see if the character actually
  866. // needed to be quoted at all.
  867. // http://tools.ietf.org/html/rfc5321#section-4.1.2
  868. // the sending system SHOULD transmit the
  869. // form that uses the minimum quoting possible.
  870. // To do: check whether the character needs to be quoted (escaped) in this context
  871. $context_prior = $context;
  872. $context = (int) array_pop($context_stack); // End of qpair
  873. $token = ISEMAIL_STRING_BACKSLASH . $token;
  874.  
  875. switch ($context) {
  876. case ISEMAIL_CONTEXT_COMMENT:
  877. break;
  878. case ISEMAIL_CONTEXT_QUOTEDSTRING:
  879. $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
  880. $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
  881. $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
  882. break;
  883. case ISEMAIL_COMPONENT_LITERAL:
  884. $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
  885. $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
  886. $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
  887. break;
  888. default:
  889. die("Quoted pair logic invoked in an invalid context: $context");
  890. }
  891.  
  892. break;
  893. //-------------------------------------------------------------
  894. // Comment
  895. //-------------------------------------------------------------
  896. case ISEMAIL_CONTEXT_COMMENT:
  897. // http://tools.ietf.org/html/rfc5322#section-3.2.2
  898. // comment = "(" *([FWS] ccontent) [FWS] ")"
  899. //
  900. // ccontent = ctext / quoted-pair / comment
  901. switch ($token) {
  902. // Nested comment
  903. case ISEMAIL_STRING_OPENPARENTHESIS:
  904. // Nested comments are OK
  905. $context_stack[] = $context;
  906. $context = ISEMAIL_CONTEXT_COMMENT;
  907. break;
  908. // End of comment
  909. case ISEMAIL_STRING_CLOSEPARENTHESIS:
  910. $context_prior = $context;
  911. $context = (int) array_pop($context_stack);
  912.  
  913. // http://tools.ietf.org/html/rfc5322#section-3.2.2
  914. // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
  915. // structured header field are semantically interpreted as a single
  916. // space character.
  917. //
  918. // is_email() author's note: This *cannot* mean that we must add a
  919. // space to the address wherever CFWS appears. This would result in
  920. // any addr-spec that had CFWS outside a quoted string being invalid
  921. // for RFC 5321.
  922. // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
  923. // $parsedata[$context] .= ISEMAIL_STRING_SP;
  924. // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
  925. // $element_len++;
  926. // }
  927.  
  928. break;
  929. // Quoted pair
  930. case ISEMAIL_STRING_BACKSLASH:
  931. $context_stack[] = $context;
  932. $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
  933. break;
  934. // Folding White Space
  935. case ISEMAIL_STRING_CR:
  936. case ISEMAIL_STRING_SP:
  937. case ISEMAIL_STRING_HTAB:
  938. if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
  939.  
  940. $return_status[] = ISEMAIL_CFWS_FWS;
  941.  
  942. $context_stack[] = $context;
  943. $context = ISEMAIL_CONTEXT_FWS;
  944. $token_prior = $token;
  945. break;
  946. // ctext
  947. default:
  948. // http://tools.ietf.org/html/rfc5322#section-3.2.3
  949. // ctext = %d33-39 / ; Printable US-ASCII
  950. // %d42-91 / ; characters not including
  951. // %d93-126 / ; "(", ")", or "\"
  952. // obs-ctext
  953. //
  954. // obs-ctext = obs-NO-WS-CTL
  955. //
  956. // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
  957. // %d11 / ; characters that do not
  958. // %d12 / ; include the carriage
  959. // %d14-31 / ; return, line feed, and
  960. // %d127 ; white space characters
  961. $ord = ord($token);
  962.  
  963. if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
  964. $return_status[] = ISEMAIL_ERR_EXPECTING_CTEXT; // Fatal error
  965. break;
  966. } elseif (($ord < 32) || ($ord === 127)) {
  967. $return_status[] = ISEMAIL_DEPREC_CTEXT;
  968. }
  969. }
  970.  
  971. break;
  972. //-------------------------------------------------------------
  973. // Folding White Space
  974. //-------------------------------------------------------------
  975. case ISEMAIL_CONTEXT_FWS:
  976. // http://tools.ietf.org/html/rfc5322#section-3.2.2
  977. // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
  978. // ; Folding white space
  979.  
  980. // But note the erratum:
  981. // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:
  982. // In the obsolete syntax, any amount of folding white space MAY be
  983. // inserted where the obs-FWS rule is allowed. This creates the
  984. // possibility of having two consecutive "folds" in a line, and
  985. // therefore the possibility that a line which makes up a folded header
  986. // field could be composed entirely of white space.
  987. //
  988. // obs-FWS = 1*([CRLF] WSP)
  989. if ($token_prior === ISEMAIL_STRING_CR) {
  990. if ($token === ISEMAIL_STRING_CR) {
  991. $return_status[] = ISEMAIL_ERR_FWS_CRLF_X2; // Fatal error
  992. break;
  993. }
  994.  
  995. if (isset($crlf_count)) {
  996. if (++$crlf_count > 1)
  997. $return_status[] = ISEMAIL_DEPREC_FWS; // Multiple folds = obsolete FWS
  998. } else $crlf_count = 1;
  999. }
  1000.  
  1001. switch ($token) {
  1002. case ISEMAIL_STRING_CR:
  1003. if ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))
  1004. $return_status[] = ISEMAIL_ERR_CR_NO_LF; // Fatal error
  1005.  
  1006. break;
  1007. case ISEMAIL_STRING_SP:
  1008. case ISEMAIL_STRING_HTAB:
  1009. break;
  1010. default:
  1011. if ($token_prior === ISEMAIL_STRING_CR) {
  1012. $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
  1013. break;
  1014. }
  1015.  
  1016. if (isset($crlf_count)) unset($crlf_count);
  1017.  
  1018. $context_prior = $context;
  1019. $context = (int) array_pop($context_stack); // End of FWS
  1020.  
  1021. // http://tools.ietf.org/html/rfc5322#section-3.2.2
  1022. // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
  1023. // structured header field are semantically interpreted as a single
  1024. // space character.
  1025. //
  1026. // is_email() author's note: This *cannot* mean that we must add a
  1027. // space to the address wherever CFWS appears. This would result in
  1028. // any addr-spec that had CFWS outside a quoted string being invalid
  1029. // for RFC 5321.
  1030. // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
  1031. // $parsedata[$context] .= ISEMAIL_STRING_SP;
  1032. // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
  1033. // $element_len++;
  1034. // }
  1035.  
  1036. $i--; // Look at this token again in the parent context
  1037. }
  1038.  
  1039. $token_prior = $token;
  1040. break;
  1041. //-------------------------------------------------------------
  1042. // A context we aren't expecting
  1043. //-------------------------------------------------------------
  1044. default:
  1045. die("Unknown context: $context");
  1046. }
  1047.  
  1048. //-echo "<td>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</td></tr>"; // debug
  1049. if ((int) max($return_status) > ISEMAIL_RFC5322) break; // No point going on if we've got a fatal error
  1050. }
  1051.  
  1052. // Some simple final tests
  1053. if ((int) max($return_status) < ISEMAIL_RFC5322) {
  1054. if ($context === ISEMAIL_CONTEXT_QUOTEDSTRING) $return_status[] = ISEMAIL_ERR_UNCLOSEDQUOTEDSTR; // Fatal error
  1055. elseif ($context === ISEMAIL_CONTEXT_QUOTEDPAIR) $return_status[] = ISEMAIL_ERR_BACKSLASHEND; // Fatal error
  1056. elseif ($context === ISEMAIL_CONTEXT_COMMENT) $return_status[] = ISEMAIL_ERR_UNCLOSEDCOMMENT; // Fatal error
  1057. elseif ($context === ISEMAIL_COMPONENT_LITERAL) $return_status[] = ISEMAIL_ERR_UNCLOSEDDOMLIT; // Fatal error
  1058. elseif ($token === ISEMAIL_STRING_CR) $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
  1059. elseif ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') $return_status[] = ISEMAIL_ERR_NODOMAIN; // Fatal error
  1060. elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
  1061. elseif ($hyphen_flag) $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
  1062. // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
  1063. // The maximum total length of a domain name or number is 255 octets.
  1064. elseif (strlen($parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 255)
  1065. $return_status[] = ISEMAIL_RFC5322_DOMAIN_TOOLONG;
  1066. // http://tools.ietf.org/html/rfc5321#section-4.1.2
  1067. // Forward-path = Path
  1068. //
  1069. // Path = "<" [ A-d-l ":" ] Mailbox ">"
  1070. //
  1071. // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
  1072. // The maximum total length of a reverse-path or forward-path is 256
  1073. // octets (including the punctuation and element separators).
  1074. //
  1075. // Thus, even without (obsolete) routing information, the Mailbox can
  1076. // only be 254 characters long. This is confirmed by this verified
  1077. // erratum to RFC 3696:
  1078. //
  1079. // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690
  1080. // However, there is a restriction in RFC 2821 on the length of an
  1081. // address in MAIL and RCPT commands of 254 characters. Since addresses
  1082. // that do not fit in those fields are not normally useful, the upper
  1083. // limit on address lengths should normally be considered to be 254.
  1084. elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART] . ISEMAIL_STRING_AT . $parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 254)
  1085. $return_status[] = ISEMAIL_RFC5322_TOOLONG;
  1086. // http://tools.ietf.org/html/rfc1035#section-2.3.4
  1087. // labels 63 octets or less
  1088. elseif ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
  1089. }
  1090.  
  1091. // Check DNS?
  1092. $dns_checked = false;
  1093.  
  1094. if ($checkDNS && ((int) max($return_status) < ISEMAIL_DNSWARN) && function_exists('dns_get_record')) {
  1095. // http://tools.ietf.org/html/rfc5321#section-2.3.5
  1096. // Names that can
  1097. // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
  1098. // in Section 5) are permitted, as are CNAME RRs whose targets can be
  1099. // resolved, in turn, to MX or address RRs.
  1100. //
  1101. // http://tools.ietf.org/html/rfc5321#section-5.1
  1102. // The lookup first attempts to locate an MX record associated with the
  1103. // name. If a CNAME record is found, the resulting name is processed as
  1104. // if it were the initial name. ... If an empty list of MXs is returned,
  1105. // the address is treated as if it was associated with an implicit MX
  1106. // RR, with a preference of 0, pointing to that host.
  1107. //
  1108. // is_email() author's note: We will regard the existence of a CNAME to be
  1109. // sufficient evidence of the domain's existence. For performance reasons
  1110. // we will not repeat the DNS lookup for the CNAME's target, but we will
  1111. // raise a warning because we didn't immediately find an MX record.
  1112. if ($element_count === 0) $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= '.'; // Checking TLD DNS seems to work only if you explicitly check from the root
  1113.  
  1114. $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_MX); // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id=51844)
  1115.  
  1116. if ((is_bool($result) && !(bool) $result))
  1117. $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // Domain can't be found in DNS
  1118. else {
  1119. if (count($result) === 0) {
  1120. $return_status[] = ISEMAIL_DNSWARN_NO_MX_RECORD; // MX-record for domain can't be found
  1121. $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_A + DNS_CNAME);
  1122.  
  1123. if (count($result) === 0)
  1124. $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // No usable records for the domain can be found
  1125. } else $dns_checked = true;
  1126. }
  1127. }
  1128.  
  1129. // Check for TLD addresses
  1130. // -----------------------
  1131. // TLD addresses are specifically allowed in RFC 5321 but they are
  1132. // unusual to say the least. We will allocate a separate
  1133. // status to these addresses on the basis that they are more likely
  1134. // to be typos than genuine addresses (unless we've already
  1135. // established that the domain does have an MX record)
  1136. //
  1137. // http://tools.ietf.org/html/rfc5321#section-2.3.5
  1138. // In the case
  1139. // of a top-level domain used by itself in an email address, a single
  1140. // string is used without any dots. This makes the requirement,
  1141. // described in more detail below, that only fully-qualified domain
  1142. // names appear in SMTP transactions on the public Internet,
  1143. // particularly important where top-level domains are involved.
  1144. //
  1145. // TLD format
  1146. // ----------
  1147. // The format of TLDs has changed a number of times. The standards
  1148. // used by IANA have been largely ignored by ICANN, leading to
  1149. // confusion over the standards being followed. These are not defined
  1150. // anywhere, except as a general component of a DNS host name (a label).
  1151. // However, this could potentially lead to 123.123.123.123 being a
  1152. // valid DNS name (rather than an IP address) and thereby creating
  1153. // an ambiguity. The most authoritative statement on TLD formats that
  1154. // the author can find is in a (rejected!) erratum to RFC 1123
  1155. // submitted by John Klensin, the author of RFC 5321:
  1156. //
  1157. // http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353
  1158. // However, a valid host name can never have the dotted-decimal
  1159. // form #.#.#.#, since this change does not permit the highest-level
  1160. // component label to start with a digit even if it is not all-numeric.
  1161. if (!$dns_checked && ((int) max($return_status) < ISEMAIL_DNSWARN)) {
  1162. if ($element_count === 0) $return_status[] = ISEMAIL_RFC5321_TLD;
  1163.  
  1164. if (is_numeric($atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count][0]))
  1165. $return_status[] = ISEMAIL_RFC5321_TLDNUMERIC;
  1166. }
  1167.  
  1168. $return_status = array_unique($return_status);
  1169. $final_status = (int) max($return_status);
  1170.  
  1171. if (count($return_status) !== 1) array_shift($return_status); // remove redundant ISEMAIL_VALID
  1172.  
  1173. $parsedata['status'] = $return_status;
  1174.  
  1175. if ($final_status < $threshold) $final_status = ISEMAIL_VALID;
  1176.  
  1177. return ($diagnose) ? $final_status : ($final_status < ISEMAIL_THRESHOLD);
  1178. }
  1179. ?>

Report this snippet  

You need to login to post a comment.