PageRenderTime 709ms CodeModel.GetById 182ms app.highlight 230ms RepoModel.GetById 232ms app.codeStats 1ms

/vendor/ZF2/library/Zend/Validator/Hostname.php

https://github.com/XataWork/zf2-project
PHP | 761 lines | 504 code | 62 blank | 195 comment | 71 complexity | b292152abaf0f9e53973f863de37eb83 MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework (http://framework.zend.com/)
  4 *
  5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  7 * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 */
  9
 10namespace Zend\Validator;
 11
 12use Zend\Stdlib\ErrorHandler;
 13use Zend\Stdlib\StringUtils;
 14
 15/**
 16 * Please note there are two standalone test scripts for testing IDN characters due to problems
 17 * with file encoding.
 18 *
 19 * The first is tests/Zend/Validator/HostnameTestStandalone.php which is designed to be run on
 20 * the command line.
 21 *
 22 * The second is tests/Zend/Validator/HostnameTestForm.php which is designed to be run via HTML
 23 * to allow users to test entering UTF-8 characters in a form.
 24 */
 25class Hostname extends AbstractValidator
 26{
 27    const CANNOT_DECODE_PUNYCODE  = 'hostnameCannotDecodePunycode';
 28    const INVALID                 = 'hostnameInvalid';
 29    const INVALID_DASH            = 'hostnameDashCharacter';
 30    const INVALID_HOSTNAME        = 'hostnameInvalidHostname';
 31    const INVALID_HOSTNAME_SCHEMA = 'hostnameInvalidHostnameSchema';
 32    const INVALID_LOCAL_NAME      = 'hostnameInvalidLocalName';
 33    const INVALID_URI             = 'hostnameInvalidUri';
 34    const IP_ADDRESS_NOT_ALLOWED  = 'hostnameIpAddressNotAllowed';
 35    const LOCAL_NAME_NOT_ALLOWED  = 'hostnameLocalNameNotAllowed';
 36    const UNDECIPHERABLE_TLD      = 'hostnameUndecipherableTld';
 37    const UNKNOWN_TLD             = 'hostnameUnknownTld';
 38
 39    /**
 40     * @var array
 41     */
 42    protected $messageTemplates = array(
 43        self::CANNOT_DECODE_PUNYCODE  => "The input appears to be a DNS hostname but the given punycode notation cannot be decoded",
 44        self::INVALID                 => "Invalid type given. String expected",
 45        self::INVALID_DASH            => "The input appears to be a DNS hostname but contains a dash in an invalid position",
 46        self::INVALID_HOSTNAME        => "The input does not match the expected structure for a DNS hostname",
 47        self::INVALID_HOSTNAME_SCHEMA => "The input appears to be a DNS hostname but cannot match against hostname schema for TLD '%tld%'",
 48        self::INVALID_LOCAL_NAME      => "The input does not appear to be a valid local network name",
 49        self::INVALID_URI             => "The input does not appear to be a valid URI hostname",
 50        self::IP_ADDRESS_NOT_ALLOWED  => "The input appears to be an IP address, but IP addresses are not allowed",
 51        self::LOCAL_NAME_NOT_ALLOWED  => "The input appears to be a local network name but local network names are not allowed",
 52        self::UNDECIPHERABLE_TLD      => "The input appears to be a DNS hostname but cannot extract TLD part",
 53        self::UNKNOWN_TLD             => "The input appears to be a DNS hostname but cannot match TLD against known list",
 54    );
 55
 56    /**
 57     * @var array
 58     */
 59    protected $messageVariables = array(
 60        'tld' => 'tld',
 61    );
 62
 63    const ALLOW_DNS   = 1;  // Allows Internet domain names (e.g., example.com)
 64    const ALLOW_IP    = 2;  // Allows IP addresses
 65    const ALLOW_LOCAL = 4;  // Allows local network names (e.g., localhost, www.localdomain)
 66    const ALLOW_URI   = 8;  // Allows URI hostnames
 67    const ALLOW_ALL   = 15;  // Allows all types of hostnames
 68
 69    /**
 70     * Array of valid top-level-domains
 71     *
 72     * @see ftp://data.iana.org/TLD/tlds-alpha-by-domain.txt  List of all TLDs by domain
 73     * @see http://www.iana.org/domains/root/db/ Official list of supported TLDs
 74     * @var array
 75     */
 76    protected $validTlds = array(
 77        'ac', 'academy', 'actor', 'ad', 'ae', 'aero', 'af', 'ag', 'agency', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar',
 78        'arpa', 'as', 'asia', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bar', 'bargains', 'bb', 'bd', 'be', 'berlin', 'best',
 79        'bf', 'bg', 'bh', 'bi', 'bike', 'biz', 'bj', 'bl', 'blue', 'bm', 'bn', 'bo', 'boutique', 'bq', 'br', 'bs', 'bt',
 80        'build', 'builders', 'buzz', 'bv', 'bw', 'by', 'bz', 'ca', 'cab', 'camera', 'camp', 'cards', 'careers', 'cat',
 81        'catering', 'cc', 'cd', 'center', 'ceo', 'cf', 'cg', 'ch', 'cheap', 'christmas', 'ci', 'ck', 'cl', 'cleaning',
 82        'clothing', 'club', 'cm', 'cn', 'co', 'codes', 'coffee', 'com', 'community', 'company', 'computer',
 83        'construction', 'contractors', 'cool', 'coop', 'cr', 'cruises', 'cu', 'cv', 'cw', 'cx', 'cy', 'cz', 'dance',
 84        'dating', 'de', 'democrat', 'diamonds', 'directory', 'dj', 'dk', 'dm', 'do', 'domains', 'dz', 'ec', 'edu',
 85        'education', 'ee', 'eg', 'eh', 'email', 'enterprises', 'equipment', 'er', 'es', 'estate', 'et', 'eu', 'events',
 86        'expert', 'exposed', 'farm', 'fi', 'fish', 'fj', 'fk', 'flights', 'florist', 'fm', 'fo', 'foundation', 'fr',
 87        'futbol', 'ga', 'gallery', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gift', 'gl', 'glass', 'gm', 'gn', 'gov',
 88        'gp', 'gq', 'gr', 'graphics', 'gs', 'gt', 'gu', 'guitars', 'guru', 'gw', 'gy', 'hk', 'hm', 'hn', 'holdings',
 89        'holiday', 'house', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'immobilien', 'in', 'industries', 'info',
 90        'institute', 'int', 'international', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'kaufen',
 91        'ke', 'kg', 'kh', 'ki', 'kim', 'kitchen', 'kiwi', 'km', 'kn', 'kp', 'kr', 'kred', 'kw', 'ky', 'kz', 'la',
 92        'land', 'lb', 'lc', 'li', 'lighting', 'limo', 'link', 'lk', 'lr', 'ls', 'lt', 'lu', 'luxury', 'lv', 'ly', 'ma',
 93        'management', 'mango', 'marketing', 'mc', 'md', 'me', 'menu', 'mf', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn',
 94        'mo', 'mobi', 'moda', 'monash', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv', 'mw', 'mx', 'my', 'mz',
 95        'na', 'nagoya', 'name', 'nc', 'ne', 'net', 'neustar', 'nf', 'ng', 'ni', 'ninja', 'nl', 'no', 'np', 'nr', 'nu',
 96        'nz', 'om', 'onl', 'org', 'pa', 'partners', 'parts', 'pe', 'pf', 'pg', 'ph', 'photo', 'photography', 'photos',
 97        'pics', 'pink', 'pk', 'pl', 'plumbing', 'pm', 'pn', 'post', 'pr', 'pro', 'productions', 'properties', 'ps',
 98        'pt', 'pub', 'pw', 'py', 'qa', 'qpon', 're', 'recipes', 'red', 'rentals', 'repair', 'report', 'reviews', 'rich',
 99        'ro', 'rs', 'ru', 'ruhr', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sexy', 'sg', 'sh', 'shiksha', 'shoes', 'si',
100        'singles', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'social', 'solar', 'solutions', 'sr', 'ss', 'st', 'su',
101        'supplies', 'supply', 'support', 'sv', 'sx', 'sy', 'systems', 'sz', 'tattoo', 'tc', 'td', 'technology', 'tel',
102        'tf', 'tg', 'th', 'tienda', 'tips', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'today', 'tokyo', 'tools', 'tp', 'tr',
103        'training', 'travel', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'um', 'uno', 'us', 'uy', 'uz', 'va',
104        'vacations', 'vc', 've', 'ventures', 'vg', 'vi', 'viajes', 'villas', 'vision', 'vn', 'voting', 'voyage', 'vu',
105        'wang', 'watch', 'wed', 'wf', 'wien', 'wiki', 'works', 'ws', '测试', 'परीक्षा', '集团', '在线', '한국', 'ভারত',
106        'বাংলা', '公益', '公司', '移动', '我爱你', 'испытание', 'қаз', 'онлайн', 'сайт', 'срб', '테스트', '삼성',
107        'சிங்கப்பூர்', 'дети', 'טעסט', '中文网', '中信', '中国', '中國', 'భారత్', 'ලංකා', '測試', 'ભારત', 'भारत',
108        'آزمایشی', 'பரிட்சை', '网络', 'укр', '香港', 'δοκιμή', 'إختبار', '台湾', '台灣', 'мон',
109        'الجزائر', 'عمان', 'ایران', 'امارات', 'بازار', 'پاکستان', 'الاردن', 'بھارت', 'المغرب', 'السعودية', 'سودان', 'مليسيا', 'شبكة', 'გე',
110        'ไทย', 'سورية', 'рф', 'تونس', 'みんな', 'ਭਾਰਤ', '游戏', 'مصر', 'قطر', 'இலங்கை', 'இந்தியா', '新加坡', 'فلسطين',
111        'テスト', '政务', 'xxx', 'xyz', 'ye', 'yt', 'za', 'zm', 'zone', 'zw'
112    );
113
114    /**
115     * Array for valid Idns
116     * @see http://www.iana.org/domains/idn-tables/ Official list of supported IDN Chars
117     * (.AC) Ascension Island http://www.nic.ac/pdf/AC-IDN-Policy.pdf
118     * (.AR) Argentina http://www.nic.ar/faqidn.html
119     * (.AS) American Samoa http://www.nic.as/idn/chars.cfm
120     * (.AT) Austria http://www.nic.at/en/service/technical_information/idn/charset_converter/
121     * (.BIZ) International http://www.iana.org/domains/idn-tables/
122     * (.BR) Brazil http://registro.br/faq/faq6.html
123     * (.BV) Bouvett Island http://www.norid.no/domeneregistrering/idn/idn_nyetegn.en.html
124     * (.CAT) Catalan http://www.iana.org/domains/idn-tables/tables/cat_ca_1.0.html
125     * (.CH) Switzerland https://nic.switch.ch/reg/ocView.action?res=EF6GW2JBPVTG67DLNIQXU234MN6SC33JNQQGI7L6#anhang1
126     * (.CL) Chile http://www.iana.org/domains/idn-tables/tables/cl_latn_1.0.html
127     * (.COM) International http://www.verisign.com/information-services/naming-services/internationalized-domain-names/index.html
128     * (.DE) Germany http://www.denic.de/en/domains/idns/liste.html
129     * (.DK) Danmark http://www.dk-hostmaster.dk/index.php?id=151
130     * (.ES) Spain https://www.nic.es/media/2008-05/1210147705287.pdf
131     * (.FI) Finland http://www.ficora.fi/en/index/palvelut/fiverkkotunnukset/aakkostenkaytto.html
132     * (.GR) Greece https://grweb.ics.forth.gr/CharacterTable1_en.jsp
133     * (.HU) Hungary http://www.domain.hu/domain/English/szabalyzat/szabalyzat.html
134     * (.IL) Israel http://www.isoc.org.il/domains/il-domain-rules.html
135     * (.INFO) International http://www.nic.info/info/idn
136     * (.IO) British Indian Ocean Territory http://www.nic.io/IO-IDN-Policy.pdf
137     * (.IR) Iran http://www.nic.ir/Allowable_Characters_dot-iran
138     * (.IS) Iceland http://www.isnic.is/domain/rules.php
139     * (.KR) Korea http://www.iana.org/domains/idn-tables/tables/kr_ko-kr_1.0.html
140     * (.LI) Liechtenstein https://nic.switch.ch/reg/ocView.action?res=EF6GW2JBPVTG67DLNIQXU234MN6SC33JNQQGI7L6#anhang1
141     * (.LT) Lithuania http://www.domreg.lt/static/doc/public/idn_symbols-en.pdf
142     * (.MD) Moldova http://www.register.md/
143     * (.MUSEUM) International http://www.iana.org/domains/idn-tables/tables/museum_latn_1.0.html
144     * (.NET) International http://www.verisign.com/information-services/naming-services/internationalized-domain-names/index.html
145     * (.NO) Norway http://www.norid.no/domeneregistrering/idn/idn_nyetegn.en.html
146     * (.NU) Niue http://www.worldnames.net/
147     * (.ORG) International http://www.pir.org/index.php?db=content/FAQs&tbl=FAQs_Registrant&id=2
148     * (.PE) Peru https://www.nic.pe/nuevas_politicas_faq_2.php
149     * (.PL) Poland http://www.dns.pl/IDN/allowed_character_sets.pdf
150     * (.PR) Puerto Rico http://www.nic.pr/idn_rules.asp
151     * (.PT) Portugal https://online.dns.pt/dns_2008/do?com=DS;8216320233;111;+PAGE(4000058)+K-CAT-CODIGO(C.125)+RCNT(100);
152     * (.RU) Russia http://www.iana.org/domains/idn-tables/tables/ru_ru-ru_1.0.html
153     * (.SA) Saudi Arabia http://www.iana.org/domains/idn-tables/tables/sa_ar_1.0.html
154     * (.SE) Sweden http://www.iis.se/english/IDN_campaignsite.shtml?lang=en
155     * (.SH) Saint Helena http://www.nic.sh/SH-IDN-Policy.pdf
156     * (.SJ) Svalbard and Jan Mayen http://www.norid.no/domeneregistrering/idn/idn_nyetegn.en.html
157     * (.TH) Thailand http://www.iana.org/domains/idn-tables/tables/th_th-th_1.0.html
158     * (.TM) Turkmenistan http://www.nic.tm/TM-IDN-Policy.pdf
159     * (.TR) Turkey https://www.nic.tr/index.php
160     * (.UA) Ukraine http://www.iana.org/domains/idn-tables/tables/ua_cyrl_1.2.html
161     * (.VE) Venice http://www.iana.org/domains/idn-tables/tables/ve_es_1.0.html
162     * (.VN) Vietnam http://www.vnnic.vn/english/5-6-300-2-2-04-20071115.htm#1.%20Introduction
163     *
164     * @var array
165     */
166    protected $validIdns = array(
167        'AC'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿāăąćĉċčďđēėęěĝġģĥħīįĵķĺļľŀłńņňŋőœŕŗřśŝşšţťŧūŭůűųŵŷźżž]{1,63}$/iu'),
168        'AR'  => array(1 => '/^[\x{002d}0-9a-zà-ãç-êìíñ-õü]{1,63}$/iu'),
169        'AS'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıĵķĸĺļľłńņňŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźż]{1,63}$/iu'),
170        'AT'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿœšž]{1,63}$/iu'),
171        'BIZ' => 'Hostname/Biz.php',
172        'BR'  => array(1 => '/^[\x{002d}0-9a-zà-ãçéíó-õúü]{1,63}$/iu'),
173        'BV'  => array(1 => '/^[\x{002d}0-9a-zàáä-éêñ-ôöøüčđńŋšŧž]{1,63}$/iu'),
174        'CAT' => array(1 => '/^[\x{002d}0-9a-z·àç-éíïòóúü]{1,63}$/iu'),
175        'CH'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿœ]{1,63}$/iu'),
176        'CL'  => array(1 => '/^[\x{002d}0-9a-záéíñóúü]{1,63}$/iu'),
177        'CN'  => 'Hostname/Cn.php',
178        'COM' => 'Hostname/Com.php',
179        'DE'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿăąāćĉčċďđĕěėęēğĝġģĥħĭĩįīıĵķĺľļłńňņŋŏőōœĸŕřŗśŝšşťţŧŭůűũųūŵŷźžż]{1,63}$/iu'),
180        'DK'  => array(1 => '/^[\x{002d}0-9a-zäéöü]{1,63}$/iu'),
181        'ES'  => array(1 => '/^[\x{002d}0-9a-zàáçèéíïñòóúü·]{1,63}$/iu'),
182        'EU'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿ]{1,63}$/iu',
183            2 => '/^[\x{002d}0-9a-zāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıĵķĺļľŀłńņňʼnŋōŏőœŕŗřśŝšťŧũūŭůűųŵŷźżž]{1,63}$/iu',
184            3 => '/^[\x{002d}0-9a-zșț]{1,63}$/iu',
185            4 => '/^[\x{002d}0-9a-zΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ]{1,63}$/iu',
186            5 => '/^[\x{002d}0-9a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]{1,63}$/iu',
187            6 => '/^[\x{002d}0-9a-zἀ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ-ὼώᾀ-ᾇᾐ-ᾗᾠ-ᾧᾰ-ᾴᾶᾷῂῃῄῆῇῐ-ῒΐῖῗῠ-ῧῲῳῴῶῷ]{1,63}$/iu'),
188        'FI'  => array(1 => '/^[\x{002d}0-9a-zäåö]{1,63}$/iu'),
189        'GR'  => array(1 => '/^[\x{002d}0-9a-zΆΈΉΊΌΎ-ΡΣ-ώἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼῂῃῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲῳῴῶ-ῼ]{1,63}$/iu'),
190        'HK'  => 'Hostname/Cn.php',
191        'HU'  => array(1 => '/^[\x{002d}0-9a-záéíóöúüőű]{1,63}$/iu'),
192        'IL'  => array(1 => '/^[\x{002d}0-9\x{05D0}-\x{05EA}]{1,63}$/iu',
193            2 => '/^[\x{002d}0-9a-z]{1,63}$/i'),
194        'INFO'=> array(1 => '/^[\x{002d}0-9a-zäåæéöøü]{1,63}$/iu',
195            2 => '/^[\x{002d}0-9a-záéíóöúüőű]{1,63}$/iu',
196            3 => '/^[\x{002d}0-9a-záæéíðóöúýþ]{1,63}$/iu',
197            4 => '/^[\x{AC00}-\x{D7A3}]{1,17}$/iu',
198            5 => '/^[\x{002d}0-9a-zāčēģīķļņōŗšūž]{1,63}$/iu',
199            6 => '/^[\x{002d}0-9a-ząčėęįšūųž]{1,63}$/iu',
200            7 => '/^[\x{002d}0-9a-zóąćęłńśźż]{1,63}$/iu',
201            8 => '/^[\x{002d}0-9a-záéíñóúü]{1,63}$/iu'),
202        'IO'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿăąāćĉčċďđĕěėęēğĝġģĥħĭĩįīıĵķĺľļłńňņŋŏőōœĸŕřŗśŝšşťţŧŭůűũųūŵŷźžż]{1,63}$/iu'),
203        'IS'  => array(1 => '/^[\x{002d}0-9a-záéýúíóþæöð]{1,63}$/iu'),
204        'IT'  => array(1 => '/^[\x{002d}0-9a-zàâäèéêëìîïòôöùûüæœçÿß-]{1,63}$/iu'),
205        'JP'  => 'Hostname/Jp.php',
206        'KR'  => array(1 => '/^[\x{AC00}-\x{D7A3}]{1,17}$/iu'),
207        'LI'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿœ]{1,63}$/iu'),
208        'LT'  => array(1 => '/^[\x{002d}0-9ąčęėįšųūž]{1,63}$/iu'),
209        'MD'  => array(1 => '/^[\x{002d}0-9ăâîşţ]{1,63}$/iu'),
210        'MUSEUM' => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿāăąćċčďđēėęěğġģħīįıķĺļľłńņňŋōőœŕŗřśşšţťŧūůűųŵŷźżžǎǐǒǔ\x{01E5}\x{01E7}\x{01E9}\x{01EF}ə\x{0292}ẁẃẅỳ]{1,63}$/iu'),
211        'NET' => 'Hostname/Com.php',
212        'NO'  => array(1 => '/^[\x{002d}0-9a-zàáä-éêñ-ôöøüčđńŋšŧž]{1,63}$/iu'),
213        'NU'  => 'Hostname/Com.php',
214        'ORG' => array(1 => '/^[\x{002d}0-9a-záéíñóúü]{1,63}$/iu',
215            2 => '/^[\x{002d}0-9a-zóąćęłńśźż]{1,63}$/iu',
216            3 => '/^[\x{002d}0-9a-záäåæéëíðóöøúüýþ]{1,63}$/iu',
217            4 => '/^[\x{002d}0-9a-záéíóöúüőű]{1,63}$/iu',
218            5 => '/^[\x{002d}0-9a-ząčėęįšūųž]{1,63}$/iu',
219            6 => '/^[\x{AC00}-\x{D7A3}]{1,17}$/iu',
220            7 => '/^[\x{002d}0-9a-zāčēģīķļņōŗšūž]{1,63}$/iu'),
221        'PE'  => array(1 => '/^[\x{002d}0-9a-zñáéíóúü]{1,63}$/iu'),
222        'PL'  => array(1 => '/^[\x{002d}0-9a-zāčēģīķļņōŗšūž]{1,63}$/iu',
223            2 => '/^[\x{002d}а-ик-ш\x{0450}ѓѕјљњќџ]{1,63}$/iu',
224            3 => '/^[\x{002d}0-9a-zâîăşţ]{1,63}$/iu',
225            4 => '/^[\x{002d}0-9а-яё\x{04C2}]{1,63}$/iu',
226            5 => '/^[\x{002d}0-9a-zàáâèéêìíîòóôùúûċġħż]{1,63}$/iu',
227            6 => '/^[\x{002d}0-9a-zàäåæéêòóôöøü]{1,63}$/iu',
228            7 => '/^[\x{002d}0-9a-zóąćęłńśźż]{1,63}$/iu',
229            8 => '/^[\x{002d}0-9a-zàáâãçéêíòóôõúü]{1,63}$/iu',
230            9 => '/^[\x{002d}0-9a-zâîăşţ]{1,63}$/iu',
231            10=> '/^[\x{002d}0-9a-záäéíóôúýčďĺľňŕšťž]{1,63}$/iu',
232            11=> '/^[\x{002d}0-9a-zçë]{1,63}$/iu',
233            12=> '/^[\x{002d}0-9а-ик-шђјљњћџ]{1,63}$/iu',
234            13=> '/^[\x{002d}0-9a-zćčđšž]{1,63}$/iu',
235            14=> '/^[\x{002d}0-9a-zâçöûüğış]{1,63}$/iu',
236            15=> '/^[\x{002d}0-9a-záéíñóúü]{1,63}$/iu',
237            16=> '/^[\x{002d}0-9a-zäõöüšž]{1,63}$/iu',
238            17=> '/^[\x{002d}0-9a-zĉĝĥĵŝŭ]{1,63}$/iu',
239            18=> '/^[\x{002d}0-9a-zâäéëîô]{1,63}$/iu',
240            19=> '/^[\x{002d}0-9a-zàáâäåæçèéêëìíîïðñòôöøùúûüýćčłńřśš]{1,63}$/iu',
241            20=> '/^[\x{002d}0-9a-zäåæõöøüšž]{1,63}$/iu',
242            21=> '/^[\x{002d}0-9a-zàáçèéìíòóùú]{1,63}$/iu',
243            22=> '/^[\x{002d}0-9a-zàáéíóöúüőű]{1,63}$/iu',
244            23=> '/^[\x{002d}0-9ΐά-ώ]{1,63}$/iu',
245            24=> '/^[\x{002d}0-9a-zàáâåæçèéêëðóôöøüþœ]{1,63}$/iu',
246            25=> '/^[\x{002d}0-9a-záäéíóöúüýčďěňřšťůž]{1,63}$/iu',
247            26=> '/^[\x{002d}0-9a-z·àçèéíïòóúü]{1,63}$/iu',
248            27=> '/^[\x{002d}0-9а-ъьюя\x{0450}\x{045D}]{1,63}$/iu',
249            28=> '/^[\x{002d}0-9а-яёіў]{1,63}$/iu',
250            29=> '/^[\x{002d}0-9a-ząčėęįšūųž]{1,63}$/iu',
251            30=> '/^[\x{002d}0-9a-záäåæéëíðóöøúüýþ]{1,63}$/iu',
252            31=> '/^[\x{002d}0-9a-zàâæçèéêëîïñôùûüÿœ]{1,63}$/iu',
253            32=> '/^[\x{002d}0-9а-щъыьэюяёєіїґ]{1,63}$/iu',
254            33=> '/^[\x{002d}0-9א-ת]{1,63}$/iu'),
255        'PR'  => array(1 => '/^[\x{002d}0-9a-záéíóúñäëïüöâêîôûàèùæçœãõ]{1,63}$/iu'),
256        'PT'  => array(1 => '/^[\x{002d}0-9a-záàâãçéêíóôõú]{1,63}$/iu'),
257        'RU'  => array(1 => '/^[\x{002d}0-9а-яё]{1,63}$/iu'),
258        'SA'  => array(1 => '/^[\x{002d}.0-9\x{0621}-\x{063A}\x{0641}-\x{064A}\x{0660}-\x{0669}]{1,63}$/iu'),
259        'SE'  => array(1 => '/^[\x{002d}0-9a-zäåéöü]{1,63}$/iu'),
260        'SH'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿăąāćĉčċďđĕěėęēğĝġģĥħĭĩįīıĵķĺľļłńňņŋŏőōœĸŕřŗśŝšşťţŧŭůűũųūŵŷźžż]{1,63}$/iu'),
261        'SI'  => array(
262            1 => '/^[\x{002d}0-9a-zà-öø-ÿ]{1,63}$/iu',
263            2 => '/^[\x{002d}0-9a-zāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıĵķĺļľŀłńņňʼnŋōŏőœŕŗřśŝšťŧũūŭůűųŵŷźżž]{1,63}$/iu',
264            3 => '/^[\x{002d}0-9a-zșț]{1,63}$/iu'),
265        'SJ'  => array(1 => '/^[\x{002d}0-9a-zàáä-éêñ-ôöøüčđńŋšŧž]{1,63}$/iu'),
266        'TH'  => array(1 => '/^[\x{002d}0-9a-z\x{0E01}-\x{0E3A}\x{0E40}-\x{0E4D}\x{0E50}-\x{0E59}]{1,63}$/iu'),
267        'TM'  => array(1 => '/^[\x{002d}0-9a-zà-öø-ÿāăąćĉċčďđēėęěĝġģĥħīįĵķĺļľŀłńņňŋőœŕŗřśŝşšţťŧūŭůűųŵŷźżž]{1,63}$/iu'),
268        'TW'  => 'Hostname/Cn.php',
269        'TR'  => array(1 => '/^[\x{002d}0-9a-zğıüşöç]{1,63}$/iu'),
270        'UA'  => array(1 => '/^[\x{002d}0-9a-zабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџґӂʼ]{1,63}$/iu'),
271        'VE'  => array(1 => '/^[\x{002d}0-9a-záéíóúüñ]{1,63}$/iu'),
272        'VN'  => array(1 => '/^[ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯư\x{1EA0}-\x{1EF9}]{1,63}$/iu'),
273        'мон' => array(1 => '/^[\x{002d}0-9\x{0430}-\x{044F}]{1,63}$/iu'),
274        'срб' => array(1 => '/^[\x{002d}0-9а-ик-шђјљњћџ]{1,63}$/iu'),
275        'сайт' => array(1 => '/^[\x{002d}0-9а-яёіїѝйўґг]{1,63}$/iu'),
276        'онлайн' => array(1 => '/^[\x{002d}0-9а-яёіїѝйўґг]{1,63}$/iu'),
277        '中国' => 'Hostname/Cn.php',
278        '中國' => 'Hostname/Cn.php',
279        'ලංකා' => array(1 => '/^[\x{0d80}-\x{0dff}]{1,63}$/iu'),
280        '香港' => 'Hostname/Cn.php',
281        '台湾' => 'Hostname/Cn.php',
282        '台灣' => 'Hostname/Cn.php',
283        'امارات'   => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
284        'الاردن'    => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
285        'السعودية' => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
286        'ไทย' => array(1 => '/^[\x{002d}0-9a-z\x{0E01}-\x{0E3A}\x{0E40}-\x{0E4D}\x{0E50}-\x{0E59}]{1,63}$/iu'),
287        'рф' => array(1 => '/^[\x{002d}0-9а-яё]{1,63}$/iu'),
288        'تونس' => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
289        'مصر' => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
290        'இலங்கை' => array(1 => '/^[\x{0b80}-\x{0bff}]{1,63}$/iu'),
291        'فلسطين' => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
292        'شبكة'  => array(1 => '/^[\x{0621}-\x{0624}\x{0626}-\x{063A}\x{0641}\x{0642}\x{0644}-\x{0648}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06CC}\x{06F0}-\x{06F9}]{1,30}$/iu'),
293    );
294
295    protected $idnLength = array(
296        'BIZ' => array(5 => 17, 11 => 15, 12 => 20),
297        'CN'  => array(1 => 20),
298        'COM' => array(3 => 17, 5 => 20),
299        'HK'  => array(1 => 15),
300        'INFO'=> array(4 => 17),
301        'KR'  => array(1 => 17),
302        'NET' => array(3 => 17, 5 => 20),
303        'ORG' => array(6 => 17),
304        'TW'  => array(1 => 20),
305        'امارات' => array(1 => 30),
306        'الاردن' => array(1 => 30),
307        'السعودية' => array(1 => 30),
308        'تونس' => array(1 => 30),
309        'مصر' => array(1 => 30),
310        'فلسطين' => array(1 => 30),
311        'شبكة' => array(1 => 30),
312        '中国' => array(1 => 20),
313        '中國' => array(1 => 20),
314        '香港' => array(1 => 20),
315        '台湾' => array(1 => 20),
316        '台灣' => array(1 => 20),
317    );
318
319    protected $tld;
320
321    /**
322     * Options for the hostname validator
323     *
324     * @var array
325     */
326    protected $options = array(
327        'allow'       => self::ALLOW_DNS, // Allow these hostnames
328        'useIdnCheck' => true,  // Check IDN domains
329        'useTldCheck' => true,  // Check TLD elements
330        'ipValidator' => null,  // IP validator to use
331    );
332
333    /**
334     * Sets validator options.
335     *
336     * @param int  $allow       OPTIONAL Set what types of hostname to allow (default ALLOW_DNS)
337     * @param bool $useIdnCheck OPTIONAL Set whether IDN domains are validated (default true)
338     * @param bool $useTldCheck Set whether the TLD element of a hostname is validated (default true)
339     * @param Ip   $ipValidator OPTIONAL
340     * @see http://www.iana.org/cctld/specifications-policies-cctlds-01apr02.htm  Technical Specifications for ccTLDs
341     */
342    public function __construct($options = array())
343    {
344        if (!is_array($options)) {
345            $options = func_get_args();
346            $temp['allow'] = array_shift($options);
347            if (!empty($options)) {
348                $temp['useIdnCheck'] = array_shift($options);
349            }
350
351            if (!empty($options)) {
352                $temp['useTldCheck'] = array_shift($options);
353            }
354
355            if (!empty($options)) {
356                $temp['ipValidator'] = array_shift($options);
357            }
358
359            $options = $temp;
360        }
361
362        if (!array_key_exists('ipValidator', $options)) {
363            $options['ipValidator'] = null;
364        }
365
366        parent::__construct($options);
367    }
368
369    /**
370     * Returns the set ip validator
371     *
372     * @return Ip
373     */
374    public function getIpValidator()
375    {
376        return $this->options['ipValidator'];
377    }
378
379    /**
380     *
381     * @param Ip $ipValidator OPTIONAL
382     * @return Hostname;
383     */
384    public function setIpValidator(Ip $ipValidator = null)
385    {
386        if ($ipValidator === null) {
387            $ipValidator = new Ip();
388        }
389
390        $this->options['ipValidator'] = $ipValidator;
391        return $this;
392    }
393
394    /**
395     * Returns the allow option
396     *
397     * @return int
398     */
399    public function getAllow()
400    {
401        return $this->options['allow'];
402    }
403
404    /**
405     * Sets the allow option
406     *
407     * @param  int $allow
408     * @return Hostname Provides a fluent interface
409     */
410    public function setAllow($allow)
411    {
412        $this->options['allow'] = $allow;
413        return $this;
414    }
415
416    /**
417     * Returns the set idn option
418     *
419     * @return bool
420     */
421    public function getIdnCheck()
422    {
423        return $this->options['useIdnCheck'];
424    }
425
426    /**
427     * Set whether IDN domains are validated
428     *
429     * This only applies when DNS hostnames are validated
430     *
431     * @param  bool $useIdnCheck Set to true to validate IDN domains
432     * @return Hostname
433     */
434    public function useIdnCheck($useIdnCheck)
435    {
436        $this->options['useIdnCheck'] = (bool) $useIdnCheck;
437        return $this;
438    }
439
440    /**
441     * Returns the set tld option
442     *
443     * @return bool
444     */
445    public function getTldCheck()
446    {
447        return $this->options['useTldCheck'];
448    }
449
450    /**
451     * Set whether the TLD element of a hostname is validated
452     *
453     * This only applies when DNS hostnames are validated
454     *
455     * @param  bool $useTldCheck Set to true to validate TLD elements
456     * @return Hostname
457     */
458    public function useTldCheck($useTldCheck)
459    {
460        $this->options['useTldCheck'] = (bool) $useTldCheck;
461        return $this;
462    }
463
464    /**
465     * Defined by Interface
466     *
467     * Returns true if and only if the $value is a valid hostname with respect to the current allow option
468     *
469     * @param  string $value
470     * @return bool
471     */
472    public function isValid($value)
473    {
474        if (!is_string($value)) {
475            $this->error(self::INVALID);
476            return false;
477        }
478
479        $this->setValue($value);
480        // Check input against IP address schema
481        if (preg_match('/^[0-9a-f:.]*$/i', $value) && $this->getIpValidator()
482            ->setTranslator($this->getTranslator())
483            ->isValid($value)) {
484            if (!($this->getAllow() & self::ALLOW_IP)) {
485                $this->error(self::IP_ADDRESS_NOT_ALLOWED);
486                return false;
487            } else {
488                return true;
489            }
490        }
491
492        // Local hostnames are allowed to be partial (ending '.')
493        if ($this->getAllow() & self::ALLOW_LOCAL) {
494            if (substr($value, -1) === '.') {
495                $value = substr($value, 0, -1);
496                if (substr($value, -1) === '.') {
497                    // Empty hostnames (ending '..') are not allowed
498                    $this->error(self::INVALID_LOCAL_NAME);
499                    return false;
500                }
501            }
502        }
503
504        $domainParts = explode('.', $value);
505
506        // Prevent partial IP V4 addresses (ending '.')
507        if ((count($domainParts) == 4) && preg_match('/^[0-9.a-e:.]*$/i', $value) && $this->getIpValidator()
508            ->setTranslator($this->getTranslator())
509            ->isValid($value)) {
510            $this->error(self::INVALID_LOCAL_NAME);
511        }
512
513        $utf8StrWrapper = StringUtils::getWrapper('UTF-8');
514
515        // Check input against DNS hostname schema
516        if ((count($domainParts) > 1)
517            && ($utf8StrWrapper->strlen($value) >= 4)
518            && ($utf8StrWrapper->strlen($value) <= 254))
519        {
520            $status = false;
521
522            do {
523                // First check TLD
524                $matches = array();
525                if (preg_match('/([^.]{2,10})$/iu', end($domainParts), $matches)
526                    || (array_key_exists(end($domainParts), $this->validIdns))) {
527                    reset($domainParts);
528
529                    // Hostname characters are: *(label dot)(label dot label); max 254 chars
530                    // label: id-prefix [*ldh{61} id-prefix]; max 63 chars
531                    // id-prefix: alpha / digit
532                    // ldh: alpha / digit / dash
533
534                    // Match TLD against known list
535                    $this->tld = strtoupper($matches[1]);
536                    if ($this->getTldCheck()) {
537                        if (!in_array(strtolower($this->tld), $this->validTlds)
538                            && !in_array($this->tld, $this->validTlds)) {
539                            $this->error(self::UNKNOWN_TLD);
540                            $status = false;
541                            break;
542                        }
543                        // We have already validated that the TLD is fine. We don't want it to go through the below
544                        // checks as new UTF-8 TLDs will incorrectly fail if there is no IDN regex for it.
545                        array_pop($domainParts);
546                    }
547
548                    /**
549                     * Match against IDN hostnames
550                     * Note: Keep label regex short to avoid issues with long patterns when matching IDN hostnames
551                     *
552                     * @see Hostname\Interface
553                     */
554                    $regexChars = array(0 => '/^[a-z0-9\x2d]{1,63}$/i');
555                    if ($this->getIdnCheck() && isset($this->validIdns[$this->tld])) {
556                        if (is_string($this->validIdns[$this->tld])) {
557                            $regexChars += include __DIR__ . '/' . $this->validIdns[$this->tld];
558                        } else {
559                            $regexChars += $this->validIdns[$this->tld];
560                        }
561                    }
562
563                    // Check each hostname part
564                    $check = 0;
565                    foreach ($domainParts as $domainPart) {
566                        // Decode Punycode domain names to IDN
567                        if (strpos($domainPart, 'xn--') === 0) {
568                            $domainPart = $this->decodePunycode(substr($domainPart, 4));
569                            if ($domainPart === false) {
570                                return false;
571                            }
572                        }
573
574                        // Check dash (-) does not start, end or appear in 3rd and 4th positions
575                        if (($utf8StrWrapper->strpos($domainPart, '-') === 0)
576                            || (($utf8StrWrapper->strlen($domainPart) > 2) && ($utf8StrWrapper->strpos($domainPart, '-', 2) == 2) && ($utf8StrWrapper->strpos($domainPart, '-', 3) == 3))
577                            || ($utf8StrWrapper->strpos($domainPart, '-') === ($utf8StrWrapper->strlen($domainPart) - 1))) {
578                            $this->error(self::INVALID_DASH);
579                            $status = false;
580                            break 2;
581                        }
582
583                        // Check each domain part
584                        $checked = false;
585                        foreach ($regexChars as $regexKey => $regexChar) {
586                            ErrorHandler::start();
587                            $status = preg_match($regexChar, $domainPart);
588                            ErrorHandler::stop();
589                            if ($status > 0) {
590                                $length = 63;
591                                if (array_key_exists($this->tld, $this->idnLength)
592                                    && (array_key_exists($regexKey, $this->idnLength[$this->tld]))) {
593                                    $length = $this->idnLength[$this->tld];
594                                }
595
596                                if ($utf8StrWrapper->strlen($domainPart) > $length) {
597                                    $this->error(self::INVALID_HOSTNAME);
598                                    $status = false;
599                                } else {
600                                    $checked = true;
601                                    break;
602                                }
603                            }
604                        }
605
606                        if ($checked) {
607                            ++$check;
608                        }
609                    }
610
611                    // If one of the labels doesn't match, the hostname is invalid
612                    if ($check !== count($domainParts)) {
613                        $this->error(self::INVALID_HOSTNAME_SCHEMA);
614                        $status = false;
615                    }
616                } else {
617                    // Hostname not long enough
618                    $this->error(self::UNDECIPHERABLE_TLD);
619                    $status = false;
620                }
621            } while (false);
622
623            // If the input passes as an Internet domain name, and domain names are allowed, then the hostname
624            // passes validation
625            if ($status && ($this->getAllow() & self::ALLOW_DNS)) {
626                return true;
627            }
628        } elseif ($this->getAllow() & self::ALLOW_DNS) {
629            $this->error(self::INVALID_HOSTNAME);
630            $status = false;
631        }
632
633        // Check for URI Syntax (RFC3986)
634        if ($this->getAllow() & self::ALLOW_URI) {
635            if (preg_match("/^([a-zA-Z0-9-._~!$&\'()*+,;=]|%[[:xdigit:]]{2}){1,254}$/i", $value)) {
636                return true;
637            } else {
638                $this->error(self::INVALID_URI);
639            }
640        }
641
642        // Check input against local network name schema; last chance to pass validation
643        ErrorHandler::start();
644        $regexLocal = '/^(([a-zA-Z0-9\x2d]{1,63}\x2e)*[a-zA-Z0-9\x2d]{1,63}[\x2e]{0,1}){1,254}$/';
645        $status = preg_match($regexLocal, $value);
646        ErrorHandler::stop();
647
648        // If the input passes as a local network name, and local network names are allowed, then the
649        // hostname passes validation
650        $allowLocal = $this->getAllow() & self::ALLOW_LOCAL;
651        if ($status && $allowLocal) {
652            return true;
653        }
654
655        // If the input does not pass as a local network name, add a message
656        if (!$status) {
657            $this->error(self::INVALID_LOCAL_NAME);
658        }
659
660        // If local network names are not allowed, add a message
661        if ($status && !$allowLocal) {
662            $this->error(self::LOCAL_NAME_NOT_ALLOWED);
663        }
664
665        return false;
666    }
667
668    /**
669     * Decodes a punycode encoded string to it's original utf8 string
670     * Returns false in case of a decoding failure.
671     *
672     * @param  string $encoded Punycode encoded string to decode
673     * @return string|false
674     */
675    protected function decodePunycode($encoded)
676    {
677        if (!preg_match('/^[a-z0-9-]+$/i', $encoded)) {
678            // no punycode encoded string
679            $this->error(self::CANNOT_DECODE_PUNYCODE);
680            return false;
681        }
682
683        $decoded = array();
684        $separator = strrpos($encoded, '-');
685        if ($separator > 0) {
686            for ($x = 0; $x < $separator; ++$x) {
687                // prepare decoding matrix
688                $decoded[] = ord($encoded[$x]);
689            }
690        }
691
692        $lengthd = count($decoded);
693        $lengthe = strlen($encoded);
694
695        // decoding
696        $init  = true;
697        $base  = 72;
698        $index = 0;
699        $char  = 0x80;
700
701        for ($indexe = ($separator) ? ($separator + 1) : 0; $indexe < $lengthe; ++$lengthd) {
702            for ($oldIndex = $index, $pos = 1, $key = 36; 1; $key += 36) {
703                $hex   = ord($encoded[$indexe++]);
704                $digit = ($hex - 48 < 10) ? $hex - 22
705                       : (($hex - 65 < 26) ? $hex - 65
706                       : (($hex - 97 < 26) ? $hex - 97
707                       : 36));
708
709                $index += $digit * $pos;
710                $tag    = ($key <= $base) ? 1 : (($key >= $base + 26) ? 26 : ($key - $base));
711                if ($digit < $tag) {
712                    break;
713                }
714
715                $pos = (int) ($pos * (36 - $tag));
716            }
717
718            $delta   = intval($init ? (($index - $oldIndex) / 700) : (($index - $oldIndex) / 2));
719            $delta  += intval($delta / ($lengthd + 1));
720            for ($key = 0; $delta > 910 / 2; $key += 36) {
721                $delta = intval($delta / 35);
722            }
723
724            $base   = intval($key + 36 * $delta / ($delta + 38));
725            $init   = false;
726            $char  += (int) ($index / ($lengthd + 1));
727            $index %= ($lengthd + 1);
728            if ($lengthd > 0) {
729                for ($i = $lengthd; $i > $index; $i--) {
730                    $decoded[$i] = $decoded[($i - 1)];
731                }
732            }
733
734            $decoded[$index++] = $char;
735        }
736
737        // convert decoded ucs4 to utf8 string
738        foreach ($decoded as $key => $value) {
739            if ($value < 128) {
740                $decoded[$key] = chr($value);
741            } elseif ($value < (1 << 11)) {
742                $decoded[$key]  = chr(192 + ($value >> 6));
743                $decoded[$key] .= chr(128 + ($value & 63));
744            } elseif ($value < (1 << 16)) {
745                $decoded[$key]  = chr(224 + ($value >> 12));
746                $decoded[$key] .= chr(128 + (($value >> 6) & 63));
747                $decoded[$key] .= chr(128 + ($value & 63));
748            } elseif ($value < (1 << 21)) {
749                $decoded[$key]  = chr(240 + ($value >> 18));
750                $decoded[$key] .= chr(128 + (($value >> 12) & 63));
751                $decoded[$key] .= chr(128 + (($value >> 6) & 63));
752                $decoded[$key] .= chr(128 + ($value & 63));
753            } else {
754                $this->error(self::CANNOT_DECODE_PUNYCODE);
755                return false;
756            }
757        }
758
759        return implode($decoded);
760    }
761}