PageRenderTime 16ms CodeModel.GetById 13ms app.highlight 1ms RepoModel.GetById 1ms app.codeStats 0ms

/libraries/vendor/joomla/string/src/phputf8/utils/patterns.php

https://gitlab.com/vitaliylukin91/idea-rating
PHP | 64 lines | 30 code | 3 blank | 31 comment | 0 complexity | 0a730cb180908e3784f4caf1c017d1a8 MD5 | raw file
 1<?php
 2/**
 3* PCRE Regular expressions for UTF-8. Note this file is not actually used by
 4* the rest of the library but these regular expressions can be useful to have
 5* available.
 6* @see http://www.w3.org/International/questions/qa-forms-utf-8
 7* @package utf8
 8*/
 9
10//--------------------------------------------------------------------
11/**
12* PCRE Pattern to check a UTF-8 string is valid
13* Comes from W3 FAQ: Multilingual Forms
14* Note: modified to include full ASCII range including control chars
15* @see http://www.w3.org/International/questions/qa-forms-utf-8
16* @package utf8
17*/
18$UTF8_VALID = '^('.
19    '[\x00-\x7F]'.                          # ASCII (including control chars)
20    '|[\xC2-\xDF][\x80-\xBF]'.              # non-overlong 2-byte
21    '|\xE0[\xA0-\xBF][\x80-\xBF]'.          # excluding overlongs
22    '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.   # straight 3-byte
23    '|\xED[\x80-\x9F][\x80-\xBF]'.          # excluding surrogates
24    '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.       # planes 1-3
25    '|[\xF1-\xF3][\x80-\xBF]{3}'.           # planes 4-15
26    '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.       # plane 16
27    ')*$';
28
29//--------------------------------------------------------------------
30/**
31* PCRE Pattern to match single UTF-8 characters
32* Comes from W3 FAQ: Multilingual Forms
33* Note: modified to include full ASCII range including control chars
34* @see http://www.w3.org/International/questions/qa-forms-utf-8
35* @package utf8
36*/
37$UTF8_MATCH =
38    '([\x00-\x7F])'.                          # ASCII (including control chars)
39    '|([\xC2-\xDF][\x80-\xBF])'.              # non-overlong 2-byte
40    '|(\xE0[\xA0-\xBF][\x80-\xBF])'.          # excluding overlongs
41    '|([\xE1-\xEC\xEE\xEF][\x80-\xBF]{2})'.   # straight 3-byte
42    '|(\xED[\x80-\x9F][\x80-\xBF])'.          # excluding surrogates
43    '|(\xF0[\x90-\xBF][\x80-\xBF]{2})'.       # planes 1-3
44    '|([\xF1-\xF3][\x80-\xBF]{3})'.           # planes 4-15
45    '|(\xF4[\x80-\x8F][\x80-\xBF]{2})';       # plane 16
46
47//--------------------------------------------------------------------
48/**
49* PCRE Pattern to locate bad bytes in a UTF-8 string
50* Comes from W3 FAQ: Multilingual Forms
51* Note: modified to include full ASCII range including control chars
52* @see http://www.w3.org/International/questions/qa-forms-utf-8
53* @package utf8
54*/
55$UTF8_BAD =
56    '([\x00-\x7F]'.                          # ASCII (including control chars)
57    '|[\xC2-\xDF][\x80-\xBF]'.               # non-overlong 2-byte
58    '|\xE0[\xA0-\xBF][\x80-\xBF]'.           # excluding overlongs
59    '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.    # straight 3-byte
60    '|\xED[\x80-\x9F][\x80-\xBF]'.           # excluding surrogates
61    '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.        # planes 1-3
62    '|[\xF1-\xF3][\x80-\xBF]{3}'.            # planes 4-15
63    '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.        # plane 16
64    '|(.{1}))';                              # invalid byte