PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/SemanticMediaWiki/includes/datavalues/SMW_DV_URI.php

#
PHP | 289 lines | 202 code | 30 blank | 57 comment | 58 complexity | e7b761a9d4dadde6451b70187fb571f2 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * @file
  4. * @ingroup SMWDataValues
  5. */
  6. define( 'SMW_URI_MODE_EMAIL', 1 );
  7. define( 'SMW_URI_MODE_URI', 3 );
  8. define( 'SMW_URI_MODE_ANNOURI', 4 );
  9. define( 'SMW_URI_MODE_TEL', 5 );
  10. /**
  11. * This datavalue implements URL/URI/ANNURI/PHONE/EMAIL datavalues suitable for
  12. * defining the respective types of properties.
  13. *
  14. * @author Nikolas Iwan
  15. * @author Markus Kr??tzsch
  16. * @ingroup SMWDataValues
  17. * @bug Correctly create safe HTML and Wiki text.
  18. */
  19. class SMWURIValue extends SMWDataValue {
  20. /**
  21. * The value as returned by getWikitext() and getLongText().
  22. * @var string
  23. */
  24. protected $m_wikitext;
  25. /**
  26. * One of the basic modes of operation for this class (emails, URL,
  27. * telephone number URI, ...).
  28. * @var integer
  29. */
  30. private $m_mode;
  31. public function __construct( $typeid ) {
  32. parent::__construct( $typeid );
  33. switch ( $typeid ) {
  34. case '_ema':
  35. $this->m_mode = SMW_URI_MODE_EMAIL;
  36. break;
  37. case '_anu':
  38. $this->m_mode = SMW_URI_MODE_ANNOURI;
  39. break;
  40. case '_tel':
  41. $this->m_mode = SMW_URI_MODE_TEL;
  42. break;
  43. case '_uri': case '_url': case '__spu': default:
  44. $this->m_mode = SMW_URI_MODE_URI;
  45. break;
  46. }
  47. }
  48. protected function parseUserValue( $value ) {
  49. $value = trim( $value );
  50. $this->m_wikitext = $value;
  51. if ( $this->m_caption === false ) {
  52. $this->m_caption = $this->m_wikitext;
  53. }
  54. $scheme = $hierpart = $query = $fragment = '';
  55. if ( $value === '' ) { // do not accept empty strings
  56. $this->addError( wfMsgForContent( 'smw_emptystring' ) );
  57. $this->m_dataitem = new SMWDIUri( 'http', '//example.com', '', '', $this->m_typeid ); // define data item to have some value
  58. return;
  59. }
  60. switch ( $this->m_mode ) {
  61. case SMW_URI_MODE_URI: case SMW_URI_MODE_ANNOURI:
  62. $parts = explode( ':', $value, 2 ); // try to split "schema:rest"
  63. if ( count( $parts ) == 1 ) { // possibly add "http" as default
  64. $value = 'http://' . $value;
  65. $parts[1] = $parts[0];
  66. $parts[0] = 'http';
  67. }
  68. // check against blacklist
  69. $uri_blacklist = explode( "\n", wfMsgForContent( 'smw_uri_blacklist' ) );
  70. foreach ( $uri_blacklist as $uri ) {
  71. $uri = trim( $uri );
  72. if ( $uri == mb_substr( $value, 0, mb_strlen( $uri ) ) ) { // disallowed URI!
  73. $this->addError( wfMsgForContent( 'smw_baduri', $value ) );
  74. $this->m_dataitem = new SMWDIUri( 'http', '//example.com', '', '', $this->m_typeid ); // define data item to have some value
  75. return;
  76. }
  77. }
  78. // decompose general URI components
  79. $scheme = $parts[0];
  80. $parts = explode( '?', $parts[1], 2 ); // try to split "hier-part?queryfrag"
  81. if ( count( $parts ) == 2 ) {
  82. $hierpart = $parts[0];
  83. $parts = explode( '#', $parts[1], 2 ); // try to split "query#frag"
  84. $query = $parts[0];
  85. $fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
  86. } else {
  87. $query = '';
  88. $parts = explode( '#', $parts[0], 2 ); // try to split "hier-part#frag"
  89. $hierpart = $parts[0];
  90. $fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
  91. }
  92. // We do not validate the URI characters (the data item will do this) but we do some escaping:
  93. // encode most characters, but leave special symbols as given by user:
  94. $hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $hierpart ) );
  95. $query = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $query ) );
  96. $fragment = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $fragment ) );
  97. /// NOTE: we do not support raw [ (%5D) and ] (%5E), although they are needed for ldap:// (but rarely in a wiki)
  98. /// NOTE: "+" gets encoded, as it is interpreted as space by most browsers when part of a URL;
  99. /// this prevents tel: from working directly, but we have a datatype for this anyway.
  100. break;
  101. case SMW_URI_MODE_TEL:
  102. $scheme = 'tel';
  103. if ( substr( $value, 0, 4 ) === 'tel:' ) { // accept optional "tel"
  104. $value = substr( $value, 4 );
  105. $this->m_wikitext = $value;
  106. }
  107. $hierpart = preg_replace( '/(?<=[0-9]) (?=[0-9])/', '\1-\2', $value );
  108. $hierpart = str_replace( ' ', '', $hierpart );
  109. if ( substr( $hierpart, 0, 2 ) == '00' ) {
  110. $hierpart = '+' . substr( $hierpart, 2 );
  111. }
  112. if ( ( strlen( preg_replace( '/[^0-9]/', '', $hierpart ) ) < 6 ) ||
  113. ( preg_match( '<[-+./][-./]>', $hierpart ) ) ||
  114. ( !SMWURIValue::isValidTelURI( 'tel:' . $hierpart ) ) ) { /// TODO: introduce error-message for "bad" phone number
  115. $this->addError( wfMsgForContent( 'smw_baduri', $this->m_wikitext ) );
  116. }
  117. break;
  118. case SMW_URI_MODE_EMAIL:
  119. $scheme = 'mailto';
  120. if ( strpos( $value, 'mailto:' ) === 0 ) { // accept optional "mailto"
  121. $value = substr( $value, 7 );
  122. $this->m_wikitext = $value;
  123. }
  124. $check = method_exists( 'Sanitizer', 'validateEmail' ) ? Sanitizer::validateEmail( $value ) : self::validateEmail( $value );
  125. if ( !$check ) {
  126. /// TODO: introduce error-message for "bad" email
  127. $this->addError( wfMsgForContent( 'smw_baduri', $value ) );
  128. break;
  129. }
  130. $hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $value ) );
  131. }
  132. // Now create the URI data item:
  133. try {
  134. $this->m_dataitem = new SMWDIUri( $scheme, $hierpart, $query, $fragment, $this->m_typeid );
  135. } catch ( SMWDataItemException $e ) {
  136. $this->addError( wfMsgForContent( 'smw_baduri', $this->m_wikitext ) );
  137. $this->m_dataitem = new SMWDIUri( 'http', '//example.com', '', '', $this->m_typeid ); // define data item to have some value
  138. }
  139. }
  140. /**
  141. * Returns true if the argument is a valid RFC 3966 phone number.
  142. * Only global phone numbers are supported, and no full validation
  143. * of parameters (appended via ;param=value) is performed.
  144. */
  145. protected static function isValidTelURI( $s ) {
  146. $tel_uri_regex = '<^tel:\+[0-9./-]*[0-9][0-9./-]*(;[0-9a-zA-Z-]+=(%[0-9a-zA-Z][0-9a-zA-Z]|[0-9a-zA-Z._~:/?#[\]@!$&\'()*+,;=-])*)*$>';
  147. return (bool) preg_match( $tel_uri_regex, $s );
  148. }
  149. /**
  150. * @see SMWDataValue::loadDataItem()
  151. * @param $dataitem SMWDataItem
  152. * @return boolean
  153. */
  154. protected function loadDataItem( SMWDataItem $dataItem ) {
  155. if ( $dataItem->getDIType() == SMWDataItem::TYPE_URI ) {
  156. $this->m_dataitem = $dataItem;
  157. if ( $this->m_mode == SMW_URI_MODE_EMAIL ) {
  158. $this->m_wikitext = substr( $dataItem->getURI(), 7 );
  159. } elseif ( $this->m_mode == SMW_URI_MODE_TEL ) {
  160. $this->m_wikitext = substr( $dataItem->getURI(), 4 );
  161. } else {
  162. $this->m_wikitext = $dataItem->getURI();
  163. }
  164. $this->m_caption = $this->m_wikitext;
  165. return true;
  166. } else {
  167. return false;
  168. }
  169. }
  170. public function getShortWikiText( $linked = null ) {
  171. $url = $this->getURL();
  172. if ( is_null( $linked ) || ( $linked === false ) || ( $this->m_outformat == '-' ) || ( $url === '' ) || ( $this->m_caption === '' ) ) {
  173. return $this->m_caption;
  174. } else {
  175. return '[' . $url . ' ' . $this->m_caption . ']';
  176. }
  177. }
  178. public function getShortHTMLText( $linker = null ) {
  179. $url = $this->getURL();
  180. if ( is_null( $linker ) || ( !$this->isValid() ) || ( $this->m_outformat == '-' ) || ( $url === '' ) || ( $this->m_caption === '' ) ) {
  181. return $this->m_caption;
  182. } else {
  183. return $linker->makeExternalLink( $url, $this->m_caption );
  184. }
  185. }
  186. public function getLongWikiText( $linked = null ) {
  187. if ( !$this->isValid() ) {
  188. return $this->getErrorText();
  189. }
  190. $url = $this->getURL();
  191. if ( is_null( $linked ) || ( $linked === false ) || ( $this->m_outformat == '-' ) || ( $url === '' ) ) {
  192. return $this->m_wikitext;
  193. } else {
  194. return '[' . $url . ' ' . $this->m_wikitext . ']';
  195. }
  196. }
  197. public function getLongHTMLText( $linker = null ) {
  198. if ( !$this->isValid() ) {
  199. return $this->getErrorText();
  200. }
  201. $url = $this->getURL();
  202. if ( is_null( $linker ) || ( $this->m_outformat == '-' ) || ( $url === '' ) ) {
  203. return htmlspecialchars( $this->m_wikitext );
  204. } else {
  205. return $linker->makeExternalLink( $url, $this->m_wikitext );
  206. }
  207. }
  208. public function getWikiValue() {
  209. return $this->m_wikitext;
  210. }
  211. public function getURI() {
  212. return $this->m_dataitem->getURI();
  213. }
  214. protected function getServiceLinkParams() {
  215. // Create links to mapping services based on a wiki-editable message. The parameters
  216. // available to the message are:
  217. // $1: urlencoded version of URI/URL value (includes mailto: for emails)
  218. return array( rawurlencode( $this->m_dataitem->getURI() ) );
  219. }
  220. /**
  221. * Get a URL for hyperlinking this URI, or the empty string if this URI
  222. * is not hyperlinked in MediaWiki.
  223. * @return string
  224. */
  225. public function getURL() {
  226. global $wgUrlProtocols;
  227. foreach ( $wgUrlProtocols as $prot ) {
  228. if ( ( $prot == $this->m_dataitem->getScheme() . ':' ) || ( $prot == $this->m_dataitem->getScheme() . '://' ) ) {
  229. return $this->m_dataitem->getURI();
  230. }
  231. }
  232. return '';
  233. }
  234. /**
  235. * This is a copy of
  236. * @see Sanitizer::validateEmail
  237. * which was introduced in MW 1.18, and is thus used for compatibility with earlier versions.
  238. */
  239. public static function validateEmail( $addr ) {
  240. $result = null;
  241. if ( !wfRunHooks( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
  242. return $result;
  243. }
  244. // Please note strings below are enclosed in brackets [], this make the
  245. // hyphen "-" a range indicator. Hence it is double backslashed below.
  246. // See bug 26948
  247. $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~" ;
  248. $rfc1034_ldh_str = "a-z0-9\\-" ;
  249. $HTML5_email_regexp = "/
  250. ^ # start of string
  251. [$rfc5322_atext\\.]+ # user part which is liberal :p
  252. @ # 'apostrophe'
  253. [$rfc1034_ldh_str]+ # First domain part
  254. (\\.[$rfc1034_ldh_str]+)* # Following part prefixed with a dot
  255. $ # End of string
  256. /ix" ; // case Insensitive, eXtended
  257. return (bool) preg_match( $HTML5_email_regexp, $addr );
  258. }
  259. }