PageRenderTime 45ms CodeModel.GetById 27ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/system/classes/multibyte.php

https://github.com/HabariMag/habarimag-old
PHP | 785 lines | 353 code | 142 blank | 290 comment | 87 complexity | 39527fcfc81ab99c85b7363e0425946c MD5 | raw file
  1<?php
  2/*
  3 * @package Habari
  4 *
  5 */
  6
  7/*
  8 * Habari MultiByte Class
  9 *
 10 * Provides multibyte character set services,
 11 * a necessity since all of Habari's internal string
 12 * manipulations are done in UTF-8. Currently
 13 * this class is a wrapper around mbstring functions.
 14 *
 15 */
 16class MultiByte
 17{
 18
 19	const USE_MBSTRING = 1;
 20
 21	/*
 22	* @var $hab_enc String holding the current encoding the class is using
 23	*/
 24	static $hab_enc = 'UTF-8';
 25	/*
 26	* @var $use_library Integer denoting the current multibyte
 27	* library the class is using
 28	*/
 29	private static $use_library = self::USE_MBSTRING;
 30
 31	/**
 32	* function __construct
 33	*
 34	* An empty constructor since all functions are static
 35	*/
 36	private function __construct()
 37	{
 38	}
 39
 40	/*
 41	* function hab_encoding
 42	*
 43	* Sets and returns the internal encoding.
 44	*
 45	* @param $use_enc string. The encoding to be used
 46	*
 47	* @return string. If $enc is null, returns the current
 48	* encoding. If $enc is not null, returns the old encoding
 49	*/
 50	public static function hab_encoding( $use_enc = null )
 51	{
 52		if ( $use_enc === null ) {
 53			return self::$hab_enc;
 54		}
 55		else {
 56			$old_enc = self::$hab_enc;
 57			self::$hab_enc = $use_enc;
 58			return $old_enc;
 59		}
 60	}
 61
 62	/*
 63	* function library
 64	*
 65	* Sets and returns the multibyte library being used internally
 66	*
 67	* @param $int The new library to use. One of the self::USE_* constants, null to simply return, or false to disable and use native non-multibyte-safe PHP methods.
 68	*
 69	* @return mixed  If $new_library is null, returns the current library
 70	* being used. If $new_library has a valid value, returns the old library,
 71	* else returns false.
 72	*/
 73	public static function library( $new_library = null )
 74	{
 75		if ( $new_library === null ) {
 76			return self::$use_library;
 77		}
 78		else if ( $new_library === self::USE_MBSTRING ) {
 79			$old_library = self::$use_library;
 80			self::$use_library = $new_library;
 81			return $old_library;
 82
 83		}
 84		else if ( $new_library === false ) {
 85			$old_library = self::$use_library;
 86			self::$use_library = $new_library;
 87			return $old_library;
 88		}
 89		else {
 90			return false;
 91		}
 92	}
 93
 94	/*
 95	* function convert_encoding
 96	*
 97	* Converts a string's encoding to a new encoding
 98	*
 99	* @param $str string. The string who's encoding is being changed.
100	* @param $use_enc string. The encoding to convert to. If not set,
101	* the internal encoding will be used.
102	* @param $from_enc string. encoding before conversion. If not set,
103 	* encoding is detected automatically.
104	*
105	* @return mixed  The  source string in the new encoding or boolean false.
106	*/
107	public static function convert_encoding( $str, $use_enc = null, $from_enc = null )
108	{
109		$ret = false;
110
111		$enc = self::$hab_enc;
112		if ( $use_enc !== null ) {
113			$enc = $use_enc;
114		}
115
116		if ( self::$use_library == self::USE_MBSTRING ) {
117			if ( $from_enc == null ) {
118				$from_enc = MultiByte::detect_encoding( $str );
119			}
120			$ret = mb_convert_encoding( $str, $enc, $from_enc );
121		}
122
123		return $ret;
124	}
125
126	/*
127	* function detect_encoding
128	*
129	* Detects the encoding being used for a string
130	*
131	* @param $str string. The string who's encoding is being detected
132	*
133	* @return mixed The  source string's detected encoding, or boolean false.
134	*/
135	public static function detect_encoding( $str )
136	{
137		$enc = false;
138
139		if ( self::$use_library == self::USE_MBSTRING ) {
140			// get original detection order
141			$old_order = mb_detect_order();
142			// make sure  ISO-8859-1 is included
143			mb_detect_order( array( 'ASCII', 'JIS', 'UTF-8', 'ISO-8859-1', 'EUC-JP', 'SJIS' ) );
144			//detect the encoding . the detected encoding may be wrong, but it's better than guessing
145			$enc = mb_detect_encoding( $str );
146			// reset detection order
147			mb_detect_order( $old_order );
148		}
149
150		return $enc;
151	}
152
153	/*
154	* function substr
155	*
156	* Get a section of a string
157	*
158	* @param $str string. The original string
159	* @param $begin. integer. The beginning character of the string to return.
160	* @param $len integer. How long the returned string should be. If $len is
161	* not set, the section of the string from $begin to the end of the string is
162	* returned.
163	* @param $use_enc string. The encoding to be used. If not set,
164	* the internal encoding will be used.
165	*
166	* @return mixed The  section of the source string requested in the encoding requested or false.
167	* If $len is not set, returns substring from $begin to end of string.
168	*
169	*/
170	public static function substr( $str, $begin, $len = null, $use_enc = null )
171	{
172		$ret = false;
173
174		$enc = self::$hab_enc;
175		if ( $use_enc !== null ) {
176			$enc = $use_enc;
177		}
178
179		if ( self::$use_library == self::USE_MBSTRING ) {
180			if ( ! isset( $len ) ) {
181				$len = MultiByte::strlen( $str ) - $begin;
182			}
183			$ret = mb_substr( $str, $begin, $len, $enc );
184		}
185		else {
186			$ret = substr( $str, $begin, $len );
187		}
188		return $ret;
189	}
190
191	/*
192	* function strlen
193	*
194	* Gets the length of a string in characters
195	*
196	* @param $str string. The string who's length is being returned.
197	* @param $use_enc string. The encoding to be used. If not set,
198	* the internal encoding will be used.
199	*
200	* @return integer. The length in characters of the string, or the length in bytes if a valid
201	* multibyte library isn't loaded.
202	*/
203	public static function strlen( $str, $use_enc = null )
204	{
205		$len = 0;
206
207		$enc = self::$hab_enc;
208		if ( $use_enc !== null ) {
209			$enc = $use_enc;
210		}
211
212		if ( self::$use_library == self::USE_MBSTRING ) {
213			$len = mb_strlen( $str, $enc );
214		}
215		else {
216			$len = strlen( $str );
217		}
218
219		return $len;
220	}
221
222	/*
223	* function strpos
224	*
225	* Find position of first occurrence of string in a string
226	*
227	* @param $haysack string. The string being checked.
228	* @param $needle. string. The position counted from the beginning of haystack .
229	* @param $offset integer. The search offset. If it is not specified, 0 is used.
230	* @param $use_enc string. The encoding to be used. If not set,
231	* the internal encoding will be used.
232	*
233	* @return mixed The  section of the source string requested in the encoding requested or false.
234	* If $len is not set, returns substring from $begin to end of string.
235	*
236	*/
237	public static function strpos( $haysack, $needle, $offset = 0, $use_enc = null )
238	{
239		$enc = self::$hab_enc;
240		if ( $use_enc !== null ) {
241			$enc = $use_enc;
242		}
243
244		if ( self::$use_library == self::USE_MBSTRING ) {
245			$ret = mb_strpos( $haysack, $needle, $offset, $enc );
246		}
247		else {
248			$ret = strpos( $haysack, $needle, $offset );
249		}
250		return $ret;
251	}
252
253	/*
254	* function stripos
255	*
256	* Find position of first occurrence of string in a string. Case insensitive.
257	*
258	* @param $haysack string. The string being checked.
259	* @param $needle. string. The position counted from the beginning of haystack .
260	* @param $offset integer. The search offset. If it is not specified, 0 is used.
261	* @param $use_enc string. The encoding to be used. If not set,
262	* the internal encoding will be used.
263	*
264	* @return mixed The  section of the source string requested in the encoding requested or false.
265	* If $len is not set, returns substring from $begin to end of string.
266	*
267	*/
268	public static function stripos( $haysack, $needle, $offset = 0, $use_enc = null )
269	{
270		$enc = self::$hab_enc;
271		if ( $use_enc !== null ) {
272			$enc = $use_enc;
273		}
274
275		if ( self::$use_library == self::USE_MBSTRING ) {
276			$ret = mb_stripos( $haysack, $needle, $offset, $enc );
277		}
278		else {
279			$ret = stripos( $haysack, $needle, $offset );
280		}
281		return $ret;
282	}
283
284	/*
285	* function strrpos
286	*
287	* Find position of last occurrence of string in a string.
288	*
289	* @param $haysack string. The string being checked.
290	* @param $needle. string. The position counted from the beginning of haystack .
291	* @param $offset integer. The search offset. If it is not specified, 0 is used.
292	* @param $use_enc string. The encoding to be used. If not set,
293	* the internal encoding will be used.
294	*
295	* @return mixed The  section of the source string requested in the encoding requested or false.
296	* If $len is not set, returns substring from $begin to end of string.
297	*
298	*/
299	public static function strrpos( $haysack, $needle, $offset = 0, $use_enc = null )
300	{
301		$enc = self::$hab_enc;
302		if ( $use_enc !== null ) {
303			$enc = $use_enc;
304		}
305
306		if ( self::$use_library == self::USE_MBSTRING ) {
307			$ret = mb_strrpos( $haysack, $needle, $offset, $enc );
308		}
309		else {
310			$ret = strrpos( $haysack, $needle, $offset );
311		}
312		return $ret;
313	}
314
315	/*
316	* function strripos
317	*
318	* Find position of last occurrence of string in a string. Case insensitive.
319	*
320	* @param $haysack string. The string being checked.
321	* @param $needle. string. The position counted from the beginning of haystack .
322	* @param $offset integer. The search offset. If it is not specified, 0 is used.
323	* @param $use_enc string. The encoding to be used. If not set,
324	* the internal encoding will be used.
325	*
326	* @return mixed The  section of the source string requested in the encoding requested or false.
327	* If $len is not set, returns substring from $begin to end of string.
328	*
329	*/
330	public static function strripos( $haysack, $needle, $offset = 0, $use_enc = null )
331	{
332		$enc = self::$hab_enc;
333		if ( $use_enc !== null ) {
334			$enc = $use_enc;
335		}
336
337		if ( self::$use_library == self::USE_MBSTRING ) {
338			$ret = mb_strripos( $haysack, $needle, $offset, $enc );
339		}
340		else {
341			$ret = strripos( $haysack, $needle, $offset );
342		}
343		return $ret;
344	}
345
346	/*
347	 * function strtolower
348	 *
349	 * Converts a multibyte string to lowercase. If a valid multibyte library
350	* isn't loaded, strtolower() will be used, which can lead to unexpected results.
351	 *
352	 * @param $str string. The string to lowercase
353	* @param $use_enc string. The encoding to be used. If not set,
354	* the internal encoding will be used.
355	 *
356	 * @return string. The lowercased string.
357	*/
358	public static function strtolower( $str, $use_enc = null )
359	{
360		$enc = self::$hab_enc;
361		if ( $use_enc !== null ) {
362			$enc = $use_enc;
363		}
364
365		if ( self::$use_library == self::USE_MBSTRING ) {
366			$ret = mb_strtolower( $str, $enc );
367		}
368		else {
369			$ret = strtolower( $str );
370		}
371
372		return $ret;
373	}
374
375	/*
376	* function strtoupper
377	*
378	* Converts a multibyte string to uppercase. If a valid multibyte library
379	* isn't loaded, strtoupper() will be used, which can lead to unexpected results.
380	*
381	* @param $str string. The string to uppercase
382	* @param $use_enc string. The encoding to be used. If not set,
383	* the internal encoding will be used.
384	*
385	* @return string. The uppercased string.
386	*/
387	public static function strtoupper( $str, $use_enc = null )
388	{
389		$enc = self::$hab_enc;
390		if ( $use_enc !== null ) {
391			$enc = $use_enc;
392		}
393
394		if ( self::$use_library == self::USE_MBSTRING ) {
395			$ret = mb_strtoupper( $str, $enc );
396		}
397		else {
398			$ret = strtoupper( $str );
399		}
400
401		return $ret;
402	}
403
404	/**
405	 * Determines if the passed string is valid character data (according to mbstring)
406	 *
407	 * @param string $str the string to check
408	 * @return bool
409	 */
410	public static function valid_data( $str )
411	{
412		return mb_check_encoding( $str, self::$hab_enc );
413	}
414	
415	/**
416	 * Makes a string's first character uppercase
417	 * 
418	 * @see http://php.net/ucfirst
419	 * @param string $str The string to capitalize.
420	 * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
421	 * @return string The capitalized string.
422	 */
423	public static function ucfirst ( $str, $use_enc = null )
424	{
425		
426		$enc = self::$hab_enc;
427		if ( $use_enc !== null ) {
428			$enc = $use_enc;
429		}
430		
431		if ( self::$use_library == self::USE_MBSTRING ) {
432			
433			// get the first character
434			$first = self::substr( $str, 0, 1, $enc );
435			
436			// uppercase it
437			$first = self::strtoupper( $first, $enc );
438			
439			// get the rest of the characters
440			$last = self::substr( $str, 1, null, $enc );
441			
442			// put them back together
443			$ret = $first . $last;
444			
445		}
446		else {
447			$ret = ucfirst( $str );
448		}
449		
450		return $ret;
451		
452	}
453	
454	/**
455	 * Makes a string's first character lowercase
456	 * 
457	 * @see http://php.net/ucfirst
458	 * @param string $str The string to lowercase.
459	 * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
460	 * @return string The lowercased string.
461	 */
462	public static function lcfirst ( $str, $use_enc = null )
463	{
464		
465		$enc = self::$hab_enc;
466		if ( $use_enc !== null ) {
467			$enc = $use_enc;
468		}
469		
470		if ( self::$use_library == self::USE_MBSTRING ) {
471			
472			// get the first character
473			$first = self::substr( $str, 0, 1, $enc );
474			
475			// lowercase it
476			$first = self::strtolower( $first, $enc );
477			
478			// get the rest of the characters
479			$last = self::substr( $str, 1, null, $enc );
480			
481			// put them back together
482			$ret = $first . $last;
483			
484		}
485		else {
486			
487			// lcfirst() is php 5.3+ so we'll emulate it
488			$first = substr( $str, 0, 1 );
489			$first = strtolower( $first );
490			
491			$last = substr( $str, 1 );
492			
493			$ret = $first . $last;
494			
495		}
496		
497		return $ret;
498		
499	}
500	
501
502	/**
503	 * Replace all occurrences of the search string with the replacement string.
504	 * 
505	 * @see http://php.net/str_replace
506	 * @param mixed $search A string or an array of strings to search for.
507	 * @param mixed $replace A string or an array of strings to replace search values with.
508	 * @param string $subject The string to perform the search and replace on.
509	 * @param int $count If passed, this value will hold the number of matched and replaced needles.
510	 * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
511	 * @return string The subject with replaced values.
512	 */
513	public static function str_replace ( $search, $replace, $subject, &$count = 0, $use_enc = null )
514	{
515		
516		$enc = self::$hab_enc;
517		if ( $use_enc !== null ) {
518			$enc = $use_enc;
519		}
520		
521		if ( self::$use_library == self::USE_MBSTRING ) {
522		
523			// if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
524			if ( is_array( $search ) && !is_array( $replace ) ) {
525				$replace = array_fill( 0, count( $search ), $replace );
526			}
527			
528			// if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
529			if ( is_array( $search ) && is_array( $replace ) ) {
530				$replace = array_pad( $replace, count( $search ), '' );
531			}
532			
533			// if search is not an array, make it one
534			if ( !is_array( $search ) ) {
535				$search = array( $search );
536			}
537			
538			// if replace is not an array, make it one
539			if ( !is_array( $replace ) ) {
540				$replace = array( $replace );
541			}
542			
543			// if subject is an array, recursively call ourselves on each element of it
544			if ( is_array( $subject ) ) {
545				foreach ( $subject as $k => $v ) {
546					$subject[ $k ] = self::str_replace( $search, $replace, $v, $count, $use_enc );
547				}
548				
549				return $subject;
550			}
551						
552			
553			
554			// now we've got an array of characters and arrays of search / replace characters with the same values - loop and replace them!
555			$search_count = count( $search );	// we modify $search, so we can't include it in the condition next
556			for ( $i = 0; $i < $search_count; $i++ ) {
557				
558				// the values we'll match
559				$s = array_shift( $search );
560				$r = array_shift( $replace );
561				
562				// to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
563				$positions = array();
564				
565				$offset = 0;
566				while ( self::strpos( $subject, $s, $offset, $enc ) !== false ) {
567					
568					// get the position
569					$pos = self::strpos( $subject, $s, $offset, $enc );
570					
571					// add it to the list
572					$positions[] = $pos;
573					
574					// and set the offset to skip over this value
575					$offset = $pos + self::strlen( $s, $enc );
576					
577				}
578				
579				// if we pick through from the beginning, our positions will change if the replacement string is longer
580				// instead, we pick through from the last place
581				$positions = array_reverse( $positions );
582				
583				// now that we've got the position of each one, just loop through that and replace them
584				foreach ( $positions as $pos ) {
585					
586					// pull out the part before the string
587					$before = self::substr( $subject, 0, $pos, $enc );
588					
589					// pull out the part after
590					$after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
591					
592					// now we have the string in two parts without the string we're searching for
593					// put it back together with the replacement
594					$subject = $before . $r . $after;
595					
596					// increment our count, a replacement was made
597					$count++;
598					
599				}
600				
601			}
602			
603		}
604		else {
605			
606			$subject = str_replace( $search, $replace, $subject, $count );
607			
608		}
609		
610		return $subject;
611		
612	}
613	
614	/**
615	 * Replace all occurrences of the search string with the replacement string.
616	 * 
617	 * @see http://php.net/str_ireplace
618	 * @param mixed $search A string or an array of strings to search for.
619	 * @param mixed $replace A string or an array of strings to replace search values with.
620	 * @param string $subject The string to perform the search and replace on.
621	 * @param int $count If passed, this value will hold the number of matched and replaced needles.
622	 * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
623	 * @return string The subject with replaced values.
624	 */
625	public static function str_ireplace( $search, $replace, $subject, &$count = 0, $use_enc = null )
626	{
627		
628		$enc = self::$hab_enc;
629		if ( $use_enc !== null ) {
630			$enc = $use_enc;
631		}
632		
633		if ( self::$use_library == self::USE_MBSTRING ) {
634		
635			// if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
636			if ( is_array( $search ) && !is_array( $replace ) ) {
637				$replace = array_fill( 0, count( $search ), $replace );
638			}
639			
640			// if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
641			if ( is_array( $search ) && is_array( $replace ) ) {
642				$replace = array_pad( $replace, count( $search ), '' );
643			}
644			
645			// if search is not an array, make it one
646			if ( !is_array( $search ) ) {
647				$search = array( $search );
648			}
649			
650			// if replace is not an array, make it one
651			if ( !is_array( $replace ) ) {
652				$replace = array( $replace );
653			}
654			
655			// if subject is an array, recursively call ourselves on each element of it
656			if ( is_array( $subject ) ) {
657				foreach ( $subject as $k => $v ) {
658					$subject[ $k ] = self::str_ireplace( $search, $replace, $v, $count, $use_enc );
659				}
660				
661				return $subject;
662			}
663						
664			
665			
666			$search_count = count( $search );	// we modify $search, so we can't include it in the condition next
667			for ( $i = 0; $i < $search_count; $i++ ) {
668				
669				// the values we'll match
670				$s = array_shift( $search );
671				$r = array_shift( $replace );
672				
673				
674				// to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
675				$positions = array();
676				
677				$offset = 0;
678				while ( self::stripos( $subject, $s, $offset, $enc ) !== false ) {
679					
680					// get the position
681					$pos = self::stripos( $subject, $s, $offset, $enc );
682					
683					// add it to the list
684					$positions[] = $pos;
685					
686					// and set the offset to skip over this value
687					$offset = $pos + self::strlen( $s, $enc );
688					
689				}
690				
691				// if we pick through from the beginning, our positions will change if the replacement string is longer
692				// instead, we pick through from the last place
693				$positions = array_reverse( $positions );
694				
695				// now that we've got the position of each one, just loop through that and replace them
696				foreach ( $positions as $pos ) {
697					
698					// pull out the part before the string
699					$before = self::substr( $subject, 0, $pos, $enc );
700					
701					// pull out the part after
702					$after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
703					
704					// now we have the string in two parts without the string we're searching for
705					// put it back together with the replacement
706					$subject = $before . $r . $after;
707					
708					// increment our count, a replacement was made
709					$count++;
710					
711				}
712				
713			}
714			
715		}
716		else {
717			
718			$subject = str_ireplace( $search, $replace, $subject, $count );
719			
720		}
721		
722		return $subject;
723		
724	}
725	
726	/**
727	 * Uppercase the first character of each word in a string.
728	 * 
729	 * From php.net/ucwords:
730	 * 	The definition of a word is any string of characters that is immediately after a whitespace
731	 * 	(These are: space, form-feed, newline, carriage return, horizontal tab, and vertical tab).
732	 * 
733	 * @see http://php.net/ucwords
734	 * @param string $str The input string.
735	 * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
736	 * @return string The modified string.
737	 */
738	public static function ucwords ( $str, $use_enc = null )
739	{
740		
741		$enc = self::$hab_enc;
742		if ( $use_enc !== null ) {
743			$enc = $use_enc;
744		}
745		
746		if ( self::$use_library == self::USE_MBSTRING ) {
747		
748			$delimiters = array(
749				chr( 32 ),	// space
750				chr( 12 ),	// form-feed
751				chr( 10 ),	// newline
752				chr( 13 ),	// carriage return
753				chr( 9 ),	// horizontal tab
754				chr( 11 ),	// vertical tab
755			);
756			
757			// loop through the delimiters and explode the string by each one
758			foreach ( $delimiters as $d ) {
759				
760				$pieces = explode( $d, $str );
761				
762				for ( $i = 0; $i < count( $pieces ); $i++ ) {
763					
764					// capitalize each word
765					$pieces[ $i ] = self::ucfirst( $pieces[ $i ], $enc );
766					
767				}
768				
769				// put the string back together
770				$str = implode( $d, $pieces );
771				
772			}
773		
774		}
775		else {
776			$str = ucwords( $str );
777		}
778		
779		
780		return $str;
781	}
782
783}
784
785?>