PageRenderTime 61ms CodeModel.GetById 19ms app.highlight 36ms RepoModel.GetById 1ms app.codeStats 0ms

/src/ftk_util.c

http://ftk.googlecode.com/
C | 852 lines | 685 code | 125 blank | 42 comment | 259 complexity | ec86dbcc7755acfe23541f385ab970f6 MD5 | raw file
  1/*
  2 * File: ftk_util.c    
  3 * Author:  Li XianJing <xianjimli@hotmail.com>
  4 * Brief:   common used functions.
  5 *
  6 * Copyright (c) 2009 - 2010  Li XianJing <xianjimli@hotmail.com>
  7 *
  8 * Licensed under the Academic Free License version 2.1
  9 *
 10 * This program is free software; you can redistribute it and/or modify
 11 * it under the terms of the GNU General Public License as published by
 12 * the Free Software Foundation; either version 2 of the License, or
 13 * (at your option) any later version.
 14 *
 15 * This program is distributed in the hope that it will be useful,
 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 * GNU General Public License for more details.
 19 *
 20 * You should have received a copy of the GNU General Public License
 21 * along with this program; if not, write to the Free Software
 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 23 */
 24
 25/*
 26 * History:
 27 * ================================================================
 28 * 2009-10-03 Li XianJing <xianjimli@hotmail.com> created
 29 *
 30 */
 31
 32#include "ftk_log.h"
 33#include "ftk_util.h"
 34
 35/*UTF8-related functions are copied from glib.*/
 36
 37#define UTF8_COMPUTE(Char, Mask, Len)					      \
 38  if (Char < 128)							      \
 39    {									      \
 40      Len = 1;								      \
 41      Mask = 0x7f;							      \
 42    }									      \
 43  else if ((Char & 0xe0) == 0xc0)					      \
 44    {									      \
 45      Len = 2;								      \
 46      Mask = 0x1f;							      \
 47    }									      \
 48  else if ((Char & 0xf0) == 0xe0)					      \
 49    {									      \
 50      Len = 3;								      \
 51      Mask = 0x0f;							      \
 52    }									      \
 53  else if ((Char & 0xf8) == 0xf0)					      \
 54    {									      \
 55      Len = 4;								      \
 56      Mask = 0x07;							      \
 57    }									      \
 58  else if ((Char & 0xfc) == 0xf8)					      \
 59    {									      \
 60      Len = 5;								      \
 61      Mask = 0x03;							      \
 62    }									      \
 63  else if ((Char & 0xfe) == 0xfc)					      \
 64    {									      \
 65      Len = 6;								      \
 66      Mask = 0x01;							      \
 67    }									      \
 68  else									      \
 69    Len = -1;
 70
 71#define UTF8_LENGTH(Char)              \
 72  ((Char) < 0x80 ? 1 :                 \
 73   ((Char) < 0x800 ? 2 :               \
 74    ((Char) < 0x10000 ? 3 :            \
 75     ((Char) < 0x200000 ? 4 :          \
 76      ((Char) < 0x4000000 ? 5 : 6)))))
 77   
 78
 79#define UTF8_GET(Result, Chars, Count, Mask, Len)			      \
 80  (Result) = (Chars)[0] & (Mask);					      \
 81  for ((Count) = 1; (Count) < (Len); ++(Count))				      \
 82    {									      \
 83      if (((Chars)[(Count)] & 0xc0) != 0x80)				      \
 84	{								      \
 85	  (Result) = -1;						      \
 86	  break;							      \
 87	}								      \
 88      (Result) <<= 6;							      \
 89      (Result) |= ((Chars)[(Count)] & 0x3f);				      \
 90    }
 91
 92#define UNICODE_VALID(Char)                   \
 93    ((Char) < 0x110000 &&                     \
 94     (((Char) & 0xFFFFF800) != 0xD800) &&     \
 95     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
 96     ((Char) & 0xFFFE) != 0xFFFE)
 97static const char utf8_skip_data[256] = {
 98  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 99  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
100  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
101  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
102  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
103  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
104  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
105  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
106};
107
108const char * const g_utf8_skip = utf8_skip_data;
109#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const unsigned char *)(p)])
110
111unsigned short utf8_get_char (const char *p, const char** next)
112{
113  int i, mask = 0, len;
114  unsigned short result;
115  unsigned char c = (unsigned char) *p;
116
117  UTF8_COMPUTE (c, mask, len);
118  if (len == -1)
119    return (unsigned short)-1;
120  UTF8_GET (result, p, i, mask, len);
121
122  if(next != NULL)
123  {
124    *next = g_utf8_next_char(p);
125  }
126
127  return result;
128}
129
130unsigned short utf8_get_prev_char (const char *p, const char** prev)
131{
132	int i = 0;
133	for(i = 1; i < 8; i++)
134	{
135		unsigned char val = p[-i];
136		if((val & 0x80) && !(val & 0x40))
137		{
138			continue;
139		}
140		else
141		{
142			if(prev != NULL)
143			{
144				*prev = p-i;
145			}
146			return utf8_get_char(p-i, NULL);
147		}
148	}
149
150	if(prev != NULL)
151	{
152		*prev = p;
153	}
154
155	return 0;
156}
157
158int utf8_count_char(const char *str, int length)
159{
160	int nr = 0;
161	const char* iter = str;
162	return_val_if_fail(str != NULL, 0);
163
164	while(utf8_get_char(iter, &iter) && (iter - str) <= (int)length)
165	{
166		nr++;
167	}
168
169	return nr;
170}
171
172int unichar_to_utf8 (unsigned short c, char* outbuf)
173{
174  /* If this gets modified, also update the copy in g_string_insert_unichar() */
175  size_t len = 0;    
176  int first;
177  int i;
178
179  if (c < 0x80)
180    {
181      first = 0;
182      len = 1;
183    }
184  else if (c < 0x800)
185    {
186      first = 0xc0;
187      len = 2;
188    }
189  else if (c < 0x10000)
190    {
191      first = 0xe0;
192      len = 3;
193    }
194   else if (c < 0x200000)
195    {
196      first = 0xf0;
197      len = 4;
198    }
199  else if (c < 0x4000000)
200    {
201      first = 0xf8;
202      len = 5;
203    }
204  else
205    {
206      first = 0xfc;
207      len = 6;
208    }
209
210  if (outbuf)
211    {
212      for (i = len - 1; i > 0; --i)
213	{
214	  outbuf[i] = (c & 0x3f) | 0x80;
215	  c >>= 6;
216	}
217      outbuf[0] = c | first;
218    }
219
220  return len;
221}
222
223#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
224char* utf16_to_utf8 (const unsigned short  *str, long len, char* utf8, int out_len)
225{
226  /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
227   * are marked.
228   */
229  const unsigned short *in;
230  char *out;
231  char *result = NULL;
232  int n_bytes;
233  unsigned short high_surrogate;
234
235  return_val_if_fail (str != NULL, NULL);
236
237  n_bytes = 0;
238  in = str;
239  high_surrogate = 0;
240  while ((len < 0 || in - str < len) && *in)
241    {
242      unsigned short c = *in;
243      unsigned short wc;
244
245      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
246	{
247	  if (high_surrogate)
248	    {
249	      wc = SURROGATE_VALUE (high_surrogate, c);
250	      high_surrogate = 0;
251	    }
252	  else
253	    {
254          ftk_loge("Invalid sequence in conversion input");
255	      goto err_out;
256	    }
257	}
258      else
259	{
260	  if (high_surrogate)
261	    {
262          ftk_loge("Invalid sequence in conversion input");
263	      goto err_out;
264	    }
265
266	  if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
267	    {
268	      high_surrogate = c;
269	      goto next1;
270	    }
271	  else
272	    wc = c;
273	}
274
275      /********** DIFFERENT for UTF8/UCS4 **********/
276      n_bytes += UTF8_LENGTH (wc);
277
278    next1:
279      in++;
280    }
281
282  if (high_surrogate)
283    {
284      ftk_loge("Partial character sequence at end of input");
285      goto err_out;
286    }
287  
288  /* At this point, everything is valid, and we just need to convert
289   */
290  /********** DIFFERENT for UTF8/UCS4 **********/
291  //result = g_malloc (n_bytes + 1);
292  result = utf8;
293  assert(out_len > n_bytes);
294
295  high_surrogate = 0;
296  out = result;
297  in = str;
298  while (out < result + n_bytes)
299    {
300      unsigned short c = *in;
301      unsigned short wc;
302
303      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
304	{
305	  wc = SURROGATE_VALUE (high_surrogate, c);
306	  high_surrogate = 0;
307	}
308      else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
309	{
310	  high_surrogate = c;
311	  goto next2;
312	}
313      else
314	wc = c;
315
316      /********** DIFFERENT for UTF8/UCS4 **********/
317      out += unichar_to_utf8 (wc, out);
318
319    next2:
320      in++;
321    }
322  
323  /********** DIFFERENT for UTF8/UCS4 **********/
324  *out = '\0';
325  
326  return result;
327err_out:
328  return NULL;
329}
330
331static int ftk_hex_to_int(char c)
332{
333	if(c >= '0' && c <= '9')
334	{
335		return c - '0';
336	}
337	else if(c >= 'A' && c <= 'F')
338	{
339		return c - 'A' + 0x0A;
340	}
341	else if(c >= 'a' && c <= 'f')
342	{
343		return c - 'a' + 0x0a;
344	}
345
346	return 0;
347}
348
349static int ftk_parse_color_1(const char* value)
350{
351	return ftk_hex_to_int(value[0]) * 16 + ftk_hex_to_int(value[1]);	
352}
353
354FtkColor ftk_parse_color( const char* value)
355{
356	FtkColor color = {0};
357	return_val_if_fail(value != NULL && strlen(value) >= 8, color);
358
359	color.a = ftk_parse_color_1(value);
360	color.r = ftk_parse_color_1(value + 2);
361	color.g = ftk_parse_color_1(value + 4);
362	color.b = ftk_parse_color_1(value + 6);
363
364	return color;
365}
366
367#define IS_CURRENT(path) (((path)[0] == '.') && \
368	((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
369#define IS_HOME(path) (((path)[0] == '~') && \
370	((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
371#define IS_PARENT(path) (((path)[0] == '.') && ((path)[1] == '.') && \
372	((path)[2] == '/' || ((path)[2] == '\\') || ((path)[2] == '\0') ))
373
374#define BREAK_IF_LAST(str) if((str)[0] == '\0') break;
375
376char* normalize_path(const char* path_in, char path_out[FTK_MAX_PATH+1])
377{
378	int i = 0;
379	int in_index = 0;
380	int out_index = 0;
381
382	return_val_if_fail(path_in != NULL && path_out != NULL, NULL);
383	
384	path_out[0] = '\0';
385	for(in_index = 0; path_in[in_index] != '\0'; in_index++)
386	{
387		if(in_index == 0)
388		{
389			if(IS_CURRENT(path_in)) 
390			{
391				ftk_getcwd(path_out, FTK_MAX_PATH);
392				out_index = strlen(path_out);
393				continue;
394			}
395#ifdef LINUX			
396			else if(IS_HOME(path_in))
397			{
398				const char* home = getenv("HOME");
399				if(home != NULL)
400				{
401					ftk_strcpy(path_out, home);
402					out_index = strlen(path_out);
403				}
404				continue;
405			}	
406			else if(path_in[0] != '/')
407			{
408				ftk_getcwd(path_out, FTK_MAX_PATH);
409				out_index = strlen(path_out);
410				path_out[out_index++] = '/';
411				path_out[out_index++] = path_in[in_index];
412				continue;
413			}
414#endif		
415		}
416
417		if(path_in[in_index] == '\\' || path_in[in_index] == '/')
418		{
419			if(out_index == 0 || path_out[out_index - 1] != '/')
420			{
421				path_out[out_index++] = '/';
422			}
423		}
424		else if(IS_CURRENT(path_in+in_index) || IS_HOME(path_in+in_index))
425		{
426			in_index++;
427			BREAK_IF_LAST(path_in+in_index);
428		}
429		else if(IS_PARENT(path_in+in_index))
430		{
431			if(out_index > 1)
432			{
433				if(path_out[out_index - 1] == '/')
434				{
435					for(--out_index; path_out[out_index - 1] != '/'; out_index--);
436				}
437				else
438				{
439					ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
440					in_index += 2;
441				}
442			}
443			else
444			{
445				ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
446				in_index += 2;
447			}
448			BREAK_IF_LAST(path_in+in_index);
449		}
450		else 
451		{
452			path_out[out_index++] = path_in[in_index];
453		}
454
455		if(out_index >= FTK_MAX_PATH)
456		{
457			break;
458		}
459	}
460
461	path_out[out_index] = '\0';
462
463	for(i = 0; i < out_index; i++)
464	{
465		if(path_out[i] == '\\' || path_out[i] == '/')
466		{
467			path_out[i] = FTK_PATH_DELIM;
468		}
469	}
470
471	return path_out;
472}
473
474const char* ftk_normalize_path(char path[FTK_MAX_PATH+1])
475{
476	char path_out[FTK_MAX_PATH+1] = {0};
477	return_val_if_fail(path != NULL, NULL);
478
479	normalize_path(path, path_out);
480	ftk_strncpy(path, path_out, FTK_MAX_PATH);
481
482	return path;
483}
484
485const char* utf8_move_forward(const char* str, int nr)
486{
487	int i = 0;
488	const char* next = str;
489
490	for(i = 0; i < nr; i++)
491	{
492		utf8_get_char(next, &next);
493	}
494
495	return next;
496}
497
498#ifdef USE_LINEBREAK
499#include "linebreak/linebreak.h"
500const char* ftk_line_break(const char* start, const char* end)
501{
502	const char* p = end;
503	const char* next = NULL;
504	unsigned short c1 = 0;
505	unsigned short c2 = 0;
506	static int linebreak_inited = 0;
507
508	if(linebreak_inited == 0)
509	{
510		init_linebreak();
511		linebreak_inited = 1;
512	}
513
514	c2 = utf8_get_char(p, &next);
515	c1 = utf8_get_prev_char(p, NULL);
516
517	if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
518	{
519		size_t i = 0;
520		char brks[256] = {0};
521		size_t len = end - start + 1;
522		assert(len < sizeof(brks));
523	
524		set_linebreaks_utf8((const utf8_t*)start, len, "zh", brks);
525		
526		i = len - 2;
527		for(; i > 0; i--)
528		{
529			if(brks[i] == LINEBREAK_ALLOWBREAK || brks[i] == LINEBREAK_MUSTBREAK)
530			{
531				end = start + i + 1;
532				break;
533			}
534		}
535
536//		while((unsigned char)(*end) >= 0x80) end--;
537	}
538
539	return end;
540}
541#else
542int ftk_can_break(unsigned short c1, unsigned short c2)
543{
544	if(c1 > 0x80 || c2 > 0x80)
545	{
546		return 1;
547	}
548
549	if(isdigit(c1) && isdigit(c2))
550	{
551		return 0;
552	}
553	
554	if(isalpha(c1) && isalpha(c2))
555	{
556		return 0;
557	}
558
559	return 1;
560}
561
562const char* ftk_line_break(const char* start, const char* end)
563{
564	const char* p = end;
565	const char* next = NULL;
566	unsigned short c1 = 0;
567	unsigned short c2 = 0;
568	c2 = utf8_get_char(p, &next);
569	c1 = utf8_get_prev_char(p, NULL);
570
571	if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
572	{
573		while(!ftk_can_break(c1, c2) && p > start)
574		{
575			next = p;
576			c2 = c1;
577			c1 = utf8_get_prev_char(next, &p);
578		}
579	
580		end = p;
581	}
582
583	return end;
584}
585#endif
586
587int ftk_str2bool(const char* str)
588{
589	if(str == NULL || str[0] == '0' || strcmp(str, "false") == 0 || strcmp(str, "no") == 0)
590	{
591		return 0;
592	}
593
594	return 1;
595}
596
597char* ftk_strs_cat(char* str, int len, const char* first, ...)
598{
599	va_list arg;
600	size_t dst = 0;
601	const char* iter = first;
602	return_val_if_fail(str != NULL && len > 0, NULL);
603
604	va_start(arg, first); 
605	while(iter != NULL && dst < len)
606	{
607		for(; dst < len && *iter; iter++, dst++)
608		{
609			str[dst] = *iter;
610		}
611
612		iter = va_arg(arg, char*);
613	}
614	va_end(arg); 
615
616	if(dst < len)
617	{	
618		str[dst] = '\0';
619	}
620	else
621	{
622		str[len-1] = '\0';
623	}
624
625	return str;
626}
627
628static long  ftk_strtol_internal(const char* str, const char **end, int base)
629{
630	int i = 0;
631	long n = 0;
632	char c  = 0;
633	return_val_if_fail(str != NULL && (base == 10 || base == 8 || base == 16), 0);
634
635	if(base == 10)
636	{
637		for(i = 0; str[i] && i < 10; i++)
638		{
639			c = str[i];
640
641			if(c < '0' || c > '9')
642			{
643				break;
644			}
645
646			n = n * base + c - '0';
647		}
648	}
649	else if(base == 8)
650	{
651		for(i = 0; str[i] && i < 10; i++)
652		{
653			c = str[i];
654
655			if(c < '0' || c > '7')
656			{
657				break;
658			}
659
660			n = n * base + c - '0';
661		}
662	}
663	else if(base == 16)
664	{
665		for(i = 0; str[i] && i < 10; i++)
666		{
667			c = str[i];
668
669			if((c >= '0' && c <= '9'))
670			{
671				c -= '0';
672			}
673			else if(c >= 'a' && c <= 'f')
674			{
675				c = c - 'a' + 10;
676			}
677			else if(c >= 'A' && c <= 'F')
678			{
679				c = c - 'A' + 10;
680			}
681			else
682			{
683				break;
684			}
685
686			n = n * base + c;
687		}
688	}
689
690	if(end != NULL)
691	{
692		*end = str+i;
693	}
694
695	return n;
696}
697
698long  ftk_strtol(const char* str, const char **end, int base)
699{
700	long n = 0;
701	int neg = 0;
702	return_val_if_fail(str != NULL, 0);
703
704	while(*str == ' ' || *str == '\t') str++;
705
706	if(*str == '+' || *str == '-')
707	{
708		neg = *str == '-';
709		str++;
710	}
711
712	n = ftk_strtol_internal(str, end, base);
713
714	return neg ? -n : n;
715}
716
717int   ftk_atoi(const char* str)
718{
719	return  ftk_strtol(str, NULL, 10);
720}
721
722double ftk_atof(const char* str)
723{
724	int n = 0;
725	int f = 0;
726	int neg = 0;
727	double result = 0;
728	const char* p = NULL;
729	return_val_if_fail(str != NULL, 0);
730
731	if(str[0] == '+' || str[0] == '-')
732	{
733		neg = str[0] == '-';
734		str++;
735	}
736
737	n = ftk_strtol_internal(str, &p, 10);
738
739	if(p != NULL && *p == '.')
740	{
741		f = ftk_strtol_internal(p+1, NULL, 10);
742	}
743
744	result = f;
745	while(result >= 1)
746	{
747		result = result / 10;
748	}
749
750	result = n + result;
751
752	return neg ? -result : result;
753}
754
755static const char* ftk_itoa_simple(char* str, int len, int n, const char** end)
756{
757	int i = 0;
758	int value = n;
759	int need_len = 0;
760
761	return_val_if_fail(str != NULL && len > 2, NULL);
762
763	if(n == 0)
764	{
765		str[0] = '0';
766		str[1] = '\0';
767		
768		if(end != NULL)
769		{
770			*end = str + 1;
771		}
772
773		return str;
774	}
775	
776	if(n < 0)
777	{
778		n = -n;
779		str[0] = '-';
780		need_len++;
781	}
782
783	value = n;
784	while(value > 0)
785	{
786		value = value / 10;
787		need_len++;
788	}
789
790	need_len++; /*for null char*/
791	return_val_if_fail(len > (need_len), NULL);
792	
793	i = need_len - 2;
794	while(n > 0)
795	{
796		str[i--] = (n % 10) + '0';
797		n = n / 10;
798	}
799	str[need_len - 1] = '\0';
800
801	if(end != NULL)
802	{
803		*end = str + need_len - 1;
804	}
805
806	return str;
807}
808
809const char* ftk_itoa(char* str, int len, int n)
810{
811	return ftk_itoa_simple(str, len, n, NULL);
812}
813
814const char* ftk_ftoa(char* str, int len, double value)
815{
816	int i = 0;
817	char str_n[32] = {0};
818	char str_f[32] = {0};
819	int n = (int)value;
820	int f = (int)((value - n) * 1000000000);
821
822	ftk_itoa(str_n, sizeof(str_n), n);
823	ftk_itoa(str_f, sizeof(str_f), f > 0 ? f : -f);
824
825	if(f == 0)
826	{
827		strncpy(str, str_n, len);
828
829		return str;
830	}
831
832	i = strlen(str_f) - 1;
833	i = i > 6 ? 6 : i;
834	str_f[i] = '\0';
835
836	while(i > 0)
837	{
838		if(str_f[i] == '0') 
839		{
840			str_f[i] = '\0';
841		}
842		i--;
843	}
844	return_val_if_fail(len > (strlen(str_n) + 1 + i), NULL);
845	
846	return ftk_strs_cat(str, len, str_n, ".", str_f, NULL);
847}
848
849char* ftk_strcpy(char* dst, const char* src)
850{
851	return strcpy(dst, src);
852}