ftk_util.c - This C code provides a set of string manipulat…

/src/ftk_util.c

http://ftk.googlecode.com/ · C · 852 lines · 685 code · 125 blank · 42 comment · 267 complexity · ec86dbcc7755acfe23541f385ab970f6 MD5 · raw file

/*
 * File: ftk_util.c    
 * Author:  Li XianJing <xianjimli@hotmail.com>
 * Brief:   common used functions.
 *
 * Copyright (c) 2009 - 2010  Li XianJing <xianjimli@hotmail.com>
 *
 * Licensed under the Academic Free License version 2.1
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*
 * History:
 * ================================================================
 * 2009-10-03 Li XianJing <xianjimli@hotmail.com> created
 *
 */

#include "ftk_log.h"
#include "ftk_util.h"

/*UTF8-related functions are copied from glib.*/

#define UTF8_COMPUTE(Char, Mask, Len)					      \
  if (Char < 128)							      \
    {									      \
      Len = 1;								      \
      Mask = 0x7f;							      \
    }									      \
  else if ((Char & 0xe0) == 0xc0)					      \
    {									      \
      Len = 2;								      \
      Mask = 0x1f;							      \
    }									      \
  else if ((Char & 0xf0) == 0xe0)					      \
    {									      \
      Len = 3;								      \
      Mask = 0x0f;							      \
    }									      \
  else if ((Char & 0xf8) == 0xf0)					      \
    {									      \
      Len = 4;								      \
      Mask = 0x07;							      \
    }									      \
  else if ((Char & 0xfc) == 0xf8)					      \
    {									      \
      Len = 5;								      \
      Mask = 0x03;							      \
    }									      \
  else if ((Char & 0xfe) == 0xfc)					      \
    {									      \
      Len = 6;								      \
      Mask = 0x01;							      \
    }									      \
  else									      \
    Len = -1;

#define UTF8_LENGTH(Char)              \
  ((Char) < 0x80 ? 1 :                 \
   ((Char) < 0x800 ? 2 :               \
    ((Char) < 0x10000 ? 3 :            \
     ((Char) < 0x200000 ? 4 :          \
      ((Char) < 0x4000000 ? 5 : 6)))))
   

#define UTF8_GET(Result, Chars, Count, Mask, Len)			      \
  (Result) = (Chars)[0] & (Mask);					      \
  for ((Count) = 1; (Count) < (Len); ++(Count))				      \
    {									      \
      if (((Chars)[(Count)] & 0xc0) != 0x80)				      \
	{								      \
	  (Result) = -1;						      \
	  break;							      \
	}								      \
      (Result) <<= 6;							      \
      (Result) |= ((Chars)[(Count)] & 0x3f);				      \
    }

#define UNICODE_VALID(Char)                   \
    ((Char) < 0x110000 &&                     \
     (((Char) & 0xFFFFF800) != 0xD800) &&     \
     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
     ((Char) & 0xFFFE) != 0xFFFE)
static const char utf8_skip_data[256] = {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};

const char * const g_utf8_skip = utf8_skip_data;
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const unsigned char *)(p)])

unsigned short utf8_get_char (const char *p, const char** next)
{
  int i, mask = 0, len;
  unsigned short result;
  unsigned char c = (unsigned char) *p;

  UTF8_COMPUTE (c, mask, len);
  if (len == -1)
    return (unsigned short)-1;
  UTF8_GET (result, p, i, mask, len);

  if(next != NULL)
  {
    *next = g_utf8_next_char(p);
  }

  return result;
}

unsigned short utf8_get_prev_char (const char *p, const char** prev)
{
	int i = 0;
	for(i = 1; i < 8; i++)
	{
		unsigned char val = p[-i];
		if((val & 0x80) && !(val & 0x40))
		{
			continue;
		}
		else
		{
			if(prev != NULL)
			{
				*prev = p-i;
			}
			return utf8_get_char(p-i, NULL);
		}
	}

	if(prev != NULL)
	{
		*prev = p;
	}

	return 0;
}

int utf8_count_char(const char *str, int length)
{
	int nr = 0;
	const char* iter = str;
	return_val_if_fail(str != NULL, 0);

	while(utf8_get_char(iter, &iter) && (iter - str) <= (int)length)
	{
		nr++;
	}

	return nr;
}

int unichar_to_utf8 (unsigned short c, char* outbuf)
{
  /* If this gets modified, also update the copy in g_string_insert_unichar() */
  size_t len = 0;    
  int first;
  int i;

  if (c < 0x80)
    {
      first = 0;
      len = 1;
    }
  else if (c < 0x800)
    {
      first = 0xc0;
      len = 2;
    }
  else if (c < 0x10000)
    {
      first = 0xe0;
      len = 3;
    }
   else if (c < 0x200000)
    {
      first = 0xf0;
      len = 4;
    }
  else if (c < 0x4000000)
    {
      first = 0xf8;
      len = 5;
    }
  else
    {
      first = 0xfc;
      len = 6;
    }

  if (outbuf)
    {
      for (i = len - 1; i > 0; --i)
	{
	  outbuf[i] = (c & 0x3f) | 0x80;
	  c >>= 6;
	}
      outbuf[0] = c | first;
    }

  return len;
}

#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
char* utf16_to_utf8 (const unsigned short  *str, long len, char* utf8, int out_len)
{
  /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
   * are marked.
   */
  const unsigned short *in;
  char *out;
  char *result = NULL;
  int n_bytes;
  unsigned short high_surrogate;

  return_val_if_fail (str != NULL, NULL);

  n_bytes = 0;
  in = str;
  high_surrogate = 0;
  while ((len < 0 || in - str < len) && *in)
    {
      unsigned short c = *in;
      unsigned short wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  if (high_surrogate)
	    {
	      wc = SURROGATE_VALUE (high_surrogate, c);
	      high_surrogate = 0;
	    }
	  else
	    {
          ftk_loge("Invalid sequence in conversion input");
	      goto err_out;
	    }
	}
      else
	{
	  if (high_surrogate)
	    {
          ftk_loge("Invalid sequence in conversion input");
	      goto err_out;
	    }

	  if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	    {
	      high_surrogate = c;
	      goto next1;
	    }
	  else
	    wc = c;
	}

      /********** DIFFERENT for UTF8/UCS4 **********/
      n_bytes += UTF8_LENGTH (wc);

    next1:
      in++;
    }

  if (high_surrogate)
    {
      ftk_loge("Partial character sequence at end of input");
      goto err_out;
    }
  
  /* At this point, everything is valid, and we just need to convert
   */
  /********** DIFFERENT for UTF8/UCS4 **********/
  //result = g_malloc (n_bytes + 1);
  result = utf8;
  assert(out_len > n_bytes);

  high_surrogate = 0;
  out = result;
  in = str;
  while (out < result + n_bytes)
    {
      unsigned short c = *in;
      unsigned short wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  wc = SURROGATE_VALUE (high_surrogate, c);
	  high_surrogate = 0;
	}
      else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	{
	  high_surrogate = c;
	  goto next2;
	}
      else
	wc = c;

      /********** DIFFERENT for UTF8/UCS4 **********/
      out += unichar_to_utf8 (wc, out);

    next2:
      in++;
    }
  
  /********** DIFFERENT for UTF8/UCS4 **********/
  *out = '\0';
  
  return result;
err_out:
  return NULL;
}

static int ftk_hex_to_int(char c)
{
	if(c >= '0' && c <= '9')
	{
		return c - '0';
	}
	else if(c >= 'A' && c <= 'F')
	{
		return c - 'A' + 0x0A;
	}
	else if(c >= 'a' && c <= 'f')
	{
		return c - 'a' + 0x0a;
	}

	return 0;
}

static int ftk_parse_color_1(const char* value)
{
	return ftk_hex_to_int(value[0]) * 16 + ftk_hex_to_int(value[1]);	
}

FtkColor ftk_parse_color( const char* value)
{
	FtkColor color = {0};
	return_val_if_fail(value != NULL && strlen(value) >= 8, color);

	color.a = ftk_parse_color_1(value);
	color.r = ftk_parse_color_1(value + 2);
	color.g = ftk_parse_color_1(value + 4);
	color.b = ftk_parse_color_1(value + 6);

	return color;
}

#define IS_CURRENT(path) (((path)[0] == '.') && \
	((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
#define IS_HOME(path) (((path)[0] == '~') && \
	((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
#define IS_PARENT(path) (((path)[0] == '.') && ((path)[1] == '.') && \
	((path)[2] == '/' || ((path)[2] == '\\') || ((path)[2] == '\0') ))

#define BREAK_IF_LAST(str) if((str)[0] == '\0') break;

char* normalize_path(const char* path_in, char path_out[FTK_MAX_PATH+1])
{
	int i = 0;
	int in_index = 0;
	int out_index = 0;

	return_val_if_fail(path_in != NULL && path_out != NULL, NULL);
	
	path_out[0] = '\0';
	for(in_index = 0; path_in[in_index] != '\0'; in_index++)
	{
		if(in_index == 0)
		{
			if(IS_CURRENT(path_in)) 
			{
				ftk_getcwd(path_out, FTK_MAX_PATH);
				out_index = strlen(path_out);
				continue;
			}
#ifdef LINUX			
			else if(IS_HOME(path_in))
			{
				const char* home = getenv("HOME");
				if(home != NULL)
				{
					ftk_strcpy(path_out, home);
					out_index = strlen(path_out);
				}
				continue;
			}	
			else if(path_in[0] != '/')
			{
				ftk_getcwd(path_out, FTK_MAX_PATH);
				out_index = strlen(path_out);
				path_out[out_index++] = '/';
				path_out[out_index++] = path_in[in_index];
				continue;
			}
#endif		
		}

		if(path_in[in_index] == '\\' || path_in[in_index] == '/')
		{
			if(out_index == 0 || path_out[out_index - 1] != '/')
			{
				path_out[out_index++] = '/';
			}
		}
		else if(IS_CURRENT(path_in+in_index) || IS_HOME(path_in+in_index))
		{
			in_index++;
			BREAK_IF_LAST(path_in+in_index);
		}
		else if(IS_PARENT(path_in+in_index))
		{
			if(out_index > 1)
			{
				if(path_out[out_index - 1] == '/')
				{
					for(--out_index; path_out[out_index - 1] != '/'; out_index--);
				}
				else
				{
					ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
					in_index += 2;
				}
			}
			else
			{
				ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
				in_index += 2;
			}
			BREAK_IF_LAST(path_in+in_index);
		}
		else 
		{
			path_out[out_index++] = path_in[in_index];
		}

		if(out_index >= FTK_MAX_PATH)
		{
			break;
		}
	}

	path_out[out_index] = '\0';

	for(i = 0; i < out_index; i++)
	{
		if(path_out[i] == '\\' || path_out[i] == '/')
		{
			path_out[i] = FTK_PATH_DELIM;
		}
	}

	return path_out;
}

const char* ftk_normalize_path(char path[FTK_MAX_PATH+1])
{
	char path_out[FTK_MAX_PATH+1] = {0};
	return_val_if_fail(path != NULL, NULL);

	normalize_path(path, path_out);
	ftk_strncpy(path, path_out, FTK_MAX_PATH);

	return path;
}

const char* utf8_move_forward(const char* str, int nr)
{
	int i = 0;
	const char* next = str;

	for(i = 0; i < nr; i++)
	{
		utf8_get_char(next, &next);
	}

	return next;
}

#ifdef USE_LINEBREAK
#include "linebreak/linebreak.h"
const char* ftk_line_break(const char* start, const char* end)
{
	const char* p = end;
	const char* next = NULL;
	unsigned short c1 = 0;
	unsigned short c2 = 0;
	static int linebreak_inited = 0;

	if(linebreak_inited == 0)
	{
		init_linebreak();
		linebreak_inited = 1;
	}

	c2 = utf8_get_char(p, &next);
	c1 = utf8_get_prev_char(p, NULL);

	if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
	{
		size_t i = 0;
		char brks[256] = {0};
		size_t len = end - start + 1;
		assert(len < sizeof(brks));
	
		set_linebreaks_utf8((const utf8_t*)start, len, "zh", brks);
		
		i = len - 2;
		for(; i > 0; i--)
		{
			if(brks[i] == LINEBREAK_ALLOWBREAK || brks[i] == LINEBREAK_MUSTBREAK)
			{
				end = start + i + 1;
				break;
			}
		}

//		while((unsigned char)(*end) >= 0x80) end--;
	}

	return end;
}
#else
int ftk_can_break(unsigned short c1, unsigned short c2)
{
	if(c1 > 0x80 || c2 > 0x80)
	{
		return 1;
	}

	if(isdigit(c1) && isdigit(c2))
	{
		return 0;
	}
	
	if(isalpha(c1) && isalpha(c2))
	{
		return 0;
	}

	return 1;
}

const char* ftk_line_break(const char* start, const char* end)
{
	const char* p = end;
	const char* next = NULL;
	unsigned short c1 = 0;
	unsigned short c2 = 0;
	c2 = utf8_get_char(p, &next);
	c1 = utf8_get_prev_char(p, NULL);

	if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
	{
		while(!ftk_can_break(c1, c2) && p > start)
		{
			next = p;
			c2 = c1;
			c1 = utf8_get_prev_char(next, &p);
		}
	
		end = p;
	}

	return end;
}
#endif

int ftk_str2bool(const char* str)
{
	if(str == NULL || str[0] == '0' || strcmp(str, "false") == 0 || strcmp(str, "no") == 0)
	{
		return 0;
	}

	return 1;
}

char* ftk_strs_cat(char* str, int len, const char* first, ...)
{
	va_list arg;
	size_t dst = 0;
	const char* iter = first;
	return_val_if_fail(str != NULL && len > 0, NULL);

	va_start(arg, first); 
	while(iter != NULL && dst < len)
	{
		for(; dst < len && *iter; iter++, dst++)
		{
			str[dst] = *iter;
		}

		iter = va_arg(arg, char*);
	}
	va_end(arg); 

	if(dst < len)
	{	
		str[dst] = '\0';
	}
	else
	{
		str[len-1] = '\0';
	}

	return str;
}

static long  ftk_strtol_internal(const char* str, const char **end, int base)
{
	int i = 0;
	long n = 0;
	char c  = 0;
	return_val_if_fail(str != NULL && (base == 10 || base == 8 || base == 16), 0);

	if(base == 10)
	{
		for(i = 0; str[i] && i < 10; i++)
		{
			c = str[i];

			if(c < '0' || c > '9')
			{
				break;
			}

			n = n * base + c - '0';
		}
	}
	else if(base == 8)
	{
		for(i = 0; str[i] && i < 10; i++)
		{
			c = str[i];

			if(c < '0' || c > '7')
			{
				break;
			}

			n = n * base + c - '0';
		}
	}
	else if(base == 16)
	{
		for(i = 0; str[i] && i < 10; i++)
		{
			c = str[i];

			if((c >= '0' && c <= '9'))
			{
				c -= '0';
			}
			else if(c >= 'a' && c <= 'f')
			{
				c = c - 'a' + 10;
			}
			else if(c >= 'A' && c <= 'F')
			{
				c = c - 'A' + 10;
			}
			else
			{
				break;
			}

			n = n * base + c;
		}
	}

	if(end != NULL)
	{
		*end = str+i;
	}

	return n;
}

long  ftk_strtol(const char* str, const char **end, int base)
{
	long n = 0;
	int neg = 0;
	return_val_if_fail(str != NULL, 0);

	while(*str == ' ' || *str == '\t') str++;

	if(*str == '+' || *str == '-')
	{
		neg = *str == '-';
		str++;
	}

	n = ftk_strtol_internal(str, end, base);

	return neg ? -n : n;
}

int   ftk_atoi(const char* str)
{
	return  ftk_strtol(str, NULL, 10);
}

double ftk_atof(const char* str)
{
	int n = 0;
	int f = 0;
	int neg = 0;
	double result = 0;
	const char* p = NULL;
	return_val_if_fail(str != NULL, 0);

	if(str[0] == '+' || str[0] == '-')
	{
		neg = str[0] == '-';
		str++;
	}

	n = ftk_strtol_internal(str, &p, 10);

	if(p != NULL && *p == '.')
	{
		f = ftk_strtol_internal(p+1, NULL, 10);
	}

	result = f;
	while(result >= 1)
	{
		result = result / 10;
	}

	result = n + result;

	return neg ? -result : result;
}

static const char* ftk_itoa_simple(char* str, int len, int n, const char** end)
{
	int i = 0;
	int value = n;
	int need_len = 0;

	return_val_if_fail(str != NULL && len > 2, NULL);

	if(n == 0)
	{
		str[0] = '0';
		str[1] = '\0';
		
		if(end != NULL)
		{
			*end = str + 1;
		}

		return str;
	}
	
	if(n < 0)
	{
		n = -n;
		str[0] = '-';
		need_len++;
	}

	value = n;
	while(value > 0)
	{
		value = value / 10;
		need_len++;
	}

	need_len++; /*for null char*/
	return_val_if_fail(len > (need_len), NULL);
	
	i = need_len - 2;
	while(n > 0)
	{
		str[i--] = (n % 10) + '0';
		n = n / 10;
	}
	str[need_len - 1] = '\0';

	if(end != NULL)
	{
		*end = str + need_len - 1;
	}

	return str;
}

const char* ftk_itoa(char* str, int len, int n)
{
	return ftk_itoa_simple(str, len, n, NULL);
}

const char* ftk_ftoa(char* str, int len, double value)
{
	int i = 0;
	char str_n[32] = {0};
	char str_f[32] = {0};
	int n = (int)value;
	int f = (int)((value - n) * 1000000000);

	ftk_itoa(str_n, sizeof(str_n), n);
	ftk_itoa(str_f, sizeof(str_f), f > 0 ? f : -f);

	if(f == 0)
	{
		strncpy(str, str_n, len);

		return str;
	}

	i = strlen(str_f) - 1;
	i = i > 6 ? 6 : i;
	str_f[i] = '\0';

	while(i > 0)
	{
		if(str_f[i] == '0') 
		{
			str_f[i] = '\0';
		}
		i--;
	}
	return_val_if_fail(len > (strlen(str_n) + 1 + i), NULL);
	
	return ftk_strs_cat(str, len, str_n, ".", str_f, NULL);
}

char* ftk_strcpy(char* dst, const char* src)
{
	return strcpy(dst, src);
}
Summary ✨

This C code provides a set of string manipulation functions for converting between strings and integers, as well as formatting numbers as strings. It includes functions like ftk_atoi, ftk_atof, ftk_itoa to convert between strings and integers, and ftk_ftoa to format numbers as strings with decimal places. The code is likely part of a larger library or framework for string manipulation in C.
Alerts (6)

Complexity hotspot; lines 367 to 372 (total complexity: 23)
367 368 369 370 371 372