/tags/V_33_62/pfe-33/pfe/zchar-ext.c
C | 361 lines | 315 code | 9 blank | 37 comment | 2 complexity | 60632d9980aab737c1e4bcb6f3cdac7c MD5 | raw file
Possible License(s): Zlib, LGPL-2.0, AGPL-3.0, LGPL-2.1
- /**
- *
- * Copyright (C) 2000 - 2004 Guido U. Draheim <guidod@gmx.de>
- * Copyright (C) 2005 - 2006 Guido U. Draheim <guidod@gmx.de>
- *
- * @see GNU LGPL
- * @author Guido U. Draheim (modified by $Author: guidod $)
- * @version $Revision: 1.2 $
- * (modified $Date: 2006-08-11 22:56:05 $)
- *
- * @description:
- * ZCHAR-EXT wordset - defines words for zero-terminated strings,
- * the datatype prefix is called "Z" which is usually a simple CHAR.
- * And CHAR can be either BCHAR or WCHAR depending on your platform.
- * Anyway, the words in this wordset should be largely modelled
- * after the examples found in other forth implementations - most
- * prominently MPE's forths.
- */
-
- #define _P4_SOURCE 1
-
- #include <pfe/pfe-base.h>
-
- #include <pfe/os-ctype.h>
- #include <pfe/os-string.h>
- #include <pfe/def-words.h>
-
- /* --------------------------------------------------------------------- */
- /** 'Z"' ( [chars<">] -- z* )
- * scan the input to the next doublequote and create a buffer
- * that holds the chars - return the address of that zero-terminated
- * string-buffer, either =>"POCKET-PAD" or =>"ALLOT"ed into the dictionary.
- */
- FCode(p4_z_quote)
- {
- register p4_byte_t* p;
- register p4ucell n;
- p4_word_parse ('"'); *DP=0; /* PARSE-NOHERE */
- n = PFE.word.len;
- if (STATE)
- {
- FX_COMPILE (p4_z_quote);
- DP += sizeof(short);
- p = DP;
- }else{
- p = p4_pocket ();
- n = PFE.word.len < P4_POCKET_SIZE ?
- PFE.word.len : P4_POCKET_SIZE;
- }
- p4_memcpy (p, PFE.word.ptr, n); p[PFE.word.len] = '\0';
- if (STATE)
- {
- DP += n+1;
- FX (p4_align);
- ((short*)p)[-1] = (DP - p);
- }else{
- FX_PUSH(p);
- }
- }
- FCode_XE (p4_z_quote_XT)
- { FX_USE_CODE_ADDR {
- short skip = *P4_INC(IP, short);
- FX_PUSH(IP);
- P4_ADD_(IP,skip,char);
- FX_USE_CODE_EXIT;
- }}
- p4xcode* p4_z_quote_SEE(p4xcode* ip, char* p, p4_Semant* s)
- {
- int skip = *P4_INC(ip,short);
- sprintf (p, "%.*s %.*s\" ",
- NFACNT(*s->name), s->name + 1,
- (int) skip, (char*) ip);
- P4_ADD_(ip,skip,char);
- return ip;
- }
- P4COMPILES(p4_z_quote, p4_z_quote_XT, p4_z_quote_SEE, 0);
- /** ZCOUNT ( z* -- z* len )
- * push length of z-string, additionally to the string addr itself.
- : ZSTRLEN ZCOUNT NIP ;
- * (see libc strlen(3)) / compare with => COUNT / => ZSTRLEN
- */
- FCode (p4_zcount)
- {
- /* FX_PUSH (p4_strlen ((char*)(*SP))) is wrong,
- * gcc may leave unintended behaviour
- */
- register int i = p4_strlen ((char*)(*SP));
- FX_PUSH(i);
- }
- /** ZSTRLEN ( z* -- len )
- * push length of z-string.
- : ZSTRLEN ZCOUNT NIP ;
- * (see libc strlen(3)) / compare with => ZMOVE / => CMOVE
- */
- FCode (p4_zstrlen)
- {
- *SP = p4_strlen ((char*)(*SP));
- }
- /** ZMOVE ( zsrc* zdest* -- )
- * copy a zero terminated string
- * (see libc strcpy(3)) / compare with => ZSTRLEN / => COUNT
- */
- FCode (p4_zmove)
- {
- p4_strcpy ((char*)(SP[0]), (char*)(SP[1]));
- FX_2DROP;
- }
- /** APPENDZ ( caddr* u zdest* -- )
- * Add the string defined by CADDR LEN to the zero terminated string
- * at ZDEST - actually a => SYNONYM of => +ZPLACE of the => ZPLACE family
- * (see strncat(3)) / compare with => ZPLACE / => +PLACE
- */
- /** +ZPLACE ( caddr* u zdest* -- )
- * Add the string defined by CADDR LEN to the zero terminated string
- * at ZDEST - (for older scripts the => SYNONYM named => APPENDZ exists)
- * (see libc strncat(3)) / compare with => ZPLACE / => +PLACE
- */
- FCode (p4_appendz)
- {
- p4_strncat ((char*)(SP[0]), (char*)(SP[2]), (int)(SP[1]));
- FX_3DROP;
- }
- /** ZPLACE ( addr* len zaddr* -- )
- * copy string and place as 0 terminated
- * (see libc strncpy(3)) / see also => +ZPLACE / => Z+PLACE
- */
- FCode (p4_zplace)
- {
- p4_strncpy ((char*)(SP[0]), (char*)(SP[2]), (int)(SP[1]));
- FX_3DROP;
- }
- /* ------------------------------------------------------------------- */
- /*
- * helper function used by all backslash-lit-strings
- * copies a string from input buffer to output buffer
- * thereby interpreting backlash-sequences. Returns
- * the number of chars copied.
- */
- p4ucell p4_backslash_parse_into (p4char delim, p4char* dst, int max,
- int refills)
- {
- register int i, j = 0;
- register const p4char* src; p4ucell len;
- parse:
- p4_word_parse (delim); *DP=0; /* PARSE-NOHERE */
- src = PFE.word.ptr; len = PFE.word.len;
- if (! len && refills--) { if (p4_refill ()) goto parse; }
- i = 0;
- while (i < len && j < max)
- {
- if (src[i] != '\\')
- {
- dst[j++] = src[i++];
- }else{
- if (++i == len) goto parse;
- switch (src[i])
- {
- case 'n': dst[j++] = '\n'; i++; break;
- case 'r': dst[j++] = '\r'; i++; break;
- case 'b': dst[j++] = '\b'; i++; break;
- case 'a': dst[j++] = '\a'; i++; break;
- case 'f': dst[j++] = '\f'; i++; break;
- case 'v': dst[j++] = '\v'; i++; break;
- case 'e': dst[j++] = '\33'; i++; break;
- case 'i': dst[j++] = '\''; i++; break; /* extra feature */
- case 'q': dst[j++] = '\"'; i++; break; /* extra feature */
- case 'x': i++;
- if (i < len && isxdigit(src[i]))
- {
- register p4char a = src[i++]-'0';
- if (a > '9') a -= 'A'-'9'+1;
- if (i < len && isxdigit (src[i]))
- {
- a <<= 4;
- if (src[i] <= '9') a |= src[i] - '0';
- else a |= src[i] - 'A' + 10;
- }
- dst[j++] = a;
- }else{
- p4_throw (P4_ON_INVALID_NUMBER);
- }
- break;
- default:
- if (! p4_isalnum (src[i]))
- dst[j++] = src[i++];
- else if (isdigit (src[i]))
- {
- register p4char a = src[i++]-'0';
- if (i < len && isdigit (src[i]))
- { a <<= 3; a |= src[i++]-'0'; }
- if (i < len && isdigit (src[i]))
- { a <<= 3; a |= src[i++]-'0'; }
- dst[j++] = a;
- }
- else if ('A' <= src[i] && src[i] <= 'Z')
- {
- dst[j++] = src[i++] & 31;
- }else{
- p4_throw (P4_ON_INVALID_NUMBER);
- }
- }
- }
- }
- dst[j] = '\0'; return j;
- }
-
-
- /** 'C\\\"' ( [backslashed-strings_<">] -- bstr* )
- * scan the following text to create a literal just
- * like =>'C"' does, but backslashes can be used to
- * escape special chars. The rules for the backslashes
- * follow C literals, implemented techniques are
- * \n \r \b \a \f \v \e \777
- * and all non-alnum chars represent themselves, esp.
- * \" \' \ \? \! \% \( \) \[ \] \{ \} etcetera.
- * most importantly the doublequote itself can be escaped.
- * but be also informed that the usage of \' and \" is not
- * portable as some systems preferred to map [\'] into ["].
- * Here I use the experimental addition to map [\q] to ["] and [\i] to [']
- */
- FCode (p4_c_backslash_quote)
- {
- p4char* p;
- p4ucell l;
-
- if (STATE)
- {
- FX_COMPILE(p4_c_backslash_quote);
- p = DP;
- }else{
- p = p4_pocket ();
- }
- p[0] = l = p4_backslash_parse_into ('"', p+1, 255, 127);
- if (STATE)
- {
- DP += l+1;
- FX (p4_align);
- }
- FX_PUSH (p);
- }
- P4COMPILES (p4_c_backslash_quote, p4_c_quote_execution,
- P4_SKIPS_STRING, P4_DEFAULT_STYLE);
- /** 'S\\\"' ( [backslashed-strings_<">] -- str cnt )
- * scan the following text to create a literal just
- * like =>'S"' does, but backslashes can be used to
- * escape special chars. The rules for the backslashes
- * follow C literals, implemented techniques are
- * \n \r \b \a \f \v \e \777
- * and all non-alnum chars represent themselves, esp.
- * \" \' \ \? \! \% \( \) \[ \] \{ \} etcetera.
- * most importantly the doublequote itself can be escaped.
- * but be also informed that the usage of \' and \" is not
- * portable as some systems preferred to map [\'] into ["].
- * Here I use the experimental addition to map [\q] to ["] and [\i] to [']
- */
- FCode (p4_s_backslash_quote)
- {
- p4char* p;
- p4ucell l;
-
- if (STATE)
- {
- FX_COMPILE(p4_s_backslash_quote);
- p = DP;
- }else{
- p = p4_pocket ();
- }
- p[0] = l = p4_backslash_parse_into ('"', p+1, 255, 127);
- if (STATE)
- {
- DP += l+1;
- FX (p4_align);
- }
- FX_PUSH (p+1);
- FX_PUSH (l);
- }
- P4COMPILES(p4_s_backslash_quote, p4_s_quote_execution,
- P4_SKIPS_STRING, P4_DEFAULT_STYLE);
- /** 'Z\\\"' ( [backslashed-strings_<">] -- zstr* )
- * scan the following text to create a literal just
- * like =>'Z"' does, but backslashes can be used to
- * escape special chars. The rules for the backslashes
- * follow C literals, implemented techniques are
- * \n \r \b \a \f \v \e \777
- * and all non-alnum chars represent themselves, esp.
- * \" \' \ \? \! \% \( \) \[ \] \{ \} etcetera.
- * most importantly the doublequote itself can be escaped
- * but be also informed that the usage of \' and \" is not
- * portable as some systems preferred to map [\'] into ["].
- * Here I use the experimental addition to map [\q] to ["] and [\i] to [']
- */
- FCode (p4_z_backslash_quote)
- {
- p4char* p;
- p4ucell l;
-
- if (STATE)
- {
- FX_COMPILE(p4_z_backslash_quote);
- p = DP;
- l = p4_backslash_parse_into ('"', p+sizeof(short), 65535, 32767);
- }else{
- p = p4_pocket ();
- l = p4_backslash_parse_into ('"', p+sizeof(short), 254, 126);
- }
- if (STATE)
- {
- DP += l+sizeof(short);
- FX (p4_align);
- (*(short*)p) = ((p4char*)DP - p);
- }
- FX_PUSH (p+sizeof(short));
- }
- P4COMPILES(p4_z_backslash_quote, p4_z_quote_XT,
- p4_z_quote_SEE, P4_DEFAULT_STYLE);
- P4_LISTWORDS(zchar) =
- {
- P4_INTO ("FORTH", 0 ),
- P4_SXco ("Z\"", p4_z_quote),
- P4_FXco ("ZCOUNT", p4_zcount),
- P4_FXco ("ZSTRLEN", p4_zstrlen),
- P4_FXco ("ZMOVE", p4_zmove),
- P4_FXco ("ZPLACE", p4_zplace),
- P4_FXco ("+ZPLACE", p4_appendz),
- P4_FNYM ("APPENDZ", "+ZPLACE"),
- P4_SXco ("S\\\"", p4_s_backslash_quote),
- P4_SXco ("C\\\"", p4_c_backslash_quote),
- P4_SXco ("Z\\\"", p4_z_backslash_quote),
- P4_INTO ("ENVIRONMENT", 0 ),
- P4_OCON ("ZCHAR-EXT", 2000 ),
- };
- P4_COUNTWORDS(zchar, "ZCHAR-EXT - zero-terminated C-like charstrings");
- /*
- * Local variables:
- * c-file-style: "stroustrup"
- * End:
- */