/googleclient/third_party/icu38/files/source/common/ucnvhz.c
C | 591 lines | 445 code | 73 blank | 73 comment | 81 complexity | 3c65c6943143f32bea2ae2b3e133b088 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.0, LGPL-3.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, BSD-3-Clause, GPL-2.0, Apache-2.0, MIT, CPL-1.0
- /*
- **********************************************************************
- * Copyright (C) 2000-2006, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- * file name: ucnvhz.c
- * encoding: US-ASCII
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2000oct16
- * created by: Ram Viswanadha
- * 10/31/2000 Ram Implemented offsets logic function
- *
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
- #include "cmemory.h"
- #include "unicode/ucnv.h"
- #include "unicode/ucnv_cb.h"
- #include "unicode/uset.h"
- #include "ucnv_bld.h"
- #include "ucnv_cnv.h"
- #define UCNV_TILDE 0x7E /* ~ */
- #define UCNV_OPEN_BRACE 0x7B /* { */
- #define UCNV_CLOSE_BRACE 0x7D /* } */
- #define SB_ESCAPE "\x7E\x7D"
- #define DB_ESCAPE "\x7E\x7B"
- #define TILDE_ESCAPE "\x7E\x7E"
- #define ESC_LEN 2
- #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \
- while(len-->0){ \
- if(targetIndex < targetLength){ \
- args->target[targetIndex] = (unsigned char) *strToAppend; \
- if(args->offsets!=NULL){ \
- *(offsets++) = sourceIndex-1; \
- } \
- targetIndex++; \
- } \
- else{ \
- args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
- *err =U_BUFFER_OVERFLOW_ERROR; \
- } \
- strToAppend++; \
- } \
- }
- typedef struct{
- UConverter* gbConverter;
- int32_t targetIndex;
- int32_t sourceIndex;
- UBool isEscapeAppended;
- UBool isStateDBCS;
- UBool isTargetUCharDBCS;
- }UConverterDataHZ;
- static void
- _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
- cnv->toUnicodeStatus = 0;
- cnv->fromUnicodeStatus= 0;
- cnv->mode=0;
- cnv->fromUChar32=0x0000;
- cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
- if(cnv->extraInfo != NULL){
- uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
- ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
- }
- else {
- *errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
- static void
- _HZClose(UConverter *cnv){
- if(cnv->extraInfo != NULL) {
- ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
- if(!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo = NULL;
- }
- }
- static void
- _HZReset(UConverter *cnv, UConverterResetChoice choice){
- if(choice<=UCNV_RESET_TO_UNICODE) {
- cnv->toUnicodeStatus = 0;
- cnv->mode=0;
- if(cnv->extraInfo != NULL){
- ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
- }
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- cnv->fromUnicodeStatus= 0;
- cnv->fromUChar32=0x0000;
- if(cnv->extraInfo != NULL){
- ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
- ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
- ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
- ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
- }
- }
- }
- /**************************************HZ Encoding*************************************************
- * Rules for HZ encoding
- *
- * In ASCII mode, a byte is interpreted as an ASCII character, unless a
- * '~' is encountered. The character '~' is an escape character. By
- * convention, it must be immediately followed ONLY by '~', '{' or '\n'
- * (<LF>), with the following special meaning.
- * 1. The escape sequence '~~' is interpreted as a '~'.
- * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
- * 3. The escape sequence '~\n' is a line-continuation marker to be
- * consumed with no output produced.
- * In GB mode, characters are interpreted two bytes at a time as (pure)
- * GB codes until the escape-from-GB code '~}' is read. This code
- * switches the mode from GB back to ASCII. (Note that the escape-
- * from-GB code '~}' ($7E7D) is outside the defined GB range.)
- *
- * Source: RFC 1842
- */
- static void
- UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
- UErrorCode* err){
- char tempBuf[2];
- const char *mySource = ( char *) args->source;
- UChar *myTarget = args->target;
- const char *mySourceLimit = args->sourceLimit;
- UChar32 targetUniChar = 0x0000;
- UChar mySourceChar = 0x0000;
- UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
- tempBuf[0]=0;
- tempBuf[1]=0;
- if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- while(mySource< mySourceLimit){
-
- if(myTarget < args->targetLimit){
-
- mySourceChar= (unsigned char) *mySource++;
- switch(mySourceChar){
- case 0x0A:
- if(args->converter->mode ==UCNV_TILDE){
- args->converter->mode=0;
-
- }
- *(myTarget++)=(UChar)mySourceChar;
- continue;
-
- case UCNV_TILDE:
- if(args->converter->mode ==UCNV_TILDE){
- *(myTarget++)=(UChar)mySourceChar;
- args->converter->mode=0;
- continue;
-
- }
- else if(args->converter->toUnicodeStatus !=0){
- args->converter->mode=0;
- break;
- }
- else{
- args->converter->mode = UCNV_TILDE;
- continue;
- }
-
-
- case UCNV_OPEN_BRACE:
- if(args->converter->mode == UCNV_TILDE){
- args->converter->mode=0;
- myData->isStateDBCS = TRUE;
- continue;
- }
- else{
- break;
- }
-
-
- case UCNV_CLOSE_BRACE:
- if(args->converter->mode == UCNV_TILDE){
- args->converter->mode=0;
- myData->isStateDBCS = FALSE;
- continue;
- }
- else{
- break;
- }
-
- default:
- /* if the first byte is equal to TILDE and the trail byte
- * is not a valid byte then it is an error condition
- */
- if(args->converter->mode == UCNV_TILDE){
- args->converter->mode=0;
- mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
- goto SAVE_STATE;
- }
-
- break;
- }
-
- if(myData->isStateDBCS){
- if(args->converter->toUnicodeStatus == 0x00){
- args->converter->toUnicodeStatus = (UChar) mySourceChar;
- continue;
- }
- else{
- tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
- tempBuf[1] = (char) (mySourceChar+0x80);
- mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
- args->converter->toUnicodeStatus =0x00;
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
- tempBuf, 2, args->converter->useFallback);
- }
- }
- else{
- if(args->converter->fromUnicodeStatus == 0x00){
- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
- mySource - 1, 1, args->converter->useFallback);
- }
- else{
- goto SAVE_STATE;
- }
- }
- if(targetUniChar < 0xfffe){
- if(args->offsets) {
- args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
- }
- *(myTarget++)=(UChar)targetUniChar;
- }
- else if(targetUniChar>=0xfffe){
- SAVE_STATE:
- if(targetUniChar == 0xfffe){
- *err = U_INVALID_CHAR_FOUND;
- }
- else{
- *err = U_ILLEGAL_CHAR_FOUND;
- }
- if(myData->isStateDBCS){
- /* this should never occur since isStateDBCS is set to true
- * only after tempBuf[0] and tempBuf[1]
- * are set to the input .. just to please BEAM
- */
- if(tempBuf[0]==0 || tempBuf[1]==0){
- *err = U_INTERNAL_PROGRAM_ERROR;
- }else{
- args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
- args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
- args->converter->toULength=2;
- }
- }
- else{
- args->converter->toUBytes[0] = (uint8_t)mySourceChar;
- args->converter->toULength=1;
- }
- break;
- }
- }
- else{
- *err =U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- args->target = myTarget;
- args->source = mySource;
- }
- static void
- UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
- UErrorCode * err){
- const UChar *mySource = args->source;
- char *myTarget = args->target;
- int32_t* offsets = args->offsets;
- int32_t mySourceIndex = 0;
- int32_t myTargetIndex = 0;
- int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
- int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
- int32_t length=0;
- uint32_t targetUniChar = 0x0000;
- UChar32 mySourceChar = 0x0000;
- UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
- UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
- UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
- int len =0;
- const char* escSeq=NULL;
-
- if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
- goto getTrail;
- }
- /*writing the char to the output stream */
- while (mySourceIndex < mySourceLength){
- targetUniChar = missingCharMarker;
- if (myTargetIndex < targetLength){
-
- mySourceChar = (UChar) mySource[mySourceIndex++];
-
- oldIsTargetUCharDBCS = isTargetUCharDBCS;
- if(mySourceChar ==UCNV_TILDE){
- /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
- len = ESC_LEN;
- escSeq = TILDE_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- continue;
- }
- else{
- length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
- mySourceChar,&targetUniChar,args->converter->useFallback);
- }
- /* only DBCS or SBCS characters are expected*/
- /* DB haracters with high bit set to 1 are expected */
- if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
- targetUniChar= missingCharMarker;
- }
- if (targetUniChar != missingCharMarker){
- myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
- if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
- /*Shifting from a double byte to single byte mode*/
- if(!isTargetUCharDBCS){
- len =ESC_LEN;
- escSeq = SB_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- myConverterData->isEscapeAppended = TRUE;
- }
- else{ /* Shifting from a single byte to double byte mode*/
- len =ESC_LEN;
- escSeq = DB_ESCAPE;
- CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
- myConverterData->isEscapeAppended = TRUE;
-
- }
- }
-
- if(isTargetUCharDBCS){
- if( myTargetIndex <targetLength){
- myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
- if(myTargetIndex < targetLength){
- myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }else{
- if( myTargetIndex <targetLength){
- myTarget[myTargetIndex++] = (char) (targetUniChar );
- if(offsets){
- *(offsets++) = mySourceIndex-1;
- }
-
- }else{
- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
- *err = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- }
- else{
- /* oops.. the code point is unassigned */
- /*Handle surrogates */
- /*check if the char is a First surrogate*/
- if(UTF_IS_SURROGATE(mySourceChar)) {
- if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
- args->converter->fromUChar32=mySourceChar;
- getTrail:
- /*look ahead to find the trail surrogate*/
- if(mySourceIndex < mySourceLength) {
- /* test the following code unit */
- UChar trail=(UChar) args->source[mySourceIndex];
- if(UTF_IS_SECOND_SURROGATE(trail)) {
- ++mySourceIndex;
- mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
- args->converter->fromUChar32=0x00;
- /* there are no surrogates in GB2312*/
- *err = U_INVALID_CHAR_FOUND;
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* no more input */
- *err = U_ZERO_ERROR;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *err=U_ILLEGAL_CHAR_FOUND;
- }
- } else {
- /* callback(unassigned) for a BMP code point */
- *err = U_INVALID_CHAR_FOUND;
- }
- args->converter->fromUChar32=mySourceChar;
- break;
- }
- }
- else{
- *err = U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- targetUniChar=missingCharMarker;
- }
- args->target += myTargetIndex;
- args->source += mySourceIndex;
- myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
- }
- static void
- _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
- UConverter *cnv = args->converter;
- UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
- char *p;
- char buffer[4];
- p = buffer;
-
- if( convData->isTargetUCharDBCS){
- *p++= UCNV_TILDE;
- *p++= UCNV_CLOSE_BRACE;
- convData->isTargetUCharDBCS=FALSE;
- }
- *p++= (char)cnv->subChars[0];
- ucnv_cbFromUWriteBytes(args,
- buffer, (int32_t)(p - buffer),
- offsetIndex, err);
- }
- /*
- * Structure for cloning an HZ converter into a single memory block.
- * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
- * and then ucnv_safeClone() of the sub-converter may additionally align
- * subCnv inside the cloneHZStruct, for which we need the deadSpace after
- * subCnv. This is because UAlignedMemory may be larger than the actually
- * necessary alignment size for the platform.
- * The other cloneHZStruct fields will not be moved around,
- * and are aligned properly with cloneHZStruct's alignment.
- */
- struct cloneHZStruct
- {
- UConverter cnv;
- UConverter subCnv;
- UAlignedMemory deadSpace;
- UConverterDataHZ mydata;
- };
- static UConverter *
- _HZ_SafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
- {
- struct cloneHZStruct * localClone;
- int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
- if (U_FAILURE(*status)){
- return 0;
- }
- if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return 0;
- }
- localClone = (struct cloneHZStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = TRUE;
- /* deep-clone the sub-converter */
- size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
- ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
- ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
- return &localClone->cnv;
- }
- static void
- _HZ_GetUnicodeSet(const UConverter *cnv,
- const USetAdder *sa,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- /* the tilde '~' is hardcoded in the converter */
- sa->add(sa->set, 0x7e);
- /* add all of the code points that the sub-converter handles */
- ((UConverterDataHZ*)cnv->extraInfo)->
- gbConverter->sharedData->impl->
- getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
- sa, which, pErrorCode);
- }
- static const UConverterImpl _HZImpl={
- UCNV_HZ,
-
- NULL,
- NULL,
-
- _HZOpen,
- _HZClose,
- _HZReset,
-
- UConverter_toUnicode_HZ_OFFSETS_LOGIC,
- UConverter_toUnicode_HZ_OFFSETS_LOGIC,
- UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
- UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
- NULL,
-
- NULL,
- NULL,
- _HZ_WriteSub,
- _HZ_SafeClone,
- _HZ_GetUnicodeSet
- };
- static const UConverterStaticData _HZStaticData={
- sizeof(UConverterStaticData),
- "HZ",
- 0,
- UCNV_IBM,
- UCNV_HZ,
- 1,
- 4,
- { 0x1a, 0, 0, 0 },
- 1,
- FALSE,
- FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
- };
-
-
- const UConverterSharedData _HZData={
- sizeof(UConverterSharedData),
- ~((uint32_t) 0),
- NULL,
- NULL,
- &_HZStaticData,
- FALSE,
- &_HZImpl,
- 0
- };
- #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */