| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #include <stdio.h> |
| #include <libxml/parser.h> |
| #include <unicode/ucnv.h> |
|
|
| #define ICU_PIVOT_BUF_SIZE 1024 |
|
|
| typedef struct { |
| UConverter *uconv; |
| UConverter *utf8; |
| UChar *pivot_source; |
| UChar *pivot_target; |
| int isInput; |
| UChar pivot_buf[ICU_PIVOT_BUF_SIZE]; |
| } myConvCtxt; |
|
|
| static xmlCharEncError |
| icuConvert(void *vctxt, unsigned char *out, int *outlen, |
| const unsigned char *in, int *inlen, int flush) { |
| myConvCtxt *cd = vctxt; |
| const char *ucv_in = (const char *) in; |
| char *ucv_out = (char *) out; |
| UConverter *target, *source; |
| UErrorCode err = U_ZERO_ERROR; |
| int ret; |
|
|
| if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
| if (outlen != NULL) |
| *outlen = 0; |
| return XML_ENC_ERR_INTERNAL; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| if (cd->isInput) { |
| source = cd->uconv; |
| target = cd->utf8; |
| } else { |
| source = cd->utf8; |
| target = cd->uconv; |
| } |
|
|
| ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen, |
| &ucv_in, ucv_in + *inlen, cd->pivot_buf, |
| &cd->pivot_source, &cd->pivot_target, |
| cd->pivot_buf + ICU_PIVOT_BUF_SIZE, |
| 0, flush, &err); |
|
|
| *inlen = ucv_in - (const char*) in; |
| *outlen = ucv_out - (char *) out; |
|
|
| if (U_SUCCESS(err)) { |
| ret = XML_ENC_ERR_SUCCESS; |
| } else { |
| switch (err) { |
| case U_TRUNCATED_CHAR_FOUND: |
| |
| ret = XML_ENC_ERR_INPUT; |
| break; |
|
|
| case U_BUFFER_OVERFLOW_ERROR: |
| ret = XML_ENC_ERR_SPACE; |
| break; |
|
|
| case U_INVALID_CHAR_FOUND: |
| case U_ILLEGAL_CHAR_FOUND: |
| case U_ILLEGAL_ESCAPE_SEQUENCE: |
| case U_UNSUPPORTED_ESCAPE_SEQUENCE: |
| ret = XML_ENC_ERR_INPUT; |
| break; |
|
|
| case U_MEMORY_ALLOCATION_ERROR: |
| ret = XML_ENC_ERR_MEMORY; |
| break; |
|
|
| default: |
| ret = XML_ENC_ERR_INTERNAL; |
| break; |
| } |
| } |
|
|
| return ret; |
| } |
|
|
| static xmlParserErrors |
| icuOpen(const char* name, int isInput, myConvCtxt **out) |
| { |
| UErrorCode status; |
| myConvCtxt *cd; |
|
|
| *out = NULL; |
|
|
| cd = xmlMalloc(sizeof(myConvCtxt)); |
| if (cd == NULL) |
| return XML_ERR_NO_MEMORY; |
|
|
| cd->isInput = isInput; |
| cd->pivot_source = cd->pivot_buf; |
| cd->pivot_target = cd->pivot_buf; |
|
|
| status = U_ZERO_ERROR; |
| cd->uconv = ucnv_open(name, &status); |
| if (U_FAILURE(status)) |
| goto error; |
|
|
| status = U_ZERO_ERROR; |
| if (isInput) { |
| ucnv_setToUCallBack(cd->uconv, UCNV_TO_U_CALLBACK_STOP, |
| NULL, NULL, NULL, &status); |
| } |
| else { |
| ucnv_setFromUCallBack(cd->uconv, UCNV_FROM_U_CALLBACK_STOP, |
| NULL, NULL, NULL, &status); |
| } |
| if (U_FAILURE(status)) |
| goto error; |
|
|
| status = U_ZERO_ERROR; |
| cd->utf8 = ucnv_open("UTF-8", &status); |
| if (U_FAILURE(status)) |
| goto error; |
|
|
| *out = cd; |
| return 0; |
|
|
| error: |
| if (cd->uconv) |
| ucnv_close(cd->uconv); |
| xmlFree(cd); |
|
|
| if (status == U_FILE_ACCESS_ERROR) |
| return XML_ERR_UNSUPPORTED_ENCODING; |
| if (status == U_MEMORY_ALLOCATION_ERROR) |
| return XML_ERR_NO_MEMORY; |
| return XML_ERR_SYSTEM; |
| } |
|
|
| static void |
| icuClose(myConvCtxt *cd) |
| { |
| if (cd == NULL) |
| return; |
| ucnv_close(cd->uconv); |
| ucnv_close(cd->utf8); |
| xmlFree(cd); |
| } |
|
|
| static void |
| icuConvCtxtDtor(void *vctxt) { |
| icuClose(vctxt); |
| } |
|
|
| static xmlParserErrors |
| icuConvImpl(void *vctxt, const char *name, xmlCharEncFlags flags, |
| xmlCharEncodingHandler **result) { |
| xmlCharEncConvFunc inFunc = NULL, outFunc = NULL; |
| myConvCtxt *inputCtxt = NULL; |
| myConvCtxt *outputCtxt = NULL; |
| xmlParserErrors ret; |
|
|
| if (flags & XML_ENC_INPUT) { |
| ret = icuOpen(name, 1, &inputCtxt); |
| if (ret != 0) |
| goto error; |
| inFunc = icuConvert; |
| } |
|
|
| if (flags & XML_ENC_OUTPUT) { |
| ret = icuOpen(name, 0, &outputCtxt); |
| if (ret != 0) |
| goto error; |
| outFunc = icuConvert; |
| } |
|
|
| return xmlCharEncNewCustomHandler(name, inFunc, outFunc, icuConvCtxtDtor, |
| inputCtxt, outputCtxt, result); |
|
|
| error: |
| if (inputCtxt != NULL) |
| icuClose(inputCtxt); |
| if (outputCtxt != NULL) |
| icuClose(outputCtxt); |
| return ret; |
| } |
|
|
| int |
| main(void) { |
| xmlParserCtxtPtr ctxt; |
| xmlDocPtr doc; |
| const char *xml; |
| xmlChar *content; |
| int ret = 0; |
|
|
| |
| |
| |
| |
| |
| |
| xml = "<doc>\xDE</doc>"; |
|
|
| ctxt = xmlNewParserCtxt(); |
| xmlCtxtSetCharEncConvImpl(ctxt, icuConvImpl, NULL); |
| doc = xmlCtxtReadDoc(ctxt, BAD_CAST xml, NULL, "IBM-1051", 0); |
| xmlFreeParserCtxt(ctxt); |
|
|
| content = xmlNodeGetContent((xmlNodePtr) doc); |
|
|
| printf("content: %s\n", content); |
|
|
| if (!xmlStrEqual(content, BAD_CAST "\xC3\x9F")) { |
| fprintf(stderr, "conversion failed\n"); |
| ret = 1; |
| } |
|
|
| xmlFree(content); |
| xmlFreeDoc(doc); |
|
|
| return ret; |
| } |
|
|
|
|