|
|
#include <config.h>
#include "xmltok.h"
#include "nametab.h"
#include "xmltok_impl.h"
#include "ascii.h"
#include "xmltok_impl.c"
#include "asciitab.h"
#include "utf8tab.h"
#include "iasciitab.h"
#include "latin1tab.h"
#include "xmltok_ns.c"
Go to the source code of this file.
Data Structures | |
struct | normal_encoding |
struct | unknown_encoding |
Defines | |
#define | IGNORE_SECTION_TOK_VTABLE |
#define | VTABLE1 |
#define | VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
#define | UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) |
#define | UTF8_GET_NAMING2(pages, byte) |
#define | UTF8_GET_NAMING3(pages, byte) |
#define | UTF8_GET_NAMING(pages, p, n) |
#define | UTF8_INVALID3(p) |
#define | UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) |
#define | utf8_isName4 isNever |
#define | utf8_isNmstrt4 isNever |
#define | utf8_isInvalid2 isNever |
#define | STANDARD_VTABLE(E) |
#define | NORMAL_VTABLE(E) |
#define | MINBPC(enc) 1 |
#define | SB_BYTE_TYPE(enc, p) (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) |
#define | BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) (*(p)) |
#define | IS_NAME_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) |
#define | IS_NMSTRT_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) |
#define | IS_INVALID_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) |
#define | IS_NAME_CHAR_MINBPC(enc, p) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) (0) |
#define | CHAR_MATCHES(enc, p, c) (*(p) == c) |
#define | PREFIX(ident) normal_ ## ident |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | DEFINE_UTF16_TO_UTF8(E) |
#define | DEFINE_UTF16_TO_UTF16(E) |
#define | SET2(ptr, ch) (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
#define | GET_LO(ptr) ((unsigned char)(ptr)[0]) |
#define | GET_HI(ptr) ((unsigned char)(ptr)[1]) |
#define | SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) |
#define | GET_LO(ptr) ((unsigned char)(ptr)[1]) |
#define | GET_HI(ptr) ((unsigned char)(ptr)[0]) |
#define | LITTLE2_BYTE_TYPE(enc, p) |
#define | LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) |
#define | LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) |
#define | LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) |
#define | LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) |
#define | PREFIX(ident) little2_ ## ident |
#define | MINBPC(enc) 2 |
#define | BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) |
#define | CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) |
#define | IS_NAME_CHAR(enc, p, n) 0 |
#define | IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) |
#define | IS_NMSTRT_CHAR(enc, p, n) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | BIG2_BYTE_TYPE(enc, p) |
#define | BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
#define | BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) |
#define | BIG2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) |
#define | BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) |
#define | PREFIX(ident) big2_ ## ident |
#define | MINBPC(enc) 2 |
#define | BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) |
#define | BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) |
#define | CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) |
#define | IS_NAME_CHAR(enc, p, n) 0 |
#define | IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) |
#define | IS_NMSTRT_CHAR(enc, p, n) (0) |
#define | IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
#define | BT_COLON BT_NMSTRT |
#define | BT_COLON BT_NMSTRT |
#define | INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) |
#define | SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) |
#define | NS(x) x |
#define | ns(x) x |
Enumerations | |
enum | { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 } |
enum | { UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC } |
Functions | |
int | XmlUtf8Encode (int c, char *buf) |
int | XmlUtf16Encode (int charNum, unsigned short *buf) |
int | XmlSizeOfUnknownEncoding (void) |
ENCODING * | XmlInitUnknownEncoding (void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData) |
|
|
|
Value: ((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: static \ void E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: ((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Definition at line 38 of file xmltok.c. Referenced by XmlInitUnknownEncoding(). |
|
Value: ((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) |
|
Value: (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F))) |
|
Value: (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) |
|
Value: ((*p) == 0xED \ ? (((p)[1] & 0x20) != 0) \ : ((*p) == 0xEF \ ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ : 0)) |
|
|
|
|
|
|
|
|
|
|
|
Value: { PREFIX(prologTok), PREFIX(contentTok), \ PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ PREFIX(nameLength), \ PREFIX(skipS), \ PREFIX(getAtts), \ PREFIX(charRefNumber), \ PREFIX(predefinedEntityName), \ PREFIX(updatePosition), \ PREFIX(isPublicId) |
|
Definition at line 261 of file xmltok.c. 00261 { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 00262 UTF8_cval1 = 0x00, 00263 UTF8_cval2 = 0xc0, 00264 UTF8_cval3 = 0xe0, 00265 UTF8_cval4 = 0xf0 00266 };
|
|
Definition at line 1356 of file xmltok.c. 01356 { 01357 UNKNOWN_ENC = -1, 01358 ISO_8859_1_ENC = 0, 01359 US_ASCII_ENC, 01360 UTF_8_ENC, 01361 UTF_16_ENC, 01362 UTF_16BE_ENC, 01363 UTF_16LE_ENC, 01364 /* must match encodingNames up to here */ 01365 NO_ENC 01366 };
|
|
Definition at line 1276 of file xmltok.c. References BT_LEAD2, BT_NONXML, BT_OTHER, unknown_encoding::convert, normal_encoding::enc, normal_encoding::isInvalid2, normal_encoding::isInvalid3, normal_encoding::isInvalid4, normal_encoding::isName2, normal_encoding::isName3, normal_encoding::isName4, normal_encoding::isNmstrt2, normal_encoding::isNmstrt3, normal_encoding::isNmstrt4, unknown_encoding::normal, normal_encoding::type, UCS2_GET_NAMING, unknown_encoding::userData, unknown_encoding::utf16, encoding::utf16Convert, unknown_encoding::utf8, encoding::utf8Convert, and XmlUtf8Encode(). 01280 { 01281 int i; 01282 struct unknown_encoding *e = mem; 01283 for (i = 0; i < (int)sizeof(struct normal_encoding); i++) 01284 ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; 01285 for (i = 0; i < 128; i++) 01286 if (latin1_encoding.type[i] != BT_OTHER 01287 && latin1_encoding.type[i] != BT_NONXML 01288 && table[i] != i) 01289 return 0; 01290 for (i = 0; i < 256; i++) { 01291 int c = table[i]; 01292 if (c == -1) { 01293 e->normal.type[i] = BT_MALFORM; 01294 /* This shouldn't really get used. */ 01295 e->utf16[i] = 0xFFFF; 01296 e->utf8[i][0] = 1; 01297 e->utf8[i][1] = 0; 01298 } 01299 else if (c < 0) { 01300 if (c < -4) 01301 return 0; 01302 e->normal.type[i] = BT_LEAD2 - (c + 2); 01303 e->utf8[i][0] = 0; 01304 e->utf16[i] = 0; 01305 } 01306 else if (c < 0x80) { 01307 if (latin1_encoding.type[c] != BT_OTHER 01308 && latin1_encoding.type[c] != BT_NONXML 01309 && c != i) 01310 return 0; 01311 e->normal.type[i] = latin1_encoding.type[c]; 01312 e->utf8[i][0] = 1; 01313 e->utf8[i][1] = (char)c; 01314 e->utf16[i] = c == 0 ? 0xFFFF : c; 01315 } 01316 else if (checkCharRefNumber(c) < 0) { 01317 e->normal.type[i] = BT_NONXML; 01318 /* This shouldn't really get used. */ 01319 e->utf16[i] = 0xFFFF; 01320 e->utf8[i][0] = 1; 01321 e->utf8[i][1] = 0; 01322 } 01323 else { 01324 if (c > 0xFFFF) 01325 return 0; 01326 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 01327 e->normal.type[i] = BT_NMSTRT; 01328 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 01329 e->normal.type[i] = BT_NAME; 01330 else 01331 e->normal.type[i] = BT_OTHER; 01332 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 01333 e->utf16[i] = c; 01334 } 01335 } 01336 e->userData = userData; 01337 e->convert = convert; 01338 if (convert) { 01339 e->normal.isName2 = unknown_isName; 01340 e->normal.isName3 = unknown_isName; 01341 e->normal.isName4 = unknown_isName; 01342 e->normal.isNmstrt2 = unknown_isNmstrt; 01343 e->normal.isNmstrt3 = unknown_isNmstrt; 01344 e->normal.isNmstrt4 = unknown_isNmstrt; 01345 e->normal.isInvalid2 = unknown_isInvalid; 01346 e->normal.isInvalid3 = unknown_isInvalid; 01347 e->normal.isInvalid4 = unknown_isInvalid; 01348 } 01349 e->normal.enc.utf8Convert = unknown_toUtf8; 01350 e->normal.enc.utf16Convert = unknown_toUtf16; 01351 return &(e->normal.enc); 01352 }
|
|
Definition at line 1188 of file xmltok.c. 01189 { 01190 return sizeof(struct unknown_encoding); 01191 }
|
|
Definition at line 1163 of file xmltok.c. 01164 { 01165 if (charNum < 0) 01166 return 0; 01167 if (charNum < 0x10000) { 01168 buf[0] = charNum; 01169 return 1; 01170 } 01171 if (charNum < 0x110000) { 01172 charNum -= 0x10000; 01173 buf[0] = (charNum >> 10) + 0xD800; 01174 buf[1] = (charNum & 0x3FF) + 0xDC00; 01175 return 2; 01176 } 01177 return 0; 01178 }
|
|
Definition at line 1127 of file xmltok.c. Referenced by XmlInitUnknownEncoding(). 01128 { 01129 enum { 01130 /* minN is minimum legal resulting value for N byte sequence */ 01131 min2 = 0x80, 01132 min3 = 0x800, 01133 min4 = 0x10000 01134 }; 01135 01136 if (c < 0) 01137 return 0; 01138 if (c < min2) { 01139 buf[0] = (c | UTF8_cval1); 01140 return 1; 01141 } 01142 if (c < min3) { 01143 buf[0] = ((c >> 6) | UTF8_cval2); 01144 buf[1] = ((c & 0x3f) | 0x80); 01145 return 2; 01146 } 01147 if (c < min4) { 01148 buf[0] = ((c >> 12) | UTF8_cval3); 01149 buf[1] = (((c >> 6) & 0x3f) | 0x80); 01150 buf[2] = ((c & 0x3f) | 0x80); 01151 return 3; 01152 } 01153 if (c < 0x110000) { 01154 buf[0] = ((c >> 18) | UTF8_cval4); 01155 buf[1] = (((c >> 12) & 0x3f) | 0x80); 01156 buf[2] = (((c >> 6) & 0x3f) | 0x80); 01157 buf[3] = ((c & 0x3f) | 0x80); 01158 return 4; 01159 } 01160 return 0; 01161 }
|
Last updated at Tue Dec 18 21:07:42 PST 2007. | This site and project hosted by... |