|
| |||||||||||||
#include <config.h>#include "xmltok.h"#include "nametab.h"#include "xmltok_impl.h"#include "ascii.h"#include "xmltok_impl.c"#include "asciitab.h"#include "utf8tab.h"#include "iasciitab.h"#include "latin1tab.h"#include "xmltok_ns.c"Go to the source code of this file.
Data Structures | |
| struct | normal_encoding |
| struct | unknown_encoding |
Defines | |
| #define | IGNORE_SECTION_TOK_VTABLE |
| #define | VTABLE1 |
| #define | VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
| #define | UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) |
| #define | UTF8_GET_NAMING2(pages, byte) |
| #define | UTF8_GET_NAMING3(pages, byte) |
| #define | UTF8_GET_NAMING(pages, p, n) |
| #define | UTF8_INVALID3(p) |
| #define | UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) |
| #define | utf8_isName4 isNever |
| #define | utf8_isNmstrt4 isNever |
| #define | utf8_isInvalid2 isNever |
| #define | STANDARD_VTABLE(E) |
| #define | NORMAL_VTABLE(E) |
| #define | MINBPC(enc) 1 |
| #define | SB_BYTE_TYPE(enc, p) (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) |
| #define | BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) |
| #define | BYTE_TO_ASCII(enc, p) (*(p)) |
| #define | IS_NAME_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) |
| #define | IS_NMSTRT_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) |
| #define | IS_INVALID_CHAR(enc, p, n) (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) |
| #define | IS_NAME_CHAR_MINBPC(enc, p) (0) |
| #define | IS_NMSTRT_CHAR_MINBPC(enc, p) (0) |
| #define | CHAR_MATCHES(enc, p, c) (*(p) == c) |
| #define | PREFIX(ident) normal_ ## ident |
| #define | BT_COLON BT_NMSTRT |
| #define | BT_COLON BT_NMSTRT |
| #define | BT_COLON BT_NMSTRT |
| #define | BT_COLON BT_NMSTRT |
| #define | DEFINE_UTF16_TO_UTF8(E) |
| #define | DEFINE_UTF16_TO_UTF16(E) |
| #define | SET2(ptr, ch) (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
| #define | GET_LO(ptr) ((unsigned char)(ptr)[0]) |
| #define | GET_HI(ptr) ((unsigned char)(ptr)[1]) |
| #define | SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) |
| #define | GET_LO(ptr) ((unsigned char)(ptr)[1]) |
| #define | GET_HI(ptr) ((unsigned char)(ptr)[0]) |
| #define | LITTLE2_BYTE_TYPE(enc, p) |
| #define | LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) |
| #define | LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) |
| #define | LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) |
| #define | LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) |
| #define | PREFIX(ident) little2_ ## ident |
| #define | MINBPC(enc) 2 |
| #define | BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) |
| #define | BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) |
| #define | CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) |
| #define | IS_NAME_CHAR(enc, p, n) 0 |
| #define | IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) |
| #define | IS_NMSTRT_CHAR(enc, p, n) (0) |
| #define | IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
| #define | BT_COLON BT_NMSTRT |
| #define | BT_COLON BT_NMSTRT |
| #define | BIG2_BYTE_TYPE(enc, p) |
| #define | BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
| #define | BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) |
| #define | BIG2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) |
| #define | BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) |
| #define | PREFIX(ident) big2_ ## ident |
| #define | MINBPC(enc) 2 |
| #define | BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) |
| #define | BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) |
| #define | CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) |
| #define | IS_NAME_CHAR(enc, p, n) 0 |
| #define | IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) |
| #define | IS_NMSTRT_CHAR(enc, p, n) (0) |
| #define | IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) |
| #define | BT_COLON BT_NMSTRT |
| #define | BT_COLON BT_NMSTRT |
| #define | INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) |
| #define | SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) |
| #define | NS(x) x |
| #define | ns(x) x |
Enumerations | |
| enum | { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 } |
| enum | { UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC } |
Functions | |
| int | XmlUtf8Encode (int c, char *buf) |
| int | XmlUtf16Encode (int charNum, unsigned short *buf) |
| int | XmlSizeOfUnknownEncoding (void) |
| ENCODING * | XmlInitUnknownEncoding (void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData) |
|
|
|
|
|
Value: ((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: static \ void E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: ((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Definition at line 38 of file xmltok.c. Referenced by XmlInitUnknownEncoding(). |
|
|
Value: ((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) |
|
|
Value: (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F)))
|
|
|
Value: (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F)))
|
|
|
Value: ((*p) == 0xED \
? (((p)[1] & 0x20) != 0) \
: ((*p) == 0xEF \
? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
: 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: { PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
PREFIX(sameName), \
PREFIX(nameMatchesAscii), \
PREFIX(nameLength), \
PREFIX(skipS), \
PREFIX(getAtts), \
PREFIX(charRefNumber), \
PREFIX(predefinedEntityName), \
PREFIX(updatePosition), \
PREFIX(isPublicId)
|
|
|
Definition at line 261 of file xmltok.c. 00261 { /* UTF8_cvalN is value of masked first byte of N byte sequence */
00262 UTF8_cval1 = 0x00,
00263 UTF8_cval2 = 0xc0,
00264 UTF8_cval3 = 0xe0,
00265 UTF8_cval4 = 0xf0
00266 };
|
|
|
Definition at line 1356 of file xmltok.c. 01356 {
01357 UNKNOWN_ENC = -1,
01358 ISO_8859_1_ENC = 0,
01359 US_ASCII_ENC,
01360 UTF_8_ENC,
01361 UTF_16_ENC,
01362 UTF_16BE_ENC,
01363 UTF_16LE_ENC,
01364 /* must match encodingNames up to here */
01365 NO_ENC
01366 };
|
|
||||||||||||||||||||
|
Definition at line 1276 of file xmltok.c. References BT_LEAD2, BT_NONXML, BT_OTHER, unknown_encoding::convert, normal_encoding::enc, normal_encoding::isInvalid2, normal_encoding::isInvalid3, normal_encoding::isInvalid4, normal_encoding::isName2, normal_encoding::isName3, normal_encoding::isName4, normal_encoding::isNmstrt2, normal_encoding::isNmstrt3, normal_encoding::isNmstrt4, unknown_encoding::normal, normal_encoding::type, UCS2_GET_NAMING, unknown_encoding::userData, unknown_encoding::utf16, encoding::utf16Convert, unknown_encoding::utf8, encoding::utf8Convert, and XmlUtf8Encode(). 01280 {
01281 int i;
01282 struct unknown_encoding *e = mem;
01283 for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
01284 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
01285 for (i = 0; i < 128; i++)
01286 if (latin1_encoding.type[i] != BT_OTHER
01287 && latin1_encoding.type[i] != BT_NONXML
01288 && table[i] != i)
01289 return 0;
01290 for (i = 0; i < 256; i++) {
01291 int c = table[i];
01292 if (c == -1) {
01293 e->normal.type[i] = BT_MALFORM;
01294 /* This shouldn't really get used. */
01295 e->utf16[i] = 0xFFFF;
01296 e->utf8[i][0] = 1;
01297 e->utf8[i][1] = 0;
01298 }
01299 else if (c < 0) {
01300 if (c < -4)
01301 return 0;
01302 e->normal.type[i] = BT_LEAD2 - (c + 2);
01303 e->utf8[i][0] = 0;
01304 e->utf16[i] = 0;
01305 }
01306 else if (c < 0x80) {
01307 if (latin1_encoding.type[c] != BT_OTHER
01308 && latin1_encoding.type[c] != BT_NONXML
01309 && c != i)
01310 return 0;
01311 e->normal.type[i] = latin1_encoding.type[c];
01312 e->utf8[i][0] = 1;
01313 e->utf8[i][1] = (char)c;
01314 e->utf16[i] = c == 0 ? 0xFFFF : c;
01315 }
01316 else if (checkCharRefNumber(c) < 0) {
01317 e->normal.type[i] = BT_NONXML;
01318 /* This shouldn't really get used. */
01319 e->utf16[i] = 0xFFFF;
01320 e->utf8[i][0] = 1;
01321 e->utf8[i][1] = 0;
01322 }
01323 else {
01324 if (c > 0xFFFF)
01325 return 0;
01326 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
01327 e->normal.type[i] = BT_NMSTRT;
01328 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
01329 e->normal.type[i] = BT_NAME;
01330 else
01331 e->normal.type[i] = BT_OTHER;
01332 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
01333 e->utf16[i] = c;
01334 }
01335 }
01336 e->userData = userData;
01337 e->convert = convert;
01338 if (convert) {
01339 e->normal.isName2 = unknown_isName;
01340 e->normal.isName3 = unknown_isName;
01341 e->normal.isName4 = unknown_isName;
01342 e->normal.isNmstrt2 = unknown_isNmstrt;
01343 e->normal.isNmstrt3 = unknown_isNmstrt;
01344 e->normal.isNmstrt4 = unknown_isNmstrt;
01345 e->normal.isInvalid2 = unknown_isInvalid;
01346 e->normal.isInvalid3 = unknown_isInvalid;
01347 e->normal.isInvalid4 = unknown_isInvalid;
01348 }
01349 e->normal.enc.utf8Convert = unknown_toUtf8;
01350 e->normal.enc.utf16Convert = unknown_toUtf16;
01351 return &(e->normal.enc);
01352 }
|
|
|
Definition at line 1188 of file xmltok.c. 01189 {
01190 return sizeof(struct unknown_encoding);
01191 }
|
|
||||||||||||
|
Definition at line 1163 of file xmltok.c. 01164 {
01165 if (charNum < 0)
01166 return 0;
01167 if (charNum < 0x10000) {
01168 buf[0] = charNum;
01169 return 1;
01170 }
01171 if (charNum < 0x110000) {
01172 charNum -= 0x10000;
01173 buf[0] = (charNum >> 10) + 0xD800;
01174 buf[1] = (charNum & 0x3FF) + 0xDC00;
01175 return 2;
01176 }
01177 return 0;
01178 }
|
|
||||||||||||
|
Definition at line 1127 of file xmltok.c. Referenced by XmlInitUnknownEncoding(). 01128 {
01129 enum {
01130 /* minN is minimum legal resulting value for N byte sequence */
01131 min2 = 0x80,
01132 min3 = 0x800,
01133 min4 = 0x10000
01134 };
01135
01136 if (c < 0)
01137 return 0;
01138 if (c < min2) {
01139 buf[0] = (c | UTF8_cval1);
01140 return 1;
01141 }
01142 if (c < min3) {
01143 buf[0] = ((c >> 6) | UTF8_cval2);
01144 buf[1] = ((c & 0x3f) | 0x80);
01145 return 2;
01146 }
01147 if (c < min4) {
01148 buf[0] = ((c >> 12) | UTF8_cval3);
01149 buf[1] = (((c >> 6) & 0x3f) | 0x80);
01150 buf[2] = ((c & 0x3f) | 0x80);
01151 return 3;
01152 }
01153 if (c < 0x110000) {
01154 buf[0] = ((c >> 18) | UTF8_cval4);
01155 buf[1] = (((c >> 12) & 0x3f) | 0x80);
01156 buf[2] = (((c >> 6) & 0x3f) | 0x80);
01157 buf[3] = ((c & 0x3f) | 0x80);
01158 return 4;
01159 }
01160 return 0;
01161 }
|
| Last updated at Tue Dec 18 21:07:42 PST 2007. | This site and project hosted by... |