[BACK]Return to xmltok_impl.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / external / mit / expat / dist / lib

Annotation of src/external/mit/expat/dist/lib/xmltok_impl.c, Revision 1.5

1.1       tron        1: /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
                      2:    See the file COPYING for copying permission.
                      3: */
                      4:
                      5: /* This file is included! */
                      6: #ifdef XML_TOK_IMPL_C
                      7:
                      8: #ifndef IS_INVALID_CHAR
                      9: #define IS_INVALID_CHAR(enc, ptr, n) (0)
                     10: #endif
                     11:
                     12: #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
                     13:     case BT_LEAD ## n: \
                     14:       if (end - ptr < n) \
                     15:         return XML_TOK_PARTIAL_CHAR; \
                     16:       if (IS_INVALID_CHAR(enc, ptr, n)) { \
                     17:         *(nextTokPtr) = (ptr); \
                     18:         return XML_TOK_INVALID; \
                     19:       } \
                     20:       ptr += n; \
                     21:       break;
                     22:
                     23: #define INVALID_CASES(ptr, nextTokPtr) \
                     24:   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
                     25:   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
                     26:   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
                     27:   case BT_NONXML: \
                     28:   case BT_MALFORM: \
                     29:   case BT_TRAIL: \
                     30:     *(nextTokPtr) = (ptr); \
                     31:     return XML_TOK_INVALID;
                     32:
                     33: #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
                     34:    case BT_LEAD ## n: \
                     35:      if (end - ptr < n) \
                     36:        return XML_TOK_PARTIAL_CHAR; \
                     37:      if (!IS_NAME_CHAR(enc, ptr, n)) { \
                     38:        *nextTokPtr = ptr; \
                     39:        return XML_TOK_INVALID; \
                     40:      } \
                     41:      ptr += n; \
                     42:      break;
                     43:
                     44: #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
                     45:   case BT_NONASCII: \
                     46:     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
                     47:       *nextTokPtr = ptr; \
                     48:       return XML_TOK_INVALID; \
                     49:     } \
                     50:   case BT_NMSTRT: \
                     51:   case BT_HEX: \
                     52:   case BT_DIGIT: \
                     53:   case BT_NAME: \
                     54:   case BT_MINUS: \
                     55:     ptr += MINBPC(enc); \
                     56:     break; \
                     57:   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
                     58:   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
                     59:   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
                     60:
                     61: #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
                     62:    case BT_LEAD ## n: \
                     63:      if (end - ptr < n) \
                     64:        return XML_TOK_PARTIAL_CHAR; \
                     65:      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
                     66:        *nextTokPtr = ptr; \
                     67:        return XML_TOK_INVALID; \
                     68:      } \
                     69:      ptr += n; \
                     70:      break;
                     71:
                     72: #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
                     73:   case BT_NONASCII: \
                     74:     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
                     75:       *nextTokPtr = ptr; \
                     76:       return XML_TOK_INVALID; \
                     77:     } \
                     78:   case BT_NMSTRT: \
                     79:   case BT_HEX: \
                     80:     ptr += MINBPC(enc); \
                     81:     break; \
                     82:   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
                     83:   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
                     84:   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
                     85:
                     86: #ifndef PREFIX
                     87: #define PREFIX(ident) ident
                     88: #endif
                     89:
1.5     ! spz        90:
        !            91: #define HAS_CHARS(enc, ptr, end, count) \
        !            92:     (end - ptr >= count * MINBPC(enc))
        !            93:
        !            94: #define HAS_CHAR(enc, ptr, end) \
        !            95:     HAS_CHARS(enc, ptr, end, 1)
        !            96:
        !            97: #define REQUIRE_CHARS(enc, ptr, end, count) \
        !            98:     { \
        !            99:       if (! HAS_CHARS(enc, ptr, end, count)) { \
        !           100:         return XML_TOK_PARTIAL; \
        !           101:       } \
        !           102:     }
        !           103:
        !           104: #define REQUIRE_CHAR(enc, ptr, end) \
        !           105:     REQUIRE_CHARS(enc, ptr, end, 1)
        !           106:
        !           107:
1.1       tron      108: /* ptr points to character following "<!-" */
                    109:
                    110: static int PTRCALL
                    111: PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
                    112:                     const char *end, const char **nextTokPtr)
                    113: {
1.5     ! spz       114:   if (HAS_CHAR(enc, ptr, end)) {
1.1       tron      115:     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
                    116:       *nextTokPtr = ptr;
                    117:       return XML_TOK_INVALID;
                    118:     }
                    119:     ptr += MINBPC(enc);
1.5     ! spz       120:     while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      121:       switch (BYTE_TYPE(enc, ptr)) {
                    122:       INVALID_CASES(ptr, nextTokPtr)
                    123:       case BT_MINUS:
1.5     ! spz       124:         ptr += MINBPC(enc);
        !           125:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron      126:         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
1.5     ! spz       127:           ptr += MINBPC(enc);
        !           128:           REQUIRE_CHAR(enc, ptr, end);
1.1       tron      129:           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    130:             *nextTokPtr = ptr;
                    131:             return XML_TOK_INVALID;
                    132:           }
                    133:           *nextTokPtr = ptr + MINBPC(enc);
                    134:           return XML_TOK_COMMENT;
                    135:         }
                    136:         break;
                    137:       default:
                    138:         ptr += MINBPC(enc);
                    139:         break;
                    140:       }
                    141:     }
                    142:   }
                    143:   return XML_TOK_PARTIAL;
                    144: }
                    145:
                    146: /* ptr points to character following "<!" */
                    147:
                    148: static int PTRCALL
                    149: PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
                    150:                  const char *end, const char **nextTokPtr)
                    151: {
1.5     ! spz       152:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      153:   switch (BYTE_TYPE(enc, ptr)) {
                    154:   case BT_MINUS:
                    155:     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    156:   case BT_LSQB:
                    157:     *nextTokPtr = ptr + MINBPC(enc);
                    158:     return XML_TOK_COND_SECT_OPEN;
                    159:   case BT_NMSTRT:
                    160:   case BT_HEX:
                    161:     ptr += MINBPC(enc);
                    162:     break;
                    163:   default:
                    164:     *nextTokPtr = ptr;
                    165:     return XML_TOK_INVALID;
                    166:   }
1.5     ! spz       167:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      168:     switch (BYTE_TYPE(enc, ptr)) {
                    169:     case BT_PERCNT:
1.5     ! spz       170:       REQUIRE_CHARS(enc, ptr, end, 2);
1.1       tron      171:       /* don't allow <!ENTITY% foo "whatever"> */
                    172:       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
                    173:       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
                    174:         *nextTokPtr = ptr;
                    175:         return XML_TOK_INVALID;
                    176:       }
                    177:       /* fall through */
                    178:     case BT_S: case BT_CR: case BT_LF:
                    179:       *nextTokPtr = ptr;
                    180:       return XML_TOK_DECL_OPEN;
                    181:     case BT_NMSTRT:
                    182:     case BT_HEX:
                    183:       ptr += MINBPC(enc);
                    184:       break;
                    185:     default:
                    186:       *nextTokPtr = ptr;
                    187:       return XML_TOK_INVALID;
                    188:     }
                    189:   }
                    190:   return XML_TOK_PARTIAL;
                    191: }
                    192:
                    193: static int PTRCALL
1.5     ! spz       194: PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
1.1       tron      195:                       const char *end, int *tokPtr)
                    196: {
                    197:   int upper = 0;
                    198:   *tokPtr = XML_TOK_PI;
                    199:   if (end - ptr != MINBPC(enc)*3)
                    200:     return 1;
                    201:   switch (BYTE_TO_ASCII(enc, ptr)) {
                    202:   case ASCII_x:
                    203:     break;
                    204:   case ASCII_X:
                    205:     upper = 1;
                    206:     break;
                    207:   default:
                    208:     return 1;
                    209:   }
                    210:   ptr += MINBPC(enc);
                    211:   switch (BYTE_TO_ASCII(enc, ptr)) {
                    212:   case ASCII_m:
                    213:     break;
                    214:   case ASCII_M:
                    215:     upper = 1;
                    216:     break;
                    217:   default:
                    218:     return 1;
                    219:   }
                    220:   ptr += MINBPC(enc);
                    221:   switch (BYTE_TO_ASCII(enc, ptr)) {
                    222:   case ASCII_l:
                    223:     break;
                    224:   case ASCII_L:
                    225:     upper = 1;
                    226:     break;
                    227:   default:
                    228:     return 1;
                    229:   }
                    230:   if (upper)
                    231:     return 0;
                    232:   *tokPtr = XML_TOK_XML_DECL;
                    233:   return 1;
                    234: }
                    235:
                    236: /* ptr points to character following "<?" */
                    237:
                    238: static int PTRCALL
                    239: PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
                    240:                const char *end, const char **nextTokPtr)
                    241: {
                    242:   int tok;
                    243:   const char *target = ptr;
1.5     ! spz       244:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      245:   switch (BYTE_TYPE(enc, ptr)) {
                    246:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    247:   default:
                    248:     *nextTokPtr = ptr;
                    249:     return XML_TOK_INVALID;
                    250:   }
1.5     ! spz       251:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      252:     switch (BYTE_TYPE(enc, ptr)) {
                    253:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    254:     case BT_S: case BT_CR: case BT_LF:
                    255:       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
                    256:         *nextTokPtr = ptr;
                    257:         return XML_TOK_INVALID;
                    258:       }
                    259:       ptr += MINBPC(enc);
1.5     ! spz       260:       while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      261:         switch (BYTE_TYPE(enc, ptr)) {
                    262:         INVALID_CASES(ptr, nextTokPtr)
                    263:         case BT_QUEST:
                    264:           ptr += MINBPC(enc);
1.5     ! spz       265:           REQUIRE_CHAR(enc, ptr, end);
1.1       tron      266:           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    267:             *nextTokPtr = ptr + MINBPC(enc);
                    268:             return tok;
                    269:           }
                    270:           break;
                    271:         default:
                    272:           ptr += MINBPC(enc);
                    273:           break;
                    274:         }
                    275:       }
                    276:       return XML_TOK_PARTIAL;
                    277:     case BT_QUEST:
                    278:       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
                    279:         *nextTokPtr = ptr;
                    280:         return XML_TOK_INVALID;
                    281:       }
                    282:       ptr += MINBPC(enc);
1.5     ! spz       283:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron      284:       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    285:         *nextTokPtr = ptr + MINBPC(enc);
                    286:         return tok;
                    287:       }
                    288:       /* fall through */
                    289:     default:
                    290:       *nextTokPtr = ptr;
                    291:       return XML_TOK_INVALID;
                    292:     }
                    293:   }
                    294:   return XML_TOK_PARTIAL;
                    295: }
                    296:
                    297: static int PTRCALL
1.5     ! spz       298: PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
1.1       tron      299:                          const char *end, const char **nextTokPtr)
                    300: {
                    301:   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
                    302:                                      ASCII_T, ASCII_A, ASCII_LSQB };
                    303:   int i;
                    304:   /* CDATA[ */
1.5     ! spz       305:   REQUIRE_CHARS(enc, ptr, end, 6);
1.1       tron      306:   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
                    307:     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
                    308:       *nextTokPtr = ptr;
                    309:       return XML_TOK_INVALID;
                    310:     }
                    311:   }
                    312:   *nextTokPtr = ptr;
                    313:   return XML_TOK_CDATA_SECT_OPEN;
                    314: }
                    315:
                    316: static int PTRCALL
                    317: PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
                    318:                         const char *end, const char **nextTokPtr)
                    319: {
1.4       spz       320:   if (ptr >= end)
1.1       tron      321:     return XML_TOK_NONE;
                    322:   if (MINBPC(enc) > 1) {
                    323:     size_t n = end - ptr;
                    324:     if (n & (MINBPC(enc) - 1)) {
                    325:       n &= ~(MINBPC(enc) - 1);
                    326:       if (n == 0)
                    327:         return XML_TOK_PARTIAL;
                    328:       end = ptr + n;
                    329:     }
                    330:   }
                    331:   switch (BYTE_TYPE(enc, ptr)) {
                    332:   case BT_RSQB:
                    333:     ptr += MINBPC(enc);
1.5     ! spz       334:     REQUIRE_CHAR(enc, ptr, end);
1.1       tron      335:     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
                    336:       break;
                    337:     ptr += MINBPC(enc);
1.5     ! spz       338:     REQUIRE_CHAR(enc, ptr, end);
1.1       tron      339:     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    340:       ptr -= MINBPC(enc);
                    341:       break;
                    342:     }
                    343:     *nextTokPtr = ptr + MINBPC(enc);
                    344:     return XML_TOK_CDATA_SECT_CLOSE;
                    345:   case BT_CR:
                    346:     ptr += MINBPC(enc);
1.5     ! spz       347:     REQUIRE_CHAR(enc, ptr, end);
1.1       tron      348:     if (BYTE_TYPE(enc, ptr) == BT_LF)
                    349:       ptr += MINBPC(enc);
                    350:     *nextTokPtr = ptr;
                    351:     return XML_TOK_DATA_NEWLINE;
                    352:   case BT_LF:
                    353:     *nextTokPtr = ptr + MINBPC(enc);
                    354:     return XML_TOK_DATA_NEWLINE;
                    355:   INVALID_CASES(ptr, nextTokPtr)
                    356:   default:
                    357:     ptr += MINBPC(enc);
                    358:     break;
                    359:   }
1.5     ! spz       360:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      361:     switch (BYTE_TYPE(enc, ptr)) {
                    362: #define LEAD_CASE(n) \
                    363:     case BT_LEAD ## n: \
                    364:       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
                    365:         *nextTokPtr = ptr; \
                    366:         return XML_TOK_DATA_CHARS; \
                    367:       } \
                    368:       ptr += n; \
                    369:       break;
                    370:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                    371: #undef LEAD_CASE
                    372:     case BT_NONXML:
                    373:     case BT_MALFORM:
                    374:     case BT_TRAIL:
                    375:     case BT_CR:
                    376:     case BT_LF:
                    377:     case BT_RSQB:
                    378:       *nextTokPtr = ptr;
                    379:       return XML_TOK_DATA_CHARS;
                    380:     default:
                    381:       ptr += MINBPC(enc);
                    382:       break;
                    383:     }
                    384:   }
                    385:   *nextTokPtr = ptr;
                    386:   return XML_TOK_DATA_CHARS;
                    387: }
                    388:
                    389: /* ptr points to character following "</" */
                    390:
                    391: static int PTRCALL
                    392: PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
                    393:                    const char *end, const char **nextTokPtr)
                    394: {
1.5     ! spz       395:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      396:   switch (BYTE_TYPE(enc, ptr)) {
                    397:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    398:   default:
                    399:     *nextTokPtr = ptr;
                    400:     return XML_TOK_INVALID;
                    401:   }
1.5     ! spz       402:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      403:     switch (BYTE_TYPE(enc, ptr)) {
                    404:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    405:     case BT_S: case BT_CR: case BT_LF:
1.5     ! spz       406:       for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1.1       tron      407:         switch (BYTE_TYPE(enc, ptr)) {
                    408:         case BT_S: case BT_CR: case BT_LF:
                    409:           break;
                    410:         case BT_GT:
                    411:           *nextTokPtr = ptr + MINBPC(enc);
                    412:           return XML_TOK_END_TAG;
                    413:         default:
                    414:           *nextTokPtr = ptr;
                    415:           return XML_TOK_INVALID;
                    416:         }
                    417:       }
                    418:       return XML_TOK_PARTIAL;
                    419: #ifdef XML_NS
                    420:     case BT_COLON:
                    421:       /* no need to check qname syntax here,
                    422:          since end-tag must match exactly */
                    423:       ptr += MINBPC(enc);
                    424:       break;
                    425: #endif
                    426:     case BT_GT:
                    427:       *nextTokPtr = ptr + MINBPC(enc);
                    428:       return XML_TOK_END_TAG;
                    429:     default:
                    430:       *nextTokPtr = ptr;
                    431:       return XML_TOK_INVALID;
                    432:     }
                    433:   }
                    434:   return XML_TOK_PARTIAL;
                    435: }
                    436:
                    437: /* ptr points to character following "&#X" */
                    438:
                    439: static int PTRCALL
                    440: PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
                    441:                        const char *end, const char **nextTokPtr)
                    442: {
1.5     ! spz       443:   if (HAS_CHAR(enc, ptr, end)) {
1.1       tron      444:     switch (BYTE_TYPE(enc, ptr)) {
                    445:     case BT_DIGIT:
                    446:     case BT_HEX:
                    447:       break;
                    448:     default:
                    449:       *nextTokPtr = ptr;
                    450:       return XML_TOK_INVALID;
                    451:     }
1.5     ! spz       452:     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1.1       tron      453:       switch (BYTE_TYPE(enc, ptr)) {
                    454:       case BT_DIGIT:
                    455:       case BT_HEX:
                    456:         break;
                    457:       case BT_SEMI:
                    458:         *nextTokPtr = ptr + MINBPC(enc);
                    459:         return XML_TOK_CHAR_REF;
                    460:       default:
                    461:         *nextTokPtr = ptr;
                    462:         return XML_TOK_INVALID;
                    463:       }
                    464:     }
                    465:   }
                    466:   return XML_TOK_PARTIAL;
                    467: }
                    468:
                    469: /* ptr points to character following "&#" */
                    470:
                    471: static int PTRCALL
                    472: PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
                    473:                     const char *end, const char **nextTokPtr)
                    474: {
1.5     ! spz       475:   if (HAS_CHAR(enc, ptr, end)) {
1.1       tron      476:     if (CHAR_MATCHES(enc, ptr, ASCII_x))
                    477:       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    478:     switch (BYTE_TYPE(enc, ptr)) {
                    479:     case BT_DIGIT:
                    480:       break;
                    481:     default:
                    482:       *nextTokPtr = ptr;
                    483:       return XML_TOK_INVALID;
                    484:     }
1.5     ! spz       485:     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1.1       tron      486:       switch (BYTE_TYPE(enc, ptr)) {
                    487:       case BT_DIGIT:
                    488:         break;
                    489:       case BT_SEMI:
                    490:         *nextTokPtr = ptr + MINBPC(enc);
                    491:         return XML_TOK_CHAR_REF;
                    492:       default:
                    493:         *nextTokPtr = ptr;
                    494:         return XML_TOK_INVALID;
                    495:       }
                    496:     }
                    497:   }
                    498:   return XML_TOK_PARTIAL;
                    499: }
                    500:
                    501: /* ptr points to character following "&" */
                    502:
                    503: static int PTRCALL
                    504: PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
                    505:                 const char **nextTokPtr)
                    506: {
1.5     ! spz       507:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      508:   switch (BYTE_TYPE(enc, ptr)) {
                    509:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    510:   case BT_NUM:
                    511:     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    512:   default:
                    513:     *nextTokPtr = ptr;
                    514:     return XML_TOK_INVALID;
                    515:   }
1.5     ! spz       516:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      517:     switch (BYTE_TYPE(enc, ptr)) {
                    518:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    519:     case BT_SEMI:
                    520:       *nextTokPtr = ptr + MINBPC(enc);
                    521:       return XML_TOK_ENTITY_REF;
                    522:     default:
                    523:       *nextTokPtr = ptr;
                    524:       return XML_TOK_INVALID;
                    525:     }
                    526:   }
                    527:   return XML_TOK_PARTIAL;
                    528: }
                    529:
                    530: /* ptr points to character following first character of attribute name */
                    531:
                    532: static int PTRCALL
                    533: PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
                    534:                  const char **nextTokPtr)
                    535: {
                    536: #ifdef XML_NS
                    537:   int hadColon = 0;
                    538: #endif
1.5     ! spz       539:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      540:     switch (BYTE_TYPE(enc, ptr)) {
                    541:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    542: #ifdef XML_NS
                    543:     case BT_COLON:
                    544:       if (hadColon) {
                    545:         *nextTokPtr = ptr;
                    546:         return XML_TOK_INVALID;
                    547:       }
                    548:       hadColon = 1;
                    549:       ptr += MINBPC(enc);
1.5     ! spz       550:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron      551:       switch (BYTE_TYPE(enc, ptr)) {
                    552:       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    553:       default:
                    554:         *nextTokPtr = ptr;
                    555:         return XML_TOK_INVALID;
                    556:       }
                    557:       break;
                    558: #endif
                    559:     case BT_S: case BT_CR: case BT_LF:
                    560:       for (;;) {
                    561:         int t;
                    562:
                    563:         ptr += MINBPC(enc);
1.5     ! spz       564:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron      565:         t = BYTE_TYPE(enc, ptr);
                    566:         if (t == BT_EQUALS)
                    567:           break;
                    568:         switch (t) {
                    569:         case BT_S:
                    570:         case BT_LF:
                    571:         case BT_CR:
                    572:           break;
                    573:         default:
                    574:           *nextTokPtr = ptr;
                    575:           return XML_TOK_INVALID;
                    576:         }
                    577:       }
                    578:     /* fall through */
                    579:     case BT_EQUALS:
                    580:       {
                    581:         int open;
                    582: #ifdef XML_NS
                    583:         hadColon = 0;
                    584: #endif
                    585:         for (;;) {
                    586:           ptr += MINBPC(enc);
1.5     ! spz       587:           REQUIRE_CHAR(enc, ptr, end);
1.1       tron      588:           open = BYTE_TYPE(enc, ptr);
                    589:           if (open == BT_QUOT || open == BT_APOS)
                    590:             break;
                    591:           switch (open) {
                    592:           case BT_S:
                    593:           case BT_LF:
                    594:           case BT_CR:
                    595:             break;
                    596:           default:
                    597:             *nextTokPtr = ptr;
                    598:             return XML_TOK_INVALID;
                    599:           }
                    600:         }
                    601:         ptr += MINBPC(enc);
                    602:         /* in attribute value */
                    603:         for (;;) {
                    604:           int t;
1.5     ! spz       605:           REQUIRE_CHAR(enc, ptr, end);
1.1       tron      606:           t = BYTE_TYPE(enc, ptr);
                    607:           if (t == open)
                    608:             break;
                    609:           switch (t) {
                    610:           INVALID_CASES(ptr, nextTokPtr)
                    611:           case BT_AMP:
                    612:             {
                    613:               int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
                    614:               if (tok <= 0) {
                    615:                 if (tok == XML_TOK_INVALID)
                    616:                   *nextTokPtr = ptr;
                    617:                 return tok;
                    618:               }
                    619:               break;
                    620:             }
                    621:           case BT_LT:
                    622:             *nextTokPtr = ptr;
                    623:             return XML_TOK_INVALID;
                    624:           default:
                    625:             ptr += MINBPC(enc);
                    626:             break;
                    627:           }
                    628:         }
                    629:         ptr += MINBPC(enc);
1.5     ! spz       630:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron      631:         switch (BYTE_TYPE(enc, ptr)) {
                    632:         case BT_S:
                    633:         case BT_CR:
                    634:         case BT_LF:
                    635:           break;
                    636:         case BT_SOL:
                    637:           goto sol;
                    638:         case BT_GT:
                    639:           goto gt;
                    640:         default:
                    641:           *nextTokPtr = ptr;
                    642:           return XML_TOK_INVALID;
                    643:         }
                    644:         /* ptr points to closing quote */
                    645:         for (;;) {
                    646:           ptr += MINBPC(enc);
1.5     ! spz       647:           REQUIRE_CHAR(enc, ptr, end);
1.1       tron      648:           switch (BYTE_TYPE(enc, ptr)) {
                    649:           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    650:           case BT_S: case BT_CR: case BT_LF:
                    651:             continue;
                    652:           case BT_GT:
                    653:           gt:
                    654:             *nextTokPtr = ptr + MINBPC(enc);
                    655:             return XML_TOK_START_TAG_WITH_ATTS;
                    656:           case BT_SOL:
                    657:           sol:
                    658:             ptr += MINBPC(enc);
1.5     ! spz       659:             REQUIRE_CHAR(enc, ptr, end);
1.1       tron      660:             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    661:               *nextTokPtr = ptr;
                    662:               return XML_TOK_INVALID;
                    663:             }
                    664:             *nextTokPtr = ptr + MINBPC(enc);
                    665:             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
                    666:           default:
                    667:             *nextTokPtr = ptr;
                    668:             return XML_TOK_INVALID;
                    669:           }
                    670:           break;
                    671:         }
                    672:         break;
                    673:       }
                    674:     default:
                    675:       *nextTokPtr = ptr;
                    676:       return XML_TOK_INVALID;
                    677:     }
                    678:   }
                    679:   return XML_TOK_PARTIAL;
                    680: }
                    681:
                    682: /* ptr points to character following "<" */
                    683:
                    684: static int PTRCALL
                    685: PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
                    686:                const char **nextTokPtr)
                    687: {
                    688: #ifdef XML_NS
                    689:   int hadColon;
                    690: #endif
1.5     ! spz       691:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      692:   switch (BYTE_TYPE(enc, ptr)) {
                    693:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    694:   case BT_EXCL:
1.5     ! spz       695:     ptr += MINBPC(enc);
        !           696:     REQUIRE_CHAR(enc, ptr, end);
1.1       tron      697:     switch (BYTE_TYPE(enc, ptr)) {
                    698:     case BT_MINUS:
                    699:       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    700:     case BT_LSQB:
                    701:       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
                    702:                                       end, nextTokPtr);
                    703:     }
                    704:     *nextTokPtr = ptr;
                    705:     return XML_TOK_INVALID;
                    706:   case BT_QUEST:
                    707:     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    708:   case BT_SOL:
                    709:     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    710:   default:
                    711:     *nextTokPtr = ptr;
                    712:     return XML_TOK_INVALID;
                    713:   }
                    714: #ifdef XML_NS
                    715:   hadColon = 0;
                    716: #endif
                    717:   /* we have a start-tag */
1.5     ! spz       718:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      719:     switch (BYTE_TYPE(enc, ptr)) {
                    720:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    721: #ifdef XML_NS
                    722:     case BT_COLON:
                    723:       if (hadColon) {
                    724:         *nextTokPtr = ptr;
                    725:         return XML_TOK_INVALID;
                    726:       }
                    727:       hadColon = 1;
                    728:       ptr += MINBPC(enc);
1.5     ! spz       729:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron      730:       switch (BYTE_TYPE(enc, ptr)) {
                    731:       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    732:       default:
                    733:         *nextTokPtr = ptr;
                    734:         return XML_TOK_INVALID;
                    735:       }
                    736:       break;
                    737: #endif
                    738:     case BT_S: case BT_CR: case BT_LF:
                    739:       {
                    740:         ptr += MINBPC(enc);
1.5     ! spz       741:         while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      742:           switch (BYTE_TYPE(enc, ptr)) {
                    743:           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    744:           case BT_GT:
                    745:             goto gt;
                    746:           case BT_SOL:
                    747:             goto sol;
                    748:           case BT_S: case BT_CR: case BT_LF:
                    749:             ptr += MINBPC(enc);
                    750:             continue;
                    751:           default:
                    752:             *nextTokPtr = ptr;
                    753:             return XML_TOK_INVALID;
                    754:           }
                    755:           return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
                    756:         }
                    757:         return XML_TOK_PARTIAL;
                    758:       }
                    759:     case BT_GT:
                    760:     gt:
                    761:       *nextTokPtr = ptr + MINBPC(enc);
                    762:       return XML_TOK_START_TAG_NO_ATTS;
                    763:     case BT_SOL:
                    764:     sol:
                    765:       ptr += MINBPC(enc);
1.5     ! spz       766:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron      767:       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    768:         *nextTokPtr = ptr;
                    769:         return XML_TOK_INVALID;
                    770:       }
                    771:       *nextTokPtr = ptr + MINBPC(enc);
                    772:       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
                    773:     default:
                    774:       *nextTokPtr = ptr;
                    775:       return XML_TOK_INVALID;
                    776:     }
                    777:   }
                    778:   return XML_TOK_PARTIAL;
                    779: }
                    780:
                    781: static int PTRCALL
                    782: PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
                    783:                    const char **nextTokPtr)
                    784: {
1.4       spz       785:   if (ptr >= end)
1.1       tron      786:     return XML_TOK_NONE;
                    787:   if (MINBPC(enc) > 1) {
                    788:     size_t n = end - ptr;
                    789:     if (n & (MINBPC(enc) - 1)) {
                    790:       n &= ~(MINBPC(enc) - 1);
                    791:       if (n == 0)
                    792:         return XML_TOK_PARTIAL;
                    793:       end = ptr + n;
                    794:     }
                    795:   }
                    796:   switch (BYTE_TYPE(enc, ptr)) {
                    797:   case BT_LT:
                    798:     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    799:   case BT_AMP:
                    800:     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    801:   case BT_CR:
                    802:     ptr += MINBPC(enc);
1.5     ! spz       803:     if (! HAS_CHAR(enc, ptr, end))
1.1       tron      804:       return XML_TOK_TRAILING_CR;
                    805:     if (BYTE_TYPE(enc, ptr) == BT_LF)
                    806:       ptr += MINBPC(enc);
                    807:     *nextTokPtr = ptr;
                    808:     return XML_TOK_DATA_NEWLINE;
                    809:   case BT_LF:
                    810:     *nextTokPtr = ptr + MINBPC(enc);
                    811:     return XML_TOK_DATA_NEWLINE;
                    812:   case BT_RSQB:
                    813:     ptr += MINBPC(enc);
1.5     ! spz       814:     if (! HAS_CHAR(enc, ptr, end))
1.1       tron      815:       return XML_TOK_TRAILING_RSQB;
                    816:     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
                    817:       break;
                    818:     ptr += MINBPC(enc);
1.5     ! spz       819:     if (! HAS_CHAR(enc, ptr, end))
1.1       tron      820:       return XML_TOK_TRAILING_RSQB;
                    821:     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                    822:       ptr -= MINBPC(enc);
                    823:       break;
                    824:     }
                    825:     *nextTokPtr = ptr;
                    826:     return XML_TOK_INVALID;
                    827:   INVALID_CASES(ptr, nextTokPtr)
                    828:   default:
                    829:     ptr += MINBPC(enc);
                    830:     break;
                    831:   }
1.5     ! spz       832:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      833:     switch (BYTE_TYPE(enc, ptr)) {
                    834: #define LEAD_CASE(n) \
                    835:     case BT_LEAD ## n: \
                    836:       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
                    837:         *nextTokPtr = ptr; \
                    838:         return XML_TOK_DATA_CHARS; \
                    839:       } \
                    840:       ptr += n; \
                    841:       break;
                    842:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                    843: #undef LEAD_CASE
                    844:     case BT_RSQB:
1.5     ! spz       845:       if (HAS_CHARS(enc, ptr, end, 2)) {
1.1       tron      846:          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
                    847:            ptr += MINBPC(enc);
                    848:            break;
                    849:          }
1.5     ! spz       850:          if (HAS_CHARS(enc, ptr, end, 3)) {
1.1       tron      851:            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
                    852:              ptr += MINBPC(enc);
                    853:              break;
                    854:            }
                    855:            *nextTokPtr = ptr + 2*MINBPC(enc);
                    856:            return XML_TOK_INVALID;
                    857:          }
                    858:       }
                    859:       /* fall through */
                    860:     case BT_AMP:
                    861:     case BT_LT:
                    862:     case BT_NONXML:
                    863:     case BT_MALFORM:
                    864:     case BT_TRAIL:
                    865:     case BT_CR:
                    866:     case BT_LF:
                    867:       *nextTokPtr = ptr;
                    868:       return XML_TOK_DATA_CHARS;
                    869:     default:
                    870:       ptr += MINBPC(enc);
                    871:       break;
                    872:     }
                    873:   }
                    874:   *nextTokPtr = ptr;
                    875:   return XML_TOK_DATA_CHARS;
                    876: }
                    877:
                    878: /* ptr points to character following "%" */
                    879:
                    880: static int PTRCALL
                    881: PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
                    882:                     const char **nextTokPtr)
                    883: {
1.5     ! spz       884:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      885:   switch (BYTE_TYPE(enc, ptr)) {
                    886:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    887:   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
                    888:     *nextTokPtr = ptr;
                    889:     return XML_TOK_PERCENT;
                    890:   default:
                    891:     *nextTokPtr = ptr;
                    892:     return XML_TOK_INVALID;
                    893:   }
1.5     ! spz       894:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      895:     switch (BYTE_TYPE(enc, ptr)) {
                    896:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    897:     case BT_SEMI:
                    898:       *nextTokPtr = ptr + MINBPC(enc);
                    899:       return XML_TOK_PARAM_ENTITY_REF;
                    900:     default:
                    901:       *nextTokPtr = ptr;
                    902:       return XML_TOK_INVALID;
                    903:     }
                    904:   }
                    905:   return XML_TOK_PARTIAL;
                    906: }
                    907:
                    908: static int PTRCALL
                    909: PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
                    910:                       const char **nextTokPtr)
                    911: {
1.5     ! spz       912:   REQUIRE_CHAR(enc, ptr, end);
1.1       tron      913:   switch (BYTE_TYPE(enc, ptr)) {
                    914:   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
                    915:   default:
                    916:     *nextTokPtr = ptr;
                    917:     return XML_TOK_INVALID;
                    918:   }
1.5     ! spz       919:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      920:     switch (BYTE_TYPE(enc, ptr)) {
                    921:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                    922:     case BT_CR: case BT_LF: case BT_S:
                    923:     case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
                    924:       *nextTokPtr = ptr;
                    925:       return XML_TOK_POUND_NAME;
                    926:     default:
                    927:       *nextTokPtr = ptr;
                    928:       return XML_TOK_INVALID;
                    929:     }
                    930:   }
                    931:   return -XML_TOK_POUND_NAME;
                    932: }
                    933:
                    934: static int PTRCALL
                    935: PREFIX(scanLit)(int open, const ENCODING *enc,
                    936:                 const char *ptr, const char *end,
                    937:                 const char **nextTokPtr)
                    938: {
1.5     ! spz       939:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron      940:     int t = BYTE_TYPE(enc, ptr);
                    941:     switch (t) {
                    942:     INVALID_CASES(ptr, nextTokPtr)
                    943:     case BT_QUOT:
                    944:     case BT_APOS:
                    945:       ptr += MINBPC(enc);
                    946:       if (t != open)
                    947:         break;
1.5     ! spz       948:       if (! HAS_CHAR(enc, ptr, end))
1.1       tron      949:         return -XML_TOK_LITERAL;
                    950:       *nextTokPtr = ptr;
                    951:       switch (BYTE_TYPE(enc, ptr)) {
                    952:       case BT_S: case BT_CR: case BT_LF:
                    953:       case BT_GT: case BT_PERCNT: case BT_LSQB:
                    954:         return XML_TOK_LITERAL;
                    955:       default:
                    956:         return XML_TOK_INVALID;
                    957:       }
                    958:     default:
                    959:       ptr += MINBPC(enc);
                    960:       break;
                    961:     }
                    962:   }
                    963:   return XML_TOK_PARTIAL;
                    964: }
                    965:
                    966: static int PTRCALL
                    967: PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
                    968:                   const char **nextTokPtr)
                    969: {
                    970:   int tok;
1.4       spz       971:   if (ptr >= end)
1.1       tron      972:     return XML_TOK_NONE;
                    973:   if (MINBPC(enc) > 1) {
                    974:     size_t n = end - ptr;
                    975:     if (n & (MINBPC(enc) - 1)) {
                    976:       n &= ~(MINBPC(enc) - 1);
                    977:       if (n == 0)
                    978:         return XML_TOK_PARTIAL;
                    979:       end = ptr + n;
                    980:     }
                    981:   }
                    982:   switch (BYTE_TYPE(enc, ptr)) {
                    983:   case BT_QUOT:
                    984:     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
                    985:   case BT_APOS:
                    986:     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
                    987:   case BT_LT:
                    988:     {
                    989:       ptr += MINBPC(enc);
1.5     ! spz       990:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron      991:       switch (BYTE_TYPE(enc, ptr)) {
                    992:       case BT_EXCL:
                    993:         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    994:       case BT_QUEST:
                    995:         return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                    996:       case BT_NMSTRT:
                    997:       case BT_HEX:
                    998:       case BT_NONASCII:
                    999:       case BT_LEAD2:
                   1000:       case BT_LEAD3:
                   1001:       case BT_LEAD4:
                   1002:         *nextTokPtr = ptr - MINBPC(enc);
                   1003:         return XML_TOK_INSTANCE_START;
                   1004:       }
                   1005:       *nextTokPtr = ptr;
                   1006:       return XML_TOK_INVALID;
                   1007:     }
                   1008:   case BT_CR:
                   1009:     if (ptr + MINBPC(enc) == end) {
                   1010:       *nextTokPtr = end;
                   1011:       /* indicate that this might be part of a CR/LF pair */
                   1012:       return -XML_TOK_PROLOG_S;
                   1013:     }
                   1014:     /* fall through */
                   1015:   case BT_S: case BT_LF:
                   1016:     for (;;) {
                   1017:       ptr += MINBPC(enc);
1.5     ! spz      1018:       if (! HAS_CHAR(enc, ptr, end))
1.1       tron     1019:         break;
                   1020:       switch (BYTE_TYPE(enc, ptr)) {
                   1021:       case BT_S: case BT_LF:
                   1022:         break;
                   1023:       case BT_CR:
                   1024:         /* don't split CR/LF pair */
                   1025:         if (ptr + MINBPC(enc) != end)
                   1026:           break;
                   1027:         /* fall through */
                   1028:       default:
                   1029:         *nextTokPtr = ptr;
                   1030:         return XML_TOK_PROLOG_S;
                   1031:       }
                   1032:     }
                   1033:     *nextTokPtr = ptr;
                   1034:     return XML_TOK_PROLOG_S;
                   1035:   case BT_PERCNT:
                   1036:     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                   1037:   case BT_COMMA:
                   1038:     *nextTokPtr = ptr + MINBPC(enc);
                   1039:     return XML_TOK_COMMA;
                   1040:   case BT_LSQB:
                   1041:     *nextTokPtr = ptr + MINBPC(enc);
                   1042:     return XML_TOK_OPEN_BRACKET;
                   1043:   case BT_RSQB:
                   1044:     ptr += MINBPC(enc);
1.5     ! spz      1045:     if (! HAS_CHAR(enc, ptr, end))
1.1       tron     1046:       return -XML_TOK_CLOSE_BRACKET;
                   1047:     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1.5     ! spz      1048:       REQUIRE_CHARS(enc, ptr, end, 2);
1.1       tron     1049:       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
                   1050:         *nextTokPtr = ptr + 2*MINBPC(enc);
                   1051:         return XML_TOK_COND_SECT_CLOSE;
                   1052:       }
                   1053:     }
                   1054:     *nextTokPtr = ptr;
                   1055:     return XML_TOK_CLOSE_BRACKET;
                   1056:   case BT_LPAR:
                   1057:     *nextTokPtr = ptr + MINBPC(enc);
                   1058:     return XML_TOK_OPEN_PAREN;
                   1059:   case BT_RPAR:
                   1060:     ptr += MINBPC(enc);
1.5     ! spz      1061:     if (! HAS_CHAR(enc, ptr, end))
1.1       tron     1062:       return -XML_TOK_CLOSE_PAREN;
                   1063:     switch (BYTE_TYPE(enc, ptr)) {
                   1064:     case BT_AST:
                   1065:       *nextTokPtr = ptr + MINBPC(enc);
                   1066:       return XML_TOK_CLOSE_PAREN_ASTERISK;
                   1067:     case BT_QUEST:
                   1068:       *nextTokPtr = ptr + MINBPC(enc);
                   1069:       return XML_TOK_CLOSE_PAREN_QUESTION;
                   1070:     case BT_PLUS:
                   1071:       *nextTokPtr = ptr + MINBPC(enc);
                   1072:       return XML_TOK_CLOSE_PAREN_PLUS;
                   1073:     case BT_CR: case BT_LF: case BT_S:
                   1074:     case BT_GT: case BT_COMMA: case BT_VERBAR:
                   1075:     case BT_RPAR:
                   1076:       *nextTokPtr = ptr;
                   1077:       return XML_TOK_CLOSE_PAREN;
                   1078:     }
                   1079:     *nextTokPtr = ptr;
                   1080:     return XML_TOK_INVALID;
                   1081:   case BT_VERBAR:
                   1082:     *nextTokPtr = ptr + MINBPC(enc);
                   1083:     return XML_TOK_OR;
                   1084:   case BT_GT:
                   1085:     *nextTokPtr = ptr + MINBPC(enc);
                   1086:     return XML_TOK_DECL_CLOSE;
                   1087:   case BT_NUM:
                   1088:     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                   1089: #define LEAD_CASE(n) \
                   1090:   case BT_LEAD ## n: \
                   1091:     if (end - ptr < n) \
                   1092:       return XML_TOK_PARTIAL_CHAR; \
                   1093:     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
                   1094:       ptr += n; \
                   1095:       tok = XML_TOK_NAME; \
                   1096:       break; \
                   1097:     } \
                   1098:     if (IS_NAME_CHAR(enc, ptr, n)) { \
                   1099:       ptr += n; \
                   1100:       tok = XML_TOK_NMTOKEN; \
                   1101:       break; \
                   1102:     } \
                   1103:     *nextTokPtr = ptr; \
                   1104:     return XML_TOK_INVALID;
                   1105:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1106: #undef LEAD_CASE
                   1107:   case BT_NMSTRT:
                   1108:   case BT_HEX:
                   1109:     tok = XML_TOK_NAME;
                   1110:     ptr += MINBPC(enc);
                   1111:     break;
                   1112:   case BT_DIGIT:
                   1113:   case BT_NAME:
                   1114:   case BT_MINUS:
                   1115: #ifdef XML_NS
                   1116:   case BT_COLON:
                   1117: #endif
                   1118:     tok = XML_TOK_NMTOKEN;
                   1119:     ptr += MINBPC(enc);
                   1120:     break;
                   1121:   case BT_NONASCII:
                   1122:     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
                   1123:       ptr += MINBPC(enc);
                   1124:       tok = XML_TOK_NAME;
                   1125:       break;
                   1126:     }
                   1127:     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
                   1128:       ptr += MINBPC(enc);
                   1129:       tok = XML_TOK_NMTOKEN;
                   1130:       break;
                   1131:     }
                   1132:     /* fall through */
                   1133:   default:
                   1134:     *nextTokPtr = ptr;
                   1135:     return XML_TOK_INVALID;
                   1136:   }
1.5     ! spz      1137:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron     1138:     switch (BYTE_TYPE(enc, ptr)) {
                   1139:     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                   1140:     case BT_GT: case BT_RPAR: case BT_COMMA:
                   1141:     case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
                   1142:     case BT_S: case BT_CR: case BT_LF:
                   1143:       *nextTokPtr = ptr;
                   1144:       return tok;
                   1145: #ifdef XML_NS
                   1146:     case BT_COLON:
                   1147:       ptr += MINBPC(enc);
                   1148:       switch (tok) {
                   1149:       case XML_TOK_NAME:
1.5     ! spz      1150:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron     1151:         tok = XML_TOK_PREFIXED_NAME;
                   1152:         switch (BYTE_TYPE(enc, ptr)) {
                   1153:         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
                   1154:         default:
                   1155:           tok = XML_TOK_NMTOKEN;
                   1156:           break;
                   1157:         }
                   1158:         break;
                   1159:       case XML_TOK_PREFIXED_NAME:
                   1160:         tok = XML_TOK_NMTOKEN;
                   1161:         break;
                   1162:       }
                   1163:       break;
                   1164: #endif
                   1165:     case BT_PLUS:
                   1166:       if (tok == XML_TOK_NMTOKEN)  {
                   1167:         *nextTokPtr = ptr;
                   1168:         return XML_TOK_INVALID;
                   1169:       }
                   1170:       *nextTokPtr = ptr + MINBPC(enc);
                   1171:       return XML_TOK_NAME_PLUS;
                   1172:     case BT_AST:
                   1173:       if (tok == XML_TOK_NMTOKEN)  {
                   1174:         *nextTokPtr = ptr;
                   1175:         return XML_TOK_INVALID;
                   1176:       }
                   1177:       *nextTokPtr = ptr + MINBPC(enc);
                   1178:       return XML_TOK_NAME_ASTERISK;
                   1179:     case BT_QUEST:
                   1180:       if (tok == XML_TOK_NMTOKEN)  {
                   1181:         *nextTokPtr = ptr;
                   1182:         return XML_TOK_INVALID;
                   1183:       }
                   1184:       *nextTokPtr = ptr + MINBPC(enc);
                   1185:       return XML_TOK_NAME_QUESTION;
                   1186:     default:
                   1187:       *nextTokPtr = ptr;
                   1188:       return XML_TOK_INVALID;
                   1189:     }
                   1190:   }
                   1191:   return -tok;
                   1192: }
                   1193:
                   1194: static int PTRCALL
                   1195: PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
                   1196:                           const char *end, const char **nextTokPtr)
                   1197: {
                   1198:   const char *start;
1.4       spz      1199:   if (ptr >= end)
1.1       tron     1200:     return XML_TOK_NONE;
1.5     ! spz      1201:   else if (! HAS_CHAR(enc, ptr, end))
        !          1202:     return XML_TOK_PARTIAL;
1.1       tron     1203:   start = ptr;
1.5     ! spz      1204:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron     1205:     switch (BYTE_TYPE(enc, ptr)) {
                   1206: #define LEAD_CASE(n) \
                   1207:     case BT_LEAD ## n: ptr += n; break;
                   1208:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1209: #undef LEAD_CASE
                   1210:     case BT_AMP:
                   1211:       if (ptr == start)
                   1212:         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                   1213:       *nextTokPtr = ptr;
                   1214:       return XML_TOK_DATA_CHARS;
                   1215:     case BT_LT:
                   1216:       /* this is for inside entity references */
                   1217:       *nextTokPtr = ptr;
                   1218:       return XML_TOK_INVALID;
                   1219:     case BT_LF:
                   1220:       if (ptr == start) {
                   1221:         *nextTokPtr = ptr + MINBPC(enc);
                   1222:         return XML_TOK_DATA_NEWLINE;
                   1223:       }
                   1224:       *nextTokPtr = ptr;
                   1225:       return XML_TOK_DATA_CHARS;
                   1226:     case BT_CR:
                   1227:       if (ptr == start) {
                   1228:         ptr += MINBPC(enc);
1.5     ! spz      1229:         if (! HAS_CHAR(enc, ptr, end))
1.1       tron     1230:           return XML_TOK_TRAILING_CR;
                   1231:         if (BYTE_TYPE(enc, ptr) == BT_LF)
                   1232:           ptr += MINBPC(enc);
                   1233:         *nextTokPtr = ptr;
                   1234:         return XML_TOK_DATA_NEWLINE;
                   1235:       }
                   1236:       *nextTokPtr = ptr;
                   1237:       return XML_TOK_DATA_CHARS;
                   1238:     case BT_S:
                   1239:       if (ptr == start) {
                   1240:         *nextTokPtr = ptr + MINBPC(enc);
                   1241:         return XML_TOK_ATTRIBUTE_VALUE_S;
                   1242:       }
                   1243:       *nextTokPtr = ptr;
                   1244:       return XML_TOK_DATA_CHARS;
                   1245:     default:
                   1246:       ptr += MINBPC(enc);
                   1247:       break;
                   1248:     }
                   1249:   }
                   1250:   *nextTokPtr = ptr;
                   1251:   return XML_TOK_DATA_CHARS;
                   1252: }
                   1253:
                   1254: static int PTRCALL
                   1255: PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
                   1256:                        const char *end, const char **nextTokPtr)
                   1257: {
                   1258:   const char *start;
1.4       spz      1259:   if (ptr >= end)
1.1       tron     1260:     return XML_TOK_NONE;
1.5     ! spz      1261:   else if (! HAS_CHAR(enc, ptr, end))
        !          1262:     return XML_TOK_PARTIAL;
1.1       tron     1263:   start = ptr;
1.5     ! spz      1264:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron     1265:     switch (BYTE_TYPE(enc, ptr)) {
                   1266: #define LEAD_CASE(n) \
                   1267:     case BT_LEAD ## n: ptr += n; break;
                   1268:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1269: #undef LEAD_CASE
                   1270:     case BT_AMP:
                   1271:       if (ptr == start)
                   1272:         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
                   1273:       *nextTokPtr = ptr;
                   1274:       return XML_TOK_DATA_CHARS;
                   1275:     case BT_PERCNT:
                   1276:       if (ptr == start) {
                   1277:         int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
                   1278:                                        end, nextTokPtr);
                   1279:         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
                   1280:       }
                   1281:       *nextTokPtr = ptr;
                   1282:       return XML_TOK_DATA_CHARS;
                   1283:     case BT_LF:
                   1284:       if (ptr == start) {
                   1285:         *nextTokPtr = ptr + MINBPC(enc);
                   1286:         return XML_TOK_DATA_NEWLINE;
                   1287:       }
                   1288:       *nextTokPtr = ptr;
                   1289:       return XML_TOK_DATA_CHARS;
                   1290:     case BT_CR:
                   1291:       if (ptr == start) {
                   1292:         ptr += MINBPC(enc);
1.5     ! spz      1293:         if (! HAS_CHAR(enc, ptr, end))
1.1       tron     1294:           return XML_TOK_TRAILING_CR;
                   1295:         if (BYTE_TYPE(enc, ptr) == BT_LF)
                   1296:           ptr += MINBPC(enc);
                   1297:         *nextTokPtr = ptr;
                   1298:         return XML_TOK_DATA_NEWLINE;
                   1299:       }
                   1300:       *nextTokPtr = ptr;
                   1301:       return XML_TOK_DATA_CHARS;
                   1302:     default:
                   1303:       ptr += MINBPC(enc);
                   1304:       break;
                   1305:     }
                   1306:   }
                   1307:   *nextTokPtr = ptr;
                   1308:   return XML_TOK_DATA_CHARS;
                   1309: }
                   1310:
                   1311: #ifdef XML_DTD
                   1312:
                   1313: static int PTRCALL
                   1314: PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
                   1315:                          const char *end, const char **nextTokPtr)
                   1316: {
                   1317:   int level = 0;
                   1318:   if (MINBPC(enc) > 1) {
                   1319:     size_t n = end - ptr;
                   1320:     if (n & (MINBPC(enc) - 1)) {
                   1321:       n &= ~(MINBPC(enc) - 1);
                   1322:       end = ptr + n;
                   1323:     }
                   1324:   }
1.5     ! spz      1325:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron     1326:     switch (BYTE_TYPE(enc, ptr)) {
                   1327:     INVALID_CASES(ptr, nextTokPtr)
                   1328:     case BT_LT:
1.5     ! spz      1329:       ptr += MINBPC(enc);
        !          1330:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron     1331:       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1.5     ! spz      1332:         ptr += MINBPC(enc);
        !          1333:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron     1334:         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
                   1335:           ++level;
                   1336:           ptr += MINBPC(enc);
                   1337:         }
                   1338:       }
                   1339:       break;
                   1340:     case BT_RSQB:
1.5     ! spz      1341:       ptr += MINBPC(enc);
        !          1342:       REQUIRE_CHAR(enc, ptr, end);
1.1       tron     1343:       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1.5     ! spz      1344:         ptr += MINBPC(enc);
        !          1345:         REQUIRE_CHAR(enc, ptr, end);
1.1       tron     1346:         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
                   1347:           ptr += MINBPC(enc);
                   1348:           if (level == 0) {
                   1349:             *nextTokPtr = ptr;
                   1350:             return XML_TOK_IGNORE_SECT;
                   1351:           }
                   1352:           --level;
                   1353:         }
                   1354:       }
                   1355:       break;
                   1356:     default:
                   1357:       ptr += MINBPC(enc);
                   1358:       break;
                   1359:     }
                   1360:   }
                   1361:   return XML_TOK_PARTIAL;
                   1362: }
                   1363:
                   1364: #endif /* XML_DTD */
                   1365:
                   1366: static int PTRCALL
                   1367: PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
                   1368:                    const char **badPtr)
                   1369: {
                   1370:   ptr += MINBPC(enc);
                   1371:   end -= MINBPC(enc);
1.5     ! spz      1372:   for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1.1       tron     1373:     switch (BYTE_TYPE(enc, ptr)) {
                   1374:     case BT_DIGIT:
                   1375:     case BT_HEX:
                   1376:     case BT_MINUS:
                   1377:     case BT_APOS:
                   1378:     case BT_LPAR:
                   1379:     case BT_RPAR:
                   1380:     case BT_PLUS:
                   1381:     case BT_COMMA:
                   1382:     case BT_SOL:
                   1383:     case BT_EQUALS:
                   1384:     case BT_QUEST:
                   1385:     case BT_CR:
                   1386:     case BT_LF:
                   1387:     case BT_SEMI:
                   1388:     case BT_EXCL:
                   1389:     case BT_AST:
                   1390:     case BT_PERCNT:
                   1391:     case BT_NUM:
                   1392: #ifdef XML_NS
                   1393:     case BT_COLON:
                   1394: #endif
                   1395:       break;
                   1396:     case BT_S:
                   1397:       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
                   1398:         *badPtr = ptr;
                   1399:         return 0;
                   1400:       }
                   1401:       break;
                   1402:     case BT_NAME:
                   1403:     case BT_NMSTRT:
                   1404:       if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
                   1405:         break;
                   1406:     default:
                   1407:       switch (BYTE_TO_ASCII(enc, ptr)) {
                   1408:       case 0x24: /* $ */
                   1409:       case 0x40: /* @ */
                   1410:         break;
                   1411:       default:
                   1412:         *badPtr = ptr;
                   1413:         return 0;
                   1414:       }
                   1415:       break;
                   1416:     }
                   1417:   }
                   1418:   return 1;
                   1419: }
                   1420:
                   1421: /* This must only be called for a well-formed start-tag or empty
                   1422:    element tag.  Returns the number of attributes.  Pointers to the
                   1423:    first attsMax attributes are stored in atts.
                   1424: */
                   1425:
                   1426: static int PTRCALL
                   1427: PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
                   1428:                 int attsMax, ATTRIBUTE *atts)
                   1429: {
                   1430:   enum { other, inName, inValue } state = inName;
                   1431:   int nAtts = 0;
                   1432:   int open = 0; /* defined when state == inValue;
                   1433:                    initialization just to shut up compilers */
                   1434:
                   1435:   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
                   1436:     switch (BYTE_TYPE(enc, ptr)) {
                   1437: #define START_NAME \
                   1438:       if (state == other) { \
                   1439:         if (nAtts < attsMax) { \
                   1440:           atts[nAtts].name = ptr; \
                   1441:           atts[nAtts].normalized = 1; \
                   1442:         } \
                   1443:         state = inName; \
                   1444:       }
                   1445: #define LEAD_CASE(n) \
                   1446:     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
                   1447:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1448: #undef LEAD_CASE
                   1449:     case BT_NONASCII:
                   1450:     case BT_NMSTRT:
                   1451:     case BT_HEX:
                   1452:       START_NAME
                   1453:       break;
                   1454: #undef START_NAME
                   1455:     case BT_QUOT:
                   1456:       if (state != inValue) {
                   1457:         if (nAtts < attsMax)
                   1458:           atts[nAtts].valuePtr = ptr + MINBPC(enc);
                   1459:         state = inValue;
                   1460:         open = BT_QUOT;
                   1461:       }
                   1462:       else if (open == BT_QUOT) {
                   1463:         state = other;
                   1464:         if (nAtts < attsMax)
                   1465:           atts[nAtts].valueEnd = ptr;
                   1466:         nAtts++;
                   1467:       }
                   1468:       break;
                   1469:     case BT_APOS:
                   1470:       if (state != inValue) {
                   1471:         if (nAtts < attsMax)
                   1472:           atts[nAtts].valuePtr = ptr + MINBPC(enc);
                   1473:         state = inValue;
                   1474:         open = BT_APOS;
                   1475:       }
                   1476:       else if (open == BT_APOS) {
                   1477:         state = other;
                   1478:         if (nAtts < attsMax)
                   1479:           atts[nAtts].valueEnd = ptr;
                   1480:         nAtts++;
                   1481:       }
                   1482:       break;
                   1483:     case BT_AMP:
                   1484:       if (nAtts < attsMax)
                   1485:         atts[nAtts].normalized = 0;
                   1486:       break;
                   1487:     case BT_S:
                   1488:       if (state == inName)
                   1489:         state = other;
                   1490:       else if (state == inValue
                   1491:                && nAtts < attsMax
                   1492:                && atts[nAtts].normalized
                   1493:                && (ptr == atts[nAtts].valuePtr
                   1494:                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
                   1495:                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
                   1496:                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
                   1497:         atts[nAtts].normalized = 0;
                   1498:       break;
                   1499:     case BT_CR: case BT_LF:
                   1500:       /* This case ensures that the first attribute name is counted
                   1501:          Apart from that we could just change state on the quote. */
                   1502:       if (state == inName)
                   1503:         state = other;
                   1504:       else if (state == inValue && nAtts < attsMax)
                   1505:         atts[nAtts].normalized = 0;
                   1506:       break;
                   1507:     case BT_GT:
                   1508:     case BT_SOL:
                   1509:       if (state != inValue)
                   1510:         return nAtts;
                   1511:       break;
                   1512:     default:
                   1513:       break;
                   1514:     }
                   1515:   }
                   1516:   /* not reached */
                   1517: }
                   1518:
                   1519: static int PTRFASTCALL
1.5     ! spz      1520: PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
1.1       tron     1521: {
                   1522:   int result = 0;
                   1523:   /* skip &# */
                   1524:   ptr += 2*MINBPC(enc);
                   1525:   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
                   1526:     for (ptr += MINBPC(enc);
                   1527:          !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
                   1528:          ptr += MINBPC(enc)) {
                   1529:       int c = BYTE_TO_ASCII(enc, ptr);
                   1530:       switch (c) {
                   1531:       case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
                   1532:       case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
                   1533:         result <<= 4;
                   1534:         result |= (c - ASCII_0);
                   1535:         break;
                   1536:       case ASCII_A: case ASCII_B: case ASCII_C:
                   1537:       case ASCII_D: case ASCII_E: case ASCII_F:
                   1538:         result <<= 4;
                   1539:         result += 10 + (c - ASCII_A);
                   1540:         break;
                   1541:       case ASCII_a: case ASCII_b: case ASCII_c:
                   1542:       case ASCII_d: case ASCII_e: case ASCII_f:
                   1543:         result <<= 4;
                   1544:         result += 10 + (c - ASCII_a);
                   1545:         break;
                   1546:       }
                   1547:       if (result >= 0x110000)
                   1548:         return -1;
                   1549:     }
                   1550:   }
                   1551:   else {
                   1552:     for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
                   1553:       int c = BYTE_TO_ASCII(enc, ptr);
                   1554:       result *= 10;
                   1555:       result += (c - ASCII_0);
                   1556:       if (result >= 0x110000)
                   1557:         return -1;
                   1558:     }
                   1559:   }
                   1560:   return checkCharRefNumber(result);
                   1561: }
                   1562:
                   1563: static int PTRCALL
1.5     ! spz      1564: PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
1.1       tron     1565:                              const char *end)
                   1566: {
                   1567:   switch ((end - ptr)/MINBPC(enc)) {
                   1568:   case 2:
                   1569:     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
                   1570:       switch (BYTE_TO_ASCII(enc, ptr)) {
                   1571:       case ASCII_l:
                   1572:         return ASCII_LT;
                   1573:       case ASCII_g:
                   1574:         return ASCII_GT;
                   1575:       }
                   1576:     }
                   1577:     break;
                   1578:   case 3:
                   1579:     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
                   1580:       ptr += MINBPC(enc);
                   1581:       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
                   1582:         ptr += MINBPC(enc);
                   1583:         if (CHAR_MATCHES(enc, ptr, ASCII_p))
                   1584:           return ASCII_AMP;
                   1585:       }
                   1586:     }
                   1587:     break;
                   1588:   case 4:
                   1589:     switch (BYTE_TO_ASCII(enc, ptr)) {
                   1590:     case ASCII_q:
                   1591:       ptr += MINBPC(enc);
                   1592:       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
                   1593:         ptr += MINBPC(enc);
                   1594:         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
                   1595:           ptr += MINBPC(enc);
                   1596:           if (CHAR_MATCHES(enc, ptr, ASCII_t))
                   1597:             return ASCII_QUOT;
                   1598:         }
                   1599:       }
                   1600:       break;
                   1601:     case ASCII_a:
                   1602:       ptr += MINBPC(enc);
                   1603:       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
                   1604:         ptr += MINBPC(enc);
                   1605:         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
                   1606:           ptr += MINBPC(enc);
                   1607:           if (CHAR_MATCHES(enc, ptr, ASCII_s))
                   1608:             return ASCII_APOS;
                   1609:         }
                   1610:       }
                   1611:       break;
                   1612:     }
                   1613:   }
                   1614:   return 0;
                   1615: }
                   1616:
                   1617: static int PTRCALL
                   1618: PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
                   1619: {
                   1620:   for (;;) {
                   1621:     switch (BYTE_TYPE(enc, ptr1)) {
                   1622: #define LEAD_CASE(n) \
                   1623:     case BT_LEAD ## n: \
                   1624:       if (*ptr1++ != *ptr2++) \
                   1625:         return 0;
                   1626:     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
                   1627: #undef LEAD_CASE
                   1628:       /* fall through */
                   1629:       if (*ptr1++ != *ptr2++)
                   1630:         return 0;
                   1631:       break;
                   1632:     case BT_NONASCII:
                   1633:     case BT_NMSTRT:
                   1634: #ifdef XML_NS
                   1635:     case BT_COLON:
                   1636: #endif
                   1637:     case BT_HEX:
                   1638:     case BT_DIGIT:
                   1639:     case BT_NAME:
                   1640:     case BT_MINUS:
                   1641:       if (*ptr2++ != *ptr1++)
                   1642:         return 0;
                   1643:       if (MINBPC(enc) > 1) {
                   1644:         if (*ptr2++ != *ptr1++)
                   1645:           return 0;
                   1646:         if (MINBPC(enc) > 2) {
                   1647:           if (*ptr2++ != *ptr1++)
                   1648:             return 0;
                   1649:           if (MINBPC(enc) > 3) {
                   1650:             if (*ptr2++ != *ptr1++)
                   1651:               return 0;
                   1652:           }
                   1653:         }
                   1654:       }
                   1655:       break;
                   1656:     default:
                   1657:       if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
                   1658:         return 1;
                   1659:       switch (BYTE_TYPE(enc, ptr2)) {
                   1660:       case BT_LEAD2:
                   1661:       case BT_LEAD3:
                   1662:       case BT_LEAD4:
                   1663:       case BT_NONASCII:
                   1664:       case BT_NMSTRT:
                   1665: #ifdef XML_NS
                   1666:       case BT_COLON:
                   1667: #endif
                   1668:       case BT_HEX:
                   1669:       case BT_DIGIT:
                   1670:       case BT_NAME:
                   1671:       case BT_MINUS:
                   1672:         return 0;
                   1673:       default:
                   1674:         return 1;
                   1675:       }
                   1676:     }
                   1677:   }
                   1678:   /* not reached */
                   1679: }
                   1680:
                   1681: static int PTRCALL
1.5     ! spz      1682: PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
1.1       tron     1683:                          const char *end1, const char *ptr2)
                   1684: {
                   1685:   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1.5     ! spz      1686:     if (end1 - ptr1 < MINBPC(enc))
1.1       tron     1687:       return 0;
                   1688:     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
                   1689:       return 0;
                   1690:   }
                   1691:   return ptr1 == end1;
                   1692: }
                   1693:
                   1694: static int PTRFASTCALL
                   1695: PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
                   1696: {
                   1697:   const char *start = ptr;
                   1698:   for (;;) {
                   1699:     switch (BYTE_TYPE(enc, ptr)) {
                   1700: #define LEAD_CASE(n) \
                   1701:     case BT_LEAD ## n: ptr += n; break;
                   1702:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1703: #undef LEAD_CASE
                   1704:     case BT_NONASCII:
                   1705:     case BT_NMSTRT:
                   1706: #ifdef XML_NS
                   1707:     case BT_COLON:
                   1708: #endif
                   1709:     case BT_HEX:
                   1710:     case BT_DIGIT:
                   1711:     case BT_NAME:
                   1712:     case BT_MINUS:
                   1713:       ptr += MINBPC(enc);
                   1714:       break;
                   1715:     default:
                   1716:       return (int)(ptr - start);
                   1717:     }
                   1718:   }
                   1719: }
                   1720:
                   1721: static const char * PTRFASTCALL
                   1722: PREFIX(skipS)(const ENCODING *enc, const char *ptr)
                   1723: {
                   1724:   for (;;) {
                   1725:     switch (BYTE_TYPE(enc, ptr)) {
                   1726:     case BT_LF:
                   1727:     case BT_CR:
                   1728:     case BT_S:
                   1729:       ptr += MINBPC(enc);
                   1730:       break;
                   1731:     default:
                   1732:       return ptr;
                   1733:     }
                   1734:   }
                   1735: }
                   1736:
                   1737: static void PTRCALL
                   1738: PREFIX(updatePosition)(const ENCODING *enc,
                   1739:                        const char *ptr,
                   1740:                        const char *end,
                   1741:                        POSITION *pos)
                   1742: {
1.5     ! spz      1743:   while (HAS_CHAR(enc, ptr, end)) {
1.1       tron     1744:     switch (BYTE_TYPE(enc, ptr)) {
                   1745: #define LEAD_CASE(n) \
                   1746:     case BT_LEAD ## n: \
                   1747:       ptr += n; \
                   1748:       break;
                   1749:     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
                   1750: #undef LEAD_CASE
                   1751:     case BT_LF:
                   1752:       pos->columnNumber = (XML_Size)-1;
                   1753:       pos->lineNumber++;
                   1754:       ptr += MINBPC(enc);
                   1755:       break;
                   1756:     case BT_CR:
                   1757:       pos->lineNumber++;
                   1758:       ptr += MINBPC(enc);
1.5     ! spz      1759:       if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1.1       tron     1760:         ptr += MINBPC(enc);
                   1761:       pos->columnNumber = (XML_Size)-1;
                   1762:       break;
                   1763:     default:
                   1764:       ptr += MINBPC(enc);
                   1765:       break;
                   1766:     }
                   1767:     pos->columnNumber++;
                   1768:   }
                   1769: }
                   1770:
                   1771: #undef DO_LEAD_CASE
                   1772: #undef MULTIBYTE_CASES
                   1773: #undef INVALID_CASES
                   1774: #undef CHECK_NAME_CASE
                   1775: #undef CHECK_NAME_CASES
                   1776: #undef CHECK_NMSTRT_CASE
                   1777: #undef CHECK_NMSTRT_CASES
                   1778:
                   1779: #endif /* XML_TOK_IMPL_C */

CVSweb <webmaster@jp.NetBSD.org>