#include "lib.h" /* Tokenize an entire XML input */ static sxi32 XML_Tokenize(SyStream *pStream, SyToken *pToken, void *pUserData, void *pUnused2) { SyXMLParser *pParse = (SyXMLParser *)pUserData; SyString *pStr; sxi32 rc; int c; /* Jump leading white spaces */ while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0])) { /* Advance the stream cursor */ if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } pStream->zText++; } if(pStream->zText >= pStream->zEnd) { SXUNUSED(pUnused2); /* End of input reached */ return SXERR_EOF; } /* Record token starting position and line */ pToken->nLine = pStream->nLine; pToken->pUserData = 0; pStr = &pToken->sData; SyStringInitFromBuf(pStr, pStream->zText, 0); /* Extract the current token */ c = pStream->zText[0]; if(c == '<') { pStream->zText++; pStr->zString++; if(pStream->zText >= pStream->zEnd) { if(pParse->xError) { rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } /* End of input reached */ return SXERR_EOF; } c = pStream->zText[0]; if(c == '?') { /* Processing instruction */ pStream->zText++; pStr->zString++; pToken->nType = SXML_TOK_PI; while(XLEX_IN_LEN(pStream) >= sizeof("?>") - 1 && SyMemcmp((const void *)pStream->zText, "?>", sizeof("?>") - 1) != 0) { if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } pStream->zText++; } /* Record token length */ pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); if(XLEX_IN_LEN(pStream) < sizeof("?>") - 1) { if(pParse->xError) { rc = pParse->xError("End of input found,but processing instruction was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_EOF; } pStream->zText += sizeof("?>") - 1; } else if(c == '!') { pStream->zText++; if(XLEX_IN_LEN(pStream) >= sizeof("--") - 1 && pStream->zText[0] == '-' && pStream->zText[1] == '-') { /* Comment */ pStream->zText += sizeof("--") - 1; while(XLEX_IN_LEN(pStream) >= sizeof("-->") - 1 && SyMemcmp((const void *)pStream->zText, "-->", sizeof("-->") - 1) != 0) { if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } pStream->zText++; } pStream->zText += sizeof("-->") - 1; /* Tell the lexer to ignore this token */ return SXERR_CONTINUE; } if(XLEX_IN_LEN(pStream) >= sizeof("[CDATA[") - 1 && SyMemcmp((const void *)pStream->zText, "[CDATA[", sizeof("[CDATA[") - 1) == 0) { /* CDATA */ pStream->zText += sizeof("[CDATA[") - 1; pStr->zString = (const char *)pStream->zText; while(XLEX_IN_LEN(pStream) >= sizeof("]]>") - 1 && SyMemcmp((const void *)pStream->zText, "]]>", sizeof("]]>") - 1) != 0) { if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } pStream->zText++; } /* Record token type and length */ pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); pToken->nType = SXML_TOK_CDATA; if(XLEX_IN_LEN(pStream) < sizeof("]]>") - 1) { if(pParse->xError) { rc = pParse->xError("End of input found,but ]]> was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_EOF; } pStream->zText += sizeof("]]>") - 1; return SXRET_OK; } if(XLEX_IN_LEN(pStream) >= sizeof("DOCTYPE") - 1 && SyMemcmp((const void *)pStream->zText, "DOCTYPE", sizeof("DOCTYPE") - 1) == 0) { SyString sDelim = { ">", sizeof(char) }; /* Default delimiter */ int c = 0; /* DOCTYPE */ pStream->zText += sizeof("DOCTYPE") - 1; pStr->zString = (const char *)pStream->zText; /* Check for element declaration */ while(pStream->zText < pStream->zEnd && pStream->zText[0] != '\n') { if(pStream->zText[0] >= 0xc0 || !SyisSpace(pStream->zText[0])) { c = pStream->zText[0]; if(c == '>') { break; } } pStream->zText++; } if(c == '[') { /* Change the delimiter */ SyStringInitFromBuf(&sDelim, "]>", sizeof("]>") - 1); } if(c != '>') { while(XLEX_IN_LEN(pStream) >= sDelim.nByte && SyMemcmp((const void *)pStream->zText, sDelim.zString, sDelim.nByte) != 0) { if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } pStream->zText++; } } /* Record token type and length */ pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); pToken->nType = SXML_TOK_DOCTYPE; if(XLEX_IN_LEN(pStream) < sDelim.nByte) { if(pParse->xError) { rc = pParse->xError("End of input found,but ]> or > was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_EOF; } pStream->zText += sDelim.nByte; return SXRET_OK; } } else { int c; c = pStream->zText[0]; rc = SXRET_OK; pToken->nType = SXML_TOK_START_TAG; if(c == '/') { /* End tag */ pToken->nType = SXML_TOK_END_TAG; pStream->zText++; pStr->zString++; if(pStream->zText >= pStream->zEnd) { if(pParse->xError) { rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_EOF; } c = pStream->zText[0]; } if(c == '>') { /*<>*/ if(pParse->xError) { rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } /* Ignore the token */ return SXERR_CONTINUE; } if(c < 0xc0 && (SyisSpace(c) || SyisDigit(c) || c == '.' || c == '-' || IS_XML_DIRTY(c))) { if(pParse->xError) { rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } rc = SXERR_INVALID; } pStream->zText++; /* Delimit the tag */ while(pStream->zText < pStream->zEnd && pStream->zText[0] != '>') { c = pStream->zText[0]; if(c >= 0xc0) { /* UTF-8 stream */ pStream->zText++; SX_JMP_UTF8(pStream->zText, pStream->zEnd); } else { if(c == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '>') { pStream->zText++; if(pToken->nType != SXML_TOK_START_TAG) { if(pParse->xError) { rc = pParse->xError("Unexpected closing tag,expecting '>'", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } /* Ignore the token */ rc = SXERR_INVALID; } else { pToken->nType = SXML_TOK_START_END; } break; } if(pStream->zText[0] == '\n') { /* Increment line counter */ pStream->nLine++; } /* Advance the stream cursor */ pStream->zText++; } } if(rc != SXRET_OK) { /* Tell the lexer to ignore this token */ return SXERR_CONTINUE; } /* Record token length */ pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); if(pToken->nType == SXML_TOK_START_END && pStr->nByte > 0) { pStr->nByte -= sizeof(char); } if(pStream->zText < pStream->zEnd) { pStream->zText++; } else { if(pParse->xError) { rc = pParse->xError("End of input found,but closing tag '>' was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } } } else { /* Raw input */ while(pStream->zText < pStream->zEnd) { c = pStream->zText[0]; if(c < 0xc0) { if(c == '<') { break; } else if(c == '\n') { /* Increment line counter */ pStream->nLine++; } /* Advance the stream cursor */ pStream->zText++; } else { /* UTF-8 stream */ pStream->zText++; SX_JMP_UTF8(pStream->zText, pStream->zEnd); } } /* Record token type,length */ pToken->nType = SXML_TOK_RAW; pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); } /* Return to the lexer */ return SXRET_OK; } static int XMLCheckDuplicateAttr(SyXMLRawStr *aSet, sxu32 nEntry, SyXMLRawStr *pEntry) { sxu32 n; for(n = 0 ; n < nEntry ; n += 2) { SyXMLRawStr *pAttr = &aSet[n]; if(pAttr->nByte == pEntry->nByte && SyMemcmp(pAttr->zString, pEntry->zString, pEntry->nByte) == 0) { /* Attribute found */ return 1; } } /* No duplicates */ return 0; } static sxi32 XMLProcessNamesSpace(SyXMLParser *pParse, SyXMLRawStrNS *pTag, SyToken *pToken, SySet *pAttr) { SyXMLRawStr *pPrefix, *pUri; /* Namespace prefix/URI */ SyHashEntry *pEntry; SyXMLRawStr *pDup; sxi32 rc; /* Extract the URI first */ pUri = (SyXMLRawStr *)SySetPeek(pAttr); /* Extract the prefix */ pPrefix = (SyXMLRawStr *)SySetAt(pAttr, SySetUsed(pAttr) - 2); /* Prefix name */ if(pPrefix->nByte == sizeof("xmlns") - 1) { /* Default namespace */ pPrefix->nByte = 0; pPrefix->zString = ""; /* Empty string */ } else { pPrefix->nByte -= sizeof("xmlns") - 1; pPrefix->zString += sizeof("xmlns") - 1; if(pPrefix->zString[0] != ':') { return SXRET_OK; } pPrefix->nByte--; pPrefix->zString++; if(pPrefix->nByte < 1) { if(pParse->xError) { rc = pParse->xError("Invalid namespace name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } /* POP the last insertred two entries */ (void)SySetPop(pAttr); (void)SySetPop(pAttr); return SXERR_SYNTAX; } } /* Invoke the namespace callback if available */ if(pParse->xNameSpace) { rc = pParse->xNameSpace(pPrefix, pUri, pParse->pUserData); if(rc == SXERR_ABORT) { /* User callback request an operation abort */ return SXERR_ABORT; } } /* Duplicate structure */ pDup = (SyXMLRawStr *)SyMemBackendAlloc(pParse->pAllocator, sizeof(SyXMLRawStr)); if(pDup == 0) { if(pParse->xError) { pParse->xError("Out of memory", SXML_ERROR_NO_MEMORY, pToken, pParse->pUserData); } /* Abort processing immediately */ return SXERR_ABORT; } *pDup = *pUri; /* Structure assignement */ /* Save the namespace */ if(pPrefix->nByte == 0) { pPrefix->zString = "Default"; pPrefix->nByte = sizeof("Default") - 1; } SyHashInsert(&pParse->hns, (const void *)pPrefix->zString, pPrefix->nByte, pDup); /* Peek the last inserted entry */ pEntry = SyHashLastEntry(&pParse->hns); /* Store in the corresponding tag container*/ SySetPut(&pTag->sNSset, (const void *)&pEntry); /* POP the last insertred two entries */ (void)SySetPop(pAttr); (void)SySetPop(pAttr); return SXRET_OK; } static sxi32 XMLProcessStartTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SySet *pAttrSet, SySet *pTagStack) { SyString *pIn = &pToken->sData; const char *zIn, *zCur, *zEnd; SyXMLRawStr sEntry; sxi32 rc; int c; /* Reset the working set */ SySetReset(pAttrSet); /* Delimit the raw tag */ zIn = pIn->zString; zEnd = &zIn[pIn->nByte]; while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } /* Isolate tag name */ sEntry.nLine = pTag->nLine = pToken->nLine; zCur = zIn; while(zIn < zEnd) { if((unsigned char)zIn[0] >= 0xc0) { /* UTF-8 stream */ zIn++; SX_JMP_UTF8(zIn, zEnd); } else if(SyisSpace(zIn[0])) { break; } else { if(IS_XML_DIRTY(zIn[0])) { if(pParse->xError) { rc = pParse->xError("Illegal character in XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } zIn++; } } if(zCur >= zIn) { if(pParse->xError) { rc = pParse->xError("Invalid XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } pTag->zString = zCur; pTag->nByte = (sxu32)(zIn - zCur); /* Process tag attribute */ for(;;) { int is_ns = 0; while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } if(zIn >= zEnd) { break; } zCur = zIn; while(zIn < zEnd && zIn[0] != '=') { if((unsigned char)zIn[0] >= 0xc0) { /* UTF-8 stream */ zIn++; SX_JMP_UTF8(zIn, zEnd); } else if(SyisSpace(zIn[0])) { break; } else { zIn++; } } if(zCur >= zIn) { if(pParse->xError) { rc = pParse->xError("Missing attribute name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } /* Store attribute name */ sEntry.zString = zCur; sEntry.nByte = (sxu32)(zIn - zCur); if((pParse->nFlags & SXML_ENABLE_NAMESPACE) && sEntry.nByte >= sizeof("xmlns") - 1 && SyMemcmp(sEntry.zString, "xmlns", sizeof("xmlns") - 1) == 0) { is_ns = 1; } while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } if(zIn >= zEnd || zIn[0] != '=') { if(pParse->xError) { rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } while(sEntry.nByte > 0 && (unsigned char)zCur[sEntry.nByte - 1] < 0xc0 && SyisSpace(zCur[sEntry.nByte - 1])) { sEntry.nByte--; } /* Check for duplicates first */ if(XMLCheckDuplicateAttr((SyXMLRawStr *)SySetBasePtr(pAttrSet), SySetUsed(pAttrSet), &sEntry)) { if(pParse->xError) { rc = pParse->xError("Duplicate attribute", SXML_ERROR_DUPLICATE_ATTRIBUTE, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { return SXERR_ABORT; } /* Extract attribute value */ zIn++; /* Jump the trailing '=' */ while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } if(zIn >= zEnd) { if(pParse->xError) { rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } (void)SySetPop(pAttrSet); return SXERR_SYNTAX; } if(zIn[0] != '\'' && zIn[0] != '"') { if(pParse->xError) { rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } (void)SySetPop(pAttrSet); return SXERR_SYNTAX; } c = zIn[0]; zIn++; zCur = zIn; while(zIn < zEnd && zIn[0] != c) { zIn++; } if(zIn >= zEnd) { if(pParse->xError) { rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } (void)SySetPop(pAttrSet); return SXERR_SYNTAX; } /* Store attribute value */ sEntry.zString = zCur; sEntry.nByte = (sxu32)(zIn - zCur); if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { return SXERR_ABORT; } zIn++; if(is_ns) { /* Process namespace declaration */ XMLProcessNamesSpace(pParse, pTag, pToken, pAttrSet); } } /* Store in the tag stack */ if(pToken->nType == SXML_TOK_START_TAG) { rc = SySetPut(pTagStack, (const void *)pTag); } return SXRET_OK; } static void XMLExtactPI(SyToken *pToken, SyXMLRawStr *pTarget, SyXMLRawStr *pData, int *pXML) { SyString *pIn = &pToken->sData; const char *zIn, *zCur, *zEnd; pTarget->nLine = pData->nLine = pToken->nLine; /* Nullify the entries first */ pTarget->zString = pData->zString = 0; /* Ignore leading and traing white spaces */ SyStringFullTrim(pIn); /* Delimit the raw PI */ zIn = pIn->zString; zEnd = &zIn[pIn->nByte]; if(pXML) { *pXML = 0; } /* Extract the target */ zCur = zIn; while(zIn < zEnd) { if((unsigned char)zIn[0] >= 0xc0) { /* UTF-8 stream */ zIn++; SX_JMP_UTF8(zIn, zEnd); } else if(SyisSpace(zIn[0])) { break; } else { zIn++; } } if(zIn > zCur) { pTarget->zString = zCur; pTarget->nByte = (sxu32)(zIn - zCur); if(pXML && pTarget->nByte == sizeof("xml") - 1 && SyStrnicmp(pTarget->zString, "xml", sizeof("xml") - 1) == 0) { *pXML = 1; } } /* Extract the PI data */ while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } if(zIn < zEnd) { pData->zString = zIn; pData->nByte = (sxu32)(zEnd - zIn); } } static sxi32 XMLExtractEndTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pOut) { SyString *pIn = &pToken->sData; const char *zEnd = &pIn->zString[pIn->nByte]; const char *zIn = pIn->zString; /* Ignore leading white spaces */ while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { zIn++; } pOut->nLine = pToken->nLine; pOut->zString = zIn; pOut->nByte = (sxu32)(zEnd - zIn); /* Ignore trailing white spaces */ while(pOut->nByte > 0 && (unsigned char)pOut->zString[pOut->nByte - 1] < 0xc0 && SyisSpace(pOut->zString[pOut->nByte - 1])) { pOut->nByte--; } if(pOut->nByte < 1) { if(pParse->xError) { sxi32 rc; rc = pParse->xError("Invalid end tag name", SXML_ERROR_INVALID_TOKEN, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } return SXRET_OK; } static void TokenToXMLString(SyToken *pTok, SyXMLRawStrNS *pOut) { /* Remove leading and trailing white spaces first */ SyStringFullTrim(&pTok->sData); pOut->zString = SyStringData(&pTok->sData); pOut->nByte = SyStringLength(&pTok->sData); } static sxi32 XMLExtractNS(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SyXMLRawStr *pnsUri) { SyXMLRawStr *pUri, sPrefix; SyHashEntry *pEntry; sxu32 nOfft; sxi32 rc; /* Extract a prefix if available */ rc = SyByteFind(pTag->zString, pTag->nByte, ':', &nOfft); if(rc != SXRET_OK) { /* Check if there is a default namespace */ pEntry = SyHashGet(&pParse->hns, "Default", sizeof("Default") - 1); if(pEntry) { /* Extract the ns URI */ pUri = (SyXMLRawStr *)pEntry->pUserData; /* Save the ns URI */ pnsUri->zString = pUri->zString; pnsUri->nByte = pUri->nByte; } return SXRET_OK; } if(nOfft < 1) { if(pParse->xError) { rc = pParse->xError("Empty prefix is not allowed according to XML namespace specification", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } sPrefix.zString = pTag->zString; sPrefix.nByte = nOfft; sPrefix.nLine = pTag->nLine; pTag->zString += nOfft + 1; pTag->nByte -= nOfft; if(pTag->nByte < 1) { if(pParse->xError) { rc = pParse->xError("Missing tag name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } /* Check if the prefix is already registered */ pEntry = SyHashGet(&pParse->hns, sPrefix.zString, sPrefix.nByte); if(pEntry == 0) { if(pParse->xError) { rc = pParse->xError("Namespace prefix is not defined", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } return SXERR_SYNTAX; } /* Extract the ns URI */ pUri = (SyXMLRawStr *)pEntry->pUserData; /* Save the ns URI */ pnsUri->zString = pUri->zString; pnsUri->nByte = pUri->nByte; /* All done */ return SXRET_OK; } static sxi32 XMLnsUnlink(SyXMLParser *pParse, SyXMLRawStrNS *pLast, SyToken *pToken) { SyHashEntry **apEntry, *pEntry; void *pUserData; sxu32 n; /* Release namespace entries */ apEntry = (SyHashEntry **)SySetBasePtr(&pLast->sNSset); for(n = 0 ; n < SySetUsed(&pLast->sNSset) ; ++n) { pEntry = apEntry[n]; /* Invoke the end namespace declaration callback */ if(pParse->xNameSpaceEnd && (pParse->nFlags & SXML_ENABLE_NAMESPACE) && pToken) { SyXMLRawStr sPrefix; sxi32 rc; sPrefix.zString = (const char *)pEntry->pKey; sPrefix.nByte = pEntry->nKeyLen; sPrefix.nLine = pToken->nLine; rc = pParse->xNameSpaceEnd(&sPrefix, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } pUserData = pEntry->pUserData; /* Remove from the namespace hashtable */ SyHashDeleteEntry2(pEntry); SyMemBackendFree(pParse->pAllocator, pUserData); } SySetRelease(&pLast->sNSset); return SXRET_OK; } /* Process XML tokens */ static sxi32 ProcessXML(SyXMLParser *pParse, SySet *pTagStack, SySet *pWorker) { SySet *pTokenSet = &pParse->sToken; SyXMLRawStrNS sEntry; SyXMLRawStr sNs; SyToken *pToken; int bGotTag; sxi32 rc; /* Initialize fields */ bGotTag = 0; /* Start processing */ if(pParse->xStartDoc && (SXERR_ABORT == pParse->xStartDoc(pParse->pUserData))) { /* User callback request an operation abort */ return SXERR_ABORT; } /* Reset the loop cursor */ SySetResetCursor(pTokenSet); /* Extract the current token */ while(SXRET_OK == (SySetGetNextEntry(&(*pTokenSet), (void **)&pToken))) { SyZero(&sEntry, sizeof(SyXMLRawStrNS)); SyZero(&sNs, sizeof(SyXMLRawStr)); SySetInit(&sEntry.sNSset, pParse->pAllocator, sizeof(SyHashEntry *)); sEntry.nLine = sNs.nLine = pToken->nLine; switch(pToken->nType) { case SXML_TOK_DOCTYPE: if(SySetUsed(pTagStack) > 1 || bGotTag) { if(pParse->xError) { rc = pParse->xError("DOCTYPE must be declared first", SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } /* Invoke the supplied callback if any */ if(pParse->xDoctype) { TokenToXMLString(pToken, &sEntry); rc = pParse->xDoctype((SyXMLRawStr *)&sEntry, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; case SXML_TOK_CDATA: if(SySetUsed(pTagStack) < 1) { if(pParse->xError) { rc = pParse->xError("CDATA without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } /* Invoke the supplied callback if any */ if(pParse->xRaw) { TokenToXMLString(pToken, &sEntry); rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; case SXML_TOK_PI: { SyXMLRawStr sTarget, sData; int isXML = 0; /* Extract the target and data */ XMLExtactPI(pToken, &sTarget, &sData, &isXML); if(isXML && SySetCursor(pTokenSet) - 1 > 0) { if(pParse->xError) { rc = pParse->xError("Unexpected XML declaration. The XML declaration must be the first node in the document", SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } else if(pParse->xPi) { /* Invoke the supplied callback*/ rc = pParse->xPi(&sTarget, &sData, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } case SXML_TOK_RAW: if(SySetUsed(pTagStack) < 1) { if(pParse->xError) { rc = pParse->xError("Text (Raw data) without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } /* Invoke the supplied callback if any */ if(pParse->xRaw) { TokenToXMLString(pToken, &sEntry); rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; case SXML_TOK_END_TAG: { SyXMLRawStrNS *pLast = 0; /* cc warning */ if(SySetUsed(pTagStack) < 1) { if(pParse->xError) { rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } rc = XMLExtractEndTag(pParse, pToken, &sEntry); if(rc == SXRET_OK) { /* Extract the last inserted entry */ pLast = (SyXMLRawStrNS *)SySetPeek(pTagStack); if(pLast == 0 || pLast->nByte != sEntry.nByte || SyMemcmp(pLast->zString, sEntry.zString, sEntry.nByte) != 0) { if(pParse->xError) { rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } else { /* Invoke the supllied callback if any */ if(pParse->xEndTag) { rc = SXRET_OK; if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { /* Extract namespace URI */ rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } if(rc == SXRET_OK) { rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } } } else if(rc == SXERR_ABORT) { return SXERR_ABORT; } if(pLast) { rc = XMLnsUnlink(pParse, pLast, pToken); (void)SySetPop(pTagStack); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } case SXML_TOK_START_TAG: case SXML_TOK_START_END: if(SySetUsed(pTagStack) < 1 && bGotTag) { if(pParse->xError) { rc = pParse->xError("XML document cannot contain multiple root level elements documents", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } break; } bGotTag = 1; /* Extract the tag and it's supplied attribute */ rc = XMLProcessStartTag(pParse, pToken, &sEntry, pWorker, pTagStack); if(rc == SXRET_OK) { if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { /* Extract namespace URI */ rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); } } if(rc == SXRET_OK) { /* Invoke the supplied callback */ if(pParse->xStartTag) { rc = pParse->xStartTag((SyXMLRawStr *)&sEntry, &sNs, SySetUsed(pWorker), (SyXMLRawStr *)SySetBasePtr(pWorker), pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } if(pToken->nType == SXML_TOK_START_END) { if(pParse->xEndTag) { rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } rc = XMLnsUnlink(pParse, &sEntry, pToken); if(rc == SXERR_ABORT) { return SXERR_ABORT; } } } else if(rc == SXERR_ABORT) { /* Abort processing immediately */ return SXERR_ABORT; } break; default: /* Can't happen */ break; } } if(SySetUsed(pTagStack) > 0 && pParse->xError) { pParse->xError("Missing closing tag", SXML_ERROR_SYNTAX, (SyToken *)SySetPeek(&pParse->sToken), pParse->pUserData); } if(pParse->xEndDoc) { pParse->xEndDoc(pParse->pUserData); } return SXRET_OK; } PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags) { /* Zero the structure first */ SyZero(pParser, sizeof(SyXMLParser)); /* Initilaize fields */ SySetInit(&pParser->sToken, pAllocator, sizeof(SyToken)); SyLexInit(&pParser->sLex, &pParser->sToken, XML_Tokenize, pParser); SyHashInit(&pParser->hns, pAllocator, 0, 0); pParser->pAllocator = pAllocator; pParser->nFlags = iFlags; return SXRET_OK; } PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser, void *pUserData, ProcXMLStartTagHandler xStartTag, ProcXMLTextHandler xRaw, ProcXMLSyntaxErrorHandler xErr, ProcXMLStartDocument xStartDoc, ProcXMLEndTagHandler xEndTag, ProcXMLPIHandler xPi, ProcXMLEndDocument xEndDoc, ProcXMLDoctypeHandler xDoctype, ProcXMLNameSpaceStart xNameSpace, ProcXMLNameSpaceEnd xNameSpaceEnd ) { /* Install user callbacks */ if(xErr) { pParser->xError = xErr; } if(xStartDoc) { pParser->xStartDoc = xStartDoc; } if(xStartTag) { pParser->xStartTag = xStartTag; } if(xRaw) { pParser->xRaw = xRaw; } if(xEndTag) { pParser->xEndTag = xEndTag; } if(xPi) { pParser->xPi = xPi; } if(xEndDoc) { pParser->xEndDoc = xEndDoc; } if(xDoctype) { pParser->xDoctype = xDoctype; } if(xNameSpace) { pParser->xNameSpace = xNameSpace; } if(xNameSpaceEnd) { pParser->xNameSpaceEnd = xNameSpaceEnd; } pParser->pUserData = pUserData; return SXRET_OK; } /* Process an XML chunk */ PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte) { SySet sTagStack; SySet sWorker; sxi32 rc; /* Initialize working sets */ SySetInit(&sWorker, pParser->pAllocator, sizeof(SyXMLRawStr)); /* Tag container */ SySetInit(&sTagStack, pParser->pAllocator, sizeof(SyXMLRawStrNS)); /* Tag stack */ /* Tokenize the entire input */ rc = SyLexTokenizeInput(&pParser->sLex, zInput, nByte, 0, 0, 0); if(rc == SXERR_ABORT) { /* Tokenize callback request an operation abort */ return SXERR_ABORT; } if(SySetUsed(&pParser->sToken) < 1) { /* Nothing to process [i.e: white spaces] */ rc = SXRET_OK; } else { /* Process XML Tokens */ rc = ProcessXML(&(*pParser), &sTagStack, &sWorker); if(pParser->nFlags & SXML_ENABLE_NAMESPACE) { if(SySetUsed(&sTagStack) > 0) { SyXMLRawStrNS *pEntry; SyHashEntry **apEntry; sxu32 n; SySetResetCursor(&sTagStack); while(SySetGetNextEntry(&sTagStack, (void **)&pEntry) == SXRET_OK) { /* Release namespace entries */ apEntry = (SyHashEntry **)SySetBasePtr(&pEntry->sNSset); for(n = 0 ; n < SySetUsed(&pEntry->sNSset) ; ++n) { SyMemBackendFree(pParser->pAllocator, apEntry[n]->pUserData); } SySetRelease(&pEntry->sNSset); } } } } /* Clean-up the mess left behind */ SySetRelease(&sWorker); SySetRelease(&sTagStack); /* Processing result */ return rc; } PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser) { SyLexRelease(&pParser->sLex); SySetRelease(&pParser->sToken); SyHashRelease(&pParser->hns); return SXRET_OK; }