diff --git a/Makefile b/Makefile index 8fdcf25..cc77789 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ ASTYLE_FLAGS =\ --lineend=linux -all: psharp dummy.lib json.lib +all: psharp dummy.lib json.lib xml.lib clean: rm -f psharp $(ENGINE_OBJS) *.lib @@ -63,3 +63,6 @@ dummy.lib: ext/dummy/dummy.c json.lib: ext/json/json.c $(CC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o json.lib ext/json/json.c + +xml.lib: ext/xml/xml.c + $(CC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o xml.lib ext/xml/lib.c ext/xml/xml.c diff --git a/constant.c b/constant.c index 384a867..87c31da 100644 --- a/constant.c +++ b/constant.c @@ -1258,225 +1258,6 @@ static void PH7_EXTR_PREFIX_IF_EXISTS_Const(ph7_value *pVal, void *pUserData) { SXUNUSED(pUserData); /* cc warning */ ph7_value_int(pVal, 0x40); } -#ifndef PH7_DISABLE_BUILTIN_FUNC -/* - * XML_ERROR_NONE - * Expand the value of SXML_ERROR_NO_MEMORY defined in ph7Int.h - */ -static void PH7_XML_ERROR_NONE_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_NO_MEMORY); -} -/* - * XML_ERROR_NO_MEMORY - * Expand the value of SXML_ERROR_NONE defined in ph7Int.h - */ -static void PH7_XML_ERROR_NO_MEMORY_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_NO_MEMORY); -} -/* - * XML_ERROR_SYNTAX - * Expand the value of SXML_ERROR_SYNTAX defined in ph7Int.h - */ -static void PH7_XML_ERROR_SYNTAX_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_SYNTAX); -} -/* - * XML_ERROR_NO_ELEMENTS - * Expand the value of SXML_ERROR_NO_ELEMENTS defined in ph7Int.h - */ -static void PH7_XML_ERROR_NO_ELEMENTS_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_NO_ELEMENTS); -} -/* - * XML_ERROR_INVALID_TOKEN - * Expand the value of SXML_ERROR_INVALID_TOKEN defined in ph7Int.h - */ -static void PH7_XML_ERROR_INVALID_TOKEN_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_INVALID_TOKEN); -} -/* - * XML_ERROR_UNCLOSED_TOKEN - * Expand the value of SXML_ERROR_UNCLOSED_TOKEN defined in ph7Int.h - */ -static void PH7_XML_ERROR_UNCLOSED_TOKEN_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_UNCLOSED_TOKEN); -} -/* - * XML_ERROR_PARTIAL_CHAR - * Expand the value of SXML_ERROR_PARTIAL_CHAR defined in ph7Int.h - */ -static void PH7_XML_ERROR_PARTIAL_CHAR_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_PARTIAL_CHAR); -} -/* - * XML_ERROR_TAG_MISMATCH - * Expand the value of SXML_ERROR_TAG_MISMATCH defined in ph7Int.h - */ -static void PH7_XML_ERROR_TAG_MISMATCH_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_TAG_MISMATCH); -} -/* - * XML_ERROR_DUPLICATE_ATTRIBUTE - * Expand the value of SXML_ERROR_DUPLICATE_ATTRIBUTE defined in ph7Int.h - */ -static void PH7_XML_ERROR_DUPLICATE_ATTRIBUTE_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_DUPLICATE_ATTRIBUTE); -} -/* - * XML_ERROR_JUNK_AFTER_DOC_ELEMENT - * Expand the value of SXML_ERROR_JUNK_AFTER_DOC_ELEMENT defined in ph7Int.h - */ -static void PH7_XML_ERROR_JUNK_AFTER_DOC_ELEMENT_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_JUNK_AFTER_DOC_ELEMENT); -} -/* - * XML_ERROR_PARAM_ENTITY_REF - * Expand the value of SXML_ERROR_PARAM_ENTITY_REF defined in ph7Int.h - */ -static void PH7_XML_ERROR_PARAM_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_PARAM_ENTITY_REF); -} -/* - * XML_ERROR_UNDEFINED_ENTITY - * Expand the value of SXML_ERROR_UNDEFINED_ENTITY defined in ph7Int.h - */ -static void PH7_XML_ERROR_UNDEFINED_ENTITY_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_UNDEFINED_ENTITY); -} -/* - * XML_ERROR_RECURSIVE_ENTITY_REF - * Expand the value of SXML_ERROR_RECURSIVE_ENTITY_REF defined in ph7Int.h - */ -static void PH7_XML_ERROR_RECURSIVE_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_RECURSIVE_ENTITY_REF); -} -/* - * XML_ERROR_ASYNC_ENTITY - * Expand the value of SXML_ERROR_ASYNC_ENTITY defined in ph7Int.h - */ -static void PH7_XML_ERROR_ASYNC_ENTITY_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_ASYNC_ENTITY); -} -/* - * XML_ERROR_BAD_CHAR_REF - * Expand the value of SXML_ERROR_BAD_CHAR_REF defined in ph7Int.h - */ -static void PH7_XML_ERROR_BAD_CHAR_REF_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_BAD_CHAR_REF); -} -/* - * XML_ERROR_BINARY_ENTITY_REF - * Expand the value of SXML_ERROR_BINARY_ENTITY_REF defined in ph7Int.h - */ -static void PH7_XML_ERROR_BINARY_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_BINARY_ENTITY_REF); -} -/* - * XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF - * Expand the value of SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF defined in ph7Int.h - */ -static void PH7_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); -} -/* - * XML_ERROR_MISPLACED_XML_PI - * Expand the value of SXML_ERROR_MISPLACED_XML_PI defined in ph7Int.h - */ -static void PH7_XML_ERROR_MISPLACED_XML_PI_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_MISPLACED_XML_PI); -} -/* - * XML_ERROR_UNKNOWN_ENCODING - * Expand the value of SXML_ERROR_UNKNOWN_ENCODING defined in ph7Int.h - */ -static void PH7_XML_ERROR_UNKNOWN_ENCODING_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_UNKNOWN_ENCODING); -} -/* - * XML_ERROR_INCORRECT_ENCODING - * Expand the value of SXML_ERROR_INCORRECT_ENCODING defined in ph7Int.h - */ -static void PH7_XML_ERROR_INCORRECT_ENCODING_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_INCORRECT_ENCODING); -} -/* - * XML_ERROR_UNCLOSED_CDATA_SECTION - * Expand the value of SXML_ERROR_UNCLOSED_CDATA_SECTION defined in ph7Int.h - */ -static void PH7_XML_ERROR_UNCLOSED_CDATA_SECTION_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_UNCLOSED_CDATA_SECTION); -} -/* - * XML_ERROR_EXTERNAL_ENTITY_HANDLING - * Expand the value of SXML_ERROR_EXTERNAL_ENTITY_HANDLING defined in ph7Int.h - */ -static void PH7_XML_ERROR_EXTERNAL_ENTITY_HANDLING_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_ERROR_EXTERNAL_ENTITY_HANDLING); -} -/* - * XML_OPTION_CASE_FOLDING - * Expand the value of SXML_OPTION_CASE_FOLDING defined in ph7Int.h. - */ -static void PH7_XML_OPTION_CASE_FOLDING_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_OPTION_CASE_FOLDING); -} -/* - * XML_OPTION_TARGET_ENCODING - * Expand the value of SXML_OPTION_TARGET_ENCODING defined in ph7Int.h. - */ -static void PH7_XML_OPTION_TARGET_ENCODING_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_OPTION_TARGET_ENCODING); -} -/* - * XML_OPTION_SKIP_TAGSTART - * Expand the value of SXML_OPTION_SKIP_TAGSTART defined in ph7Int.h. - */ -static void PH7_XML_OPTION_SKIP_TAGSTART_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_OPTION_SKIP_TAGSTART); -} -/* - * XML_OPTION_SKIP_WHITE - * Expand the value of SXML_OPTION_SKIP_TAGSTART defined in ph7Int.h. - */ -static void PH7_XML_OPTION_SKIP_WHITE_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_int(pVal, SXML_OPTION_SKIP_WHITE); -} -/* - * XML_SAX_IMPL. - * Expand the name of the underlying XML engine. - */ -static void PH7_XML_SAX_IMP_Const(ph7_value *pVal, void *pUserData) { - SXUNUSED(pUserData); /* cc warning */ - ph7_value_string(pVal, "Symisc XML engine", (int)sizeof("Symisc XML engine") - 1); -} -#endif /* PH7_DISABLE_BUILTIN_FUNC */ - /* * static * Expand the name of the current class. 'static' otherwise. @@ -1690,35 +1471,6 @@ static const ph7_builtin_constant aBuiltIn[] = { {"EXTR_PREFIX_INVALID", PH7_EXTR_PREFIX_INVALID_Const }, {"EXTR_IF_EXISTS", PH7_EXTR_IF_EXISTS_Const }, {"EXTR_PREFIX_IF_EXISTS", PH7_EXTR_PREFIX_IF_EXISTS_Const}, -#ifndef PH7_DISABLE_BUILTIN_FUNC - {"XML_ERROR_NONE", PH7_XML_ERROR_NONE_Const}, - {"XML_ERROR_NO_MEMORY", PH7_XML_ERROR_NO_MEMORY_Const}, - {"XML_ERROR_SYNTAX", PH7_XML_ERROR_SYNTAX_Const}, - {"XML_ERROR_NO_ELEMENTS", PH7_XML_ERROR_NO_ELEMENTS_Const}, - {"XML_ERROR_INVALID_TOKEN", PH7_XML_ERROR_INVALID_TOKEN_Const}, - {"XML_ERROR_UNCLOSED_TOKEN", PH7_XML_ERROR_UNCLOSED_TOKEN_Const}, - {"XML_ERROR_PARTIAL_CHAR", PH7_XML_ERROR_PARTIAL_CHAR_Const}, - {"XML_ERROR_TAG_MISMATCH", PH7_XML_ERROR_TAG_MISMATCH_Const}, - {"XML_ERROR_DUPLICATE_ATTRIBUTE", PH7_XML_ERROR_DUPLICATE_ATTRIBUTE_Const}, - {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", PH7_XML_ERROR_JUNK_AFTER_DOC_ELEMENT_Const}, - {"XML_ERROR_PARAM_ENTITY_REF", PH7_XML_ERROR_PARAM_ENTITY_REF_Const}, - {"XML_ERROR_UNDEFINED_ENTITY", PH7_XML_ERROR_UNDEFINED_ENTITY_Const}, - {"XML_ERROR_RECURSIVE_ENTITY_REF", PH7_XML_ERROR_RECURSIVE_ENTITY_REF_Const}, - {"XML_ERROR_ASYNC_ENTITY", PH7_XML_ERROR_ASYNC_ENTITY_Const}, - {"XML_ERROR_BAD_CHAR_REF", PH7_XML_ERROR_BAD_CHAR_REF_Const}, - {"XML_ERROR_BINARY_ENTITY_REF", PH7_XML_ERROR_BINARY_ENTITY_REF_Const}, - {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", PH7_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF_Const}, - {"XML_ERROR_MISPLACED_XML_PI", PH7_XML_ERROR_MISPLACED_XML_PI_Const}, - {"XML_ERROR_UNKNOWN_ENCODING", PH7_XML_ERROR_UNKNOWN_ENCODING_Const}, - {"XML_ERROR_INCORRECT_ENCODING", PH7_XML_ERROR_INCORRECT_ENCODING_Const}, - {"XML_ERROR_UNCLOSED_CDATA_SECTION", PH7_XML_ERROR_UNCLOSED_CDATA_SECTION_Const}, - {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", PH7_XML_ERROR_EXTERNAL_ENTITY_HANDLING_Const}, - {"XML_OPTION_CASE_FOLDING", PH7_XML_OPTION_CASE_FOLDING_Const}, - {"XML_OPTION_TARGET_ENCODING", PH7_XML_OPTION_TARGET_ENCODING_Const}, - {"XML_OPTION_SKIP_TAGSTART", PH7_XML_OPTION_SKIP_TAGSTART_Const}, - {"XML_OPTION_SKIP_WHITE", PH7_XML_OPTION_SKIP_WHITE_Const}, - {"XML_SAX_IMPL", PH7_XML_SAX_IMP_Const}, -#endif /* PH7_DISABLE_BUILTIN_FUNC */ {"static", PH7_static_Const }, {"self", PH7_self_Const }, {"__CLASS__", PH7_self_Const }, @@ -1737,4 +1489,3 @@ PH7_PRIVATE void PH7_RegisterBuiltInConstant(ph7_vm *pVm) { ph7_create_constant(&(*pVm), aBuiltIn[n].zName, aBuiltIn[n].xExpand, &(*pVm)); } } -- \ No newline at end of file diff --git a/ext/xml/lib.c b/ext/xml/lib.c new file mode 100644 index 0000000..6cd3f51 --- /dev/null +++ b/ext/xml/lib.c @@ -0,0 +1,1022 @@ +#include "lib.h" + +/* Tokenize an entire XML input */ +static sxi32 XML_Tokenize(SyStream *pStream, SyToken *pToken, void *pUserData, void *pUnused2) { + SyXMLParser *pParse = (SyXMLParser *)pUserData; + SyString *pStr; + sxi32 rc; + int c; + /* Jump leading white spaces */ + while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0])) { + /* Advance the stream cursor */ + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + pStream->zText++; + } + if(pStream->zText >= pStream->zEnd) { + SXUNUSED(pUnused2); + /* End of input reached */ + return SXERR_EOF; + } + /* Record token starting position and line */ + pToken->nLine = pStream->nLine; + pToken->pUserData = 0; + pStr = &pToken->sData; + SyStringInitFromBuf(pStr, pStream->zText, 0); + /* Extract the current token */ + c = pStream->zText[0]; + if(c == '<') { + pStream->zText++; + pStr->zString++; + if(pStream->zText >= pStream->zEnd) { + if(pParse->xError) { + rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + /* End of input reached */ + return SXERR_EOF; + } + c = pStream->zText[0]; + if(c == '?') { + /* Processing instruction */ + pStream->zText++; + pStr->zString++; + pToken->nType = SXML_TOK_PI; + while(XLEX_IN_LEN(pStream) >= sizeof("?>") - 1 && + SyMemcmp((const void *)pStream->zText, "?>", sizeof("?>") - 1) != 0) { + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + pStream->zText++; + } + /* Record token length */ + pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); + if(XLEX_IN_LEN(pStream) < sizeof("?>") - 1) { + if(pParse->xError) { + rc = pParse->xError("End of input found,but processing instruction was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_EOF; + } + pStream->zText += sizeof("?>") - 1; + } else if(c == '!') { + pStream->zText++; + if(XLEX_IN_LEN(pStream) >= sizeof("--") - 1 && pStream->zText[0] == '-' && pStream->zText[1] == '-') { + /* Comment */ + pStream->zText += sizeof("--") - 1; + while(XLEX_IN_LEN(pStream) >= sizeof("-->") - 1 && + SyMemcmp((const void *)pStream->zText, "-->", sizeof("-->") - 1) != 0) { + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + pStream->zText++; + } + pStream->zText += sizeof("-->") - 1; + /* Tell the lexer to ignore this token */ + return SXERR_CONTINUE; + } + if(XLEX_IN_LEN(pStream) >= sizeof("[CDATA[") - 1 && SyMemcmp((const void *)pStream->zText, "[CDATA[", sizeof("[CDATA[") - 1) == 0) { + /* CDATA */ + pStream->zText += sizeof("[CDATA[") - 1; + pStr->zString = (const char *)pStream->zText; + while(XLEX_IN_LEN(pStream) >= sizeof("]]>") - 1 && + SyMemcmp((const void *)pStream->zText, "]]>", sizeof("]]>") - 1) != 0) { + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + pStream->zText++; + } + /* Record token type and length */ + pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); + pToken->nType = SXML_TOK_CDATA; + if(XLEX_IN_LEN(pStream) < sizeof("]]>") - 1) { + if(pParse->xError) { + rc = pParse->xError("End of input found,but ]]> was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_EOF; + } + pStream->zText += sizeof("]]>") - 1; + return SXRET_OK; + } + if(XLEX_IN_LEN(pStream) >= sizeof("DOCTYPE") - 1 && SyMemcmp((const void *)pStream->zText, "DOCTYPE", sizeof("DOCTYPE") - 1) == 0) { + SyString sDelim = { ">", sizeof(char) }; /* Default delimiter */ + int c = 0; + /* DOCTYPE */ + pStream->zText += sizeof("DOCTYPE") - 1; + pStr->zString = (const char *)pStream->zText; + /* Check for element declaration */ + while(pStream->zText < pStream->zEnd && pStream->zText[0] != '\n') { + if(pStream->zText[0] >= 0xc0 || !SyisSpace(pStream->zText[0])) { + c = pStream->zText[0]; + if(c == '>') { + break; + } + } + pStream->zText++; + } + if(c == '[') { + /* Change the delimiter */ + SyStringInitFromBuf(&sDelim, "]>", sizeof("]>") - 1); + } + if(c != '>') { + while(XLEX_IN_LEN(pStream) >= sDelim.nByte && + SyMemcmp((const void *)pStream->zText, sDelim.zString, sDelim.nByte) != 0) { + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + pStream->zText++; + } + } + /* Record token type and length */ + pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); + pToken->nType = SXML_TOK_DOCTYPE; + if(XLEX_IN_LEN(pStream) < sDelim.nByte) { + if(pParse->xError) { + rc = pParse->xError("End of input found,but ]> or > was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_EOF; + } + pStream->zText += sDelim.nByte; + return SXRET_OK; + } + } else { + int c; + c = pStream->zText[0]; + rc = SXRET_OK; + pToken->nType = SXML_TOK_START_TAG; + if(c == '/') { + /* End tag */ + pToken->nType = SXML_TOK_END_TAG; + pStream->zText++; + pStr->zString++; + if(pStream->zText >= pStream->zEnd) { + if(pParse->xError) { + rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_EOF; + } + c = pStream->zText[0]; + } + if(c == '>') { + /*<>*/ + if(pParse->xError) { + rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + /* Ignore the token */ + return SXERR_CONTINUE; + } + if(c < 0xc0 && (SyisSpace(c) || SyisDigit(c) || c == '.' || c == '-' || IS_XML_DIRTY(c))) { + if(pParse->xError) { + rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + rc = SXERR_INVALID; + } + pStream->zText++; + /* Delimit the tag */ + while(pStream->zText < pStream->zEnd && pStream->zText[0] != '>') { + c = pStream->zText[0]; + if(c >= 0xc0) { + /* UTF-8 stream */ + pStream->zText++; + SX_JMP_UTF8(pStream->zText, pStream->zEnd); + } else { + if(c == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '>') { + pStream->zText++; + if(pToken->nType != SXML_TOK_START_TAG) { + if(pParse->xError) { + rc = pParse->xError("Unexpected closing tag,expecting '>'", + SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + /* Ignore the token */ + rc = SXERR_INVALID; + } else { + pToken->nType = SXML_TOK_START_END; + } + break; + } + if(pStream->zText[0] == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + /* Advance the stream cursor */ + pStream->zText++; + } + } + if(rc != SXRET_OK) { + /* Tell the lexer to ignore this token */ + return SXERR_CONTINUE; + } + /* Record token length */ + pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); + if(pToken->nType == SXML_TOK_START_END && pStr->nByte > 0) { + pStr->nByte -= sizeof(char); + } + if(pStream->zText < pStream->zEnd) { + pStream->zText++; + } else { + if(pParse->xError) { + rc = pParse->xError("End of input found,but closing tag '>' was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } + } + } else { + /* Raw input */ + while(pStream->zText < pStream->zEnd) { + c = pStream->zText[0]; + if(c < 0xc0) { + if(c == '<') { + break; + } else if(c == '\n') { + /* Increment line counter */ + pStream->nLine++; + } + /* Advance the stream cursor */ + pStream->zText++; + } else { + /* UTF-8 stream */ + pStream->zText++; + SX_JMP_UTF8(pStream->zText, pStream->zEnd); + } + } + /* Record token type,length */ + pToken->nType = SXML_TOK_RAW; + pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); + } + /* Return to the lexer */ + return SXRET_OK; +} +static int XMLCheckDuplicateAttr(SyXMLRawStr *aSet, sxu32 nEntry, SyXMLRawStr *pEntry) { + sxu32 n; + for(n = 0 ; n < nEntry ; n += 2) { + SyXMLRawStr *pAttr = &aSet[n]; + if(pAttr->nByte == pEntry->nByte && SyMemcmp(pAttr->zString, pEntry->zString, pEntry->nByte) == 0) { + /* Attribute found */ + return 1; + } + } + /* No duplicates */ + return 0; +} +static sxi32 XMLProcessNamesSpace(SyXMLParser *pParse, SyXMLRawStrNS *pTag, SyToken *pToken, SySet *pAttr) { + SyXMLRawStr *pPrefix, *pUri; /* Namespace prefix/URI */ + SyHashEntry *pEntry; + SyXMLRawStr *pDup; + sxi32 rc; + /* Extract the URI first */ + pUri = (SyXMLRawStr *)SySetPeek(pAttr); + /* Extract the prefix */ + pPrefix = (SyXMLRawStr *)SySetAt(pAttr, SySetUsed(pAttr) - 2); + /* Prefix name */ + if(pPrefix->nByte == sizeof("xmlns") - 1) { + /* Default namespace */ + pPrefix->nByte = 0; + pPrefix->zString = ""; /* Empty string */ + } else { + pPrefix->nByte -= sizeof("xmlns") - 1; + pPrefix->zString += sizeof("xmlns") - 1; + if(pPrefix->zString[0] != ':') { + return SXRET_OK; + } + pPrefix->nByte--; + pPrefix->zString++; + if(pPrefix->nByte < 1) { + if(pParse->xError) { + rc = pParse->xError("Invalid namespace name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + /* POP the last insertred two entries */ + (void)SySetPop(pAttr); + (void)SySetPop(pAttr); + return SXERR_SYNTAX; + } + } + /* Invoke the namespace callback if available */ + if(pParse->xNameSpace) { + rc = pParse->xNameSpace(pPrefix, pUri, pParse->pUserData); + if(rc == SXERR_ABORT) { + /* User callback request an operation abort */ + return SXERR_ABORT; + } + } + /* Duplicate structure */ + pDup = (SyXMLRawStr *)SyMemBackendAlloc(pParse->pAllocator, sizeof(SyXMLRawStr)); + if(pDup == 0) { + if(pParse->xError) { + pParse->xError("Out of memory", SXML_ERROR_NO_MEMORY, pToken, pParse->pUserData); + } + /* Abort processing immediately */ + return SXERR_ABORT; + } + *pDup = *pUri; /* Structure assignement */ + /* Save the namespace */ + if(pPrefix->nByte == 0) { + pPrefix->zString = "Default"; + pPrefix->nByte = sizeof("Default") - 1; + } + SyHashInsert(&pParse->hns, (const void *)pPrefix->zString, pPrefix->nByte, pDup); + /* Peek the last inserted entry */ + pEntry = SyHashLastEntry(&pParse->hns); + /* Store in the corresponding tag container*/ + SySetPut(&pTag->sNSset, (const void *)&pEntry); + /* POP the last insertred two entries */ + (void)SySetPop(pAttr); + (void)SySetPop(pAttr); + return SXRET_OK; +} +static sxi32 XMLProcessStartTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SySet *pAttrSet, SySet *pTagStack) { + SyString *pIn = &pToken->sData; + const char *zIn, *zCur, *zEnd; + SyXMLRawStr sEntry; + sxi32 rc; + int c; + /* Reset the working set */ + SySetReset(pAttrSet); + /* Delimit the raw tag */ + zIn = pIn->zString; + zEnd = &zIn[pIn->nByte]; + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + /* Isolate tag name */ + sEntry.nLine = pTag->nLine = pToken->nLine; + zCur = zIn; + while(zIn < zEnd) { + if((unsigned char)zIn[0] >= 0xc0) { + /* UTF-8 stream */ + zIn++; + SX_JMP_UTF8(zIn, zEnd); + } else if(SyisSpace(zIn[0])) { + break; + } else { + if(IS_XML_DIRTY(zIn[0])) { + if(pParse->xError) { + rc = pParse->xError("Illegal character in XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } + zIn++; + } + } + if(zCur >= zIn) { + if(pParse->xError) { + rc = pParse->xError("Invalid XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + pTag->zString = zCur; + pTag->nByte = (sxu32)(zIn - zCur); + /* Process tag attribute */ + for(;;) { + int is_ns = 0; + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + if(zIn >= zEnd) { + break; + } + zCur = zIn; + while(zIn < zEnd && zIn[0] != '=') { + if((unsigned char)zIn[0] >= 0xc0) { + /* UTF-8 stream */ + zIn++; + SX_JMP_UTF8(zIn, zEnd); + } else if(SyisSpace(zIn[0])) { + break; + } else { + zIn++; + } + } + if(zCur >= zIn) { + if(pParse->xError) { + rc = pParse->xError("Missing attribute name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + /* Store attribute name */ + sEntry.zString = zCur; + sEntry.nByte = (sxu32)(zIn - zCur); + if((pParse->nFlags & SXML_ENABLE_NAMESPACE) && sEntry.nByte >= sizeof("xmlns") - 1 && + SyMemcmp(sEntry.zString, "xmlns", sizeof("xmlns") - 1) == 0) { + is_ns = 1; + } + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + if(zIn >= zEnd || zIn[0] != '=') { + if(pParse->xError) { + rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + while(sEntry.nByte > 0 && (unsigned char)zCur[sEntry.nByte - 1] < 0xc0 + && SyisSpace(zCur[sEntry.nByte - 1])) { + sEntry.nByte--; + } + /* Check for duplicates first */ + if(XMLCheckDuplicateAttr((SyXMLRawStr *)SySetBasePtr(pAttrSet), SySetUsed(pAttrSet), &sEntry)) { + if(pParse->xError) { + rc = pParse->xError("Duplicate attribute", SXML_ERROR_DUPLICATE_ATTRIBUTE, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { + return SXERR_ABORT; + } + /* Extract attribute value */ + zIn++; /* Jump the trailing '=' */ + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + if(zIn >= zEnd) { + if(pParse->xError) { + rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + (void)SySetPop(pAttrSet); + return SXERR_SYNTAX; + } + if(zIn[0] != '\'' && zIn[0] != '"') { + if(pParse->xError) { + rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + (void)SySetPop(pAttrSet); + return SXERR_SYNTAX; + } + c = zIn[0]; + zIn++; + zCur = zIn; + while(zIn < zEnd && zIn[0] != c) { + zIn++; + } + if(zIn >= zEnd) { + if(pParse->xError) { + rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + (void)SySetPop(pAttrSet); + return SXERR_SYNTAX; + } + /* Store attribute value */ + sEntry.zString = zCur; + sEntry.nByte = (sxu32)(zIn - zCur); + if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { + return SXERR_ABORT; + } + zIn++; + if(is_ns) { + /* Process namespace declaration */ + XMLProcessNamesSpace(pParse, pTag, pToken, pAttrSet); + } + } + /* Store in the tag stack */ + if(pToken->nType == SXML_TOK_START_TAG) { + rc = SySetPut(pTagStack, (const void *)pTag); + } + return SXRET_OK; +} +static void XMLExtactPI(SyToken *pToken, SyXMLRawStr *pTarget, SyXMLRawStr *pData, int *pXML) { + SyString *pIn = &pToken->sData; + const char *zIn, *zCur, *zEnd; + pTarget->nLine = pData->nLine = pToken->nLine; + /* Nullify the entries first */ + pTarget->zString = pData->zString = 0; + /* Ignore leading and traing white spaces */ + SyStringFullTrim(pIn); + /* Delimit the raw PI */ + zIn = pIn->zString; + zEnd = &zIn[pIn->nByte]; + if(pXML) { + *pXML = 0; + } + /* Extract the target */ + zCur = zIn; + while(zIn < zEnd) { + if((unsigned char)zIn[0] >= 0xc0) { + /* UTF-8 stream */ + zIn++; + SX_JMP_UTF8(zIn, zEnd); + } else if(SyisSpace(zIn[0])) { + break; + } else { + zIn++; + } + } + if(zIn > zCur) { + pTarget->zString = zCur; + pTarget->nByte = (sxu32)(zIn - zCur); + if(pXML && pTarget->nByte == sizeof("xml") - 1 && SyStrnicmp(pTarget->zString, "xml", sizeof("xml") - 1) == 0) { + *pXML = 1; + } + } + /* Extract the PI data */ + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + if(zIn < zEnd) { + pData->zString = zIn; + pData->nByte = (sxu32)(zEnd - zIn); + } +} +static sxi32 XMLExtractEndTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pOut) { + SyString *pIn = &pToken->sData; + const char *zEnd = &pIn->zString[pIn->nByte]; + const char *zIn = pIn->zString; + /* Ignore leading white spaces */ + while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { + zIn++; + } + pOut->nLine = pToken->nLine; + pOut->zString = zIn; + pOut->nByte = (sxu32)(zEnd - zIn); + /* Ignore trailing white spaces */ + while(pOut->nByte > 0 && (unsigned char)pOut->zString[pOut->nByte - 1] < 0xc0 + && SyisSpace(pOut->zString[pOut->nByte - 1])) { + pOut->nByte--; + } + if(pOut->nByte < 1) { + if(pParse->xError) { + sxi32 rc; + rc = pParse->xError("Invalid end tag name", SXML_ERROR_INVALID_TOKEN, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + return SXRET_OK; +} +static void TokenToXMLString(SyToken *pTok, SyXMLRawStrNS *pOut) { + /* Remove leading and trailing white spaces first */ + SyStringFullTrim(&pTok->sData); + pOut->zString = SyStringData(&pTok->sData); + pOut->nByte = SyStringLength(&pTok->sData); +} +static sxi32 XMLExtractNS(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SyXMLRawStr *pnsUri) { + SyXMLRawStr *pUri, sPrefix; + SyHashEntry *pEntry; + sxu32 nOfft; + sxi32 rc; + /* Extract a prefix if available */ + rc = SyByteFind(pTag->zString, pTag->nByte, ':', &nOfft); + if(rc != SXRET_OK) { + /* Check if there is a default namespace */ + pEntry = SyHashGet(&pParse->hns, "Default", sizeof("Default") - 1); + if(pEntry) { + /* Extract the ns URI */ + pUri = (SyXMLRawStr *)pEntry->pUserData; + /* Save the ns URI */ + pnsUri->zString = pUri->zString; + pnsUri->nByte = pUri->nByte; + } + return SXRET_OK; + } + if(nOfft < 1) { + if(pParse->xError) { + rc = pParse->xError("Empty prefix is not allowed according to XML namespace specification", + SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + sPrefix.zString = pTag->zString; + sPrefix.nByte = nOfft; + sPrefix.nLine = pTag->nLine; + pTag->zString += nOfft + 1; + pTag->nByte -= nOfft; + if(pTag->nByte < 1) { + if(pParse->xError) { + rc = pParse->xError("Missing tag name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + /* Check if the prefix is already registered */ + pEntry = SyHashGet(&pParse->hns, sPrefix.zString, sPrefix.nByte); + if(pEntry == 0) { + if(pParse->xError) { + rc = pParse->xError("Namespace prefix is not defined", SXML_ERROR_SYNTAX, + pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + return SXERR_SYNTAX; + } + /* Extract the ns URI */ + pUri = (SyXMLRawStr *)pEntry->pUserData; + /* Save the ns URI */ + pnsUri->zString = pUri->zString; + pnsUri->nByte = pUri->nByte; + /* All done */ + return SXRET_OK; +} +static sxi32 XMLnsUnlink(SyXMLParser *pParse, SyXMLRawStrNS *pLast, SyToken *pToken) { + SyHashEntry **apEntry, *pEntry; + void *pUserData; + sxu32 n; + /* Release namespace entries */ + apEntry = (SyHashEntry **)SySetBasePtr(&pLast->sNSset); + for(n = 0 ; n < SySetUsed(&pLast->sNSset) ; ++n) { + pEntry = apEntry[n]; + /* Invoke the end namespace declaration callback */ + if(pParse->xNameSpaceEnd && (pParse->nFlags & SXML_ENABLE_NAMESPACE) && pToken) { + SyXMLRawStr sPrefix; + sxi32 rc; + sPrefix.zString = (const char *)pEntry->pKey; + sPrefix.nByte = pEntry->nKeyLen; + sPrefix.nLine = pToken->nLine; + rc = pParse->xNameSpaceEnd(&sPrefix, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + pUserData = pEntry->pUserData; + /* Remove from the namespace hashtable */ + SyHashDeleteEntry2(pEntry); + SyMemBackendFree(pParse->pAllocator, pUserData); + } + SySetRelease(&pLast->sNSset); + return SXRET_OK; +} +/* Process XML tokens */ +static sxi32 ProcessXML(SyXMLParser *pParse, SySet *pTagStack, SySet *pWorker) { + SySet *pTokenSet = &pParse->sToken; + SyXMLRawStrNS sEntry; + SyXMLRawStr sNs; + SyToken *pToken; + int bGotTag; + sxi32 rc; + /* Initialize fields */ + bGotTag = 0; + /* Start processing */ + if(pParse->xStartDoc && (SXERR_ABORT == pParse->xStartDoc(pParse->pUserData))) { + /* User callback request an operation abort */ + return SXERR_ABORT; + } + /* Reset the loop cursor */ + SySetResetCursor(pTokenSet); + /* Extract the current token */ + while(SXRET_OK == (SySetGetNextEntry(&(*pTokenSet), (void **)&pToken))) { + SyZero(&sEntry, sizeof(SyXMLRawStrNS)); + SyZero(&sNs, sizeof(SyXMLRawStr)); + SySetInit(&sEntry.sNSset, pParse->pAllocator, sizeof(SyHashEntry *)); + sEntry.nLine = sNs.nLine = pToken->nLine; + switch(pToken->nType) { + case SXML_TOK_DOCTYPE: + if(SySetUsed(pTagStack) > 1 || bGotTag) { + if(pParse->xError) { + rc = pParse->xError("DOCTYPE must be declared first", SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + /* Invoke the supplied callback if any */ + if(pParse->xDoctype) { + TokenToXMLString(pToken, &sEntry); + rc = pParse->xDoctype((SyXMLRawStr *)&sEntry, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + case SXML_TOK_CDATA: + if(SySetUsed(pTagStack) < 1) { + if(pParse->xError) { + rc = pParse->xError("CDATA without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } + /* Invoke the supplied callback if any */ + if(pParse->xRaw) { + TokenToXMLString(pToken, &sEntry); + rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + case SXML_TOK_PI: { + SyXMLRawStr sTarget, sData; + int isXML = 0; + /* Extract the target and data */ + XMLExtactPI(pToken, &sTarget, &sData, &isXML); + if(isXML && SySetCursor(pTokenSet) - 1 > 0) { + if(pParse->xError) { + rc = pParse->xError("Unexpected XML declaration. The XML declaration must be the first node in the document", + SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } else if(pParse->xPi) { + /* Invoke the supplied callback*/ + rc = pParse->xPi(&sTarget, &sData, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + case SXML_TOK_RAW: + if(SySetUsed(pTagStack) < 1) { + if(pParse->xError) { + rc = pParse->xError("Text (Raw data) without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + /* Invoke the supplied callback if any */ + if(pParse->xRaw) { + TokenToXMLString(pToken, &sEntry); + rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + case SXML_TOK_END_TAG: { + SyXMLRawStrNS *pLast = 0; /* cc warning */ + if(SySetUsed(pTagStack) < 1) { + if(pParse->xError) { + rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + rc = XMLExtractEndTag(pParse, pToken, &sEntry); + if(rc == SXRET_OK) { + /* Extract the last inserted entry */ + pLast = (SyXMLRawStrNS *)SySetPeek(pTagStack); + if(pLast == 0 || pLast->nByte != sEntry.nByte || + SyMemcmp(pLast->zString, sEntry.zString, sEntry.nByte) != 0) { + if(pParse->xError) { + rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } else { + /* Invoke the supllied callback if any */ + if(pParse->xEndTag) { + rc = SXRET_OK; + if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { + /* Extract namespace URI */ + rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + if(rc == SXRET_OK) { + rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } + } + } else if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + if(pLast) { + rc = XMLnsUnlink(pParse, pLast, pToken); + (void)SySetPop(pTagStack); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + case SXML_TOK_START_TAG: + case SXML_TOK_START_END: + if(SySetUsed(pTagStack) < 1 && bGotTag) { + if(pParse->xError) { + rc = pParse->xError("XML document cannot contain multiple root level elements documents", + SXML_ERROR_SYNTAX, pToken, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + break; + } + bGotTag = 1; + /* Extract the tag and it's supplied attribute */ + rc = XMLProcessStartTag(pParse, pToken, &sEntry, pWorker, pTagStack); + if(rc == SXRET_OK) { + if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { + /* Extract namespace URI */ + rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); + } + } + if(rc == SXRET_OK) { + /* Invoke the supplied callback */ + if(pParse->xStartTag) { + rc = pParse->xStartTag((SyXMLRawStr *)&sEntry, &sNs, SySetUsed(pWorker), + (SyXMLRawStr *)SySetBasePtr(pWorker), pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + if(pToken->nType == SXML_TOK_START_END) { + if(pParse->xEndTag) { + rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + rc = XMLnsUnlink(pParse, &sEntry, pToken); + if(rc == SXERR_ABORT) { + return SXERR_ABORT; + } + } + } else if(rc == SXERR_ABORT) { + /* Abort processing immediately */ + return SXERR_ABORT; + } + break; + default: + /* Can't happen */ + break; + } + } + if(SySetUsed(pTagStack) > 0 && pParse->xError) { + pParse->xError("Missing closing tag", SXML_ERROR_SYNTAX, + (SyToken *)SySetPeek(&pParse->sToken), pParse->pUserData); + } + if(pParse->xEndDoc) { + pParse->xEndDoc(pParse->pUserData); + } + return SXRET_OK; +} +PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags) { + /* Zero the structure first */ + SyZero(pParser, sizeof(SyXMLParser)); + /* Initilaize fields */ + SySetInit(&pParser->sToken, pAllocator, sizeof(SyToken)); + SyLexInit(&pParser->sLex, &pParser->sToken, XML_Tokenize, pParser); + SyHashInit(&pParser->hns, pAllocator, 0, 0); + pParser->pAllocator = pAllocator; + pParser->nFlags = iFlags; + return SXRET_OK; +} +PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser, + void *pUserData, + ProcXMLStartTagHandler xStartTag, + ProcXMLTextHandler xRaw, + ProcXMLSyntaxErrorHandler xErr, + ProcXMLStartDocument xStartDoc, + ProcXMLEndTagHandler xEndTag, + ProcXMLPIHandler xPi, + ProcXMLEndDocument xEndDoc, + ProcXMLDoctypeHandler xDoctype, + ProcXMLNameSpaceStart xNameSpace, + ProcXMLNameSpaceEnd xNameSpaceEnd + ) { + /* Install user callbacks */ + if(xErr) { + pParser->xError = xErr; + } + if(xStartDoc) { + pParser->xStartDoc = xStartDoc; + } + if(xStartTag) { + pParser->xStartTag = xStartTag; + } + if(xRaw) { + pParser->xRaw = xRaw; + } + if(xEndTag) { + pParser->xEndTag = xEndTag; + } + if(xPi) { + pParser->xPi = xPi; + } + if(xEndDoc) { + pParser->xEndDoc = xEndDoc; + } + if(xDoctype) { + pParser->xDoctype = xDoctype; + } + if(xNameSpace) { + pParser->xNameSpace = xNameSpace; + } + if(xNameSpaceEnd) { + pParser->xNameSpaceEnd = xNameSpaceEnd; + } + pParser->pUserData = pUserData; + return SXRET_OK; +} +/* Process an XML chunk */ +PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte) { + SySet sTagStack; + SySet sWorker; + sxi32 rc; + /* Initialize working sets */ + SySetInit(&sWorker, pParser->pAllocator, sizeof(SyXMLRawStr)); /* Tag container */ + SySetInit(&sTagStack, pParser->pAllocator, sizeof(SyXMLRawStrNS)); /* Tag stack */ + /* Tokenize the entire input */ + rc = SyLexTokenizeInput(&pParser->sLex, zInput, nByte, 0, 0, 0); + if(rc == SXERR_ABORT) { + /* Tokenize callback request an operation abort */ + return SXERR_ABORT; + } + if(SySetUsed(&pParser->sToken) < 1) { + /* Nothing to process [i.e: white spaces] */ + rc = SXRET_OK; + } else { + /* Process XML Tokens */ + rc = ProcessXML(&(*pParser), &sTagStack, &sWorker); + if(pParser->nFlags & SXML_ENABLE_NAMESPACE) { + if(SySetUsed(&sTagStack) > 0) { + SyXMLRawStrNS *pEntry; + SyHashEntry **apEntry; + sxu32 n; + SySetResetCursor(&sTagStack); + while(SySetGetNextEntry(&sTagStack, (void **)&pEntry) == SXRET_OK) { + /* Release namespace entries */ + apEntry = (SyHashEntry **)SySetBasePtr(&pEntry->sNSset); + for(n = 0 ; n < SySetUsed(&pEntry->sNSset) ; ++n) { + SyMemBackendFree(pParser->pAllocator, apEntry[n]->pUserData); + } + SySetRelease(&pEntry->sNSset); + } + } + } + } + /* Clean-up the mess left behind */ + SySetRelease(&sWorker); + SySetRelease(&sTagStack); + /* Processing result */ + return rc; +} +PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser) { + SyLexRelease(&pParser->sLex); + SySetRelease(&pParser->sToken); + SyHashRelease(&pParser->hns); + return SXRET_OK; +} \ No newline at end of file diff --git a/ext/xml/lib.h b/ext/xml/lib.h new file mode 100644 index 0000000..6a10b15 --- /dev/null +++ b/ext/xml/lib.h @@ -0,0 +1,143 @@ +#ifndef __LIB_H__ +#define __LIB_H__ + +#include "ph7.h" +#include "ph7int.h" + +/* + * Lexer token codes + * The following set of constants are the token value recognized + * by the lexer when processing XML input. + */ +#define SXML_TOK_INVALID 0xFFFF /* Invalid Token */ +#define SXML_TOK_COMMENT 0x01 /* Comment */ +#define SXML_TOK_PI 0x02 /* Processing instruction */ +#define SXML_TOK_DOCTYPE 0x04 /* Doctype directive */ +#define SXML_TOK_RAW 0x08 /* Raw text */ +#define SXML_TOK_START_TAG 0x10 /* Starting tag */ +#define SXML_TOK_CDATA 0x20 /* CDATA */ +#define SXML_TOK_END_TAG 0x40 /* Ending tag */ +#define SXML_TOK_START_END 0x80 /* Tag */ +#define SXML_TOK_SPACE 0x100 /* Spaces (including new lines) */ +#define IS_XML_DIRTY(c) \ + ( c == '<' || c == '$'|| c == '"' || c == '\''|| c == '&'|| c == '(' || c == ')' || c == '*' ||\ + c == '%' || c == '#' || c == '|' || c == '/'|| c == '~' || c == '{' || c == '}' ||\ + c == '[' || c == ']' || c == '\\'|| c == ';'||c == '^' || c == '`' ) + +/* XML processing control flags */ +#define SXML_ENABLE_NAMESPACE 0x01 /* Parse XML with namespace support enbaled */ +#define SXML_ENABLE_QUERY 0x02 /* Not used */ +#define SXML_OPTION_CASE_FOLDING 0x04 /* Controls whether case-folding is enabled for this XML parser */ +#define SXML_OPTION_SKIP_TAGSTART 0x08 /* Specify how many characters should be skipped in the beginning of a tag name.*/ +#define SXML_OPTION_SKIP_WHITE 0x10 /* Whether to skip values consisting of whitespace characters. */ +#define SXML_OPTION_TARGET_ENCODING 0x20 /* Default encoding: UTF-8 */ + +/* XML error codes */ +enum xml_err_code { + SXML_ERROR_NONE = 1, + SXML_ERROR_NO_MEMORY, + SXML_ERROR_SYNTAX, + SXML_ERROR_NO_ELEMENTS, + SXML_ERROR_INVALID_TOKEN, + SXML_ERROR_UNCLOSED_TOKEN, + SXML_ERROR_PARTIAL_CHAR, + SXML_ERROR_TAG_MISMATCH, + SXML_ERROR_DUPLICATE_ATTRIBUTE, + SXML_ERROR_JUNK_AFTER_DOC_ELEMENT, + SXML_ERROR_PARAM_ENTITY_REF, + SXML_ERROR_UNDEFINED_ENTITY, + SXML_ERROR_RECURSIVE_ENTITY_REF, + SXML_ERROR_ASYNC_ENTITY, + SXML_ERROR_BAD_CHAR_REF, + SXML_ERROR_BINARY_ENTITY_REF, + SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + SXML_ERROR_MISPLACED_XML_PI, + SXML_ERROR_UNKNOWN_ENCODING, + SXML_ERROR_INCORRECT_ENCODING, + SXML_ERROR_UNCLOSED_CDATA_SECTION, + SXML_ERROR_EXTERNAL_ENTITY_HANDLING +}; + +/* + * An XML raw text,CDATA,tag name and son is parsed out and stored + * in an instance of the following structure. + */ +typedef struct SyXMLRawStr SyXMLRawStr; +struct SyXMLRawStr { + const char *zString; /* Raw text [UTF-8 ENCODED EXCEPT CDATA] [NOT NULL TERMINATED] */ + sxu32 nByte; /* Text length */ + sxu32 nLine; /* Line number this text occurs */ +}; + +/* + * An XML raw text,CDATA,tag name is parsed out and stored + * in an instance of the following structure. + */ +typedef struct SyXMLRawStrNS SyXMLRawStrNS; +struct SyXMLRawStrNS { + /* Public field [Must match the SyXMLRawStr fields ] */ + const char *zString; /* Raw text [UTF-8 ENCODED EXCEPT CDATA] [NOT NULL TERMINATED] */ + sxu32 nByte; /* Text length */ + sxu32 nLine; /* Line number this text occurs */ + /* Private fields */ + SySet sNSset; /* Namespace entries */ +}; + +/* + * Event callback signatures. + */ +typedef sxi32(*ProcXMLStartTagHandler)(SyXMLRawStr *, SyXMLRawStr *, sxu32, SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLTextHandler)(SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLEndTagHandler)(SyXMLRawStr *, SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLPIHandler)(SyXMLRawStr *, SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLDoctypeHandler)(SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLSyntaxErrorHandler)(const char *, int, SyToken *, void *); +typedef sxi32(*ProcXMLStartDocument)(void *); +typedef sxi32(*ProcXMLNameSpaceStart)(SyXMLRawStr *, SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLNameSpaceEnd)(SyXMLRawStr *, void *); +typedef sxi32(*ProcXMLEndDocument)(void *); + +/* Each active XML SAX parser is represented by an instance + * of the following structure. + */ +typedef struct SyXMLParser SyXMLParser; +struct SyXMLParser { + SyMemBackend *pAllocator; /* Memory backend */ + void *pUserData; /* User private data forwarded varbatim by the XML parser + * as the last argument to the users callbacks. + */ + SyHash hns; /* Namespace hashtable */ + SySet sToken; /* XML tokens */ + SyLex sLex; /* Lexical analyzer */ + sxi32 nFlags; /* Control flags */ + /* User callbacks */ + ProcXMLStartTagHandler xStartTag; /* Start element handler */ + ProcXMLEndTagHandler xEndTag; /* End element handler */ + ProcXMLTextHandler xRaw; /* Raw text/CDATA handler */ + ProcXMLDoctypeHandler xDoctype; /* DOCTYPE handler */ + ProcXMLPIHandler xPi; /* Processing instruction (PI) handler*/ + ProcXMLSyntaxErrorHandler xError; /* Error handler */ + ProcXMLStartDocument xStartDoc; /* StartDoc handler */ + ProcXMLEndDocument xEndDoc; /* EndDoc handler */ + ProcXMLNameSpaceStart xNameSpace; /* Namespace declaration handler */ + ProcXMLNameSpaceEnd xNameSpaceEnd; /* End namespace declaration handler */ +}; + +PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags); +PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser, + void *pUserData, + ProcXMLStartTagHandler xStartTag, + ProcXMLTextHandler xRaw, + ProcXMLSyntaxErrorHandler xErr, + ProcXMLStartDocument xStartDoc, + ProcXMLEndTagHandler xEndTag, + ProcXMLPIHandler xPi, + ProcXMLEndDocument xEndDoc, + ProcXMLDoctypeHandler xDoctype, + ProcXMLNameSpaceStart xNameSpace, + ProcXMLNameSpaceEnd xNameSpaceEnd + ); +PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte); +PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser); + +#endif \ No newline at end of file diff --git a/ext/xml/xml.c b/ext/xml/xml.c new file mode 100644 index 0000000..15d4f17 --- /dev/null +++ b/ext/xml/xml.c @@ -0,0 +1,1483 @@ +#include "xml.h" + +/* + * XML_ERROR_NONE + * Expand the value of SXML_ERROR_NO_MEMORY defined in ph7Int.h + */ +static void PH7_XML_ERROR_NONE_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_NO_MEMORY); +} +/* + * XML_ERROR_NO_MEMORY + * Expand the value of SXML_ERROR_NONE defined in ph7Int.h + */ +static void PH7_XML_ERROR_NO_MEMORY_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_NO_MEMORY); +} +/* + * XML_ERROR_SYNTAX + * Expand the value of SXML_ERROR_SYNTAX defined in ph7Int.h + */ +static void PH7_XML_ERROR_SYNTAX_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_SYNTAX); +} +/* + * XML_ERROR_NO_ELEMENTS + * Expand the value of SXML_ERROR_NO_ELEMENTS defined in ph7Int.h + */ +static void PH7_XML_ERROR_NO_ELEMENTS_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_NO_ELEMENTS); +} +/* + * XML_ERROR_INVALID_TOKEN + * Expand the value of SXML_ERROR_INVALID_TOKEN defined in ph7Int.h + */ +static void PH7_XML_ERROR_INVALID_TOKEN_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_INVALID_TOKEN); +} +/* + * XML_ERROR_UNCLOSED_TOKEN + * Expand the value of SXML_ERROR_UNCLOSED_TOKEN defined in ph7Int.h + */ +static void PH7_XML_ERROR_UNCLOSED_TOKEN_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_UNCLOSED_TOKEN); +} +/* + * XML_ERROR_PARTIAL_CHAR + * Expand the value of SXML_ERROR_PARTIAL_CHAR defined in ph7Int.h + */ +static void PH7_XML_ERROR_PARTIAL_CHAR_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_PARTIAL_CHAR); +} +/* + * XML_ERROR_TAG_MISMATCH + * Expand the value of SXML_ERROR_TAG_MISMATCH defined in ph7Int.h + */ +static void PH7_XML_ERROR_TAG_MISMATCH_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_TAG_MISMATCH); +} +/* + * XML_ERROR_DUPLICATE_ATTRIBUTE + * Expand the value of SXML_ERROR_DUPLICATE_ATTRIBUTE defined in ph7Int.h + */ +static void PH7_XML_ERROR_DUPLICATE_ATTRIBUTE_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_DUPLICATE_ATTRIBUTE); +} +/* + * XML_ERROR_JUNK_AFTER_DOC_ELEMENT + * Expand the value of SXML_ERROR_JUNK_AFTER_DOC_ELEMENT defined in ph7Int.h + */ +static void PH7_XML_ERROR_JUNK_AFTER_DOC_ELEMENT_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_JUNK_AFTER_DOC_ELEMENT); +} +/* + * XML_ERROR_PARAM_ENTITY_REF + * Expand the value of SXML_ERROR_PARAM_ENTITY_REF defined in ph7Int.h + */ +static void PH7_XML_ERROR_PARAM_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_PARAM_ENTITY_REF); +} +/* + * XML_ERROR_UNDEFINED_ENTITY + * Expand the value of SXML_ERROR_UNDEFINED_ENTITY defined in ph7Int.h + */ +static void PH7_XML_ERROR_UNDEFINED_ENTITY_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_UNDEFINED_ENTITY); +} +/* + * XML_ERROR_RECURSIVE_ENTITY_REF + * Expand the value of SXML_ERROR_RECURSIVE_ENTITY_REF defined in ph7Int.h + */ +static void PH7_XML_ERROR_RECURSIVE_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_RECURSIVE_ENTITY_REF); +} +/* + * XML_ERROR_ASYNC_ENTITY + * Expand the value of SXML_ERROR_ASYNC_ENTITY defined in ph7Int.h + */ +static void PH7_XML_ERROR_ASYNC_ENTITY_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_ASYNC_ENTITY); +} +/* + * XML_ERROR_BAD_CHAR_REF + * Expand the value of SXML_ERROR_BAD_CHAR_REF defined in ph7Int.h + */ +static void PH7_XML_ERROR_BAD_CHAR_REF_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_BAD_CHAR_REF); +} +/* + * XML_ERROR_BINARY_ENTITY_REF + * Expand the value of SXML_ERROR_BINARY_ENTITY_REF defined in ph7Int.h + */ +static void PH7_XML_ERROR_BINARY_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_BINARY_ENTITY_REF); +} +/* + * XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF + * Expand the value of SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF defined in ph7Int.h + */ +static void PH7_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); +} +/* + * XML_ERROR_MISPLACED_XML_PI + * Expand the value of SXML_ERROR_MISPLACED_XML_PI defined in ph7Int.h + */ +static void PH7_XML_ERROR_MISPLACED_XML_PI_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_MISPLACED_XML_PI); +} +/* + * XML_ERROR_UNKNOWN_ENCODING + * Expand the value of SXML_ERROR_UNKNOWN_ENCODING defined in ph7Int.h + */ +static void PH7_XML_ERROR_UNKNOWN_ENCODING_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_UNKNOWN_ENCODING); +} +/* + * XML_ERROR_INCORRECT_ENCODING + * Expand the value of SXML_ERROR_INCORRECT_ENCODING defined in ph7Int.h + */ +static void PH7_XML_ERROR_INCORRECT_ENCODING_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_INCORRECT_ENCODING); +} +/* + * XML_ERROR_UNCLOSED_CDATA_SECTION + * Expand the value of SXML_ERROR_UNCLOSED_CDATA_SECTION defined in ph7Int.h + */ +static void PH7_XML_ERROR_UNCLOSED_CDATA_SECTION_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_UNCLOSED_CDATA_SECTION); +} +/* + * XML_ERROR_EXTERNAL_ENTITY_HANDLING + * Expand the value of SXML_ERROR_EXTERNAL_ENTITY_HANDLING defined in ph7Int.h + */ +static void PH7_XML_ERROR_EXTERNAL_ENTITY_HANDLING_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_ERROR_EXTERNAL_ENTITY_HANDLING); +} +/* + * XML_OPTION_CASE_FOLDING + * Expand the value of SXML_OPTION_CASE_FOLDING defined in ph7Int.h. + */ +static void PH7_XML_OPTION_CASE_FOLDING_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_OPTION_CASE_FOLDING); +} +/* + * XML_OPTION_TARGET_ENCODING + * Expand the value of SXML_OPTION_TARGET_ENCODING defined in ph7Int.h. + */ +static void PH7_XML_OPTION_TARGET_ENCODING_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_OPTION_TARGET_ENCODING); +} +/* + * XML_OPTION_SKIP_TAGSTART + * Expand the value of SXML_OPTION_SKIP_TAGSTART defined in ph7Int.h. + */ +static void PH7_XML_OPTION_SKIP_TAGSTART_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_OPTION_SKIP_TAGSTART); +} +/* + * XML_OPTION_SKIP_WHITE + * Expand the value of SXML_OPTION_SKIP_TAGSTART defined in ph7Int.h. + */ +static void PH7_XML_OPTION_SKIP_WHITE_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_int(pVal, SXML_OPTION_SKIP_WHITE); +} +/* + * XML_SAX_IMPL. + * Expand the name of the underlying XML engine. + */ +static void PH7_XML_SAX_IMP_Const(ph7_value *pVal, void *pUserData) { + SXUNUSED(pUserData); /* cc warning */ + ph7_value_string(pVal, "Symisc XML engine", (int)sizeof("Symisc XML engine") - 1); +} + +/* + * Allocate and initialize an XML engine. + */ +static ph7_xml_engine *VmCreateXMLEngine(ph7_context *pCtx, int process_ns, int ns_sep) { + ph7_xml_engine *pEngine; + ph7_vm *pVm = pCtx->pVm; + ph7_value *pValue; + sxu32 n; + /* Allocate a new instance */ + pEngine = (ph7_xml_engine *)SyMemBackendAlloc(&pVm->sAllocator, sizeof(ph7_xml_engine)); + if(pEngine == 0) { + /* Out of memory */ + return 0; + } + /* Zero the structure */ + SyZero(pEngine, sizeof(ph7_xml_engine)); + /* Initialize fields */ + pEngine->pVm = pVm; + pEngine->pCtx = 0; + pEngine->ns_sep = ns_sep; + SyXMLParserInit(&pEngine->sParser, &pVm->sAllocator, process_ns ? SXML_ENABLE_NAMESPACE : 0); + SyBlobInit(&pEngine->sErr, &pVm->sAllocator); + PH7_MemObjInit(pVm, &pEngine->sParserValue); + for(n = 0 ; n < SX_ARRAYSIZE(pEngine->aCB) ; ++n) { + pValue = &pEngine->aCB[n]; + /* NULLIFY the array entries,until someone register an event handler */ + PH7_MemObjInit(&(*pVm), pValue); + } + ph7_value_resource(&pEngine->sParserValue, pEngine); + pEngine->iErrCode = SXML_ERROR_NONE; + /* Finally set the magic number */ + pEngine->nMagic = XML_ENGINE_MAGIC; + return pEngine; +} +/* + * Release an XML engine. + */ +static void VmReleaseXMLEngine(ph7_xml_engine *pEngine) { + ph7_vm *pVm = pEngine->pVm; + ph7_value *pValue; + sxu32 n; + /* Release fields */ + SyBlobRelease(&pEngine->sErr); + SyXMLParserRelease(&pEngine->sParser); + PH7_MemObjRelease(&pEngine->sParserValue); + for(n = 0 ; n < SX_ARRAYSIZE(pEngine->aCB) ; ++n) { + pValue = &pEngine->aCB[n]; + PH7_MemObjRelease(pValue); + } + pEngine->nMagic = 0x2621; + /* Finally,release the whole instance */ + SyMemBackendFree(&pVm->sAllocator, pEngine); +} +/* + * resource xml_parser_create([ string $encoding ]) + * Create an UTF-8 XML parser. + * Parameter + * $encoding + * (Only UTF-8 encoding is used) + * Return + * Returns a resource handle for the new XML parser. + */ +static int vm_builtin_xml_parser_create(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + /* Allocate a new instance */ + pEngine = VmCreateXMLEngine(&(*pCtx), 0, ':'); + if(pEngine == 0) { + ph7_context_throw_error(pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); + /* Return null */ + ph7_result_null(pCtx); + SXUNUSED(nArg); /* cc warning */ + SXUNUSED(apArg); + return PH7_OK; + } + /* Return the engine as a resource */ + ph7_result_resource(pCtx, pEngine); + return PH7_OK; +} +/* + * resource xml_parser_create_ns([ string $encoding[,string $separator = ':']]) + * Create an UTF-8 XML parser with namespace support. + * Parameter + * $encoding + * (Only UTF-8 encoding is supported) + * $separtor + * Namespace separator (a single character) + * Return + * Returns a resource handle for the new XML parser. + */ +static int vm_builtin_xml_parser_create_ns(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + int ns_sep = ':'; + if(nArg > 1 && ph7_value_is_string(apArg[1])) { + const char *zSep = ph7_value_to_string(apArg[1], 0); + if(zSep[0] != 0) { + ns_sep = zSep[0]; + } + } + /* Allocate a new instance */ + pEngine = VmCreateXMLEngine(&(*pCtx), TRUE, ns_sep); + if(pEngine == 0) { + ph7_context_throw_error(pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); + /* Return null */ + ph7_result_null(pCtx); + return PH7_OK; + } + /* Return the engine as a resource */ + ph7_result_resource(pCtx, pEngine); + return PH7_OK; +} +/* + * bool xml_parser_free(resource $parser) + * Release an XML engine. + * Parameter + * $parser + * A reference to the XML parser to free. + * Return + * This function returns FALSE if parser does not refer + * to a valid parser, or else it frees the parser and returns TRUE. + */ +static int vm_builtin_xml_parser_free(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Safely release the engine */ + VmReleaseXMLEngine(pEngine); + /* Return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_element_handler(resource $parser,callback $start_element_handler,[callback $end_element_handler]) + * Sets the element handler functions for the XML parser. start_element_handler and end_element_handler + * are strings containing the names of functions. + * Parameters + * $parser + * A reference to the XML parser to set up start and end element handler functions. + * $start_element_handler + * The function named by start_element_handler must accept three parameters: + * start_element_handler(resource $parser,string $name,array $attribs) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $name + * The second parameter, name, contains the name of the element for which this handler + * is called.If case-folding is in effect for this parser, the element name will be in uppercase letters. + * $attribs + * The third parameter, attribs, contains an associative array with the element's attributes (if any). + * The keys of this array are the attribute names, the values are the attribute values. + * Attribute names are case-folded on the same criteria as element names.Attribute values are not case-folded. + * The original order of the attributes can be retrieved by walking through attribs the normal way, using each(). + * The first key in the array was the first attribute, and so on. + * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. + * $end_element_handler + * The function named by end_element_handler must accept two parameters: + * end_element_handler(resource $parser,string $name) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $name + * The second parameter, name, contains the name of the element for which this handler + * is called.If case-folding is in effect for this parser, the element name will be in uppercase + * letters. + * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_element_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the start_element_handler callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_START_TAG]); + if(nArg > 2) { + /* Save the end_element_handler callback for later invocation */ + PH7_MemObjStore(apArg[2]/* User callback*/, &pEngine->aCB[PH7_XML_END_TAG]); + } + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_character_data_handler(resource $parser,callback $handler) + * Sets the character data handler function for the XML parser parser. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept two parameters: + * handler(resource $parser,string $data) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $data + * The second parameter, data, contains the character data as a string. + * Character data handler is called for every piece of a text in the XML document. + * It can be called multiple times inside each fragment (e.g. for non-ASCII strings). + * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_character_data_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_CDATA]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_default_handler(resource $parser,callback $handler) + * Set up default handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept two parameters: + * handler(resource $parser,string $data) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $data + * The second parameter, data, contains the character data.This may be the XML declaration + * document type declaration, entities or other data for which no other handler exists. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_default_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_DEF]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_end_namespace_decl_handler(resource $parser,callback $handler) + * Set up end namespace declaration handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept two parameters: + * handler(resource $parser,string $prefix) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $prefix + * The prefix is a string used to reference the namespace within an XML object. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_end_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_NS_END]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_start_namespace_decl_handler(resource $parser,callback $handler) + * Set up start namespace declaration handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept two parameters: + * handler(resource $parser,string $prefix,string $uri) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $prefix + * The prefix is a string used to reference the namespace within an XML object. + * $uri + * Uniform Resource Identifier (URI) of namespace. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_start_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_NS_START]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_processing_instruction_handler(resource $parser,callback $handler) + * Set up processing instruction (PI) handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept three parameters: + * handler(resource $parser,string $target,string $data) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $target + * The second parameter, target, contains the PI target. + * $data + The third parameter, data, contains the PI data. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_processing_instruction_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_PI]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_unparsed_entity_decl_handler(resource $parser,callback $handler) + * Set up unparsed entity declaration handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept six parameters: + * handler(resource $parser,string $entity_name,string $base,string $system_id,string $public_id,string $notation_name) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $entity_name + * The name of the entity that is about to be defined. + * $base + * This is the base for resolving the system identifier (systemId) of the external entity. + * Currently this parameter will always be set to an empty string. + * $system_id + * System identifier for the external entity. + * $public_id + * Public identifier for the external entity. + * $notation_name + * Name of the notation of this entity (see xml_set_notation_decl_handler()). + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_unparsed_entity_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_UNPED]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_notation_decl_handler(resource $parser,callback $handler) + * Set up notation declaration handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept five parameters: + * handler(resource $parser,string $entity_name,string $base,string $system_id,string $public_id) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $entity_name + * The name of the entity that is about to be defined. + * $base + * This is the base for resolving the system identifier (systemId) of the external entity. + * Currently this parameter will always be set to an empty string. + * $system_id + * System identifier for the external entity. + * $public_id + * Public identifier for the external entity. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_notation_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_ND]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * bool xml_set_external_entity_ref_handler(resource $parser,callback $handler) + * Set up external entity reference handler. + * Parameters + * $parser + * A reference to the XML parser to set up character data handler function. + * $handler + * handler is a string containing the name of the callback. + * The function named by handler must accept five parameters: + * handler(resource $parser,string $open_entity_names,string $base,string $system_id,string $public_id) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $open_entity_names + * The second parameter, open_entity_names, is a space-separated list of the names + * of the entities that are open for the parse of this entity (including the name of the referenced entity). + * $base + * This is the base for resolving the system identifier (system_id) of the external entity. + * Currently this parameter will always be set to an empty string. + * $system_id + * The fourth parameter, system_id, is the system identifier as specified in the entity declaration. + * $public_id + * The fifth parameter, public_id, is the public identifier as specified in the entity declaration + * or an empty string if none was specified; the whitespace in the public identifier will have been + * normalized as required by the XML spec. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + * Return + * TRUE on success or FALSE on failure. + */ +static int vm_builtin_xml_set_external_entity_ref_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(nArg > 1) { + /* Save the user callback for later invocation */ + PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_EER]); + } + /* All done,return TRUE */ + ph7_result_bool(pCtx, 1); + return PH7_OK; +} +/* + * int xml_get_current_line_number(resource $parser) + * Gets the current line number for the given XML parser. + * Parameters + * $parser + * A reference to the XML parser. + * Return + * This function returns FALSE if parser does not refer + * to a valid parser, or else it returns which line the parser + * is currently at in its data buffer. + */ +static int vm_builtin_xml_get_current_line_number(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Return the line number */ + ph7_result_int(pCtx, (int)pEngine->nLine); + return PH7_OK; +} +/* + * int xml_get_current_byte_index(resource $parser) + * Gets the current byte index of the given XML parser. + * Parameters + * $parser + * A reference to the XML parser. + * Return + * This function returns FALSE if parser does not refer to a valid + * parser, or else it returns which byte index the parser is currently + * at in its data buffer (starting at 0). + */ +static int vm_builtin_xml_get_current_byte_index(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + SyStream *pStream; + SyToken *pToken; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the current processed token */ + pToken = (SyToken *)SySetPeekCurrentEntry(&pEngine->sParser.sToken); + if(pToken == 0) { + /* Stream not yet processed */ + ph7_result_int(pCtx, 0); + return 0; + } + /* Point to the input stream */ + pStream = &pEngine->sParser.sLex.sStream; + /* Return the byte index */ + ph7_result_int64(pCtx, (ph7_int64)(pToken->sData.zString - (const char *)pStream->zInput)); + return PH7_OK; +} +/* + * bool xml_set_object(resource $parser,object &$object) + * Use XML Parser within an object. + * NOTE + * This function is depreceated and is a no-op. + * Parameters + * $parser + * A reference to the XML parser. + * $object + * The object where to use the XML parser. + * Return + * Always FALSE. + */ +static int vm_builtin_xml_set_object(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 2 || !ph7_value_is_resource(apArg[0]) || !ph7_value_is_object(apArg[1])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Throw a notice and return */ + ph7_context_throw_error(pCtx, PH7_CTX_NOTICE, "This function is depreceated and is a no-op." + "In order to mimic this behaviour,you can supply instead of a function name an array " + "containing an object reference and a method name." + ); + /* Return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; +} +/* + * int xml_get_current_column_number(resource $parser) + * Gets the current column number of the given XML parser. + * Parameters + * $parser + * A reference to the XML parser. + * Return + * This function returns FALSE if parser does not refer to a valid parser, or else it returns + * which column on the current line (as given by xml_get_current_line_number()) the parser + * is currently at. + */ +static int vm_builtin_xml_get_current_column_number(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + SyStream *pStream; + SyToken *pToken; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the current processed token */ + pToken = (SyToken *)SySetPeekCurrentEntry(&pEngine->sParser.sToken); + if(pToken == 0) { + /* Stream not yet processed */ + ph7_result_int(pCtx, 0); + return 0; + } + /* Point to the input stream */ + pStream = &pEngine->sParser.sLex.sStream; + /* Return the byte index */ + ph7_result_int64(pCtx, (ph7_int64)(pToken->sData.zString - (const char *)pStream->zInput) / 80); + return PH7_OK; +} +/* + * int xml_get_error_code(resource $parser) + * Get XML parser error code. + * Parameters + * $parser + * A reference to the XML parser. + * Return + * This function returns FALSE if parser does not refer to a valid + * parser, or else it returns one of the error codes listed in the error + * codes section. + */ +static int vm_builtin_xml_get_error_code(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Return the error code if any */ + ph7_result_int(pCtx, pEngine->iErrCode); + return PH7_OK; +} +/* + * XML parser event callbacks + * Each time the unserlying XML parser extract a single token + * from the input,one of the following callbacks are invoked. + * IMP-XML-ENGINE-07-07-2012 22:02 FreeBSD [chm@symisc.net] + */ +/* + * Create a scalar ph7_value holding the value + * of an XML tag/attribute/CDATA and so on. + */ +static ph7_value *VmXMLValue(ph7_xml_engine *pEngine, SyXMLRawStr *pXML, SyXMLRawStr *pNsUri) { + ph7_value *pValue; + /* Allocate a new scalar variable */ + pValue = ph7_context_new_scalar(pEngine->pCtx); + if(pValue == 0) { + ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); + return 0; + } + if(pNsUri && pNsUri->nByte > 0) { + /* Append namespace URI and the separator */ + ph7_value_string_format(pValue, "%.*s%c", pNsUri->nByte, pNsUri->zString, pEngine->ns_sep); + } + /* Copy the tag value */ + ph7_value_string(pValue, pXML->zString, (int)pXML->nByte); + return pValue; +} +/* + * Create a 'ph7_value' of type array holding the values + * of an XML tag attributes. + */ +static ph7_value *VmXMLAttrValue(ph7_xml_engine *pEngine, SyXMLRawStr *aAttr, sxu32 nAttr) { + ph7_value *pArray; + /* Create an empty array */ + pArray = ph7_context_new_array(pEngine->pCtx); + if(pArray == 0) { + ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); + return 0; + } + if(nAttr > 0) { + ph7_value *pKey, *pValue; + sxu32 n; + /* Create worker variables */ + pKey = ph7_context_new_scalar(pEngine->pCtx); + pValue = ph7_context_new_scalar(pEngine->pCtx); + if(pKey == 0 || pValue == 0) { + ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); + return 0; + } + /* Copy attributes */ + for(n = 0 ; n < nAttr ; n += 2) { + /* Reset string cursors */ + ph7_value_reset_string_cursor(pKey); + ph7_value_reset_string_cursor(pValue); + /* Copy attribute name and it's associated value */ + ph7_value_string(pKey, aAttr[n].zString, (int)aAttr[n].nByte); /* Attribute name */ + ph7_value_string(pValue, aAttr[n + 1].zString, (int)aAttr[n + 1].nByte); /* Attribute value */ + /* Insert in the array */ + ph7_array_add_elem(pArray, pKey, pValue); /* Will make it's own copy */ + } + /* Release the worker variables */ + ph7_context_release_value(pEngine->pCtx, pKey); + ph7_context_release_value(pEngine->pCtx, pValue); + } + /* Return the freshly created array */ + return pArray; +} +/* + * Start element handler. + * The user defined callback must accept three parameters: + * start_element_handler(resource $parser,string $name,array $attribs ) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $name + * The second parameter, name, contains the name of the element for which this handler + * is called.If case-folding is in effect for this parser, the element name will be in uppercase letters. + * $attribs + * The third parameter, attribs, contains an associative array with the element's attributes (if any). + * The keys of this array are the attribute names, the values are the attribute values. + * Attribute names are case-folded on the same criteria as element names.Attribute values are not case-folded. + * The original order of the attributes can be retrieved by walking through attribs the normal way, using each(). + * The first key in the array was the first attribute, and so on. + * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. + */ +static sxi32 VmXMLStartElementHandler(SyXMLRawStr *pStart, SyXMLRawStr *pNS, sxu32 nAttr, SyXMLRawStr *aAttr, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pTag, *pAttr; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_START_TAG]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Create a ph7_value holding the tag name */ + pTag = VmXMLValue(pEngine, pStart, pNS); + /* Create a ph7_value holding the tag attributes */ + pAttr = VmXMLAttrValue(pEngine, aAttr, nAttr); + if(pTag == 0 || pAttr == 0) { + SXUNUSED(pNS); /* cc warning */ + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTag, pAttr, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pTag); + ph7_context_release_value(pEngine->pCtx, pAttr); + return SXRET_OK; +} +/* + * End element handler. + * The user defined callback must accept two parameters: + * end_element_handler(resource $parser,string $name) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $name + * The second parameter, name, contains the name of the element for which this handler is called. + * If case-folding is in effect for this parser, the element name will be in uppercase letters. + * Note: Instead of a function name, an array containing an object reference and a method name + * can also be supplied. + */ +static sxi32 VmXMLEndElementHandler(SyXMLRawStr *pEnd, SyXMLRawStr *pNS, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pTag; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_END_TAG]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Create a ph7_value holding the tag name */ + pTag = VmXMLValue(pEngine, pEnd, pNS); + if(pTag == 0) { + SXUNUSED(pNS); /* cc warning */ + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTag, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pTag); + return SXRET_OK; +} +/* + * Character data handler. + * The user defined callback must accept two parameters: + * handler(resource $parser,string $data) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $data + * The second parameter, data, contains the character data as a string. + * Character data handler is called for every piece of a text in the XML document. + * It can be called multiple times inside each fragment (e.g. for non-ASCII strings). + * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. + * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. + */ +static sxi32 VmXMLTextHandler(SyXMLRawStr *pText, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pData; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_CDATA]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Create a ph7_value holding the data */ + pData = VmXMLValue(pEngine, &(*pText), 0); + if(pData == 0) { + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pData, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pData); + return SXRET_OK; +} +/* + * Processing instruction (PI) handler. + * The user defined callback must accept two parameters: + * handler(resource $parser,string $target,string $data) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $target + * The second parameter, target, contains the PI target. + * $data + * The third parameter, data, contains the PI data. + * Note: Instead of a function name, an array containing an object reference + * and a method name can also be supplied. + */ +static sxi32 VmXMLPIHandler(SyXMLRawStr *pTargetStr, SyXMLRawStr *pDataStr, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pTarget, *pData; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_PI]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Get a ph7_value holding the data */ + pTarget = VmXMLValue(pEngine, &(*pTargetStr), 0); + pData = VmXMLValue(pEngine, &(*pDataStr), 0); + if(pTarget == 0 || pData == 0) { + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTarget, pData, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pTarget); + ph7_context_release_value(pEngine->pCtx, pData); + return SXRET_OK; +} +/* + * Namespace declaration handler. + * The user defined callback must accept two parameters: + * handler(resource $parser,string $prefix,string $uri) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $prefix + * The prefix is a string used to reference the namespace within an XML object. + * $uri + * Uniform Resource Identifier (URI) of namespace. + * Note: Instead of a function name, an array containing an object reference + * and a method name can also be supplied. + */ +static sxi32 VmXMLNSStartHandler(SyXMLRawStr *pUriStr, SyXMLRawStr *pPrefixStr, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pUri, *pPrefix; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_NS_START]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Get a ph7_value holding the PREFIX/URI */ + pUri = VmXMLValue(pEngine, pUriStr, 0); + pPrefix = VmXMLValue(pEngine, pPrefixStr, 0); + if(pUri == 0 || pPrefix == 0) { + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pUri, pPrefix, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pUri); + ph7_context_release_value(pEngine->pCtx, pPrefix); + return SXRET_OK; +} +/* + * Namespace end declaration handler. + * The user defined callback must accept two parameters: + * handler(resource $parser,string $prefix) + * $parser + * The first parameter, parser, is a reference to the XML parser calling the handler. + * $prefix + * The prefix is a string used to reference the namespace within an XML object. + * Note: Instead of a function name, an array containing an object reference + * and a method name can also be supplied. + */ +static sxi32 VmXMLNSEndHandler(SyXMLRawStr *pPrefixStr, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + ph7_value *pCallback, *pPrefix; + /* Point to the target user defined callback */ + pCallback = &pEngine->aCB[PH7_XML_NS_END]; + /* Make sure the given callback is callable */ + if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { + /* Not callable,return immediately*/ + return SXRET_OK; + } + /* Get a ph7_value holding the prefix */ + pPrefix = VmXMLValue(pEngine, pPrefixStr, 0); + if(pPrefix == 0) { + /* Out of mem,return immediately */ + return SXRET_OK; + } + /* Invoke the user callback */ + PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pPrefix, 0); + /* Clean-up the mess left behind */ + ph7_context_release_value(pEngine->pCtx, pPrefix); + return SXRET_OK; +} +/* + * Error Message consumer handler. + * Each time the XML parser encounter a syntaxt error or any other error + * related to XML processing,the following callback is invoked by the + * underlying XML parser. + */ +static sxi32 VmXMLErrorHandler(const char *zMessage, sxi32 iErrCode, SyToken *pToken, void *pUserData) { + ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; + /* Save the error code */ + pEngine->iErrCode = iErrCode; + SXUNUSED(zMessage); /* cc warning */ + if(pToken) { + pEngine->nLine = pToken->nLine; + } + /* Abort XML processing immediately */ + return SXERR_ABORT; +} +/* + * int xml_parse(resource $parser,string $data[,bool $is_final = false ]) + * Parses an XML document. The handlers for the configured events are called + * as many times as necessary. + * Parameters + * $parser + * A reference to the XML parser. + * $data + * Chunk of data to parse. A document may be parsed piece-wise by calling + * xml_parse() several times with new data, as long as the is_final parameter + * is set and TRUE when the last data is parsed. + * $is_final + * NOT USED. This implementation require that all the processed input be + * entirely loaded in memory. + * Return + * Returns 1 on success or 0 on failure. + */ +static int vm_builtin_xml_parse(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + SyXMLParser *pParser; + const char *zData; + int nByte; + if(nArg < 2 || !ph7_value_is_resource(apArg[0]) || !ph7_value_is_string(apArg[1])) { + /* Missing/Ivalid arguments,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + if(pEngine->iNest > 0) { + /* This can happen when the user callback call xml_parse() again + * in it's body which is forbidden. + */ + ph7_context_throw_error_format(pCtx, PH7_CTX_ERR, + "Recursive call to %s,PH7 is returning false", + ph7_function_name(pCtx) + ); + /* Return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + pEngine->pCtx = pCtx; + /* Point to the underlying XML parser */ + pParser = &pEngine->sParser; + /* Register elements handler */ + SyXMLParserSetEventHandler(pParser, pEngine, + VmXMLStartElementHandler, + VmXMLTextHandler, + VmXMLErrorHandler, + 0, + VmXMLEndElementHandler, + VmXMLPIHandler, + 0, + 0, + VmXMLNSStartHandler, + VmXMLNSEndHandler + ); + pEngine->iErrCode = SXML_ERROR_NONE; + /* Extract the raw XML input */ + zData = ph7_value_to_string(apArg[1], &nByte); + /* Start the parse process */ + pEngine->iNest++; + SyXMLProcess(pParser, zData, (sxu32)nByte); + pEngine->iNest--; + /* Return the parse result */ + ph7_result_int(pCtx, pEngine->iErrCode == SXML_ERROR_NONE ? 1 : 0); + return PH7_OK; +} +/* + * bool xml_parser_set_option(resource $parser,int $option,mixed $value) + * Sets an option in an XML parser. + * Parameters + * $parser + * A reference to the XML parser to set an option in. + * $option + * Which option to set. See below. + * The following options are available: + * XML_OPTION_CASE_FOLDING integer Controls whether case-folding is enabled for this XML parser. + * XML_OPTION_SKIP_TAGSTART integer Specify how many characters should be skipped in the beginning of a tag name. + * XML_OPTION_SKIP_WHITE integer Whether to skip values consisting of whitespace characters. + * XML_OPTION_TARGET_ENCODING string Sets which target encoding to use in this XML parser. + * $value + * The option's new value. + * Return + * Returns 1 on success or 0 on failure. + * Note: + * Well,none of these options have meaning under the built-in XML parser so a call to this + * function is a no-op. + */ +static int vm_builtin_xml_parser_set_option(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + if(nArg < 2 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Always return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; +} +/* + * mixed xml_parser_get_option(resource $parser,int $option) + * Get options from an XML parser. + * Parameters + * $parser + * A reference to the XML parser to set an option in. + * $option + * Which option to fetch. + * Return + * This function returns FALSE if parser does not refer to a valid parser + * or if option isn't valid.Else the option's value is returned. + */ +static int vm_builtin_xml_parser_get_option(ph7_context *pCtx, int nArg, ph7_value **apArg) { + ph7_xml_engine *pEngine; + int nOp; + if(nArg < 2 || !ph7_value_is_resource(apArg[0])) { + /* Missing/Ivalid argument,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Point to the XML engine */ + pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); + if(IS_INVALID_XML_ENGINE(pEngine)) { + /* Corrupt engine,return FALSE */ + ph7_result_bool(pCtx, 0); + return PH7_OK; + } + /* Extract the option */ + nOp = ph7_value_to_int(apArg[1]); + switch(nOp) { + case SXML_OPTION_SKIP_TAGSTART: + case SXML_OPTION_SKIP_WHITE: + case SXML_OPTION_CASE_FOLDING: + ph7_result_int(pCtx, 0); + break; + case SXML_OPTION_TARGET_ENCODING: + ph7_result_string(pCtx, "UTF-8", (int)sizeof("UTF-8") - 1); + break; + default: + /* Unknown option,return FALSE*/ + ph7_result_bool(pCtx, 0); + break; + } + return PH7_OK; +} +/* + * string xml_error_string(int $code) + * Gets the XML parser error string associated with the given code. + * Parameters + * $code + * An error code from xml_get_error_code(). + * Return + * Returns a string with a textual description of the error + * code, or FALSE if no description was found. + */ +static int vm_builtin_xml_error_string(ph7_context *pCtx, int nArg, ph7_value **apArg) { + int nErr = -1; + if(nArg > 0) { + nErr = ph7_value_to_int(apArg[0]); + } + switch(nErr) { + case SXML_ERROR_DUPLICATE_ATTRIBUTE: + ph7_result_string(pCtx, "Duplicate attribute", -1/*Compute length automatically*/); + break; + case SXML_ERROR_INCORRECT_ENCODING: + ph7_result_string(pCtx, "Incorrect encoding", -1); + break; + case SXML_ERROR_INVALID_TOKEN: + ph7_result_string(pCtx, "Unexpected token", -1); + break; + case SXML_ERROR_MISPLACED_XML_PI: + ph7_result_string(pCtx, "Misplaced processing instruction", -1); + break; + case SXML_ERROR_NO_MEMORY: + ph7_result_string(pCtx, "Out of memory", -1); + break; + case SXML_ERROR_NONE: + ph7_result_string(pCtx, "Not an error", -1); + break; + case SXML_ERROR_TAG_MISMATCH: + ph7_result_string(pCtx, "Tag mismatch", -1); + break; + case -1: + ph7_result_string(pCtx, "Unknown error code", -1); + break; + default: + ph7_result_string(pCtx, "Syntax error", -1); + break; + } + return PH7_OK; +} + +PH7_PRIVATE sxi32 initializeModule(ph7_vm *pVm, ph7_real *ver, SyString *desc) { + sxi32 rc; + sxu32 n; + + desc->zString = MODULE_DESC; + *ver = MODULE_VER; + for(n = 0; n < SX_ARRAYSIZE(xmlConstList); ++n) { + rc = ph7_create_constant(&(*pVm), xmlConstList[n].zName, xmlConstList[n].xExpand, &(*pVm)); + if(rc != SXRET_OK) { + return rc; + } + } + for(n = 0; n < SX_ARRAYSIZE(xmlFuncList); ++n) { + rc = ph7_create_function(&(*pVm), xmlFuncList[n].zName, xmlFuncList[n].xFunc, &(*pVm)); + if(rc != SXRET_OK) { + return rc; + } + } + return SXRET_OK; +} \ No newline at end of file diff --git a/ext/xml/xml.h b/ext/xml/xml.h new file mode 100644 index 0000000..c2707e7 --- /dev/null +++ b/ext/xml/xml.h @@ -0,0 +1,152 @@ +#ifndef __XML_H__ +#define __XML_H__ + +#include "ph7.h" +#include "ph7int.h" +#include "lib.h" + +#define MODULE_DESC "XML Module" +#define MODULE_VER 1.0 + +#define XML_TOTAL_HANDLER (PH7_XML_NS_END + 1) +#define XML_ENGINE_MAGIC 0x851EFC52 +#define IS_INVALID_XML_ENGINE(XML) (XML == 0 || (XML)->nMagic != XML_ENGINE_MAGIC) + +enum ph7_xml_handler_id { + PH7_XML_START_TAG = 0, /* Start element handlers ID */ + PH7_XML_END_TAG, /* End element handler ID*/ + PH7_XML_CDATA, /* Character data handler ID*/ + PH7_XML_PI, /* Processing instruction (PI) handler ID*/ + PH7_XML_DEF, /* Default handler ID */ + PH7_XML_UNPED, /* Unparsed entity declaration handler */ + PH7_XML_ND, /* Notation declaration handler ID*/ + PH7_XML_EER, /* External entity reference handler */ + PH7_XML_NS_START, /* Start namespace declaration handler */ + PH7_XML_NS_END /* End namespace declaration handler */ +}; + +/* An instance of the following structure describe a working + * XML engine instance. + */ +typedef struct ph7_xml_engine ph7_xml_engine; +struct ph7_xml_engine { + ph7_vm *pVm; /* VM that own this instance */ + ph7_context *pCtx; /* Call context */ + SyXMLParser sParser; /* Underlying XML parser */ + ph7_value aCB[XML_TOTAL_HANDLER]; /* User-defined callbacks */ + ph7_value sParserValue; /* ph7_value holding this instance which is forwarded + * as the first argument to the user callbacks. + */ + int ns_sep; /* Namespace separator */ + SyBlob sErr; /* Error message consumer */ + sxi32 iErrCode; /* Last error code */ + sxi32 iNest; /* Nesting level */ + sxu32 nLine; /* Last processed line */ + sxu32 nMagic; /* Magic number so that we avoid misuse */ +}; + +static void PH7_XML_ERROR_NONE_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_NO_MEMORY_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_SYNTAX_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_NO_ELEMENTS_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_INVALID_TOKEN_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_UNCLOSED_TOKEN_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_PARTIAL_CHAR_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_TAG_MISMATCH_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_DUPLICATE_ATTRIBUTE_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_JUNK_AFTER_DOC_ELEMENT_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_PARAM_ENTITY_REF_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_UNDEFINED_ENTITY_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_RECURSIVE_ENTITY_REF_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_ASYNC_ENTITY_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_BAD_CHAR_REF_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_BINARY_ENTITY_REF_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_MISPLACED_XML_PI_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_UNKNOWN_ENCODING_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_INCORRECT_ENCODING_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_UNCLOSED_CDATA_SECTION_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_ERROR_EXTERNAL_ENTITY_HANDLING_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_OPTION_CASE_FOLDING_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_OPTION_TARGET_ENCODING_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_OPTION_SKIP_TAGSTART_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_OPTION_SKIP_WHITE_Const(ph7_value *pVal, void *pUserData); +static void PH7_XML_SAX_IMP_Const(ph7_value *pVal, void *pUserData); +static int vm_builtin_xml_parser_create(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_parser_create_ns(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_parser_free(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_element_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_character_data_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_default_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_end_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_start_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_processing_instruction_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_unparsed_entity_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_notation_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_external_entity_ref_handler(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_get_current_line_number(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_get_current_byte_index(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_set_object(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_get_current_column_number(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_get_error_code(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_parse(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_parser_set_option(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_parser_get_option(ph7_context *pCtx, int nArg, ph7_value **apArg); +static int vm_builtin_xml_error_string(ph7_context *pCtx, int nArg, ph7_value **apArg); +PH7_PRIVATE sxi32 initializeModule(ph7_vm *pVm, ph7_real *ver, SyString *desc); + +static const ph7_builtin_constant xmlConstList[] = { + {"XML_ERROR_NONE", PH7_XML_ERROR_NONE_Const}, + {"XML_ERROR_NO_MEMORY", PH7_XML_ERROR_NO_MEMORY_Const}, + {"XML_ERROR_SYNTAX", PH7_XML_ERROR_SYNTAX_Const}, + {"XML_ERROR_NO_ELEMENTS", PH7_XML_ERROR_NO_ELEMENTS_Const}, + {"XML_ERROR_INVALID_TOKEN", PH7_XML_ERROR_INVALID_TOKEN_Const}, + {"XML_ERROR_UNCLOSED_TOKEN", PH7_XML_ERROR_UNCLOSED_TOKEN_Const}, + {"XML_ERROR_PARTIAL_CHAR", PH7_XML_ERROR_PARTIAL_CHAR_Const}, + {"XML_ERROR_TAG_MISMATCH", PH7_XML_ERROR_TAG_MISMATCH_Const}, + {"XML_ERROR_DUPLICATE_ATTRIBUTE", PH7_XML_ERROR_DUPLICATE_ATTRIBUTE_Const}, + {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", PH7_XML_ERROR_JUNK_AFTER_DOC_ELEMENT_Const}, + {"XML_ERROR_PARAM_ENTITY_REF", PH7_XML_ERROR_PARAM_ENTITY_REF_Const}, + {"XML_ERROR_UNDEFINED_ENTITY", PH7_XML_ERROR_UNDEFINED_ENTITY_Const}, + {"XML_ERROR_RECURSIVE_ENTITY_REF", PH7_XML_ERROR_RECURSIVE_ENTITY_REF_Const}, + {"XML_ERROR_ASYNC_ENTITY", PH7_XML_ERROR_ASYNC_ENTITY_Const}, + {"XML_ERROR_BAD_CHAR_REF", PH7_XML_ERROR_BAD_CHAR_REF_Const}, + {"XML_ERROR_BINARY_ENTITY_REF", PH7_XML_ERROR_BINARY_ENTITY_REF_Const}, + {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", PH7_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF_Const}, + {"XML_ERROR_MISPLACED_XML_PI", PH7_XML_ERROR_MISPLACED_XML_PI_Const}, + {"XML_ERROR_UNKNOWN_ENCODING", PH7_XML_ERROR_UNKNOWN_ENCODING_Const}, + {"XML_ERROR_INCORRECT_ENCODING", PH7_XML_ERROR_INCORRECT_ENCODING_Const}, + {"XML_ERROR_UNCLOSED_CDATA_SECTION", PH7_XML_ERROR_UNCLOSED_CDATA_SECTION_Const}, + {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", PH7_XML_ERROR_EXTERNAL_ENTITY_HANDLING_Const}, + {"XML_OPTION_CASE_FOLDING", PH7_XML_OPTION_CASE_FOLDING_Const}, + {"XML_OPTION_TARGET_ENCODING", PH7_XML_OPTION_TARGET_ENCODING_Const}, + {"XML_OPTION_SKIP_TAGSTART", PH7_XML_OPTION_SKIP_TAGSTART_Const}, + {"XML_OPTION_SKIP_WHITE", PH7_XML_OPTION_SKIP_WHITE_Const}, + {"XML_SAX_IMPL", PH7_XML_SAX_IMP_Const} +}; + +static const ph7_builtin_func xmlFuncList[] = { + {"xml_parser_create", vm_builtin_xml_parser_create }, + {"xml_parser_create_ns", vm_builtin_xml_parser_create_ns}, + {"xml_parser_free", vm_builtin_xml_parser_free }, + {"xml_set_element_handler", vm_builtin_xml_set_element_handler}, + {"xml_set_character_data_handler", vm_builtin_xml_set_character_data_handler}, + {"xml_set_default_handler", vm_builtin_xml_set_default_handler }, + {"xml_set_end_namespace_decl_handler", vm_builtin_xml_set_end_namespace_decl_handler}, + {"xml_set_start_namespace_decl_handler", vm_builtin_xml_set_start_namespace_decl_handler}, + {"xml_set_processing_instruction_handler", vm_builtin_xml_set_processing_instruction_handler}, + {"xml_set_unparsed_entity_decl_handler", vm_builtin_xml_set_unparsed_entity_decl_handler}, + {"xml_set_notation_decl_handler", vm_builtin_xml_set_notation_decl_handler}, + {"xml_set_external_entity_ref_handler", vm_builtin_xml_set_external_entity_ref_handler}, + {"xml_get_current_line_number", vm_builtin_xml_get_current_line_number}, + {"xml_get_current_byte_index", vm_builtin_xml_get_current_byte_index }, + {"xml_set_object", vm_builtin_xml_set_object}, + {"xml_get_current_column_number", vm_builtin_xml_get_current_column_number}, + {"xml_get_error_code", vm_builtin_xml_get_error_code }, + {"xml_parse", vm_builtin_xml_parse }, + {"xml_parser_set_option", vm_builtin_xml_parser_set_option}, + {"xml_parser_get_option", vm_builtin_xml_parser_get_option}, + {"xml_error_string", vm_builtin_xml_error_string } +}; + +#endif \ No newline at end of file diff --git a/lib.c b/lib.c index b562a18..5ef803e 100644 --- a/lib.c +++ b/lib.c @@ -3628,1064 +3628,7 @@ PH7_PRIVATE sxu32 SyBufferFormat(char *zBuf, sxu32 nLen, const char *zFormat, .. return n; } #ifndef PH7_DISABLE_BUILTIN_FUNC -/* -* Symisc XML Parser Engine (UTF-8) SAX(Event Driven) API -* @author Mrad Chems Eddine -* @started 08/03/2010 21:32 FreeBSD -* @finished 07/04/2010 23:24 Win32[VS8] -*/ -/* - * An XML raw text,CDATA,tag name is parsed out and stored - * in an instance of the following structure. - */ -typedef struct SyXMLRawStrNS SyXMLRawStrNS; -struct SyXMLRawStrNS { - /* Public field [Must match the SyXMLRawStr fields ] */ - const char *zString; /* Raw text [UTF-8 ENCODED EXCEPT CDATA] [NOT NULL TERMINATED] */ - sxu32 nByte; /* Text length */ - sxu32 nLine; /* Line number this text occurs */ - /* Private fields */ - SySet sNSset; /* Namespace entries */ -}; -/* - * Lexer token codes - * The following set of constants are the token value recognized - * by the lexer when processing XML input. - */ -#define SXML_TOK_INVALID 0xFFFF /* Invalid Token */ -#define SXML_TOK_COMMENT 0x01 /* Comment */ -#define SXML_TOK_PI 0x02 /* Processing instruction */ -#define SXML_TOK_DOCTYPE 0x04 /* Doctype directive */ -#define SXML_TOK_RAW 0x08 /* Raw text */ -#define SXML_TOK_START_TAG 0x10 /* Starting tag */ -#define SXML_TOK_CDATA 0x20 /* CDATA */ -#define SXML_TOK_END_TAG 0x40 /* Ending tag */ -#define SXML_TOK_START_END 0x80 /* Tag */ -#define SXML_TOK_SPACE 0x100 /* Spaces (including new lines) */ -#define IS_XML_DIRTY(c) \ - ( c == '<' || c == '$'|| c == '"' || c == '\''|| c == '&'|| c == '(' || c == ')' || c == '*' ||\ - c == '%' || c == '#' || c == '|' || c == '/'|| c == '~' || c == '{' || c == '}' ||\ - c == '[' || c == ']' || c == '\\'|| c == ';'||c == '^' || c == '`' ) -/* Tokenize an entire XML input */ -static sxi32 XML_Tokenize(SyStream *pStream, SyToken *pToken, void *pUserData, void *pUnused2) { - SyXMLParser *pParse = (SyXMLParser *)pUserData; - SyString *pStr; - sxi32 rc; - int c; - /* Jump leading white spaces */ - while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0])) { - /* Advance the stream cursor */ - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - pStream->zText++; - } - if(pStream->zText >= pStream->zEnd) { - SXUNUSED(pUnused2); - /* End of input reached */ - return SXERR_EOF; - } - /* Record token starting position and line */ - pToken->nLine = pStream->nLine; - pToken->pUserData = 0; - pStr = &pToken->sData; - SyStringInitFromBuf(pStr, pStream->zText, 0); - /* Extract the current token */ - c = pStream->zText[0]; - if(c == '<') { - pStream->zText++; - pStr->zString++; - if(pStream->zText >= pStream->zEnd) { - if(pParse->xError) { - rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - /* End of input reached */ - return SXERR_EOF; - } - c = pStream->zText[0]; - if(c == '?') { - /* Processing instruction */ - pStream->zText++; - pStr->zString++; - pToken->nType = SXML_TOK_PI; - while(XLEX_IN_LEN(pStream) >= sizeof("?>") - 1 && - SyMemcmp((const void *)pStream->zText, "?>", sizeof("?>") - 1) != 0) { - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - pStream->zText++; - } - /* Record token length */ - pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); - if(XLEX_IN_LEN(pStream) < sizeof("?>") - 1) { - if(pParse->xError) { - rc = pParse->xError("End of input found,but processing instruction was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_EOF; - } - pStream->zText += sizeof("?>") - 1; - } else if(c == '!') { - pStream->zText++; - if(XLEX_IN_LEN(pStream) >= sizeof("--") - 1 && pStream->zText[0] == '-' && pStream->zText[1] == '-') { - /* Comment */ - pStream->zText += sizeof("--") - 1; - while(XLEX_IN_LEN(pStream) >= sizeof("-->") - 1 && - SyMemcmp((const void *)pStream->zText, "-->", sizeof("-->") - 1) != 0) { - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - pStream->zText++; - } - pStream->zText += sizeof("-->") - 1; - /* Tell the lexer to ignore this token */ - return SXERR_CONTINUE; - } - if(XLEX_IN_LEN(pStream) >= sizeof("[CDATA[") - 1 && SyMemcmp((const void *)pStream->zText, "[CDATA[", sizeof("[CDATA[") - 1) == 0) { - /* CDATA */ - pStream->zText += sizeof("[CDATA[") - 1; - pStr->zString = (const char *)pStream->zText; - while(XLEX_IN_LEN(pStream) >= sizeof("]]>") - 1 && - SyMemcmp((const void *)pStream->zText, "]]>", sizeof("]]>") - 1) != 0) { - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - pStream->zText++; - } - /* Record token type and length */ - pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); - pToken->nType = SXML_TOK_CDATA; - if(XLEX_IN_LEN(pStream) < sizeof("]]>") - 1) { - if(pParse->xError) { - rc = pParse->xError("End of input found,but ]]> was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_EOF; - } - pStream->zText += sizeof("]]>") - 1; - return SXRET_OK; - } - if(XLEX_IN_LEN(pStream) >= sizeof("DOCTYPE") - 1 && SyMemcmp((const void *)pStream->zText, "DOCTYPE", sizeof("DOCTYPE") - 1) == 0) { - SyString sDelim = { ">", sizeof(char) }; /* Default delimiter */ - int c = 0; - /* DOCTYPE */ - pStream->zText += sizeof("DOCTYPE") - 1; - pStr->zString = (const char *)pStream->zText; - /* Check for element declaration */ - while(pStream->zText < pStream->zEnd && pStream->zText[0] != '\n') { - if(pStream->zText[0] >= 0xc0 || !SyisSpace(pStream->zText[0])) { - c = pStream->zText[0]; - if(c == '>') { - break; - } - } - pStream->zText++; - } - if(c == '[') { - /* Change the delimiter */ - SyStringInitFromBuf(&sDelim, "]>", sizeof("]>") - 1); - } - if(c != '>') { - while(XLEX_IN_LEN(pStream) >= sDelim.nByte && - SyMemcmp((const void *)pStream->zText, sDelim.zString, sDelim.nByte) != 0) { - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - pStream->zText++; - } - } - /* Record token type and length */ - pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); - pToken->nType = SXML_TOK_DOCTYPE; - if(XLEX_IN_LEN(pStream) < sDelim.nByte) { - if(pParse->xError) { - rc = pParse->xError("End of input found,but ]> or > was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_EOF; - } - pStream->zText += sDelim.nByte; - return SXRET_OK; - } - } else { - int c; - c = pStream->zText[0]; - rc = SXRET_OK; - pToken->nType = SXML_TOK_START_TAG; - if(c == '/') { - /* End tag */ - pToken->nType = SXML_TOK_END_TAG; - pStream->zText++; - pStr->zString++; - if(pStream->zText >= pStream->zEnd) { - if(pParse->xError) { - rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_EOF; - } - c = pStream->zText[0]; - } - if(c == '>') { - /*<>*/ - if(pParse->xError) { - rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - /* Ignore the token */ - return SXERR_CONTINUE; - } - if(c < 0xc0 && (SyisSpace(c) || SyisDigit(c) || c == '.' || c == '-' || IS_XML_DIRTY(c))) { - if(pParse->xError) { - rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - rc = SXERR_INVALID; - } - pStream->zText++; - /* Delimit the tag */ - while(pStream->zText < pStream->zEnd && pStream->zText[0] != '>') { - c = pStream->zText[0]; - if(c >= 0xc0) { - /* UTF-8 stream */ - pStream->zText++; - SX_JMP_UTF8(pStream->zText, pStream->zEnd); - } else { - if(c == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '>') { - pStream->zText++; - if(pToken->nType != SXML_TOK_START_TAG) { - if(pParse->xError) { - rc = pParse->xError("Unexpected closing tag,expecting '>'", - SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - /* Ignore the token */ - rc = SXERR_INVALID; - } else { - pToken->nType = SXML_TOK_START_END; - } - break; - } - if(pStream->zText[0] == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - /* Advance the stream cursor */ - pStream->zText++; - } - } - if(rc != SXRET_OK) { - /* Tell the lexer to ignore this token */ - return SXERR_CONTINUE; - } - /* Record token length */ - pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); - if(pToken->nType == SXML_TOK_START_END && pStr->nByte > 0) { - pStr->nByte -= sizeof(char); - } - if(pStream->zText < pStream->zEnd) { - pStream->zText++; - } else { - if(pParse->xError) { - rc = pParse->xError("End of input found,but closing tag '>' was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } - } - } else { - /* Raw input */ - while(pStream->zText < pStream->zEnd) { - c = pStream->zText[0]; - if(c < 0xc0) { - if(c == '<') { - break; - } else if(c == '\n') { - /* Increment line counter */ - pStream->nLine++; - } - /* Advance the stream cursor */ - pStream->zText++; - } else { - /* UTF-8 stream */ - pStream->zText++; - SX_JMP_UTF8(pStream->zText, pStream->zEnd); - } - } - /* Record token type,length */ - pToken->nType = SXML_TOK_RAW; - pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString); - } - /* Return to the lexer */ - return SXRET_OK; -} -static int XMLCheckDuplicateAttr(SyXMLRawStr *aSet, sxu32 nEntry, SyXMLRawStr *pEntry) { - sxu32 n; - for(n = 0 ; n < nEntry ; n += 2) { - SyXMLRawStr *pAttr = &aSet[n]; - if(pAttr->nByte == pEntry->nByte && SyMemcmp(pAttr->zString, pEntry->zString, pEntry->nByte) == 0) { - /* Attribute found */ - return 1; - } - } - /* No duplicates */ - return 0; -} -static sxi32 XMLProcessNamesSpace(SyXMLParser *pParse, SyXMLRawStrNS *pTag, SyToken *pToken, SySet *pAttr) { - SyXMLRawStr *pPrefix, *pUri; /* Namespace prefix/URI */ - SyHashEntry *pEntry; - SyXMLRawStr *pDup; - sxi32 rc; - /* Extract the URI first */ - pUri = (SyXMLRawStr *)SySetPeek(pAttr); - /* Extract the prefix */ - pPrefix = (SyXMLRawStr *)SySetAt(pAttr, SySetUsed(pAttr) - 2); - /* Prefix name */ - if(pPrefix->nByte == sizeof("xmlns") - 1) { - /* Default namespace */ - pPrefix->nByte = 0; - pPrefix->zString = ""; /* Empty string */ - } else { - pPrefix->nByte -= sizeof("xmlns") - 1; - pPrefix->zString += sizeof("xmlns") - 1; - if(pPrefix->zString[0] != ':') { - return SXRET_OK; - } - pPrefix->nByte--; - pPrefix->zString++; - if(pPrefix->nByte < 1) { - if(pParse->xError) { - rc = pParse->xError("Invalid namespace name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - /* POP the last insertred two entries */ - (void)SySetPop(pAttr); - (void)SySetPop(pAttr); - return SXERR_SYNTAX; - } - } - /* Invoke the namespace callback if available */ - if(pParse->xNameSpace) { - rc = pParse->xNameSpace(pPrefix, pUri, pParse->pUserData); - if(rc == SXERR_ABORT) { - /* User callback request an operation abort */ - return SXERR_ABORT; - } - } - /* Duplicate structure */ - pDup = (SyXMLRawStr *)SyMemBackendAlloc(pParse->pAllocator, sizeof(SyXMLRawStr)); - if(pDup == 0) { - if(pParse->xError) { - pParse->xError("Out of memory", SXML_ERROR_NO_MEMORY, pToken, pParse->pUserData); - } - /* Abort processing immediately */ - return SXERR_ABORT; - } - *pDup = *pUri; /* Structure assignement */ - /* Save the namespace */ - if(pPrefix->nByte == 0) { - pPrefix->zString = "Default"; - pPrefix->nByte = sizeof("Default") - 1; - } - SyHashInsert(&pParse->hns, (const void *)pPrefix->zString, pPrefix->nByte, pDup); - /* Peek the last inserted entry */ - pEntry = SyHashLastEntry(&pParse->hns); - /* Store in the corresponding tag container*/ - SySetPut(&pTag->sNSset, (const void *)&pEntry); - /* POP the last insertred two entries */ - (void)SySetPop(pAttr); - (void)SySetPop(pAttr); - return SXRET_OK; -} -static sxi32 XMLProcessStartTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SySet *pAttrSet, SySet *pTagStack) { - SyString *pIn = &pToken->sData; - const char *zIn, *zCur, *zEnd; - SyXMLRawStr sEntry; - sxi32 rc; - int c; - /* Reset the working set */ - SySetReset(pAttrSet); - /* Delimit the raw tag */ - zIn = pIn->zString; - zEnd = &zIn[pIn->nByte]; - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - /* Isolate tag name */ - sEntry.nLine = pTag->nLine = pToken->nLine; - zCur = zIn; - while(zIn < zEnd) { - if((unsigned char)zIn[0] >= 0xc0) { - /* UTF-8 stream */ - zIn++; - SX_JMP_UTF8(zIn, zEnd); - } else if(SyisSpace(zIn[0])) { - break; - } else { - if(IS_XML_DIRTY(zIn[0])) { - if(pParse->xError) { - rc = pParse->xError("Illegal character in XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } - zIn++; - } - } - if(zCur >= zIn) { - if(pParse->xError) { - rc = pParse->xError("Invalid XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - pTag->zString = zCur; - pTag->nByte = (sxu32)(zIn - zCur); - /* Process tag attribute */ - for(;;) { - int is_ns = 0; - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - if(zIn >= zEnd) { - break; - } - zCur = zIn; - while(zIn < zEnd && zIn[0] != '=') { - if((unsigned char)zIn[0] >= 0xc0) { - /* UTF-8 stream */ - zIn++; - SX_JMP_UTF8(zIn, zEnd); - } else if(SyisSpace(zIn[0])) { - break; - } else { - zIn++; - } - } - if(zCur >= zIn) { - if(pParse->xError) { - rc = pParse->xError("Missing attribute name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - /* Store attribute name */ - sEntry.zString = zCur; - sEntry.nByte = (sxu32)(zIn - zCur); - if((pParse->nFlags & SXML_ENABLE_NAMESPACE) && sEntry.nByte >= sizeof("xmlns") - 1 && - SyMemcmp(sEntry.zString, "xmlns", sizeof("xmlns") - 1) == 0) { - is_ns = 1; - } - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - if(zIn >= zEnd || zIn[0] != '=') { - if(pParse->xError) { - rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - while(sEntry.nByte > 0 && (unsigned char)zCur[sEntry.nByte - 1] < 0xc0 - && SyisSpace(zCur[sEntry.nByte - 1])) { - sEntry.nByte--; - } - /* Check for duplicates first */ - if(XMLCheckDuplicateAttr((SyXMLRawStr *)SySetBasePtr(pAttrSet), SySetUsed(pAttrSet), &sEntry)) { - if(pParse->xError) { - rc = pParse->xError("Duplicate attribute", SXML_ERROR_DUPLICATE_ATTRIBUTE, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { - return SXERR_ABORT; - } - /* Extract attribute value */ - zIn++; /* Jump the trailing '=' */ - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - if(zIn >= zEnd) { - if(pParse->xError) { - rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - (void)SySetPop(pAttrSet); - return SXERR_SYNTAX; - } - if(zIn[0] != '\'' && zIn[0] != '"') { - if(pParse->xError) { - rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - (void)SySetPop(pAttrSet); - return SXERR_SYNTAX; - } - c = zIn[0]; - zIn++; - zCur = zIn; - while(zIn < zEnd && zIn[0] != c) { - zIn++; - } - if(zIn >= zEnd) { - if(pParse->xError) { - rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - (void)SySetPop(pAttrSet); - return SXERR_SYNTAX; - } - /* Store attribute value */ - sEntry.zString = zCur; - sEntry.nByte = (sxu32)(zIn - zCur); - if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) { - return SXERR_ABORT; - } - zIn++; - if(is_ns) { - /* Process namespace declaration */ - XMLProcessNamesSpace(pParse, pTag, pToken, pAttrSet); - } - } - /* Store in the tag stack */ - if(pToken->nType == SXML_TOK_START_TAG) { - rc = SySetPut(pTagStack, (const void *)pTag); - } - return SXRET_OK; -} -static void XMLExtactPI(SyToken *pToken, SyXMLRawStr *pTarget, SyXMLRawStr *pData, int *pXML) { - SyString *pIn = &pToken->sData; - const char *zIn, *zCur, *zEnd; - pTarget->nLine = pData->nLine = pToken->nLine; - /* Nullify the entries first */ - pTarget->zString = pData->zString = 0; - /* Ignore leading and traing white spaces */ - SyStringFullTrim(pIn); - /* Delimit the raw PI */ - zIn = pIn->zString; - zEnd = &zIn[pIn->nByte]; - if(pXML) { - *pXML = 0; - } - /* Extract the target */ - zCur = zIn; - while(zIn < zEnd) { - if((unsigned char)zIn[0] >= 0xc0) { - /* UTF-8 stream */ - zIn++; - SX_JMP_UTF8(zIn, zEnd); - } else if(SyisSpace(zIn[0])) { - break; - } else { - zIn++; - } - } - if(zIn > zCur) { - pTarget->zString = zCur; - pTarget->nByte = (sxu32)(zIn - zCur); - if(pXML && pTarget->nByte == sizeof("xml") - 1 && SyStrnicmp(pTarget->zString, "xml", sizeof("xml") - 1) == 0) { - *pXML = 1; - } - } - /* Extract the PI data */ - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - if(zIn < zEnd) { - pData->zString = zIn; - pData->nByte = (sxu32)(zEnd - zIn); - } -} -static sxi32 XMLExtractEndTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pOut) { - SyString *pIn = &pToken->sData; - const char *zEnd = &pIn->zString[pIn->nByte]; - const char *zIn = pIn->zString; - /* Ignore leading white spaces */ - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - zIn++; - } - pOut->nLine = pToken->nLine; - pOut->zString = zIn; - pOut->nByte = (sxu32)(zEnd - zIn); - /* Ignore trailing white spaces */ - while(pOut->nByte > 0 && (unsigned char)pOut->zString[pOut->nByte - 1] < 0xc0 - && SyisSpace(pOut->zString[pOut->nByte - 1])) { - pOut->nByte--; - } - if(pOut->nByte < 1) { - if(pParse->xError) { - sxi32 rc; - rc = pParse->xError("Invalid end tag name", SXML_ERROR_INVALID_TOKEN, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - return SXRET_OK; -} -static void TokenToXMLString(SyToken *pTok, SyXMLRawStrNS *pOut) { - /* Remove leading and trailing white spaces first */ - SyStringFullTrim(&pTok->sData); - pOut->zString = SyStringData(&pTok->sData); - pOut->nByte = SyStringLength(&pTok->sData); -} -static sxi32 XMLExtractNS(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SyXMLRawStr *pnsUri) { - SyXMLRawStr *pUri, sPrefix; - SyHashEntry *pEntry; - sxu32 nOfft; - sxi32 rc; - /* Extract a prefix if available */ - rc = SyByteFind(pTag->zString, pTag->nByte, ':', &nOfft); - if(rc != SXRET_OK) { - /* Check if there is a default namespace */ - pEntry = SyHashGet(&pParse->hns, "Default", sizeof("Default") - 1); - if(pEntry) { - /* Extract the ns URI */ - pUri = (SyXMLRawStr *)pEntry->pUserData; - /* Save the ns URI */ - pnsUri->zString = pUri->zString; - pnsUri->nByte = pUri->nByte; - } - return SXRET_OK; - } - if(nOfft < 1) { - if(pParse->xError) { - rc = pParse->xError("Empty prefix is not allowed according to XML namespace specification", - SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - sPrefix.zString = pTag->zString; - sPrefix.nByte = nOfft; - sPrefix.nLine = pTag->nLine; - pTag->zString += nOfft + 1; - pTag->nByte -= nOfft; - if(pTag->nByte < 1) { - if(pParse->xError) { - rc = pParse->xError("Missing tag name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - /* Check if the prefix is already registered */ - pEntry = SyHashGet(&pParse->hns, sPrefix.zString, sPrefix.nByte); - if(pEntry == 0) { - if(pParse->xError) { - rc = pParse->xError("Namespace prefix is not defined", SXML_ERROR_SYNTAX, - pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - return SXERR_SYNTAX; - } - /* Extract the ns URI */ - pUri = (SyXMLRawStr *)pEntry->pUserData; - /* Save the ns URI */ - pnsUri->zString = pUri->zString; - pnsUri->nByte = pUri->nByte; - /* All done */ - return SXRET_OK; -} -static sxi32 XMLnsUnlink(SyXMLParser *pParse, SyXMLRawStrNS *pLast, SyToken *pToken) { - SyHashEntry **apEntry, *pEntry; - void *pUserData; - sxu32 n; - /* Release namespace entries */ - apEntry = (SyHashEntry **)SySetBasePtr(&pLast->sNSset); - for(n = 0 ; n < SySetUsed(&pLast->sNSset) ; ++n) { - pEntry = apEntry[n]; - /* Invoke the end namespace declaration callback */ - if(pParse->xNameSpaceEnd && (pParse->nFlags & SXML_ENABLE_NAMESPACE) && pToken) { - SyXMLRawStr sPrefix; - sxi32 rc; - sPrefix.zString = (const char *)pEntry->pKey; - sPrefix.nByte = pEntry->nKeyLen; - sPrefix.nLine = pToken->nLine; - rc = pParse->xNameSpaceEnd(&sPrefix, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - pUserData = pEntry->pUserData; - /* Remove from the namespace hashtable */ - SyHashDeleteEntry2(pEntry); - SyMemBackendFree(pParse->pAllocator, pUserData); - } - SySetRelease(&pLast->sNSset); - return SXRET_OK; -} -/* Process XML tokens */ -static sxi32 ProcessXML(SyXMLParser *pParse, SySet *pTagStack, SySet *pWorker) { - SySet *pTokenSet = &pParse->sToken; - SyXMLRawStrNS sEntry; - SyXMLRawStr sNs; - SyToken *pToken; - int bGotTag; - sxi32 rc; - /* Initialize fields */ - bGotTag = 0; - /* Start processing */ - if(pParse->xStartDoc && (SXERR_ABORT == pParse->xStartDoc(pParse->pUserData))) { - /* User callback request an operation abort */ - return SXERR_ABORT; - } - /* Reset the loop cursor */ - SySetResetCursor(pTokenSet); - /* Extract the current token */ - while(SXRET_OK == (SySetGetNextEntry(&(*pTokenSet), (void **)&pToken))) { - SyZero(&sEntry, sizeof(SyXMLRawStrNS)); - SyZero(&sNs, sizeof(SyXMLRawStr)); - SySetInit(&sEntry.sNSset, pParse->pAllocator, sizeof(SyHashEntry *)); - sEntry.nLine = sNs.nLine = pToken->nLine; - switch(pToken->nType) { - case SXML_TOK_DOCTYPE: - if(SySetUsed(pTagStack) > 1 || bGotTag) { - if(pParse->xError) { - rc = pParse->xError("DOCTYPE must be declared first", SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - /* Invoke the supplied callback if any */ - if(pParse->xDoctype) { - TokenToXMLString(pToken, &sEntry); - rc = pParse->xDoctype((SyXMLRawStr *)&sEntry, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - case SXML_TOK_CDATA: - if(SySetUsed(pTagStack) < 1) { - if(pParse->xError) { - rc = pParse->xError("CDATA without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } - /* Invoke the supplied callback if any */ - if(pParse->xRaw) { - TokenToXMLString(pToken, &sEntry); - rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - case SXML_TOK_PI: { - SyXMLRawStr sTarget, sData; - int isXML = 0; - /* Extract the target and data */ - XMLExtactPI(pToken, &sTarget, &sData, &isXML); - if(isXML && SySetCursor(pTokenSet) - 1 > 0) { - if(pParse->xError) { - rc = pParse->xError("Unexpected XML declaration. The XML declaration must be the first node in the document", - SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } else if(pParse->xPi) { - /* Invoke the supplied callback*/ - rc = pParse->xPi(&sTarget, &sData, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - case SXML_TOK_RAW: - if(SySetUsed(pTagStack) < 1) { - if(pParse->xError) { - rc = pParse->xError("Text (Raw data) without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - /* Invoke the supplied callback if any */ - if(pParse->xRaw) { - TokenToXMLString(pToken, &sEntry); - rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - case SXML_TOK_END_TAG: { - SyXMLRawStrNS *pLast = 0; /* cc warning */ - if(SySetUsed(pTagStack) < 1) { - if(pParse->xError) { - rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - rc = XMLExtractEndTag(pParse, pToken, &sEntry); - if(rc == SXRET_OK) { - /* Extract the last inserted entry */ - pLast = (SyXMLRawStrNS *)SySetPeek(pTagStack); - if(pLast == 0 || pLast->nByte != sEntry.nByte || - SyMemcmp(pLast->zString, sEntry.zString, sEntry.nByte) != 0) { - if(pParse->xError) { - rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } else { - /* Invoke the supllied callback if any */ - if(pParse->xEndTag) { - rc = SXRET_OK; - if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { - /* Extract namespace URI */ - rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - if(rc == SXRET_OK) { - rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } - } - } else if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - if(pLast) { - rc = XMLnsUnlink(pParse, pLast, pToken); - (void)SySetPop(pTagStack); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - case SXML_TOK_START_TAG: - case SXML_TOK_START_END: - if(SySetUsed(pTagStack) < 1 && bGotTag) { - if(pParse->xError) { - rc = pParse->xError("XML document cannot contain multiple root level elements documents", - SXML_ERROR_SYNTAX, pToken, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - break; - } - bGotTag = 1; - /* Extract the tag and it's supplied attribute */ - rc = XMLProcessStartTag(pParse, pToken, &sEntry, pWorker, pTagStack); - if(rc == SXRET_OK) { - if(pParse->nFlags & SXML_ENABLE_NAMESPACE) { - /* Extract namespace URI */ - rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs); - } - } - if(rc == SXRET_OK) { - /* Invoke the supplied callback */ - if(pParse->xStartTag) { - rc = pParse->xStartTag((SyXMLRawStr *)&sEntry, &sNs, SySetUsed(pWorker), - (SyXMLRawStr *)SySetBasePtr(pWorker), pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - if(pToken->nType == SXML_TOK_START_END) { - if(pParse->xEndTag) { - rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - rc = XMLnsUnlink(pParse, &sEntry, pToken); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } - } - } else if(rc == SXERR_ABORT) { - /* Abort processing immediately */ - return SXERR_ABORT; - } - break; - default: - /* Can't happen */ - break; - } - } - if(SySetUsed(pTagStack) > 0 && pParse->xError) { - pParse->xError("Missing closing tag", SXML_ERROR_SYNTAX, - (SyToken *)SySetPeek(&pParse->sToken), pParse->pUserData); - } - if(pParse->xEndDoc) { - pParse->xEndDoc(pParse->pUserData); - } - return SXRET_OK; -} -PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags) { - /* Zero the structure first */ - SyZero(pParser, sizeof(SyXMLParser)); - /* Initilaize fields */ - SySetInit(&pParser->sToken, pAllocator, sizeof(SyToken)); - SyLexInit(&pParser->sLex, &pParser->sToken, XML_Tokenize, pParser); - SyHashInit(&pParser->hns, pAllocator, 0, 0); - pParser->pAllocator = pAllocator; - pParser->nFlags = iFlags; - return SXRET_OK; -} -PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser, - void *pUserData, - ProcXMLStartTagHandler xStartTag, - ProcXMLTextHandler xRaw, - ProcXMLSyntaxErrorHandler xErr, - ProcXMLStartDocument xStartDoc, - ProcXMLEndTagHandler xEndTag, - ProcXMLPIHandler xPi, - ProcXMLEndDocument xEndDoc, - ProcXMLDoctypeHandler xDoctype, - ProcXMLNameSpaceStart xNameSpace, - ProcXMLNameSpaceEnd xNameSpaceEnd - ) { - /* Install user callbacks */ - if(xErr) { - pParser->xError = xErr; - } - if(xStartDoc) { - pParser->xStartDoc = xStartDoc; - } - if(xStartTag) { - pParser->xStartTag = xStartTag; - } - if(xRaw) { - pParser->xRaw = xRaw; - } - if(xEndTag) { - pParser->xEndTag = xEndTag; - } - if(xPi) { - pParser->xPi = xPi; - } - if(xEndDoc) { - pParser->xEndDoc = xEndDoc; - } - if(xDoctype) { - pParser->xDoctype = xDoctype; - } - if(xNameSpace) { - pParser->xNameSpace = xNameSpace; - } - if(xNameSpaceEnd) { - pParser->xNameSpaceEnd = xNameSpaceEnd; - } - pParser->pUserData = pUserData; - return SXRET_OK; -} -/* Process an XML chunk */ -PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte) { - SySet sTagStack; - SySet sWorker; - sxi32 rc; - /* Initialize working sets */ - SySetInit(&sWorker, pParser->pAllocator, sizeof(SyXMLRawStr)); /* Tag container */ - SySetInit(&sTagStack, pParser->pAllocator, sizeof(SyXMLRawStrNS)); /* Tag stack */ - /* Tokenize the entire input */ - rc = SyLexTokenizeInput(&pParser->sLex, zInput, nByte, 0, 0, 0); - if(rc == SXERR_ABORT) { - /* Tokenize callback request an operation abort */ - return SXERR_ABORT; - } - if(SySetUsed(&pParser->sToken) < 1) { - /* Nothing to process [i.e: white spaces] */ - rc = SXRET_OK; - } else { - /* Process XML Tokens */ - rc = ProcessXML(&(*pParser), &sTagStack, &sWorker); - if(pParser->nFlags & SXML_ENABLE_NAMESPACE) { - if(SySetUsed(&sTagStack) > 0) { - SyXMLRawStrNS *pEntry; - SyHashEntry **apEntry; - sxu32 n; - SySetResetCursor(&sTagStack); - while(SySetGetNextEntry(&sTagStack, (void **)&pEntry) == SXRET_OK) { - /* Release namespace entries */ - apEntry = (SyHashEntry **)SySetBasePtr(&pEntry->sNSset); - for(n = 0 ; n < SySetUsed(&pEntry->sNSset) ; ++n) { - SyMemBackendFree(pParser->pAllocator, apEntry[n]->pUserData); - } - SySetRelease(&pEntry->sNSset); - } - } - } - } - /* Clean-up the mess left behind */ - SySetRelease(&sWorker); - SySetRelease(&sTagStack); - /* Processing result */ - return rc; -} -PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser) { - SyLexRelease(&pParser->sLex); - SySetRelease(&pParser->sToken); - SyHashRelease(&pParser->hns); - return SXRET_OK; -} + /* * Zip File Format: * diff --git a/ph7int.h b/ph7int.h index ccc8cec..fb0f23e 100644 --- a/ph7int.h +++ b/ph7int.h @@ -527,86 +527,6 @@ struct SyLex { (RAW)->nByte--;\ } #ifndef PH7_DISABLE_BUILTIN_FUNC -/* - * An XML raw text,CDATA,tag name and son is parsed out and stored - * in an instance of the following structure. - */ -typedef struct SyXMLRawStr SyXMLRawStr; -struct SyXMLRawStr { - const char *zString; /* Raw text [UTF-8 ENCODED EXCEPT CDATA] [NOT NULL TERMINATED] */ - sxu32 nByte; /* Text length */ - sxu32 nLine; /* Line number this text occurs */ -}; -/* - * Event callback signatures. - */ -typedef sxi32(*ProcXMLStartTagHandler)(SyXMLRawStr *, SyXMLRawStr *, sxu32, SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLTextHandler)(SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLEndTagHandler)(SyXMLRawStr *, SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLPIHandler)(SyXMLRawStr *, SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLDoctypeHandler)(SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLSyntaxErrorHandler)(const char *, int, SyToken *, void *); -typedef sxi32(*ProcXMLStartDocument)(void *); -typedef sxi32(*ProcXMLNameSpaceStart)(SyXMLRawStr *, SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLNameSpaceEnd)(SyXMLRawStr *, void *); -typedef sxi32(*ProcXMLEndDocument)(void *); -/* XML processing control flags */ -#define SXML_ENABLE_NAMESPACE 0x01 /* Parse XML with namespace support enbaled */ -#define SXML_ENABLE_QUERY 0x02 /* Not used */ -#define SXML_OPTION_CASE_FOLDING 0x04 /* Controls whether case-folding is enabled for this XML parser */ -#define SXML_OPTION_SKIP_TAGSTART 0x08 /* Specify how many characters should be skipped in the beginning of a tag name.*/ -#define SXML_OPTION_SKIP_WHITE 0x10 /* Whether to skip values consisting of whitespace characters. */ -#define SXML_OPTION_TARGET_ENCODING 0x20 /* Default encoding: UTF-8 */ -/* XML error codes */ -enum xml_err_code { - SXML_ERROR_NONE = 1, - SXML_ERROR_NO_MEMORY, - SXML_ERROR_SYNTAX, - SXML_ERROR_NO_ELEMENTS, - SXML_ERROR_INVALID_TOKEN, - SXML_ERROR_UNCLOSED_TOKEN, - SXML_ERROR_PARTIAL_CHAR, - SXML_ERROR_TAG_MISMATCH, - SXML_ERROR_DUPLICATE_ATTRIBUTE, - SXML_ERROR_JUNK_AFTER_DOC_ELEMENT, - SXML_ERROR_PARAM_ENTITY_REF, - SXML_ERROR_UNDEFINED_ENTITY, - SXML_ERROR_RECURSIVE_ENTITY_REF, - SXML_ERROR_ASYNC_ENTITY, - SXML_ERROR_BAD_CHAR_REF, - SXML_ERROR_BINARY_ENTITY_REF, - SXML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, - SXML_ERROR_MISPLACED_XML_PI, - SXML_ERROR_UNKNOWN_ENCODING, - SXML_ERROR_INCORRECT_ENCODING, - SXML_ERROR_UNCLOSED_CDATA_SECTION, - SXML_ERROR_EXTERNAL_ENTITY_HANDLING -}; -/* Each active XML SAX parser is represented by an instance - * of the following structure. - */ -typedef struct SyXMLParser SyXMLParser; -struct SyXMLParser { - SyMemBackend *pAllocator; /* Memory backend */ - void *pUserData; /* User private data forwarded varbatim by the XML parser - * as the last argument to the users callbacks. - */ - SyHash hns; /* Namespace hashtable */ - SySet sToken; /* XML tokens */ - SyLex sLex; /* Lexical analyzer */ - sxi32 nFlags; /* Control flags */ - /* User callbacks */ - ProcXMLStartTagHandler xStartTag; /* Start element handler */ - ProcXMLEndTagHandler xEndTag; /* End element handler */ - ProcXMLTextHandler xRaw; /* Raw text/CDATA handler */ - ProcXMLDoctypeHandler xDoctype; /* DOCTYPE handler */ - ProcXMLPIHandler xPi; /* Processing instruction (PI) handler*/ - ProcXMLSyntaxErrorHandler xError; /* Error handler */ - ProcXMLStartDocument xStartDoc; /* StartDoc handler */ - ProcXMLEndDocument xEndDoc; /* EndDoc handler */ - ProcXMLNameSpaceStart xNameSpace; /* Namespace declaration handler */ - ProcXMLNameSpaceEnd xNameSpaceEnd; /* End namespace declaration handler */ -}; /* * -------------- * Archive extractor: @@ -1810,22 +1730,6 @@ PH7_PRIVATE void *PH7_ExportStdout(ph7_vm *pVm); PH7_PRIVATE void *PH7_ExportStderr(ph7_vm *pVm); /* lib.c function prototypes */ #ifndef PH7_DISABLE_BUILTIN_FUNC -PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags); -PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser, - void *pUserData, - ProcXMLStartTagHandler xStartTag, - ProcXMLTextHandler xRaw, - ProcXMLSyntaxErrorHandler xErr, - ProcXMLStartDocument xStartDoc, - ProcXMLEndTagHandler xEndTag, - ProcXMLPIHandler xPi, - ProcXMLEndDocument xEndDoc, - ProcXMLDoctypeHandler xDoctype, - ProcXMLNameSpaceStart xNameSpace, - ProcXMLNameSpaceEnd xNameSpaceEnd - ); -PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte); -PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser); PH7_PRIVATE sxi32 SyArchiveInit(SyArchive *pArch, SyMemBackend *pAllocator, ProcHash xHash, ProcRawStrCmp xCmp); PH7_PRIVATE sxi32 SyArchiveRelease(SyArchive *pArch); PH7_PRIVATE sxi32 SyArchiveResetLoopCursor(SyArchive *pArch); diff --git a/vm.c b/vm.c index 6d19368..c0b67d9 100644 --- a/vm.c +++ b/vm.c @@ -11379,1293 +11379,6 @@ static int VmProcessLongOpt(ph7_value *pKey, ph7_value *pValue, void *pUserData) VmExtractOptArgValue(pOpt->pArray, pOpt->pWorker, zArg, pOpt->zArgEnd, need_value, pOpt->pCtx, zOpt); return PH7_OK; } -#ifndef PH7_DISABLE_BUILTIN_FUNC -/* - * XML processing Functions. - * Authors: - * Symisc Systems,devel@symisc.net. - * Copyright (C) Symisc Systems,http://ph7.symisc.net - * Status: - * Devel. - */ -enum ph7_xml_handler_id { - PH7_XML_START_TAG = 0, /* Start element handlers ID */ - PH7_XML_END_TAG, /* End element handler ID*/ - PH7_XML_CDATA, /* Character data handler ID*/ - PH7_XML_PI, /* Processing instruction (PI) handler ID*/ - PH7_XML_DEF, /* Default handler ID */ - PH7_XML_UNPED, /* Unparsed entity declaration handler */ - PH7_XML_ND, /* Notation declaration handler ID*/ - PH7_XML_EER, /* External entity reference handler */ - PH7_XML_NS_START, /* Start namespace declaration handler */ - PH7_XML_NS_END /* End namespace declaration handler */ -}; -#define XML_TOTAL_HANDLER (PH7_XML_NS_END + 1) -/* An instance of the following structure describe a working - * XML engine instance. - */ -typedef struct ph7_xml_engine ph7_xml_engine; -struct ph7_xml_engine { - ph7_vm *pVm; /* VM that own this instance */ - ph7_context *pCtx; /* Call context */ - SyXMLParser sParser; /* Underlying XML parser */ - ph7_value aCB[XML_TOTAL_HANDLER]; /* User-defined callbacks */ - ph7_value sParserValue; /* ph7_value holding this instance which is forwarded - * as the first argument to the user callbacks. - */ - int ns_sep; /* Namespace separator */ - SyBlob sErr; /* Error message consumer */ - sxi32 iErrCode; /* Last error code */ - sxi32 iNest; /* Nesting level */ - sxu32 nLine; /* Last processed line */ - sxu32 nMagic; /* Magic number so that we avoid misuse */ -}; -#define XML_ENGINE_MAGIC 0x851EFC52 -#define IS_INVALID_XML_ENGINE(XML) (XML == 0 || (XML)->nMagic != XML_ENGINE_MAGIC) -/* - * Allocate and initialize an XML engine. - */ -static ph7_xml_engine *VmCreateXMLEngine(ph7_context *pCtx, int process_ns, int ns_sep) { - ph7_xml_engine *pEngine; - ph7_vm *pVm = pCtx->pVm; - ph7_value *pValue; - sxu32 n; - /* Allocate a new instance */ - pEngine = (ph7_xml_engine *)SyMemBackendAlloc(&pVm->sAllocator, sizeof(ph7_xml_engine)); - if(pEngine == 0) { - /* Out of memory */ - return 0; - } - /* Zero the structure */ - SyZero(pEngine, sizeof(ph7_xml_engine)); - /* Initialize fields */ - pEngine->pVm = pVm; - pEngine->pCtx = 0; - pEngine->ns_sep = ns_sep; - SyXMLParserInit(&pEngine->sParser, &pVm->sAllocator, process_ns ? SXML_ENABLE_NAMESPACE : 0); - SyBlobInit(&pEngine->sErr, &pVm->sAllocator); - PH7_MemObjInit(pVm, &pEngine->sParserValue); - for(n = 0 ; n < SX_ARRAYSIZE(pEngine->aCB) ; ++n) { - pValue = &pEngine->aCB[n]; - /* NULLIFY the array entries,until someone register an event handler */ - PH7_MemObjInit(&(*pVm), pValue); - } - ph7_value_resource(&pEngine->sParserValue, pEngine); - pEngine->iErrCode = SXML_ERROR_NONE; - /* Finally set the magic number */ - pEngine->nMagic = XML_ENGINE_MAGIC; - return pEngine; -} -/* - * Release an XML engine. - */ -static void VmReleaseXMLEngine(ph7_xml_engine *pEngine) { - ph7_vm *pVm = pEngine->pVm; - ph7_value *pValue; - sxu32 n; - /* Release fields */ - SyBlobRelease(&pEngine->sErr); - SyXMLParserRelease(&pEngine->sParser); - PH7_MemObjRelease(&pEngine->sParserValue); - for(n = 0 ; n < SX_ARRAYSIZE(pEngine->aCB) ; ++n) { - pValue = &pEngine->aCB[n]; - PH7_MemObjRelease(pValue); - } - pEngine->nMagic = 0x2621; - /* Finally,release the whole instance */ - SyMemBackendFree(&pVm->sAllocator, pEngine); -} -/* - * resource xml_parser_create([ string $encoding ]) - * Create an UTF-8 XML parser. - * Parameter - * $encoding - * (Only UTF-8 encoding is used) - * Return - * Returns a resource handle for the new XML parser. - */ -static int vm_builtin_xml_parser_create(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - /* Allocate a new instance */ - pEngine = VmCreateXMLEngine(&(*pCtx), 0, ':'); - if(pEngine == 0) { - ph7_context_throw_error(pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); - /* Return null */ - ph7_result_null(pCtx); - SXUNUSED(nArg); /* cc warning */ - SXUNUSED(apArg); - return PH7_OK; - } - /* Return the engine as a resource */ - ph7_result_resource(pCtx, pEngine); - return PH7_OK; -} -/* - * resource xml_parser_create_ns([ string $encoding[,string $separator = ':']]) - * Create an UTF-8 XML parser with namespace support. - * Parameter - * $encoding - * (Only UTF-8 encoding is supported) - * $separtor - * Namespace separator (a single character) - * Return - * Returns a resource handle for the new XML parser. - */ -static int vm_builtin_xml_parser_create_ns(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - int ns_sep = ':'; - if(nArg > 1 && ph7_value_is_string(apArg[1])) { - const char *zSep = ph7_value_to_string(apArg[1], 0); - if(zSep[0] != 0) { - ns_sep = zSep[0]; - } - } - /* Allocate a new instance */ - pEngine = VmCreateXMLEngine(&(*pCtx), TRUE, ns_sep); - if(pEngine == 0) { - ph7_context_throw_error(pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); - /* Return null */ - ph7_result_null(pCtx); - return PH7_OK; - } - /* Return the engine as a resource */ - ph7_result_resource(pCtx, pEngine); - return PH7_OK; -} -/* - * bool xml_parser_free(resource $parser) - * Release an XML engine. - * Parameter - * $parser - * A reference to the XML parser to free. - * Return - * This function returns FALSE if parser does not refer - * to a valid parser, or else it frees the parser and returns TRUE. - */ -static int vm_builtin_xml_parser_free(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Safely release the engine */ - VmReleaseXMLEngine(pEngine); - /* Return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_element_handler(resource $parser,callback $start_element_handler,[callback $end_element_handler]) - * Sets the element handler functions for the XML parser. start_element_handler and end_element_handler - * are strings containing the names of functions. - * Parameters - * $parser - * A reference to the XML parser to set up start and end element handler functions. - * $start_element_handler - * The function named by start_element_handler must accept three parameters: - * start_element_handler(resource $parser,string $name,array $attribs) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $name - * The second parameter, name, contains the name of the element for which this handler - * is called.If case-folding is in effect for this parser, the element name will be in uppercase letters. - * $attribs - * The third parameter, attribs, contains an associative array with the element's attributes (if any). - * The keys of this array are the attribute names, the values are the attribute values. - * Attribute names are case-folded on the same criteria as element names.Attribute values are not case-folded. - * The original order of the attributes can be retrieved by walking through attribs the normal way, using each(). - * The first key in the array was the first attribute, and so on. - * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. - * $end_element_handler - * The function named by end_element_handler must accept two parameters: - * end_element_handler(resource $parser,string $name) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $name - * The second parameter, name, contains the name of the element for which this handler - * is called.If case-folding is in effect for this parser, the element name will be in uppercase - * letters. - * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_element_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the start_element_handler callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_START_TAG]); - if(nArg > 2) { - /* Save the end_element_handler callback for later invocation */ - PH7_MemObjStore(apArg[2]/* User callback*/, &pEngine->aCB[PH7_XML_END_TAG]); - } - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_character_data_handler(resource $parser,callback $handler) - * Sets the character data handler function for the XML parser parser. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept two parameters: - * handler(resource $parser,string $data) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $data - * The second parameter, data, contains the character data as a string. - * Character data handler is called for every piece of a text in the XML document. - * It can be called multiple times inside each fragment (e.g. for non-ASCII strings). - * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_character_data_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_CDATA]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_default_handler(resource $parser,callback $handler) - * Set up default handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept two parameters: - * handler(resource $parser,string $data) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $data - * The second parameter, data, contains the character data.This may be the XML declaration - * document type declaration, entities or other data for which no other handler exists. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_default_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_DEF]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_end_namespace_decl_handler(resource $parser,callback $handler) - * Set up end namespace declaration handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept two parameters: - * handler(resource $parser,string $prefix) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $prefix - * The prefix is a string used to reference the namespace within an XML object. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_end_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_NS_END]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_start_namespace_decl_handler(resource $parser,callback $handler) - * Set up start namespace declaration handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept two parameters: - * handler(resource $parser,string $prefix,string $uri) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $prefix - * The prefix is a string used to reference the namespace within an XML object. - * $uri - * Uniform Resource Identifier (URI) of namespace. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_start_namespace_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_NS_START]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_processing_instruction_handler(resource $parser,callback $handler) - * Set up processing instruction (PI) handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept three parameters: - * handler(resource $parser,string $target,string $data) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $target - * The second parameter, target, contains the PI target. - * $data - The third parameter, data, contains the PI data. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_processing_instruction_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_PI]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_unparsed_entity_decl_handler(resource $parser,callback $handler) - * Set up unparsed entity declaration handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept six parameters: - * handler(resource $parser,string $entity_name,string $base,string $system_id,string $public_id,string $notation_name) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $entity_name - * The name of the entity that is about to be defined. - * $base - * This is the base for resolving the system identifier (systemId) of the external entity. - * Currently this parameter will always be set to an empty string. - * $system_id - * System identifier for the external entity. - * $public_id - * Public identifier for the external entity. - * $notation_name - * Name of the notation of this entity (see xml_set_notation_decl_handler()). - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_unparsed_entity_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_UNPED]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_notation_decl_handler(resource $parser,callback $handler) - * Set up notation declaration handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept five parameters: - * handler(resource $parser,string $entity_name,string $base,string $system_id,string $public_id) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $entity_name - * The name of the entity that is about to be defined. - * $base - * This is the base for resolving the system identifier (systemId) of the external entity. - * Currently this parameter will always be set to an empty string. - * $system_id - * System identifier for the external entity. - * $public_id - * Public identifier for the external entity. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_notation_decl_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_ND]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * bool xml_set_external_entity_ref_handler(resource $parser,callback $handler) - * Set up external entity reference handler. - * Parameters - * $parser - * A reference to the XML parser to set up character data handler function. - * $handler - * handler is a string containing the name of the callback. - * The function named by handler must accept five parameters: - * handler(resource $parser,string $open_entity_names,string $base,string $system_id,string $public_id) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $open_entity_names - * The second parameter, open_entity_names, is a space-separated list of the names - * of the entities that are open for the parse of this entity (including the name of the referenced entity). - * $base - * This is the base for resolving the system identifier (system_id) of the external entity. - * Currently this parameter will always be set to an empty string. - * $system_id - * The fourth parameter, system_id, is the system identifier as specified in the entity declaration. - * $public_id - * The fifth parameter, public_id, is the public identifier as specified in the entity declaration - * or an empty string if none was specified; the whitespace in the public identifier will have been - * normalized as required by the XML spec. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - * Return - * TRUE on success or FALSE on failure. - */ -static int vm_builtin_xml_set_external_entity_ref_handler(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(nArg > 1) { - /* Save the user callback for later invocation */ - PH7_MemObjStore(apArg[1]/* User callback*/, &pEngine->aCB[PH7_XML_EER]); - } - /* All done,return TRUE */ - ph7_result_bool(pCtx, 1); - return PH7_OK; -} -/* - * int xml_get_current_line_number(resource $parser) - * Gets the current line number for the given XML parser. - * Parameters - * $parser - * A reference to the XML parser. - * Return - * This function returns FALSE if parser does not refer - * to a valid parser, or else it returns which line the parser - * is currently at in its data buffer. - */ -static int vm_builtin_xml_get_current_line_number(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Return the line number */ - ph7_result_int(pCtx, (int)pEngine->nLine); - return PH7_OK; -} -/* - * int xml_get_current_byte_index(resource $parser) - * Gets the current byte index of the given XML parser. - * Parameters - * $parser - * A reference to the XML parser. - * Return - * This function returns FALSE if parser does not refer to a valid - * parser, or else it returns which byte index the parser is currently - * at in its data buffer (starting at 0). - */ -static int vm_builtin_xml_get_current_byte_index(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - SyStream *pStream; - SyToken *pToken; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the current processed token */ - pToken = (SyToken *)SySetPeekCurrentEntry(&pEngine->sParser.sToken); - if(pToken == 0) { - /* Stream not yet processed */ - ph7_result_int(pCtx, 0); - return 0; - } - /* Point to the input stream */ - pStream = &pEngine->sParser.sLex.sStream; - /* Return the byte index */ - ph7_result_int64(pCtx, (ph7_int64)(pToken->sData.zString - (const char *)pStream->zInput)); - return PH7_OK; -} -/* - * bool xml_set_object(resource $parser,object &$object) - * Use XML Parser within an object. - * NOTE - * This function is depreceated and is a no-op. - * Parameters - * $parser - * A reference to the XML parser. - * $object - * The object where to use the XML parser. - * Return - * Always FALSE. - */ -static int vm_builtin_xml_set_object(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 2 || !ph7_value_is_resource(apArg[0]) || !ph7_value_is_object(apArg[1])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Throw a notice and return */ - ph7_context_throw_error(pCtx, PH7_CTX_NOTICE, "This function is depreceated and is a no-op." - "In order to mimic this behaviour,you can supply instead of a function name an array " - "containing an object reference and a method name." - ); - /* Return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; -} -/* - * int xml_get_current_column_number(resource $parser) - * Gets the current column number of the given XML parser. - * Parameters - * $parser - * A reference to the XML parser. - * Return - * This function returns FALSE if parser does not refer to a valid parser, or else it returns - * which column on the current line (as given by xml_get_current_line_number()) the parser - * is currently at. - */ -static int vm_builtin_xml_get_current_column_number(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - SyStream *pStream; - SyToken *pToken; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the current processed token */ - pToken = (SyToken *)SySetPeekCurrentEntry(&pEngine->sParser.sToken); - if(pToken == 0) { - /* Stream not yet processed */ - ph7_result_int(pCtx, 0); - return 0; - } - /* Point to the input stream */ - pStream = &pEngine->sParser.sLex.sStream; - /* Return the byte index */ - ph7_result_int64(pCtx, (ph7_int64)(pToken->sData.zString - (const char *)pStream->zInput) / 80); - return PH7_OK; -} -/* - * int xml_get_error_code(resource $parser) - * Get XML parser error code. - * Parameters - * $parser - * A reference to the XML parser. - * Return - * This function returns FALSE if parser does not refer to a valid - * parser, or else it returns one of the error codes listed in the error - * codes section. - */ -static int vm_builtin_xml_get_error_code(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 1 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Return the error code if any */ - ph7_result_int(pCtx, pEngine->iErrCode); - return PH7_OK; -} -/* - * XML parser event callbacks - * Each time the unserlying XML parser extract a single token - * from the input,one of the following callbacks are invoked. - * IMP-XML-ENGINE-07-07-2012 22:02 FreeBSD [chm@symisc.net] - */ -/* - * Create a scalar ph7_value holding the value - * of an XML tag/attribute/CDATA and so on. - */ -static ph7_value *VmXMLValue(ph7_xml_engine *pEngine, SyXMLRawStr *pXML, SyXMLRawStr *pNsUri) { - ph7_value *pValue; - /* Allocate a new scalar variable */ - pValue = ph7_context_new_scalar(pEngine->pCtx); - if(pValue == 0) { - ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); - return 0; - } - if(pNsUri && pNsUri->nByte > 0) { - /* Append namespace URI and the separator */ - ph7_value_string_format(pValue, "%.*s%c", pNsUri->nByte, pNsUri->zString, pEngine->ns_sep); - } - /* Copy the tag value */ - ph7_value_string(pValue, pXML->zString, (int)pXML->nByte); - return pValue; -} -/* - * Create a 'ph7_value' of type array holding the values - * of an XML tag attributes. - */ -static ph7_value *VmXMLAttrValue(ph7_xml_engine *pEngine, SyXMLRawStr *aAttr, sxu32 nAttr) { - ph7_value *pArray; - /* Create an empty array */ - pArray = ph7_context_new_array(pEngine->pCtx); - if(pArray == 0) { - ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); - return 0; - } - if(nAttr > 0) { - ph7_value *pKey, *pValue; - sxu32 n; - /* Create worker variables */ - pKey = ph7_context_new_scalar(pEngine->pCtx); - pValue = ph7_context_new_scalar(pEngine->pCtx); - if(pKey == 0 || pValue == 0) { - ph7_context_throw_error(pEngine->pCtx, PH7_CTX_ERR, "PH7 is running out of memory"); - return 0; - } - /* Copy attributes */ - for(n = 0 ; n < nAttr ; n += 2) { - /* Reset string cursors */ - ph7_value_reset_string_cursor(pKey); - ph7_value_reset_string_cursor(pValue); - /* Copy attribute name and it's associated value */ - ph7_value_string(pKey, aAttr[n].zString, (int)aAttr[n].nByte); /* Attribute name */ - ph7_value_string(pValue, aAttr[n + 1].zString, (int)aAttr[n + 1].nByte); /* Attribute value */ - /* Insert in the array */ - ph7_array_add_elem(pArray, pKey, pValue); /* Will make it's own copy */ - } - /* Release the worker variables */ - ph7_context_release_value(pEngine->pCtx, pKey); - ph7_context_release_value(pEngine->pCtx, pValue); - } - /* Return the freshly created array */ - return pArray; -} -/* - * Start element handler. - * The user defined callback must accept three parameters: - * start_element_handler(resource $parser,string $name,array $attribs ) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $name - * The second parameter, name, contains the name of the element for which this handler - * is called.If case-folding is in effect for this parser, the element name will be in uppercase letters. - * $attribs - * The third parameter, attribs, contains an associative array with the element's attributes (if any). - * The keys of this array are the attribute names, the values are the attribute values. - * Attribute names are case-folded on the same criteria as element names.Attribute values are not case-folded. - * The original order of the attributes can be retrieved by walking through attribs the normal way, using each(). - * The first key in the array was the first attribute, and so on. - * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. - */ -static sxi32 VmXMLStartElementHandler(SyXMLRawStr *pStart, SyXMLRawStr *pNS, sxu32 nAttr, SyXMLRawStr *aAttr, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pTag, *pAttr; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_START_TAG]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Create a ph7_value holding the tag name */ - pTag = VmXMLValue(pEngine, pStart, pNS); - /* Create a ph7_value holding the tag attributes */ - pAttr = VmXMLAttrValue(pEngine, aAttr, nAttr); - if(pTag == 0 || pAttr == 0) { - SXUNUSED(pNS); /* cc warning */ - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTag, pAttr, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pTag); - ph7_context_release_value(pEngine->pCtx, pAttr); - return SXRET_OK; -} -/* - * End element handler. - * The user defined callback must accept two parameters: - * end_element_handler(resource $parser,string $name) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $name - * The second parameter, name, contains the name of the element for which this handler is called. - * If case-folding is in effect for this parser, the element name will be in uppercase letters. - * Note: Instead of a function name, an array containing an object reference and a method name - * can also be supplied. - */ -static sxi32 VmXMLEndElementHandler(SyXMLRawStr *pEnd, SyXMLRawStr *pNS, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pTag; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_END_TAG]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Create a ph7_value holding the tag name */ - pTag = VmXMLValue(pEngine, pEnd, pNS); - if(pTag == 0) { - SXUNUSED(pNS); /* cc warning */ - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTag, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pTag); - return SXRET_OK; -} -/* - * Character data handler. - * The user defined callback must accept two parameters: - * handler(resource $parser,string $data) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $data - * The second parameter, data, contains the character data as a string. - * Character data handler is called for every piece of a text in the XML document. - * It can be called multiple times inside each fragment (e.g. for non-ASCII strings). - * If a handler function is set to an empty string, or FALSE, the handler in question is disabled. - * Note: Instead of a function name, an array containing an object reference and a method name can also be supplied. - */ -static sxi32 VmXMLTextHandler(SyXMLRawStr *pText, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pData; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_CDATA]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Create a ph7_value holding the data */ - pData = VmXMLValue(pEngine, &(*pText), 0); - if(pData == 0) { - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pData, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pData); - return SXRET_OK; -} -/* - * Processing instruction (PI) handler. - * The user defined callback must accept two parameters: - * handler(resource $parser,string $target,string $data) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $target - * The second parameter, target, contains the PI target. - * $data - * The third parameter, data, contains the PI data. - * Note: Instead of a function name, an array containing an object reference - * and a method name can also be supplied. - */ -static sxi32 VmXMLPIHandler(SyXMLRawStr *pTargetStr, SyXMLRawStr *pDataStr, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pTarget, *pData; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_PI]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Get a ph7_value holding the data */ - pTarget = VmXMLValue(pEngine, &(*pTargetStr), 0); - pData = VmXMLValue(pEngine, &(*pDataStr), 0); - if(pTarget == 0 || pData == 0) { - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pTarget, pData, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pTarget); - ph7_context_release_value(pEngine->pCtx, pData); - return SXRET_OK; -} -/* - * Namespace declaration handler. - * The user defined callback must accept two parameters: - * handler(resource $parser,string $prefix,string $uri) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $prefix - * The prefix is a string used to reference the namespace within an XML object. - * $uri - * Uniform Resource Identifier (URI) of namespace. - * Note: Instead of a function name, an array containing an object reference - * and a method name can also be supplied. - */ -static sxi32 VmXMLNSStartHandler(SyXMLRawStr *pUriStr, SyXMLRawStr *pPrefixStr, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pUri, *pPrefix; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_NS_START]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Get a ph7_value holding the PREFIX/URI */ - pUri = VmXMLValue(pEngine, pUriStr, 0); - pPrefix = VmXMLValue(pEngine, pPrefixStr, 0); - if(pUri == 0 || pPrefix == 0) { - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pUri, pPrefix, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pUri); - ph7_context_release_value(pEngine->pCtx, pPrefix); - return SXRET_OK; -} -/* - * Namespace end declaration handler. - * The user defined callback must accept two parameters: - * handler(resource $parser,string $prefix) - * $parser - * The first parameter, parser, is a reference to the XML parser calling the handler. - * $prefix - * The prefix is a string used to reference the namespace within an XML object. - * Note: Instead of a function name, an array containing an object reference - * and a method name can also be supplied. - */ -static sxi32 VmXMLNSEndHandler(SyXMLRawStr *pPrefixStr, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - ph7_value *pCallback, *pPrefix; - /* Point to the target user defined callback */ - pCallback = &pEngine->aCB[PH7_XML_NS_END]; - /* Make sure the given callback is callable */ - if(!PH7_VmIsCallable(pEngine->pVm, pCallback, 0)) { - /* Not callable,return immediately*/ - return SXRET_OK; - } - /* Get a ph7_value holding the prefix */ - pPrefix = VmXMLValue(pEngine, pPrefixStr, 0); - if(pPrefix == 0) { - /* Out of mem,return immediately */ - return SXRET_OK; - } - /* Invoke the user callback */ - PH7_VmCallUserFunctionAp(pEngine->pVm, pCallback, 0, &pEngine->sParserValue, pPrefix, 0); - /* Clean-up the mess left behind */ - ph7_context_release_value(pEngine->pCtx, pPrefix); - return SXRET_OK; -} -/* - * Error Message consumer handler. - * Each time the XML parser encounter a syntaxt error or any other error - * related to XML processing,the following callback is invoked by the - * underlying XML parser. - */ -static sxi32 VmXMLErrorHandler(const char *zMessage, sxi32 iErrCode, SyToken *pToken, void *pUserData) { - ph7_xml_engine *pEngine = (ph7_xml_engine *)pUserData; - /* Save the error code */ - pEngine->iErrCode = iErrCode; - SXUNUSED(zMessage); /* cc warning */ - if(pToken) { - pEngine->nLine = pToken->nLine; - } - /* Abort XML processing immediately */ - return SXERR_ABORT; -} -/* - * int xml_parse(resource $parser,string $data[,bool $is_final = false ]) - * Parses an XML document. The handlers for the configured events are called - * as many times as necessary. - * Parameters - * $parser - * A reference to the XML parser. - * $data - * Chunk of data to parse. A document may be parsed piece-wise by calling - * xml_parse() several times with new data, as long as the is_final parameter - * is set and TRUE when the last data is parsed. - * $is_final - * NOT USED. This implementation require that all the processed input be - * entirely loaded in memory. - * Return - * Returns 1 on success or 0 on failure. - */ -static int vm_builtin_xml_parse(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - SyXMLParser *pParser; - const char *zData; - int nByte; - if(nArg < 2 || !ph7_value_is_resource(apArg[0]) || !ph7_value_is_string(apArg[1])) { - /* Missing/Ivalid arguments,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - if(pEngine->iNest > 0) { - /* This can happen when the user callback call xml_parse() again - * in it's body which is forbidden. - */ - ph7_context_throw_error_format(pCtx, PH7_CTX_ERR, - "Recursive call to %s,PH7 is returning false", - ph7_function_name(pCtx) - ); - /* Return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - pEngine->pCtx = pCtx; - /* Point to the underlying XML parser */ - pParser = &pEngine->sParser; - /* Register elements handler */ - SyXMLParserSetEventHandler(pParser, pEngine, - VmXMLStartElementHandler, - VmXMLTextHandler, - VmXMLErrorHandler, - 0, - VmXMLEndElementHandler, - VmXMLPIHandler, - 0, - 0, - VmXMLNSStartHandler, - VmXMLNSEndHandler - ); - pEngine->iErrCode = SXML_ERROR_NONE; - /* Extract the raw XML input */ - zData = ph7_value_to_string(apArg[1], &nByte); - /* Start the parse process */ - pEngine->iNest++; - SyXMLProcess(pParser, zData, (sxu32)nByte); - pEngine->iNest--; - /* Return the parse result */ - ph7_result_int(pCtx, pEngine->iErrCode == SXML_ERROR_NONE ? 1 : 0); - return PH7_OK; -} -/* - * bool xml_parser_set_option(resource $parser,int $option,mixed $value) - * Sets an option in an XML parser. - * Parameters - * $parser - * A reference to the XML parser to set an option in. - * $option - * Which option to set. See below. - * The following options are available: - * XML_OPTION_CASE_FOLDING integer Controls whether case-folding is enabled for this XML parser. - * XML_OPTION_SKIP_TAGSTART integer Specify how many characters should be skipped in the beginning of a tag name. - * XML_OPTION_SKIP_WHITE integer Whether to skip values consisting of whitespace characters. - * XML_OPTION_TARGET_ENCODING string Sets which target encoding to use in this XML parser. - * $value - * The option's new value. - * Return - * Returns 1 on success or 0 on failure. - * Note: - * Well,none of these options have meaning under the built-in XML parser so a call to this - * function is a no-op. - */ -static int vm_builtin_xml_parser_set_option(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - if(nArg < 2 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Always return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; -} -/* - * mixed xml_parser_get_option(resource $parser,int $option) - * Get options from an XML parser. - * Parameters - * $parser - * A reference to the XML parser to set an option in. - * $option - * Which option to fetch. - * Return - * This function returns FALSE if parser does not refer to a valid parser - * or if option isn't valid.Else the option's value is returned. - */ -static int vm_builtin_xml_parser_get_option(ph7_context *pCtx, int nArg, ph7_value **apArg) { - ph7_xml_engine *pEngine; - int nOp; - if(nArg < 2 || !ph7_value_is_resource(apArg[0])) { - /* Missing/Ivalid argument,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Point to the XML engine */ - pEngine = (ph7_xml_engine *)ph7_value_to_resource(apArg[0]); - if(IS_INVALID_XML_ENGINE(pEngine)) { - /* Corrupt engine,return FALSE */ - ph7_result_bool(pCtx, 0); - return PH7_OK; - } - /* Extract the option */ - nOp = ph7_value_to_int(apArg[1]); - switch(nOp) { - case SXML_OPTION_SKIP_TAGSTART: - case SXML_OPTION_SKIP_WHITE: - case SXML_OPTION_CASE_FOLDING: - ph7_result_int(pCtx, 0); - break; - case SXML_OPTION_TARGET_ENCODING: - ph7_result_string(pCtx, "UTF-8", (int)sizeof("UTF-8") - 1); - break; - default: - /* Unknown option,return FALSE*/ - ph7_result_bool(pCtx, 0); - break; - } - return PH7_OK; -} -/* - * string xml_error_string(int $code) - * Gets the XML parser error string associated with the given code. - * Parameters - * $code - * An error code from xml_get_error_code(). - * Return - * Returns a string with a textual description of the error - * code, or FALSE if no description was found. - */ -static int vm_builtin_xml_error_string(ph7_context *pCtx, int nArg, ph7_value **apArg) { - int nErr = -1; - if(nArg > 0) { - nErr = ph7_value_to_int(apArg[0]); - } - switch(nErr) { - case SXML_ERROR_DUPLICATE_ATTRIBUTE: - ph7_result_string(pCtx, "Duplicate attribute", -1/*Compute length automatically*/); - break; - case SXML_ERROR_INCORRECT_ENCODING: - ph7_result_string(pCtx, "Incorrect encoding", -1); - break; - case SXML_ERROR_INVALID_TOKEN: - ph7_result_string(pCtx, "Unexpected token", -1); - break; - case SXML_ERROR_MISPLACED_XML_PI: - ph7_result_string(pCtx, "Misplaced processing instruction", -1); - break; - case SXML_ERROR_NO_MEMORY: - ph7_result_string(pCtx, "Out of memory", -1); - break; - case SXML_ERROR_NONE: - ph7_result_string(pCtx, "Not an error", -1); - break; - case SXML_ERROR_TAG_MISMATCH: - ph7_result_string(pCtx, "Tag mismatch", -1); - break; - case -1: - ph7_result_string(pCtx, "Unknown error code", -1); - break; - default: - ph7_result_string(pCtx, "Syntax error", -1); - break; - } - return PH7_OK; -} -#endif /* PH7_DISABLE_BUILTIN_FUNC */ /* * int utf8_encode(string $input) * UTF-8 encoding. @@ -12953,30 +11666,6 @@ static const ph7_builtin_func aVmFunc[] = { /* URL related function */ {"parse_url", vm_builtin_parse_url }, /* Refer to 'builtin.c' for others string processing functions. */ -#ifndef PH7_DISABLE_BUILTIN_FUNC - /* XML processing functions */ - {"xml_parser_create", vm_builtin_xml_parser_create }, - {"xml_parser_create_ns", vm_builtin_xml_parser_create_ns}, - {"xml_parser_free", vm_builtin_xml_parser_free }, - {"xml_set_element_handler", vm_builtin_xml_set_element_handler}, - {"xml_set_character_data_handler", vm_builtin_xml_set_character_data_handler}, - {"xml_set_default_handler", vm_builtin_xml_set_default_handler }, - {"xml_set_end_namespace_decl_handler", vm_builtin_xml_set_end_namespace_decl_handler}, - {"xml_set_start_namespace_decl_handler", vm_builtin_xml_set_start_namespace_decl_handler}, - {"xml_set_processing_instruction_handler", vm_builtin_xml_set_processing_instruction_handler}, - {"xml_set_unparsed_entity_decl_handler", vm_builtin_xml_set_unparsed_entity_decl_handler}, - {"xml_set_notation_decl_handler", vm_builtin_xml_set_notation_decl_handler}, - {"xml_set_external_entity_ref_handler", vm_builtin_xml_set_external_entity_ref_handler}, - {"xml_get_current_line_number", vm_builtin_xml_get_current_line_number}, - {"xml_get_current_byte_index", vm_builtin_xml_get_current_byte_index }, - {"xml_set_object", vm_builtin_xml_set_object}, - {"xml_get_current_column_number", vm_builtin_xml_get_current_column_number}, - {"xml_get_error_code", vm_builtin_xml_get_error_code }, - {"xml_parse", vm_builtin_xml_parse }, - {"xml_parser_set_option", vm_builtin_xml_parser_set_option}, - {"xml_parser_get_option", vm_builtin_xml_parser_get_option}, - {"xml_error_string", vm_builtin_xml_error_string }, -#endif /* PH7_DISABLE_BUILTIN_FUNC */ /* UTF-8 encoding/decoding */ {"utf8_encode", vm_builtin_utf8_encode}, {"utf8_decode", vm_builtin_utf8_decode},