#include "lib.h"

/* Tokenize an entire XML input */
static sxi32 XML_Tokenize(SyStream *pStream, SyToken *pToken, void *pUserData, void *pUnused2) {
	SyXMLParser *pParse = (SyXMLParser *)pUserData;
	SyString *pStr;
	sxi32 rc;
	int c;
	/* Jump leading white spaces */
	while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0])) {
		/* Advance the stream cursor */
		if(pStream->zText[0] == '\n') {
			/* Increment line counter */
			pStream->nLine++;
		}
		pStream->zText++;
	}
	if(pStream->zText >= pStream->zEnd) {
		SXUNUSED(pUnused2);
		/* End of input reached */
		return SXERR_EOF;
	}
	/* Record token starting position and line */
	pToken->nLine = pStream->nLine;
	pToken->pUserData = 0;
	pStr = &pToken->sData;
	SyStringInitFromBuf(pStr, pStream->zText, 0);
	/* Extract the current token */
	c = pStream->zText[0];
	if(c == '<') {
		pStream->zText++;
		pStr->zString++;
		if(pStream->zText >= pStream->zEnd) {
			if(pParse->xError) {
				rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			/* End of input reached */
			return SXERR_EOF;
		}
		c = pStream->zText[0];
		if(c == '?') {
			/* Processing instruction */
			pStream->zText++;
			pStr->zString++;
			pToken->nType = SXML_TOK_PI;
			while(XLEX_IN_LEN(pStream) >= sizeof("?>") - 1 &&
					SyMemcmp((const void *)pStream->zText, "?>", sizeof("?>") - 1) != 0) {
				if(pStream->zText[0] == '\n') {
					/* Increment line counter */
					pStream->nLine++;
				}
				pStream->zText++;
			}
			/* Record token length */
			pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
			if(XLEX_IN_LEN(pStream) < sizeof("?>") - 1) {
				if(pParse->xError) {
					rc = pParse->xError("End of input found,but processing instruction was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				return SXERR_EOF;
			}
			pStream->zText += sizeof("?>") - 1;
		} else if(c == '!') {
			pStream->zText++;
			if(XLEX_IN_LEN(pStream) >= sizeof("--") - 1 && pStream->zText[0] == '-' && pStream->zText[1] == '-') {
				/* Comment */
				pStream->zText += sizeof("--") - 1;
				while(XLEX_IN_LEN(pStream) >= sizeof("-->") - 1 &&
						SyMemcmp((const void *)pStream->zText, "-->", sizeof("-->") - 1) != 0) {
					if(pStream->zText[0] == '\n') {
						/* Increment line counter */
						pStream->nLine++;
					}
					pStream->zText++;
				}
				pStream->zText += sizeof("-->") - 1;
				/* Tell the lexer to ignore this token */
				return SXERR_CONTINUE;
			}
			if(XLEX_IN_LEN(pStream) >= sizeof("[CDATA[") - 1 && SyMemcmp((const void *)pStream->zText, "[CDATA[", sizeof("[CDATA[") - 1) == 0) {
				/* CDATA */
				pStream->zText += sizeof("[CDATA[") - 1;
				pStr->zString = (const char *)pStream->zText;
				while(XLEX_IN_LEN(pStream) >= sizeof("]]>") - 1 &&
						SyMemcmp((const void *)pStream->zText, "]]>", sizeof("]]>") - 1) != 0) {
					if(pStream->zText[0] == '\n') {
						/* Increment line counter */
						pStream->nLine++;
					}
					pStream->zText++;
				}
				/* Record token type and length */
				pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
				pToken->nType = SXML_TOK_CDATA;
				if(XLEX_IN_LEN(pStream) < sizeof("]]>") - 1) {
					if(pParse->xError) {
						rc = pParse->xError("End of input found,but ]]> was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					return SXERR_EOF;
				}
				pStream->zText += sizeof("]]>") - 1;
				return SXRET_OK;
			}
			if(XLEX_IN_LEN(pStream) >= sizeof("DOCTYPE") - 1 && SyMemcmp((const void *)pStream->zText, "DOCTYPE", sizeof("DOCTYPE") - 1) == 0) {
				SyString sDelim = { ">", sizeof(char) };  /* Default delimiter */
				int c = 0;
				/* DOCTYPE */
				pStream->zText += sizeof("DOCTYPE") - 1;
				pStr->zString = (const char *)pStream->zText;
				/* Check for element declaration */
				while(pStream->zText < pStream->zEnd && pStream->zText[0] != '\n') {
					if(pStream->zText[0] >= 0xc0 || !SyisSpace(pStream->zText[0])) {
						c = pStream->zText[0];
						if(c == '>') {
							break;
						}
					}
					pStream->zText++;
				}
				if(c == '[') {
					/* Change the delimiter */
					SyStringInitFromBuf(&sDelim, "]>", sizeof("]>") - 1);
				}
				if(c != '>') {
					while(XLEX_IN_LEN(pStream) >= sDelim.nByte &&
							SyMemcmp((const void *)pStream->zText, sDelim.zString, sDelim.nByte) != 0) {
						if(pStream->zText[0] == '\n') {
							/* Increment line counter */
							pStream->nLine++;
						}
						pStream->zText++;
					}
				}
				/* Record token type and length */
				pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
				pToken->nType = SXML_TOK_DOCTYPE;
				if(XLEX_IN_LEN(pStream) < sDelim.nByte) {
					if(pParse->xError) {
						rc = pParse->xError("End of input found,but ]> or > was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					return SXERR_EOF;
				}
				pStream->zText += sDelim.nByte;
				return SXRET_OK;
			}
		} else {
			int c;
			c = pStream->zText[0];
			rc = SXRET_OK;
			pToken->nType = SXML_TOK_START_TAG;
			if(c == '/') {
				/* End tag */
				pToken->nType = SXML_TOK_END_TAG;
				pStream->zText++;
				pStr->zString++;
				if(pStream->zText >= pStream->zEnd) {
					if(pParse->xError) {
						rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					return SXERR_EOF;
				}
				c = pStream->zText[0];
			}
			if(c == '>') {
				/*<>*/
				if(pParse->xError) {
					rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				/* Ignore the token */
				return SXERR_CONTINUE;
			}
			if(c < 0xc0 && (SyisSpace(c) || SyisDigit(c) || c == '.' || c == '-' || IS_XML_DIRTY(c))) {
				if(pParse->xError) {
					rc = pParse->xError("Illegal syntax,expecting valid start name character", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				rc = SXERR_INVALID;
			}
			pStream->zText++;
			/* Delimit the tag */
			while(pStream->zText < pStream->zEnd && pStream->zText[0] != '>') {
				c = pStream->zText[0];
				if(c >= 0xc0) {
					/* UTF-8 stream */
					pStream->zText++;
					SX_JMP_UTF8(pStream->zText, pStream->zEnd);
				} else {
					if(c == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '>') {
						pStream->zText++;
						if(pToken->nType != SXML_TOK_START_TAG) {
							if(pParse->xError) {
								rc = pParse->xError("Unexpected closing tag,expecting '>'",
													SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
								if(rc == SXERR_ABORT) {
									return SXERR_ABORT;
								}
							}
							/* Ignore the token */
							rc = SXERR_INVALID;
						} else {
							pToken->nType = SXML_TOK_START_END;
						}
						break;
					}
					if(pStream->zText[0] == '\n') {
						/* Increment line counter */
						pStream->nLine++;
					}
					/* Advance the stream cursor */
					pStream->zText++;
				}
			}
			if(rc != SXRET_OK) {
				/* Tell the lexer to ignore this token */
				return SXERR_CONTINUE;
			}
			/* Record token length */
			pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
			if(pToken->nType == SXML_TOK_START_END && pStr->nByte > 0) {
				pStr->nByte -= sizeof(char);
			}
			if(pStream->zText < pStream->zEnd) {
				pStream->zText++;
			} else {
				if(pParse->xError) {
					rc = pParse->xError("End of input found,but closing tag '>' was not found", SXML_ERROR_UNCLOSED_TOKEN, pToken, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
			}
		}
	} else {
		/* Raw input */
		while(pStream->zText < pStream->zEnd) {
			c = pStream->zText[0];
			if(c < 0xc0) {
				if(c == '<') {
					break;
				} else if(c == '\n') {
					/* Increment line counter */
					pStream->nLine++;
				}
				/* Advance the stream cursor */
				pStream->zText++;
			} else {
				/* UTF-8 stream */
				pStream->zText++;
				SX_JMP_UTF8(pStream->zText, pStream->zEnd);
			}
		}
		/* Record token type,length */
		pToken->nType = SXML_TOK_RAW;
		pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
	}
	/* Return to the lexer */
	return SXRET_OK;
}
static int XMLCheckDuplicateAttr(SyXMLRawStr *aSet, sxu32 nEntry, SyXMLRawStr *pEntry) {
	sxu32 n;
	for(n = 0 ; n < nEntry ; n += 2) {
		SyXMLRawStr *pAttr = &aSet[n];
		if(pAttr->nByte == pEntry->nByte && SyMemcmp(pAttr->zString, pEntry->zString, pEntry->nByte) == 0) {
			/* Attribute found */
			return 1;
		}
	}
	/* No duplicates */
	return 0;
}
static sxi32 XMLProcessNamesSpace(SyXMLParser *pParse, SyXMLRawStrNS *pTag, SyToken *pToken, SySet *pAttr) {
	SyXMLRawStr *pPrefix, *pUri; /* Namespace prefix/URI */
	SyHashEntry *pEntry;
	SyXMLRawStr *pDup;
	sxi32 rc;
	/* Extract the URI first */
	pUri = (SyXMLRawStr *)SySetPeek(pAttr);
	/* Extract the prefix */
	pPrefix = (SyXMLRawStr *)SySetAt(pAttr, SySetUsed(pAttr) - 2);
	/* Prefix name */
	if(pPrefix->nByte == sizeof("xmlns") - 1) {
		/* Default namespace */
		pPrefix->nByte = 0;
		pPrefix->zString = ""; /* Empty string */
	} else {
		pPrefix->nByte   -= sizeof("xmlns") - 1;
		pPrefix->zString += sizeof("xmlns") - 1;
		if(pPrefix->zString[0] != ':') {
			return SXRET_OK;
		}
		pPrefix->nByte--;
		pPrefix->zString++;
		if(pPrefix->nByte < 1) {
			if(pParse->xError) {
				rc = pParse->xError("Invalid namespace name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			/* POP the last insertred two entries */
			(void)SySetPop(pAttr);
			(void)SySetPop(pAttr);
			return SXERR_SYNTAX;
		}
	}
	/* Invoke the namespace callback if available */
	if(pParse->xNameSpace) {
		rc = pParse->xNameSpace(pPrefix, pUri, pParse->pUserData);
		if(rc == SXERR_ABORT) {
			/* User callback request an operation abort */
			return SXERR_ABORT;
		}
	}
	/* Duplicate structure */
	pDup = (SyXMLRawStr *)SyMemBackendAlloc(pParse->pAllocator, sizeof(SyXMLRawStr));
	if(pDup == 0) {
		if(pParse->xError) {
			pParse->xError("Out of memory", SXML_ERROR_NO_MEMORY, pToken, pParse->pUserData);
		}
		/* Abort processing immediately */
		return SXERR_ABORT;
	}
	*pDup = *pUri; /* Structure assignment */
	/* Save the namespace */
	if(pPrefix->nByte == 0) {
		pPrefix->zString = "Default";
		pPrefix->nByte = sizeof("Default") - 1;
	}
	SyHashInsert(&pParse->hns, (const void *)pPrefix->zString, pPrefix->nByte, pDup);
	/* Peek the last inserted entry */
	pEntry = SyHashLastEntry(&pParse->hns);
	/* Store in the corresponding tag container*/
	SySetPut(&pTag->sNSset, (const void *)&pEntry);
	/* POP the last insertred two entries */
	(void)SySetPop(pAttr);
	(void)SySetPop(pAttr);
	return SXRET_OK;
}
static sxi32 XMLProcessStartTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SySet  *pAttrSet, SySet *pTagStack) {
	SyString *pIn = &pToken->sData;
	const char *zIn, *zCur, *zEnd;
	SyXMLRawStr sEntry;
	sxi32 rc;
	int c;
	/* Reset the working set */
	SySetReset(pAttrSet);
	/* Delimit the raw tag */
	zIn = pIn->zString;
	zEnd = &zIn[pIn->nByte];
	while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
		zIn++;
	}
	/* Isolate tag name */
	sEntry.nLine = pTag->nLine = pToken->nLine;
	zCur = zIn;
	while(zIn < zEnd) {
		if((unsigned char)zIn[0] >= 0xc0) {
			/* UTF-8 stream */
			zIn++;
			SX_JMP_UTF8(zIn, zEnd);
		} else if(SyisSpace(zIn[0])) {
			break;
		} else {
			if(IS_XML_DIRTY(zIn[0])) {
				if(pParse->xError) {
					rc = pParse->xError("Illegal character in XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
			}
			zIn++;
		}
	}
	if(zCur >= zIn) {
		if(pParse->xError) {
			rc = pParse->xError("Invalid XML name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		return SXERR_SYNTAX;
	}
	pTag->zString = zCur;
	pTag->nByte = (sxu32)(zIn - zCur);
	/* Process tag attribute */
	for(;;) {
		int is_ns = 0;
		while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
			zIn++;
		}
		if(zIn >= zEnd) {
			break;
		}
		zCur = zIn;
		while(zIn < zEnd && zIn[0] != '=') {
			if((unsigned char)zIn[0] >= 0xc0) {
				/* UTF-8 stream */
				zIn++;
				SX_JMP_UTF8(zIn, zEnd);
			} else if(SyisSpace(zIn[0])) {
				break;
			} else {
				zIn++;
			}
		}
		if(zCur >= zIn) {
			if(pParse->xError) {
				rc = pParse->xError("Missing attribute name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			return SXERR_SYNTAX;
		}
		/* Store attribute name */
		sEntry.zString = zCur;
		sEntry.nByte = (sxu32)(zIn - zCur);
		if((pParse->nFlags & SXML_ENABLE_NAMESPACE) && sEntry.nByte >= sizeof("xmlns") - 1 &&
				SyMemcmp(sEntry.zString, "xmlns", sizeof("xmlns") - 1) == 0) {
			is_ns = 1;
		}
		while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
			zIn++;
		}
		if(zIn >= zEnd || zIn[0] != '=') {
			if(pParse->xError) {
				rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			return SXERR_SYNTAX;
		}
		while(sEntry.nByte > 0 && (unsigned char)zCur[sEntry.nByte - 1] < 0xc0
				&& SyisSpace(zCur[sEntry.nByte - 1])) {
			sEntry.nByte--;
		}
		/* Check for duplicates first */
		if(XMLCheckDuplicateAttr((SyXMLRawStr *)SySetBasePtr(pAttrSet), SySetUsed(pAttrSet), &sEntry)) {
			if(pParse->xError) {
				rc = pParse->xError("Duplicate attribute", SXML_ERROR_DUPLICATE_ATTRIBUTE, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			return SXERR_SYNTAX;
		}
		if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) {
			return SXERR_ABORT;
		}
		/* Extract attribute value */
		zIn++; /* Jump the trailing '=' */
		while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
			zIn++;
		}
		if(zIn >= zEnd) {
			if(pParse->xError) {
				rc = pParse->xError("Missing attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			(void)SySetPop(pAttrSet);
			return SXERR_SYNTAX;
		}
		if(zIn[0] != '\'' && zIn[0] != '"') {
			if(pParse->xError) {
				rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			(void)SySetPop(pAttrSet);
			return SXERR_SYNTAX;
		}
		c = zIn[0];
		zIn++;
		zCur = zIn;
		while(zIn < zEnd && zIn[0] != c) {
			zIn++;
		}
		if(zIn >= zEnd) {
			if(pParse->xError) {
				rc = pParse->xError("Missing quotes on attribute value", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
				if(rc == SXERR_ABORT) {
					return SXERR_ABORT;
				}
			}
			(void)SySetPop(pAttrSet);
			return SXERR_SYNTAX;
		}
		/* Store attribute value */
		sEntry.zString = zCur;
		sEntry.nByte = (sxu32)(zIn - zCur);
		if(SXRET_OK != SySetPut(pAttrSet, (const void *)&sEntry)) {
			return SXERR_ABORT;
		}
		zIn++;
		if(is_ns) {
			/* Process namespace declaration */
			XMLProcessNamesSpace(pParse, pTag, pToken, pAttrSet);
		}
	}
	/* Store in the tag stack */
	if(pToken->nType == SXML_TOK_START_TAG) {
		rc = SySetPut(pTagStack, (const void *)pTag);
	}
	return SXRET_OK;
}
static void XMLExtactPI(SyToken *pToken, SyXMLRawStr *pTarget, SyXMLRawStr *pData, int *pXML) {
	SyString *pIn = &pToken->sData;
	const char *zIn, *zCur, *zEnd;
	pTarget->nLine = pData->nLine = pToken->nLine;
	/* Nullify the entries first */
	pTarget->zString = pData->zString = 0;
	/* Ignore leading and trailing white spaces */
	SyStringFullTrim(pIn);
	/* Delimit the raw PI */
	zIn  = pIn->zString;
	zEnd = &zIn[pIn->nByte];
	if(pXML) {
		*pXML = 0;
	}
	/* Extract the target */
	zCur = zIn;
	while(zIn < zEnd) {
		if((unsigned char)zIn[0] >= 0xc0) {
			/* UTF-8 stream */
			zIn++;
			SX_JMP_UTF8(zIn, zEnd);
		} else if(SyisSpace(zIn[0])) {
			break;
		} else {
			zIn++;
		}
	}
	if(zIn > zCur) {
		pTarget->zString = zCur;
		pTarget->nByte = (sxu32)(zIn - zCur);
		if(pXML && pTarget->nByte == sizeof("xml") - 1 && SyStrnicmp(pTarget->zString, "xml", sizeof("xml") - 1) == 0) {
			*pXML = 1;
		}
	}
	/* Extract the PI data  */
	while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
		zIn++;
	}
	if(zIn < zEnd) {
		pData->zString = zIn;
		pData->nByte = (sxu32)(zEnd - zIn);
	}
}
static sxi32 XMLExtractEndTag(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pOut) {
	SyString *pIn = &pToken->sData;
	const char *zEnd = &pIn->zString[pIn->nByte];
	const char *zIn = pIn->zString;
	/* Ignore leading white spaces */
	while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
		zIn++;
	}
	pOut->nLine = pToken->nLine;
	pOut->zString = zIn;
	pOut->nByte = (sxu32)(zEnd - zIn);
	/* Ignore trailing white spaces */
	while(pOut->nByte > 0 && (unsigned char)pOut->zString[pOut->nByte - 1] < 0xc0
			&& SyisSpace(pOut->zString[pOut->nByte - 1])) {
		pOut->nByte--;
	}
	if(pOut->nByte < 1) {
		if(pParse->xError) {
			sxi32 rc;
			rc  = pParse->xError("Invalid end tag name", SXML_ERROR_INVALID_TOKEN, pToken, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		return SXERR_SYNTAX;
	}
	return SXRET_OK;
}
static void TokenToXMLString(SyToken *pTok, SyXMLRawStrNS *pOut) {
	/* Remove leading and trailing white spaces first */
	SyStringFullTrim(&pTok->sData);
	pOut->zString = SyStringData(&pTok->sData);
	pOut->nByte = SyStringLength(&pTok->sData);
}
static sxi32 XMLExtractNS(SyXMLParser *pParse, SyToken *pToken, SyXMLRawStrNS *pTag, SyXMLRawStr *pnsUri) {
	SyXMLRawStr *pUri, sPrefix;
	SyHashEntry *pEntry;
	sxu32 nOfft;
	sxi32 rc;
	/* Extract a prefix if available */
	rc = SyByteFind(pTag->zString, pTag->nByte, ':', &nOfft);
	if(rc != SXRET_OK) {
		/* Check if there is a default namespace */
		pEntry = SyHashGet(&pParse->hns, "Default", sizeof("Default") - 1);
		if(pEntry) {
			/* Extract the ns URI */
			pUri = (SyXMLRawStr *)pEntry->pUserData;
			/* Save the ns URI */
			pnsUri->zString = pUri->zString;
			pnsUri->nByte = pUri->nByte;
		}
		return SXRET_OK;
	}
	if(nOfft < 1) {
		if(pParse->xError) {
			rc = pParse->xError("Empty prefix is not allowed according to XML namespace specification",
								SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		return SXERR_SYNTAX;
	}
	sPrefix.zString = pTag->zString;
	sPrefix.nByte = nOfft;
	sPrefix.nLine = pTag->nLine;
	pTag->zString += nOfft + 1;
	pTag->nByte -= nOfft;
	if(pTag->nByte < 1) {
		if(pParse->xError) {
			rc = pParse->xError("Missing tag name", SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		return SXERR_SYNTAX;
	}
	/* Check if the prefix is already registered */
	pEntry = SyHashGet(&pParse->hns, sPrefix.zString, sPrefix.nByte);
	if(pEntry == 0) {
		if(pParse->xError) {
			rc = pParse->xError("Namespace prefix is not defined", SXML_ERROR_SYNTAX,
								pToken, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		return SXERR_SYNTAX;
	}
	/* Extract the ns URI */
	pUri = (SyXMLRawStr *)pEntry->pUserData;
	/* Save the ns URI */
	pnsUri->zString = pUri->zString;
	pnsUri->nByte = pUri->nByte;
	/* All done */
	return SXRET_OK;
}
static sxi32 XMLnsUnlink(SyXMLParser *pParse, SyXMLRawStrNS *pLast, SyToken *pToken) {
	SyHashEntry **apEntry, *pEntry;
	void *pUserData;
	sxu32 n;
	/* Release namespace entries */
	apEntry = (SyHashEntry **)SySetBasePtr(&pLast->sNSset);
	for(n = 0 ; n < SySetUsed(&pLast->sNSset) ; ++n) {
		pEntry = apEntry[n];
		/* Invoke the end namespace declaration callback */
		if(pParse->xNameSpaceEnd && (pParse->nFlags & SXML_ENABLE_NAMESPACE) && pToken) {
			SyXMLRawStr sPrefix;
			sxi32 rc;
			sPrefix.zString = (const char *)pEntry->pKey;
			sPrefix.nByte = pEntry->nKeyLen;
			sPrefix.nLine = pToken->nLine;
			rc = pParse->xNameSpaceEnd(&sPrefix, pParse->pUserData);
			if(rc == SXERR_ABORT) {
				return SXERR_ABORT;
			}
		}
		pUserData = pEntry->pUserData;
		/* Remove from the namespace hashtable */
		SyHashDeleteEntry2(pEntry);
		SyMemBackendFree(pParse->pAllocator, pUserData);
	}
	SySetRelease(&pLast->sNSset);
	return SXRET_OK;
}
/* Process XML tokens */
static sxi32  ProcessXML(SyXMLParser *pParse, SySet *pTagStack, SySet *pWorker) {
	SySet *pTokenSet = &pParse->sToken;
	SyXMLRawStrNS sEntry;
	SyXMLRawStr sNs;
	SyToken *pToken;
	int bGotTag;
	sxi32 rc;
	/* Initialize fields */
	bGotTag = 0;
	/* Start processing */
	if(pParse->xStartDoc && (SXERR_ABORT == pParse->xStartDoc(pParse->pUserData))) {
		/* User callback request an operation abort */
		return SXERR_ABORT;
	}
	/* Reset the loop cursor */
	SySetResetCursor(pTokenSet);
	/* Extract the current token */
	while(SXRET_OK == (SySetGetNextEntry(&(*pTokenSet), (void **)&pToken))) {
		SyZero(&sEntry, sizeof(SyXMLRawStrNS));
		SyZero(&sNs, sizeof(SyXMLRawStr));
		SySetInit(&sEntry.sNSset, pParse->pAllocator, sizeof(SyHashEntry *));
		sEntry.nLine = sNs.nLine = pToken->nLine;
		switch(pToken->nType) {
			case SXML_TOK_DOCTYPE:
				if(SySetUsed(pTagStack) > 1 || bGotTag) {
					if(pParse->xError) {
						rc = pParse->xError("DOCTYPE must be declared first", SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					break;
				}
				/* Invoke the supplied callback if any */
				if(pParse->xDoctype) {
					TokenToXMLString(pToken, &sEntry);
					rc = pParse->xDoctype((SyXMLRawStr *)&sEntry, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				break;
			case SXML_TOK_CDATA:
				if(SySetUsed(pTagStack) < 1) {
					if(pParse->xError) {
						rc = pParse->xError("CDATA without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
				}
				/* Invoke the supplied callback if any */
				if(pParse->xRaw) {
					TokenToXMLString(pToken, &sEntry);
					rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				break;
			case SXML_TOK_PI: {
					SyXMLRawStr sTarget, sData;
					int isXML = 0;
					/* Extract the target and data */
					XMLExtactPI(pToken, &sTarget, &sData, &isXML);
					if(isXML && SySetCursor(pTokenSet) - 1 > 0) {
						if(pParse->xError) {
							rc = pParse->xError("Unexpected XML declaration. The XML declaration must be the first node in the document",
												SXML_ERROR_MISPLACED_XML_PI, pToken, pParse->pUserData);
							if(rc == SXERR_ABORT) {
								return SXERR_ABORT;
							}
						}
					} else if(pParse->xPi) {
						/* Invoke the supplied callback*/
						rc = pParse->xPi(&sTarget, &sData, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					break;
				}
			case SXML_TOK_RAW:
				if(SySetUsed(pTagStack) < 1) {
					if(pParse->xError) {
						rc = pParse->xError("Text (Raw data) without matching tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					break;
				}
				/* Invoke the supplied callback if any */
				if(pParse->xRaw) {
					TokenToXMLString(pToken, &sEntry);
					rc = pParse->xRaw((SyXMLRawStr *)&sEntry, pParse->pUserData);
					if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
				}
				break;
			case SXML_TOK_END_TAG: {
					SyXMLRawStrNS *pLast = 0; /* cc warning */
					if(SySetUsed(pTagStack) < 1) {
						if(pParse->xError) {
							rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData);
							if(rc == SXERR_ABORT) {
								return SXERR_ABORT;
							}
						}
						break;
					}
					rc = XMLExtractEndTag(pParse, pToken, &sEntry);
					if(rc == SXRET_OK) {
						/* Extract the last inserted entry */
						pLast = (SyXMLRawStrNS *)SySetPeek(pTagStack);
						if(pLast == 0 || pLast->nByte != sEntry.nByte ||
								SyMemcmp(pLast->zString, sEntry.zString, sEntry.nByte) != 0) {
							if(pParse->xError) {
								rc = pParse->xError("Unexpected closing tag", SXML_ERROR_TAG_MISMATCH, pToken, pParse->pUserData);
								if(rc == SXERR_ABORT) {
									return SXERR_ABORT;
								}
							}
						} else {
							/* Invoke the supplied callback if any */
							if(pParse->xEndTag) {
								rc = SXRET_OK;
								if(pParse->nFlags & SXML_ENABLE_NAMESPACE) {
									/* Extract namespace URI */
									rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs);
									if(rc == SXERR_ABORT) {
										return SXERR_ABORT;
									}
								}
								if(rc == SXRET_OK) {
									rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData);
									if(rc == SXERR_ABORT) {
										return SXERR_ABORT;
									}
								}
							}
						}
					} else if(rc == SXERR_ABORT) {
						return SXERR_ABORT;
					}
					if(pLast) {
						rc = XMLnsUnlink(pParse, pLast, pToken);
						(void)SySetPop(pTagStack);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					break;
				}
			case SXML_TOK_START_TAG:
			case SXML_TOK_START_END:
				if(SySetUsed(pTagStack) < 1 && bGotTag) {
					if(pParse->xError) {
						rc = pParse->xError("XML document cannot contain multiple root level elements documents",
											SXML_ERROR_SYNTAX, pToken, pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					break;
				}
				bGotTag = 1;
				/* Extract the tag and it's supplied attribute */
				rc = XMLProcessStartTag(pParse, pToken, &sEntry, pWorker, pTagStack);
				if(rc == SXRET_OK) {
					if(pParse->nFlags & SXML_ENABLE_NAMESPACE) {
						/* Extract namespace URI */
						rc = XMLExtractNS(pParse, pToken, &sEntry, &sNs);
					}
				}
				if(rc == SXRET_OK) {
					/* Invoke the supplied callback */
					if(pParse->xStartTag) {
						rc = pParse->xStartTag((SyXMLRawStr *)&sEntry, &sNs, SySetUsed(pWorker),
											   (SyXMLRawStr *)SySetBasePtr(pWorker), pParse->pUserData);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
					if(pToken->nType == SXML_TOK_START_END) {
						if(pParse->xEndTag) {
							rc = pParse->xEndTag((SyXMLRawStr *)&sEntry, &sNs, pParse->pUserData);
							if(rc == SXERR_ABORT) {
								return SXERR_ABORT;
							}
						}
						rc = XMLnsUnlink(pParse, &sEntry, pToken);
						if(rc == SXERR_ABORT) {
							return SXERR_ABORT;
						}
					}
				} else if(rc == SXERR_ABORT) {
					/* Abort processing immediately */
					return SXERR_ABORT;
				}
				break;
			default:
				/* Can't happen */
				break;
		}
	}
	if(SySetUsed(pTagStack) > 0 && pParse->xError) {
		pParse->xError("Missing closing tag", SXML_ERROR_SYNTAX,
					   (SyToken *)SySetPeek(&pParse->sToken), pParse->pUserData);
	}
	if(pParse->xEndDoc) {
		pParse->xEndDoc(pParse->pUserData);
	}
	return SXRET_OK;
}
PH7_PRIVATE sxi32 SyXMLParserInit(SyXMLParser *pParser, SyMemBackend *pAllocator, sxi32 iFlags) {
	/* Zero the structure first */
	SyZero(pParser, sizeof(SyXMLParser));
	/* Initialize fields */
	SySetInit(&pParser->sToken, pAllocator, sizeof(SyToken));
	SyLexInit(&pParser->sLex, &pParser->sToken, XML_Tokenize, pParser);
	SyHashInit(&pParser->hns, pAllocator, 0, 0);
	pParser->pAllocator = pAllocator;
	pParser->nFlags = iFlags;
	return SXRET_OK;
}
PH7_PRIVATE sxi32 SyXMLParserSetEventHandler(SyXMLParser *pParser,
		void *pUserData,
		ProcXMLStartTagHandler xStartTag,
		ProcXMLTextHandler xRaw,
		ProcXMLSyntaxErrorHandler xErr,
		ProcXMLStartDocument xStartDoc,
		ProcXMLEndTagHandler xEndTag,
		ProcXMLPIHandler   xPi,
		ProcXMLEndDocument xEndDoc,
		ProcXMLDoctypeHandler xDoctype,
		ProcXMLNameSpaceStart xNameSpace,
		ProcXMLNameSpaceEnd   xNameSpaceEnd
											) {
	/* Install user callbacks */
	if(xErr) {
		pParser->xError = xErr;
	}
	if(xStartDoc) {
		pParser->xStartDoc = xStartDoc;
	}
	if(xStartTag) {
		pParser->xStartTag = xStartTag;
	}
	if(xRaw) {
		pParser->xRaw = xRaw;
	}
	if(xEndTag) {
		pParser->xEndTag = xEndTag;
	}
	if(xPi) {
		pParser->xPi = xPi;
	}
	if(xEndDoc) {
		pParser->xEndDoc = xEndDoc;
	}
	if(xDoctype) {
		pParser->xDoctype = xDoctype;
	}
	if(xNameSpace) {
		pParser->xNameSpace	= xNameSpace;
	}
	if(xNameSpaceEnd) {
		pParser->xNameSpaceEnd = xNameSpaceEnd;
	}
	pParser->pUserData = pUserData;
	return SXRET_OK;
}
/* Process an XML chunk */
PH7_PRIVATE sxi32 SyXMLProcess(SyXMLParser *pParser, const char *zInput, sxu32 nByte) {
	SySet sTagStack;
	SySet sWorker;
	sxi32 rc;
	/* Initialize working sets */
	SySetInit(&sWorker, pParser->pAllocator, sizeof(SyXMLRawStr)); /* Tag container */
	SySetInit(&sTagStack, pParser->pAllocator, sizeof(SyXMLRawStrNS)); /* Tag stack */
	/* Tokenize the entire input */
	rc = SyLexTokenizeInput(&pParser->sLex, zInput, nByte, 0, 0, 0);
	if(rc == SXERR_ABORT) {
		/* Tokenize callback request an operation abort */
		return SXERR_ABORT;
	}
	if(SySetUsed(&pParser->sToken) < 1) {
		/* Nothing to process [i.e: white spaces] */
		rc = SXRET_OK;
	} else {
		/* Process XML Tokens */
		rc = ProcessXML(&(*pParser), &sTagStack, &sWorker);
		if(pParser->nFlags & SXML_ENABLE_NAMESPACE) {
			if(SySetUsed(&sTagStack) > 0) {
				SyXMLRawStrNS *pEntry;
				SyHashEntry **apEntry;
				sxu32 n;
				SySetResetCursor(&sTagStack);
				while(SySetGetNextEntry(&sTagStack, (void **)&pEntry) == SXRET_OK) {
					/* Release namespace entries */
					apEntry = (SyHashEntry **)SySetBasePtr(&pEntry->sNSset);
					for(n = 0 ; n < SySetUsed(&pEntry->sNSset) ; ++n) {
						SyMemBackendFree(pParser->pAllocator, apEntry[n]->pUserData);
					}
					SySetRelease(&pEntry->sNSset);
				}
			}
		}
	}
	/* Clean-up the mess left behind */
	SySetRelease(&sWorker);
	SySetRelease(&sTagStack);
	/* Processing result */
	return rc;
}
PH7_PRIVATE sxi32 SyXMLParserRelease(SyXMLParser *pParser) {
	SyLexRelease(&pParser->sLex);
	SySetRelease(&pParser->sToken);
	SyHashRelease(&pParser->hns);
	return SXRET_OK;
}