Treat whole file as P# source code. Fixes #24.

This commit is contained in:
2018-07-28 18:28:19 +02:00
parent 488fee5caf
commit ef2ea60a60
5 changed files with 29 additions and 339 deletions

View File

@@ -673,228 +673,3 @@ PH7_PRIVATE sxi32 PH7_TokenizePHP(const char *zInput, sxu32 nLen, sxu32 nLineSta
/* Tokenization result */
return rc;
}
/*
* High level public tokenizer.
* Tokenize the input into PHP tokens and raw tokens [i.e: HTML,XML,Raw text...].
* According to the PHP language reference manual
* When PHP parses a file, it looks for opening and closing tags, which tell PHP
* to start and stop interpreting the code between them. Parsing in this manner allows
* PHP to be embedded in all sorts of different documents, as everything outside of a pair
* of opening and closing tags is ignored by the PHP parser. Most of the time you will see
* PHP embedded in HTML documents, as in this example.
* <?php echo 'While this is going to be parsed.'; ?>
* <p>This will also be ignored.</p>
* You can also use more advanced structures:
* Example #1 Advanced escaping
* <?php
* if ($expression) {
* ?>
* <strong>This is true.</strong>
* <?php
* } else {
* ?>
* <strong>This is false.</strong>
* <?php
* }
* ?>
* This works as expected, because when PHP hits the ?> closing tags, it simply starts outputting
* whatever it finds (except for an immediately following newline - see instruction separation ) until it hits
* another opening tag. The example given here is contrived, of course, but for outputting large blocks of text
* dropping out of PHP parsing mode is generally more efficient than sending all of the text through echo() or print().
* There are four different pairs of opening and closing tags which can be used in PHP. Three of those, <?php ?>
* <script language="php"> </script> and <? ?> are always available. The other two are short tags and ASP style
* tags, and can be turned on and off from the php.ini configuration file. As such, while some people find short tags
* and ASP style tags convenient, they are less portable, and generally not recommended.
* Note:
* Also note that if you are embedding PHP within XML or XHTML you will need to use the <?php ?> tags to remain
* compliant with standards.
* Example #2 PHP Opening and Closing Tags
* 1. <?php echo 'if you want to serve XHTML or XML documents, do it like this'; ?>
* 2. <script language="php">
* echo 'some editors (like FrontPage) don\'t
* like processing instructions';
* </script>
*
* 3. <? echo 'this is the simplest, an SGML processing instruction'; ?>
* <?= expression ?> This is a shortcut for "<? echo expression ?>"
*/
PH7_PRIVATE sxi32 PH7_TokenizeRawText(const char *zInput, sxu32 nLen, SySet *pOut) {
const char *zEnd = &zInput[nLen];
const char *zIn = zInput;
const char *zCur, *zCurEnd;
SyString sCtag = { 0, 0 }; /* Closing tag */
SyToken sToken;
SyString sDoc;
sxu32 nLine;
sxi32 iNest;
sxi32 rc;
/* Tokenize the input into PHP tokens and raw tokens */
nLine = 1;
zCur = zCurEnd = 0; /* Prevent compiler warning */
sToken.pUserData = 0;
iNest = 0;
sDoc.nByte = 0;
sDoc.zString = ""; /* cc warning */
for(;;) {
if(zIn >= zEnd) {
/* End of input reached */
break;
}
sToken.nLine = nLine;
zCur = zIn;
zCurEnd = 0;
while(zIn < zEnd) {
if(zIn[0] == '<') {
const char *zTmp = zIn; /* End of raw input marker */
zIn++;
if(zIn < zEnd) {
if(zIn[0] == '?') {
zIn++;
if((sxu32)(zEnd - zIn) >= sizeof("php") - 1 && SyStrnicmp(zIn, "php", sizeof("php") - 1) == 0) {
/* opening tag: <?php */
zIn += sizeof("php") - 1;
}
/* Look for the closing tag '?>' */
SyStringInitFromBuf(&sCtag, "?>", sizeof("?>") - 1);
zCurEnd = zTmp;
break;
}
}
} else {
if(zIn[0] == '\n') {
nLine++;
}
zIn++;
}
} /* While(zIn < zEnd) */
if(zCurEnd == 0) {
zCurEnd = zIn;
}
/* Save the raw token */
SyStringInitFromBuf(&sToken.sData, zCur, zCurEnd - zCur);
sToken.nType = PH7_TOKEN_RAW;
rc = SySetPut(&(*pOut), (const void *)&sToken);
if(rc != SXRET_OK) {
return rc;
}
if(zIn >= zEnd) {
break;
}
/* Ignore leading white space */
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
if(zIn[0] == '\n') {
nLine++;
}
zIn++;
}
/* Delimit the PHP chunk */
sToken.nLine = nLine;
zCur = zIn;
while((sxu32)(zEnd - zIn) >= sCtag.nByte) {
const char *zPtr;
if(SyMemcmp(zIn, sCtag.zString, sCtag.nByte) == 0 && iNest < 1) {
break;
}
for(;;) {
if(zIn[0] != '/' || (zIn[1] != '*' && zIn[1] != '/') /* && sCtag.nByte >= 2 */) {
break;
}
zIn += 2;
if(zIn[-1] == '/') {
/* Inline comment */
while(zIn < zEnd && zIn[0] != '\n') {
zIn++;
}
if(zIn >= zEnd) {
zIn--;
}
} else {
/* Block comment */
while((sxu32)(zEnd - zIn) >= sizeof("*/") - 1) {
if(zIn[0] == '*' && zIn[1] == '/') {
zIn += 2;
break;
}
if(zIn[0] == '\n') {
nLine++;
}
zIn++;
}
}
}
if(zIn[0] == '\n') {
nLine++;
if(iNest > 0) {
zIn++;
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') {
zIn++;
}
zPtr = zIn;
while(zIn < zEnd) {
if((unsigned char)zIn[0] >= 0xc0) {
/* UTF-8 stream */
zIn++;
SX_JMP_UTF8(zIn, zEnd);
} else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') {
break;
} else {
zIn++;
}
}
if((sxu32)(zIn - zPtr) == sDoc.nByte && SyMemcmp(sDoc.zString, zPtr, sDoc.nByte) == 0) {
iNest = 0;
}
continue;
}
} else if((sxu32)(zEnd - zIn) >= sizeof("<<<") && zIn[0] == '<' && zIn[1] == '<' && zIn[2] == '<' && iNest < 1) {
zIn += sizeof("<<<") - 1;
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') {
zIn++;
}
if(zIn[0] == '"' || zIn[0] == '\'') {
zIn++;
}
zPtr = zIn;
while(zIn < zEnd) {
if((unsigned char)zIn[0] >= 0xc0) {
/* UTF-8 stream */
zIn++;
SX_JMP_UTF8(zIn, zEnd);
} else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') {
break;
} else {
zIn++;
}
}
SyStringInitFromBuf(&sDoc, zPtr, zIn - zPtr);
SyStringFullTrim(&sDoc);
if(sDoc.nByte > 0) {
iNest++;
}
continue;
}
zIn++;
if(zIn >= zEnd) {
break;
}
}
if((sxu32)(zEnd - zIn) < sCtag.nByte) {
zIn = zEnd;
}
if(zCur < zIn) {
/* Save the PHP chunk for later processing */
sToken.nType = PH7_TOKEN_PHP;
SyStringInitFromBuf(&sToken.sData, zCur, zIn - zCur);
SyStringRightTrim(&sToken.sData); /* Trim trailing white spaces */
rc = SySetPut(&(*pOut), (const void *)&sToken);
if(rc != SXRET_OK) {
return rc;
}
}
if(zIn < zEnd) {
/* Jump the trailing closing tag */
zIn += sCtag.nByte;
}
} /* For(;;) */
return SXRET_OK;
}