Treat whole file as P# source code. Fixes #24.
This commit is contained in:
parent
488fee5caf
commit
ef2ea60a60
|
@ -1690,63 +1690,17 @@ static sxi32 PH7_CompileBreak(ph7_gen_state *pGen) {
|
|||
* failure.
|
||||
*/
|
||||
static sxi32 GenStateNextChunk(ph7_gen_state *pGen) {
|
||||
ph7_value *pRawObj; /* Raw chunk [i.e: HTML,XML...] */
|
||||
sxu32 nRawObj;
|
||||
sxu32 nObjIdx;
|
||||
/* Consume raw chunks verbatim without any processing until we get
|
||||
* a PHP block.
|
||||
*/
|
||||
Consume:
|
||||
nRawObj = nObjIdx = 0;
|
||||
while(pGen->pRawIn < pGen->pRawEnd && pGen->pRawIn->nType != PH7_TOKEN_PHP) {
|
||||
pRawObj = PH7_ReserveConstObj(pGen->pVm, &nObjIdx);
|
||||
if(pRawObj == 0) {
|
||||
PH7_GenCompileError(pGen, E_ERROR, 1, "Fatal, PH7 engine is running out of memory");
|
||||
return SXERR_ABORT;
|
||||
}
|
||||
/* Mark as constant and emit the load constant instruction */
|
||||
PH7_MemObjInitFromString(pGen->pVm, pRawObj, &pGen->pRawIn->sData);
|
||||
PH7_VmEmitInstr(pGen->pVm, PH7_OP_LOADC, 0, nObjIdx, 0, 0);
|
||||
++nRawObj;
|
||||
pGen->pRawIn++; /* Next chunk */
|
||||
}
|
||||
if(nRawObj > 0) {
|
||||
/* Emit the consume instruction */
|
||||
PH7_VmEmitInstr(pGen->pVm, PH7_OP_CONSUME, nRawObj, 0, 0, 0);
|
||||
}
|
||||
if(pGen->pRawIn < pGen->pRawEnd) {
|
||||
SySet *pTokenSet = pGen->pTokenSet;
|
||||
/* Reset the token set */
|
||||
SySetReset(pTokenSet);
|
||||
/* Tokenize input */
|
||||
PH7_TokenizePHP(SyStringData(&pGen->pRawIn->sData), SyStringLength(&pGen->pRawIn->sData),
|
||||
pGen->pRawIn->nLine, pTokenSet);
|
||||
/* Point to the fresh token stream */
|
||||
pGen->pIn = (SyToken *)SySetBasePtr(pTokenSet);
|
||||
pGen->pEnd = &pGen->pIn[SySetUsed(pTokenSet)];
|
||||
/* Advance the stream cursor */
|
||||
pGen->pRawIn++;
|
||||
/* TICKET 1433-011 */
|
||||
if(pGen->pIn < pGen->pEnd && (pGen->pIn->nType & PH7_TK_EQUAL)) {
|
||||
static const sxu32 nKeyID = PH7_TKWRD_ECHO;
|
||||
sxi32 rc;
|
||||
/* Refer to TICKET 1433-009 */
|
||||
pGen->pIn->nType = PH7_TK_KEYWORD;
|
||||
pGen->pIn->pUserData = SX_INT_TO_PTR(nKeyID);
|
||||
SyStringInitFromBuf(&pGen->pIn->sData, "echo", sizeof("echo") - 1);
|
||||
rc = PH7_CompileExpr(pGen, 0, 0);
|
||||
if(rc == SXERR_ABORT) {
|
||||
return SXERR_ABORT;
|
||||
} else if(rc != SXERR_EMPTY) {
|
||||
PH7_VmEmitInstr(pGen->pVm, PH7_OP_POP, 1, 0, 0, 0);
|
||||
}
|
||||
goto Consume;
|
||||
}
|
||||
} else {
|
||||
/* No more chunks to process */
|
||||
pGen->pIn = pGen->pEnd;
|
||||
return SXERR_EOF;
|
||||
}
|
||||
SySet *pTokenSet = pGen->pTokenSet;
|
||||
/* Reset the token set */
|
||||
SySetReset(pTokenSet);
|
||||
/* Tokenize input */
|
||||
PH7_TokenizePHP(SyStringData(&pGen->pRawIn->sData), SyStringLength(&pGen->pRawIn->sData),
|
||||
pGen->pRawIn->nLine, pTokenSet);
|
||||
/* Point to the fresh token stream */
|
||||
pGen->pIn = (SyToken *)SySetBasePtr(pTokenSet);
|
||||
pGen->pEnd = &pGen->pIn[SySetUsed(pTokenSet)];
|
||||
/* Advance the stream cursor */
|
||||
pGen->pRawIn++;
|
||||
return SXRET_OK;
|
||||
}
|
||||
/*
|
||||
|
@ -5846,22 +5800,14 @@ PH7_PRIVATE sxi32 PH7_CompileScript(
|
|||
SySetInit(&aPhpToken, &pVm->sAllocator, sizeof(SyToken));
|
||||
SySetAlloc(&aPhpToken, 0xc0);
|
||||
is_expr = 0;
|
||||
if(iFlags & PH7_PHP_ONLY) {
|
||||
SyToken sTmp;
|
||||
/* PHP only: -*/
|
||||
sTmp.nLine = 1;
|
||||
sTmp.nType = PH7_TOKEN_PHP;
|
||||
sTmp.pUserData = 0;
|
||||
SyStringDupPtr(&sTmp.sData, pScript);
|
||||
SySetPut(&aRawToken, (const void *)&sTmp);
|
||||
if(iFlags & PH7_PHP_EXPR) {
|
||||
/* A simple PHP expression */
|
||||
is_expr = 1;
|
||||
}
|
||||
} else {
|
||||
/* Tokenize raw text */
|
||||
SySetAlloc(&aRawToken, 32);
|
||||
PH7_TokenizeRawText(pScript->zString, pScript->nByte, &aRawToken);
|
||||
SyToken sTmp;
|
||||
sTmp.nLine = 1;
|
||||
sTmp.pUserData = 0;
|
||||
SyStringDupPtr(&sTmp.sData, pScript);
|
||||
SySetPut(&aRawToken, (const void *)&sTmp);
|
||||
if(iFlags & PH7_PHP_EXPR) {
|
||||
/* A simple PHP expression */
|
||||
is_expr = 1;
|
||||
}
|
||||
pCodeGen = &pVm->sCodeGen;
|
||||
/* Process high-level tokens */
|
||||
|
@ -5876,35 +5822,13 @@ PH7_PRIVATE sxi32 PH7_CompileScript(
|
|||
nObjIdx = 0;
|
||||
/* Start the compilation process */
|
||||
for(;;) {
|
||||
/* Compile PHP block of code */
|
||||
if(pCodeGen->pRawIn >= pCodeGen->pRawEnd) {
|
||||
break; /* No more tokens to process */
|
||||
}
|
||||
if(pCodeGen->pRawIn->nType & PH7_TOKEN_PHP) {
|
||||
/* Compile the PHP chunk */
|
||||
rc = PH7_CompilePHP(pCodeGen, &aPhpToken, FALSE);
|
||||
if(rc == SXERR_ABORT) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* Raw chunk: [i.e: HTML, XML, etc.] */
|
||||
nRawObj = 0;
|
||||
while((pCodeGen->pRawIn < pCodeGen->pRawEnd) && (pCodeGen->pRawIn->nType != PH7_TOKEN_PHP)) {
|
||||
/* Consume the raw chunk without any processing */
|
||||
pRawObj = PH7_ReserveConstObj(&(*pVm), &nObjIdx);
|
||||
if(pRawObj == 0) {
|
||||
rc = SXERR_MEM;
|
||||
break;
|
||||
}
|
||||
/* Mark as constant and emit the load constant instruction */
|
||||
PH7_MemObjInitFromString(pVm, pRawObj, &pCodeGen->pRawIn->sData);
|
||||
PH7_VmEmitInstr(&(*pVm), PH7_OP_LOADC, 0, nObjIdx, 0, 0);
|
||||
++nRawObj;
|
||||
pCodeGen->pRawIn++; /* Next chunk */
|
||||
}
|
||||
if(nRawObj > 0) {
|
||||
/* Emit the consume instruction */
|
||||
PH7_VmEmitInstr(&(*pVm), PH7_OP_CONSUME, nRawObj, 0, 0, 0);
|
||||
rc = PH7_CompilePHP(pCodeGen, &aPhpToken, FALSE);
|
||||
if(rc == SXERR_ABORT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
cleanup:
|
||||
|
|
225
engine/lexer.c
225
engine/lexer.c
|
@ -673,228 +673,3 @@ PH7_PRIVATE sxi32 PH7_TokenizePHP(const char *zInput, sxu32 nLen, sxu32 nLineSta
|
|||
/* Tokenization result */
|
||||
return rc;
|
||||
}
|
||||
/*
|
||||
* High level public tokenizer.
|
||||
* Tokenize the input into PHP tokens and raw tokens [i.e: HTML,XML,Raw text...].
|
||||
* According to the PHP language reference manual
|
||||
* When PHP parses a file, it looks for opening and closing tags, which tell PHP
|
||||
* to start and stop interpreting the code between them. Parsing in this manner allows
|
||||
* PHP to be embedded in all sorts of different documents, as everything outside of a pair
|
||||
* of opening and closing tags is ignored by the PHP parser. Most of the time you will see
|
||||
* PHP embedded in HTML documents, as in this example.
|
||||
* <?php echo 'While this is going to be parsed.'; ?>
|
||||
* <p>This will also be ignored.</p>
|
||||
* You can also use more advanced structures:
|
||||
* Example #1 Advanced escaping
|
||||
* <?php
|
||||
* if ($expression) {
|
||||
* ?>
|
||||
* <strong>This is true.</strong>
|
||||
* <?php
|
||||
* } else {
|
||||
* ?>
|
||||
* <strong>This is false.</strong>
|
||||
* <?php
|
||||
* }
|
||||
* ?>
|
||||
* This works as expected, because when PHP hits the ?> closing tags, it simply starts outputting
|
||||
* whatever it finds (except for an immediately following newline - see instruction separation ) until it hits
|
||||
* another opening tag. The example given here is contrived, of course, but for outputting large blocks of text
|
||||
* dropping out of PHP parsing mode is generally more efficient than sending all of the text through echo() or print().
|
||||
* There are four different pairs of opening and closing tags which can be used in PHP. Three of those, <?php ?>
|
||||
* <script language="php"> </script> and <? ?> are always available. The other two are short tags and ASP style
|
||||
* tags, and can be turned on and off from the php.ini configuration file. As such, while some people find short tags
|
||||
* and ASP style tags convenient, they are less portable, and generally not recommended.
|
||||
* Note:
|
||||
* Also note that if you are embedding PHP within XML or XHTML you will need to use the <?php ?> tags to remain
|
||||
* compliant with standards.
|
||||
* Example #2 PHP Opening and Closing Tags
|
||||
* 1. <?php echo 'if you want to serve XHTML or XML documents, do it like this'; ?>
|
||||
* 2. <script language="php">
|
||||
* echo 'some editors (like FrontPage) don\'t
|
||||
* like processing instructions';
|
||||
* </script>
|
||||
*
|
||||
* 3. <? echo 'this is the simplest, an SGML processing instruction'; ?>
|
||||
* <?= expression ?> This is a shortcut for "<? echo expression ?>"
|
||||
*/
|
||||
PH7_PRIVATE sxi32 PH7_TokenizeRawText(const char *zInput, sxu32 nLen, SySet *pOut) {
|
||||
const char *zEnd = &zInput[nLen];
|
||||
const char *zIn = zInput;
|
||||
const char *zCur, *zCurEnd;
|
||||
SyString sCtag = { 0, 0 }; /* Closing tag */
|
||||
SyToken sToken;
|
||||
SyString sDoc;
|
||||
sxu32 nLine;
|
||||
sxi32 iNest;
|
||||
sxi32 rc;
|
||||
/* Tokenize the input into PHP tokens and raw tokens */
|
||||
nLine = 1;
|
||||
zCur = zCurEnd = 0; /* Prevent compiler warning */
|
||||
sToken.pUserData = 0;
|
||||
iNest = 0;
|
||||
sDoc.nByte = 0;
|
||||
sDoc.zString = ""; /* cc warning */
|
||||
for(;;) {
|
||||
if(zIn >= zEnd) {
|
||||
/* End of input reached */
|
||||
break;
|
||||
}
|
||||
sToken.nLine = nLine;
|
||||
zCur = zIn;
|
||||
zCurEnd = 0;
|
||||
while(zIn < zEnd) {
|
||||
if(zIn[0] == '<') {
|
||||
const char *zTmp = zIn; /* End of raw input marker */
|
||||
zIn++;
|
||||
if(zIn < zEnd) {
|
||||
if(zIn[0] == '?') {
|
||||
zIn++;
|
||||
if((sxu32)(zEnd - zIn) >= sizeof("php") - 1 && SyStrnicmp(zIn, "php", sizeof("php") - 1) == 0) {
|
||||
/* opening tag: <?php */
|
||||
zIn += sizeof("php") - 1;
|
||||
}
|
||||
/* Look for the closing tag '?>' */
|
||||
SyStringInitFromBuf(&sCtag, "?>", sizeof("?>") - 1);
|
||||
zCurEnd = zTmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(zIn[0] == '\n') {
|
||||
nLine++;
|
||||
}
|
||||
zIn++;
|
||||
}
|
||||
} /* While(zIn < zEnd) */
|
||||
if(zCurEnd == 0) {
|
||||
zCurEnd = zIn;
|
||||
}
|
||||
/* Save the raw token */
|
||||
SyStringInitFromBuf(&sToken.sData, zCur, zCurEnd - zCur);
|
||||
sToken.nType = PH7_TOKEN_RAW;
|
||||
rc = SySetPut(&(*pOut), (const void *)&sToken);
|
||||
if(rc != SXRET_OK) {
|
||||
return rc;
|
||||
}
|
||||
if(zIn >= zEnd) {
|
||||
break;
|
||||
}
|
||||
/* Ignore leading white space */
|
||||
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) {
|
||||
if(zIn[0] == '\n') {
|
||||
nLine++;
|
||||
}
|
||||
zIn++;
|
||||
}
|
||||
/* Delimit the PHP chunk */
|
||||
sToken.nLine = nLine;
|
||||
zCur = zIn;
|
||||
while((sxu32)(zEnd - zIn) >= sCtag.nByte) {
|
||||
const char *zPtr;
|
||||
if(SyMemcmp(zIn, sCtag.zString, sCtag.nByte) == 0 && iNest < 1) {
|
||||
break;
|
||||
}
|
||||
for(;;) {
|
||||
if(zIn[0] != '/' || (zIn[1] != '*' && zIn[1] != '/') /* && sCtag.nByte >= 2 */) {
|
||||
break;
|
||||
}
|
||||
zIn += 2;
|
||||
if(zIn[-1] == '/') {
|
||||
/* Inline comment */
|
||||
while(zIn < zEnd && zIn[0] != '\n') {
|
||||
zIn++;
|
||||
}
|
||||
if(zIn >= zEnd) {
|
||||
zIn--;
|
||||
}
|
||||
} else {
|
||||
/* Block comment */
|
||||
while((sxu32)(zEnd - zIn) >= sizeof("*/") - 1) {
|
||||
if(zIn[0] == '*' && zIn[1] == '/') {
|
||||
zIn += 2;
|
||||
break;
|
||||
}
|
||||
if(zIn[0] == '\n') {
|
||||
nLine++;
|
||||
}
|
||||
zIn++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(zIn[0] == '\n') {
|
||||
nLine++;
|
||||
if(iNest > 0) {
|
||||
zIn++;
|
||||
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') {
|
||||
zIn++;
|
||||
}
|
||||
zPtr = zIn;
|
||||
while(zIn < zEnd) {
|
||||
if((unsigned char)zIn[0] >= 0xc0) {
|
||||
/* UTF-8 stream */
|
||||
zIn++;
|
||||
SX_JMP_UTF8(zIn, zEnd);
|
||||
} else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') {
|
||||
break;
|
||||
} else {
|
||||
zIn++;
|
||||
}
|
||||
}
|
||||
if((sxu32)(zIn - zPtr) == sDoc.nByte && SyMemcmp(sDoc.zString, zPtr, sDoc.nByte) == 0) {
|
||||
iNest = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else if((sxu32)(zEnd - zIn) >= sizeof("<<<") && zIn[0] == '<' && zIn[1] == '<' && zIn[2] == '<' && iNest < 1) {
|
||||
zIn += sizeof("<<<") - 1;
|
||||
while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') {
|
||||
zIn++;
|
||||
}
|
||||
if(zIn[0] == '"' || zIn[0] == '\'') {
|
||||
zIn++;
|
||||
}
|
||||
zPtr = zIn;
|
||||
while(zIn < zEnd) {
|
||||
if((unsigned char)zIn[0] >= 0xc0) {
|
||||
/* UTF-8 stream */
|
||||
zIn++;
|
||||
SX_JMP_UTF8(zIn, zEnd);
|
||||
} else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') {
|
||||
break;
|
||||
} else {
|
||||
zIn++;
|
||||
}
|
||||
}
|
||||
SyStringInitFromBuf(&sDoc, zPtr, zIn - zPtr);
|
||||
SyStringFullTrim(&sDoc);
|
||||
if(sDoc.nByte > 0) {
|
||||
iNest++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zIn++;
|
||||
if(zIn >= zEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if((sxu32)(zEnd - zIn) < sCtag.nByte) {
|
||||
zIn = zEnd;
|
||||
}
|
||||
if(zCur < zIn) {
|
||||
/* Save the PHP chunk for later processing */
|
||||
sToken.nType = PH7_TOKEN_PHP;
|
||||
SyStringInitFromBuf(&sToken.sData, zCur, zIn - zCur);
|
||||
SyStringRightTrim(&sToken.sData); /* Trim trailing white spaces */
|
||||
rc = SySetPut(&(*pOut), (const void *)&sToken);
|
||||
if(rc != SXRET_OK) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
if(zIn < zEnd) {
|
||||
/* Jump the trailing closing tag */
|
||||
zIn += sCtag.nByte;
|
||||
}
|
||||
} /* For(;;) */
|
||||
return SXRET_OK;
|
||||
}
|
||||
|
|
|
@ -1312,7 +1312,7 @@ PH7_PRIVATE sxi32 PH7_VmInit(
|
|||
pVm->nMagic = PH7_VM_INIT;
|
||||
SyStringInitFromBuf(&sBuiltin, PH7_BUILTIN_LIB, sizeof(PH7_BUILTIN_LIB) - 1);
|
||||
/* Compile the built-in library */
|
||||
VmEvalChunk(&(*pVm), 0, &sBuiltin, PH7_PHP_ONLY, FALSE);
|
||||
VmEvalChunk(&(*pVm), 0, &sBuiltin, PH7_PHP_CODE, FALSE);
|
||||
/* Reset the code generator */
|
||||
PH7_ResetCodeGenerator(&(*pVm), pEngine->xConf.xErr, pEngine->xConf.pErrData);
|
||||
return SXRET_OK;
|
||||
|
@ -9108,7 +9108,7 @@ static int vm_builtin_assert(ph7_context *pCtx, int nArg, ph7_value **apArg) {
|
|||
SyString sChunk;
|
||||
SyStringInitFromBuf(&sChunk, SyBlobData(&pAssert->sBlob), SyBlobLength(&pAssert->sBlob));
|
||||
if(sChunk.nByte > 0) {
|
||||
VmEvalChunk(pVm, pCtx, &sChunk, PH7_PHP_ONLY | PH7_PHP_EXPR, FALSE);
|
||||
VmEvalChunk(pVm, pCtx, &sChunk, PH7_PHP_CODE | PH7_PHP_EXPR, FALSE);
|
||||
/* Extract evaluation result */
|
||||
iResult = ph7_value_to_bool(pCtx->pRet);
|
||||
} else {
|
||||
|
@ -10615,7 +10615,7 @@ static int vm_builtin_eval(ph7_context *pCtx, int nArg, ph7_value **apArg) {
|
|||
return SXRET_OK;
|
||||
}
|
||||
/* Eval the chunk */
|
||||
VmEvalChunk(pCtx->pVm, &(*pCtx), &sChunk, PH7_PHP_ONLY, FALSE);
|
||||
VmEvalChunk(pCtx->pVm, &(*pCtx), &sChunk, PH7_PHP_CODE, FALSE);
|
||||
return SXRET_OK;
|
||||
}
|
||||
/*
|
||||
|
@ -10738,7 +10738,7 @@ static sxi32 VmExecIncludedFile(
|
|||
SyString sScript;
|
||||
/* Compile and execute the script */
|
||||
SyStringInitFromBuf(&sScript, SyBlobData(&sContents), SyBlobLength(&sContents));
|
||||
VmEvalChunk(pCtx->pVm, &(*pCtx), &sScript, 0, TRUE);
|
||||
VmEvalChunk(pCtx->pVm, &(*pCtx), &sScript, PH7_PHP_CODE, TRUE);
|
||||
}
|
||||
}
|
||||
/* Pop from the set of included file */
|
||||
|
|
|
@ -447,12 +447,8 @@ typedef sxi64 ph7_int64;
|
|||
* processing the input.
|
||||
* Refer to the official documentation for additional information.
|
||||
*/
|
||||
#define PH7_PHP_ONLY 0x01 /* If this flag is set then the code to compile is assumed
|
||||
* to be plain PHP only. That is, there is no need to delimit
|
||||
* the PHP code using the standard tags such as <?php ?> or <? ?>.
|
||||
* Everything will pass through the PH7 compiler.
|
||||
*/
|
||||
#define PH7_PHP_EXPR 0x02 /* This flag is reserved for future use. */
|
||||
#define PH7_PHP_CODE 0x01 /* PHP Block of Code */
|
||||
#define PH7_PHP_EXPR 0x02 /* PHP Simple Expression */
|
||||
/*
|
||||
* Call Context Error Message Severity Level.
|
||||
*
|
||||
|
|
|
@ -1440,11 +1440,6 @@ enum ph7_expr_id {
|
|||
EXPR_OP_SHR_ASSIGN, /* Combined operator: >>= */
|
||||
EXPR_OP_COMMA /* Comma expression */
|
||||
};
|
||||
/*
|
||||
* Very high level tokens.
|
||||
*/
|
||||
#define PH7_TOKEN_RAW 0x001 /* Raw text [i.e: HTML,XML...] */
|
||||
#define PH7_TOKEN_PHP 0x002 /* PHP chunk */
|
||||
/*
|
||||
* Lexer token codes
|
||||
* The following set of constants are the tokens recognized
|
||||
|
|
Loading…
Reference in New Issue
Block a user