diff --git a/engine/compiler.c b/engine/compiler.c index b9b3e54..64a8b36 100644 --- a/engine/compiler.c +++ b/engine/compiler.c @@ -1690,63 +1690,17 @@ static sxi32 PH7_CompileBreak(ph7_gen_state *pGen) { * failure. */ static sxi32 GenStateNextChunk(ph7_gen_state *pGen) { - ph7_value *pRawObj; /* Raw chunk [i.e: HTML,XML...] */ - sxu32 nRawObj; - sxu32 nObjIdx; - /* Consume raw chunks verbatim without any processing until we get - * a PHP block. - */ -Consume: - nRawObj = nObjIdx = 0; - while(pGen->pRawIn < pGen->pRawEnd && pGen->pRawIn->nType != PH7_TOKEN_PHP) { - pRawObj = PH7_ReserveConstObj(pGen->pVm, &nObjIdx); - if(pRawObj == 0) { - PH7_GenCompileError(pGen, E_ERROR, 1, "Fatal, PH7 engine is running out of memory"); - return SXERR_ABORT; - } - /* Mark as constant and emit the load constant instruction */ - PH7_MemObjInitFromString(pGen->pVm, pRawObj, &pGen->pRawIn->sData); - PH7_VmEmitInstr(pGen->pVm, PH7_OP_LOADC, 0, nObjIdx, 0, 0); - ++nRawObj; - pGen->pRawIn++; /* Next chunk */ - } - if(nRawObj > 0) { - /* Emit the consume instruction */ - PH7_VmEmitInstr(pGen->pVm, PH7_OP_CONSUME, nRawObj, 0, 0, 0); - } - if(pGen->pRawIn < pGen->pRawEnd) { - SySet *pTokenSet = pGen->pTokenSet; - /* Reset the token set */ - SySetReset(pTokenSet); - /* Tokenize input */ - PH7_TokenizePHP(SyStringData(&pGen->pRawIn->sData), SyStringLength(&pGen->pRawIn->sData), - pGen->pRawIn->nLine, pTokenSet); - /* Point to the fresh token stream */ - pGen->pIn = (SyToken *)SySetBasePtr(pTokenSet); - pGen->pEnd = &pGen->pIn[SySetUsed(pTokenSet)]; - /* Advance the stream cursor */ - pGen->pRawIn++; - /* TICKET 1433-011 */ - if(pGen->pIn < pGen->pEnd && (pGen->pIn->nType & PH7_TK_EQUAL)) { - static const sxu32 nKeyID = PH7_TKWRD_ECHO; - sxi32 rc; - /* Refer to TICKET 1433-009 */ - pGen->pIn->nType = PH7_TK_KEYWORD; - pGen->pIn->pUserData = SX_INT_TO_PTR(nKeyID); - SyStringInitFromBuf(&pGen->pIn->sData, "echo", sizeof("echo") - 1); - rc = PH7_CompileExpr(pGen, 0, 0); - if(rc == SXERR_ABORT) { - return SXERR_ABORT; - } else if(rc != SXERR_EMPTY) { - PH7_VmEmitInstr(pGen->pVm, PH7_OP_POP, 1, 0, 0, 0); - } - goto Consume; - } - } else { - /* No more chunks to process */ - pGen->pIn = pGen->pEnd; - return SXERR_EOF; - } + SySet *pTokenSet = pGen->pTokenSet; + /* Reset the token set */ + SySetReset(pTokenSet); + /* Tokenize input */ + PH7_TokenizePHP(SyStringData(&pGen->pRawIn->sData), SyStringLength(&pGen->pRawIn->sData), + pGen->pRawIn->nLine, pTokenSet); + /* Point to the fresh token stream */ + pGen->pIn = (SyToken *)SySetBasePtr(pTokenSet); + pGen->pEnd = &pGen->pIn[SySetUsed(pTokenSet)]; + /* Advance the stream cursor */ + pGen->pRawIn++; return SXRET_OK; } /* @@ -5846,22 +5800,14 @@ PH7_PRIVATE sxi32 PH7_CompileScript( SySetInit(&aPhpToken, &pVm->sAllocator, sizeof(SyToken)); SySetAlloc(&aPhpToken, 0xc0); is_expr = 0; - if(iFlags & PH7_PHP_ONLY) { - SyToken sTmp; - /* PHP only: -*/ - sTmp.nLine = 1; - sTmp.nType = PH7_TOKEN_PHP; - sTmp.pUserData = 0; - SyStringDupPtr(&sTmp.sData, pScript); - SySetPut(&aRawToken, (const void *)&sTmp); - if(iFlags & PH7_PHP_EXPR) { - /* A simple PHP expression */ - is_expr = 1; - } - } else { - /* Tokenize raw text */ - SySetAlloc(&aRawToken, 32); - PH7_TokenizeRawText(pScript->zString, pScript->nByte, &aRawToken); + SyToken sTmp; + sTmp.nLine = 1; + sTmp.pUserData = 0; + SyStringDupPtr(&sTmp.sData, pScript); + SySetPut(&aRawToken, (const void *)&sTmp); + if(iFlags & PH7_PHP_EXPR) { + /* A simple PHP expression */ + is_expr = 1; } pCodeGen = &pVm->sCodeGen; /* Process high-level tokens */ @@ -5876,35 +5822,13 @@ PH7_PRIVATE sxi32 PH7_CompileScript( nObjIdx = 0; /* Start the compilation process */ for(;;) { + /* Compile PHP block of code */ if(pCodeGen->pRawIn >= pCodeGen->pRawEnd) { break; /* No more tokens to process */ } - if(pCodeGen->pRawIn->nType & PH7_TOKEN_PHP) { - /* Compile the PHP chunk */ - rc = PH7_CompilePHP(pCodeGen, &aPhpToken, FALSE); - if(rc == SXERR_ABORT) { - break; - } - continue; - } - /* Raw chunk: [i.e: HTML, XML, etc.] */ - nRawObj = 0; - while((pCodeGen->pRawIn < pCodeGen->pRawEnd) && (pCodeGen->pRawIn->nType != PH7_TOKEN_PHP)) { - /* Consume the raw chunk without any processing */ - pRawObj = PH7_ReserveConstObj(&(*pVm), &nObjIdx); - if(pRawObj == 0) { - rc = SXERR_MEM; - break; - } - /* Mark as constant and emit the load constant instruction */ - PH7_MemObjInitFromString(pVm, pRawObj, &pCodeGen->pRawIn->sData); - PH7_VmEmitInstr(&(*pVm), PH7_OP_LOADC, 0, nObjIdx, 0, 0); - ++nRawObj; - pCodeGen->pRawIn++; /* Next chunk */ - } - if(nRawObj > 0) { - /* Emit the consume instruction */ - PH7_VmEmitInstr(&(*pVm), PH7_OP_CONSUME, nRawObj, 0, 0, 0); + rc = PH7_CompilePHP(pCodeGen, &aPhpToken, FALSE); + if(rc == SXERR_ABORT) { + break; } } cleanup: diff --git a/engine/lexer.c b/engine/lexer.c index 3aed40a..db95267 100644 --- a/engine/lexer.c +++ b/engine/lexer.c @@ -673,228 +673,3 @@ PH7_PRIVATE sxi32 PH7_TokenizePHP(const char *zInput, sxu32 nLen, sxu32 nLineSta /* Tokenization result */ return rc; } -/* - * High level public tokenizer. - * Tokenize the input into PHP tokens and raw tokens [i.e: HTML,XML,Raw text...]. - * According to the PHP language reference manual - * When PHP parses a file, it looks for opening and closing tags, which tell PHP - * to start and stop interpreting the code between them. Parsing in this manner allows - * PHP to be embedded in all sorts of different documents, as everything outside of a pair - * of opening and closing tags is ignored by the PHP parser. Most of the time you will see - * PHP embedded in HTML documents, as in this example. - * - *
This will also be ignored.
- * You can also use more advanced structures: - * Example #1 Advanced escaping - * - * This is true. - * - * This is false. - * - * This works as expected, because when PHP hits the ?> closing tags, it simply starts outputting - * whatever it finds (except for an immediately following newline - see instruction separation ) until it hits - * another opening tag. The example given here is contrived, of course, but for outputting large blocks of text - * dropping out of PHP parsing mode is generally more efficient than sending all of the text through echo() or print(). - * There are four different pairs of opening and closing tags which can be used in PHP. Three of those, - * and ?> are always available. The other two are short tags and ASP style - * tags, and can be turned on and off from the php.ini configuration file. As such, while some people find short tags - * and ASP style tags convenient, they are less portable, and generally not recommended. - * Note: - * Also note that if you are embedding PHP within XML or XHTML you will need to use the tags to remain - * compliant with standards. - * Example #2 PHP Opening and Closing Tags - * 1. - * 2. - * - * 3. echo 'this is the simplest, an SGML processing instruction'; ?> - * = expression ?> This is a shortcut for " echo expression ?>" - */ -PH7_PRIVATE sxi32 PH7_TokenizeRawText(const char *zInput, sxu32 nLen, SySet *pOut) { - const char *zEnd = &zInput[nLen]; - const char *zIn = zInput; - const char *zCur, *zCurEnd; - SyString sCtag = { 0, 0 }; /* Closing tag */ - SyToken sToken; - SyString sDoc; - sxu32 nLine; - sxi32 iNest; - sxi32 rc; - /* Tokenize the input into PHP tokens and raw tokens */ - nLine = 1; - zCur = zCurEnd = 0; /* Prevent compiler warning */ - sToken.pUserData = 0; - iNest = 0; - sDoc.nByte = 0; - sDoc.zString = ""; /* cc warning */ - for(;;) { - if(zIn >= zEnd) { - /* End of input reached */ - break; - } - sToken.nLine = nLine; - zCur = zIn; - zCurEnd = 0; - while(zIn < zEnd) { - if(zIn[0] == '<') { - const char *zTmp = zIn; /* End of raw input marker */ - zIn++; - if(zIn < zEnd) { - if(zIn[0] == '?') { - zIn++; - if((sxu32)(zEnd - zIn) >= sizeof("php") - 1 && SyStrnicmp(zIn, "php", sizeof("php") - 1) == 0) { - /* opening tag: ' */ - SyStringInitFromBuf(&sCtag, "?>", sizeof("?>") - 1); - zCurEnd = zTmp; - break; - } - } - } else { - if(zIn[0] == '\n') { - nLine++; - } - zIn++; - } - } /* While(zIn < zEnd) */ - if(zCurEnd == 0) { - zCurEnd = zIn; - } - /* Save the raw token */ - SyStringInitFromBuf(&sToken.sData, zCur, zCurEnd - zCur); - sToken.nType = PH7_TOKEN_RAW; - rc = SySetPut(&(*pOut), (const void *)&sToken); - if(rc != SXRET_OK) { - return rc; - } - if(zIn >= zEnd) { - break; - } - /* Ignore leading white space */ - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0])) { - if(zIn[0] == '\n') { - nLine++; - } - zIn++; - } - /* Delimit the PHP chunk */ - sToken.nLine = nLine; - zCur = zIn; - while((sxu32)(zEnd - zIn) >= sCtag.nByte) { - const char *zPtr; - if(SyMemcmp(zIn, sCtag.zString, sCtag.nByte) == 0 && iNest < 1) { - break; - } - for(;;) { - if(zIn[0] != '/' || (zIn[1] != '*' && zIn[1] != '/') /* && sCtag.nByte >= 2 */) { - break; - } - zIn += 2; - if(zIn[-1] == '/') { - /* Inline comment */ - while(zIn < zEnd && zIn[0] != '\n') { - zIn++; - } - if(zIn >= zEnd) { - zIn--; - } - } else { - /* Block comment */ - while((sxu32)(zEnd - zIn) >= sizeof("*/") - 1) { - if(zIn[0] == '*' && zIn[1] == '/') { - zIn += 2; - break; - } - if(zIn[0] == '\n') { - nLine++; - } - zIn++; - } - } - } - if(zIn[0] == '\n') { - nLine++; - if(iNest > 0) { - zIn++; - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') { - zIn++; - } - zPtr = zIn; - while(zIn < zEnd) { - if((unsigned char)zIn[0] >= 0xc0) { - /* UTF-8 stream */ - zIn++; - SX_JMP_UTF8(zIn, zEnd); - } else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') { - break; - } else { - zIn++; - } - } - if((sxu32)(zIn - zPtr) == sDoc.nByte && SyMemcmp(sDoc.zString, zPtr, sDoc.nByte) == 0) { - iNest = 0; - } - continue; - } - } else if((sxu32)(zEnd - zIn) >= sizeof("<<<") && zIn[0] == '<' && zIn[1] == '<' && zIn[2] == '<' && iNest < 1) { - zIn += sizeof("<<<") - 1; - while(zIn < zEnd && (unsigned char)zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n') { - zIn++; - } - if(zIn[0] == '"' || zIn[0] == '\'') { - zIn++; - } - zPtr = zIn; - while(zIn < zEnd) { - if((unsigned char)zIn[0] >= 0xc0) { - /* UTF-8 stream */ - zIn++; - SX_JMP_UTF8(zIn, zEnd); - } else if(!SyisAlphaNum(zIn[0]) && zIn[0] != '_') { - break; - } else { - zIn++; - } - } - SyStringInitFromBuf(&sDoc, zPtr, zIn - zPtr); - SyStringFullTrim(&sDoc); - if(sDoc.nByte > 0) { - iNest++; - } - continue; - } - zIn++; - if(zIn >= zEnd) { - break; - } - } - if((sxu32)(zEnd - zIn) < sCtag.nByte) { - zIn = zEnd; - } - if(zCur < zIn) { - /* Save the PHP chunk for later processing */ - sToken.nType = PH7_TOKEN_PHP; - SyStringInitFromBuf(&sToken.sData, zCur, zIn - zCur); - SyStringRightTrim(&sToken.sData); /* Trim trailing white spaces */ - rc = SySetPut(&(*pOut), (const void *)&sToken); - if(rc != SXRET_OK) { - return rc; - } - } - if(zIn < zEnd) { - /* Jump the trailing closing tag */ - zIn += sCtag.nByte; - } - } /* For(;;) */ - return SXRET_OK; -} diff --git a/engine/vm.c b/engine/vm.c index 13ef129..9396e19 100644 --- a/engine/vm.c +++ b/engine/vm.c @@ -1312,7 +1312,7 @@ PH7_PRIVATE sxi32 PH7_VmInit( pVm->nMagic = PH7_VM_INIT; SyStringInitFromBuf(&sBuiltin, PH7_BUILTIN_LIB, sizeof(PH7_BUILTIN_LIB) - 1); /* Compile the built-in library */ - VmEvalChunk(&(*pVm), 0, &sBuiltin, PH7_PHP_ONLY, FALSE); + VmEvalChunk(&(*pVm), 0, &sBuiltin, PH7_PHP_CODE, FALSE); /* Reset the code generator */ PH7_ResetCodeGenerator(&(*pVm), pEngine->xConf.xErr, pEngine->xConf.pErrData); return SXRET_OK; @@ -9108,7 +9108,7 @@ static int vm_builtin_assert(ph7_context *pCtx, int nArg, ph7_value **apArg) { SyString sChunk; SyStringInitFromBuf(&sChunk, SyBlobData(&pAssert->sBlob), SyBlobLength(&pAssert->sBlob)); if(sChunk.nByte > 0) { - VmEvalChunk(pVm, pCtx, &sChunk, PH7_PHP_ONLY | PH7_PHP_EXPR, FALSE); + VmEvalChunk(pVm, pCtx, &sChunk, PH7_PHP_CODE | PH7_PHP_EXPR, FALSE); /* Extract evaluation result */ iResult = ph7_value_to_bool(pCtx->pRet); } else { @@ -10615,7 +10615,7 @@ static int vm_builtin_eval(ph7_context *pCtx, int nArg, ph7_value **apArg) { return SXRET_OK; } /* Eval the chunk */ - VmEvalChunk(pCtx->pVm, &(*pCtx), &sChunk, PH7_PHP_ONLY, FALSE); + VmEvalChunk(pCtx->pVm, &(*pCtx), &sChunk, PH7_PHP_CODE, FALSE); return SXRET_OK; } /* @@ -10738,7 +10738,7 @@ static sxi32 VmExecIncludedFile( SyString sScript; /* Compile and execute the script */ SyStringInitFromBuf(&sScript, SyBlobData(&sContents), SyBlobLength(&sContents)); - VmEvalChunk(pCtx->pVm, &(*pCtx), &sScript, 0, TRUE); + VmEvalChunk(pCtx->pVm, &(*pCtx), &sScript, PH7_PHP_CODE, TRUE); } } /* Pop from the set of included file */ diff --git a/include/ph7.h b/include/ph7.h index 4a139e1..fff444e 100644 --- a/include/ph7.h +++ b/include/ph7.h @@ -447,12 +447,8 @@ typedef sxi64 ph7_int64; * processing the input. * Refer to the official documentation for additional information. */ -#define PH7_PHP_ONLY 0x01 /* If this flag is set then the code to compile is assumed - * to be plain PHP only. That is, there is no need to delimit - * the PHP code using the standard tags such as or ?>. - * Everything will pass through the PH7 compiler. - */ -#define PH7_PHP_EXPR 0x02 /* This flag is reserved for future use. */ +#define PH7_PHP_CODE 0x01 /* PHP Block of Code */ +#define PH7_PHP_EXPR 0x02 /* PHP Simple Expression */ /* * Call Context Error Message Severity Level. * diff --git a/include/ph7int.h b/include/ph7int.h index 2cda1c5..db84631 100644 --- a/include/ph7int.h +++ b/include/ph7int.h @@ -1440,11 +1440,6 @@ enum ph7_expr_id { EXPR_OP_SHR_ASSIGN, /* Combined operator: >>= */ EXPR_OP_COMMA /* Comma expression */ }; -/* - * Very high level tokens. - */ -#define PH7_TOKEN_RAW 0x001 /* Raw text [i.e: HTML,XML...] */ -#define PH7_TOKEN_PHP 0x002 /* PHP chunk */ /* * Lexer token codes * The following set of constants are the tokens recognized