P# allows to use multiline string block, thus heredoc/newdoc syntax is not needed

This commit is contained in:
Rafal Kupiec 2018-07-12 16:01:47 +02:00
parent c7feffc43f
commit dce1b38e79
Signed by: belliash
GPG Key ID: 4E829243E0CFE6B4
4 changed files with 5 additions and 242 deletions

View File

@ -446,48 +446,9 @@ PH7_PRIVATE sxi32 PH7_CompileSimpleString(ph7_gen_state *pGen,sxi32 iCompileFlag
return SXRET_OK;
}
/*
* Compile a nowdoc string.
* According to the PHP language reference manual:
*
* Nowdocs are to single-quoted strings what heredocs are to double-quoted strings.
* A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc.
* The construct is ideal for embedding PHP code or other large blocks of text without the
* need for escaping. It shares some features in common with the SGML <![CDATA[ ]]>
* construct, in that it declares a block of text which is not for parsing.
* A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier
* which follows is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc
* identifiers also apply to nowdoc identifiers, especially those regarding the appearance
* of the closing identifier.
*/
static sxi32 PH7_CompileNowDoc(ph7_gen_state *pGen,sxi32 iCompileFlag)
{
SyString *pStr = &pGen->pIn->sData; /* Constant string literal */
ph7_value *pObj;
sxu32 nIdx;
nIdx = 0; /* Prevent compiler warning */
if( pStr->nByte <= 0 ){
/* Empty string,load NULL */
PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,0,0,0);
return SXRET_OK;
}
/* Reserve a new constant */
pObj = PH7_ReserveConstObj(pGen->pVm,&nIdx);
if( pObj == 0 ){
PH7_GenCompileError(&(*pGen),E_ERROR,pGen->pIn->nLine,"PH7 engine is running out of memory");
SXUNUSED(iCompileFlag); /* cc warning */
return SXERR_ABORT;
}
/* No processing is done here, simply a memcpy() operation */
PH7_MemObjInitFromString(pGen->pVm,pObj,pStr);
/* Emit the load constant instruction */
PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,nIdx,0,0);
/* Node successfully compiled */
return SXRET_OK;
}
/*
* Process variable expression [i.e: "$var","${var}"] embedded in a double quoted/heredoc string.
* Process variable expression [i.e: "$var","${var}"] embedded in a double quoted string.
* According to the PHP language reference manual
* When a string is specified in double quotes or with heredoc,variables are parsed within it.
* When a string is specified in double quotes,variables are parsed within it.
* There are two types of syntax: a simple one and a complex one. The simple syntax is the most
* common and convenient. It provides a way to embed a variable, an array value, or an object
* property in a string with a minimum of effort.
@ -538,7 +499,7 @@ static sxi32 GenStateProcessStringExpression(
return rc;
}
/*
* Reserve a new constant for a double quoted/heredoc string.
* Reserve a new constant for a double quoted string.
*/
static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount)
{
@ -557,26 +518,8 @@ static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount)
return pConstObj;
}
/*
* Compile a double quoted/heredoc string.
* Compile a double quoted string.
* According to the PHP language reference manual
* Heredoc
* A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier
* is provided, then a newline. The string itself follows, and then the same identifier again
* to close the quotation.
* The closing identifier must begin in the first column of the line. Also, the identifier must
* follow the same naming rules as any other label in PHP: it must contain only alphanumeric
* characters and underscores, and must start with a non-digit character or underscore.
* Warning
* It is very important to note that the line with the closing identifier must contain
* no other characters, except possibly a semicolon (;). That means especially that the identifier
* may not be indented, and there may not be any spaces or tabs before or after the semicolon.
* It's also important to realize that the first character before the closing identifier must
* be a newline as defined by the local operating system. This is \n on UNIX systems, including Mac OS X.
* The closing delimiter (possibly followed by a semicolon) must also be followed by a newline.
* If this rule is broken and the closing identifier is not "clean", it will not be considered a closing
* identifier, and PHP will continue looking for one. If a proper closing identifier is not found before
* the end of the current file, a parse error will result at the last line.
* Heredocs can not be used for initializing class properties.
* Double quoted
* If the string is enclosed in double-quotes ("), PHP will interpret more escape sequences for special characters:
* Escaped characters Sequence Meaning
@ -879,18 +822,6 @@ PH7_PRIVATE sxi32 PH7_CompileString(ph7_gen_state *pGen,sxi32 iCompileFlag)
/* Compilation result */
return rc;
}
/*
* Compile a Heredoc string.
* See the block-comment above for more information.
*/
static sxi32 PH7_CompileHereDoc(ph7_gen_state *pGen,sxi32 iCompileFlag)
{
sxi32 rc;
rc = GenStateCompileString(&(*pGen));
SXUNUSED(iCompileFlag); /* cc warning */
/* Compilation result */
return SXRET_OK;
}
/*
* Compile an array entry whether it is a key or a value.
* Notes on array entries.
@ -5818,12 +5749,6 @@ PH7_PRIVATE ProcNodeConstruct PH7_GetNodeHandler(sxu32 nNodeType)
}else if( nNodeType & PH7_TK_SSTR ){
/* Single quoted string */
return PH7_CompileSimpleString;
}else if( nNodeType & PH7_TK_HEREDOC ){
/* Heredoc */
return PH7_CompileHereDoc;
}else if( nNodeType & PH7_TK_NOWDOC ){
/* Nowdoc */
return PH7_CompileNowDoc;
}else if( nNodeType & PH7_TK_BSTR ){
/* Backtick quoted string */
return PH7_CompileBacktic;

160
lex.c
View File

@ -18,7 +18,6 @@
*/
/* Forward declaration */
static sxu32 KeywordCode(const char *z, int n);
static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken);
/*
* Tokenize a raw PHP input.
* Get a single low-level token from the input file. Update the stream pointer so that
@ -499,15 +498,6 @@ static sxi32 TokenizePHP(SyStream *pStream,SyToken *pToken,void *pUserData,void
if( pStream->zText[0] == '=' ){
/* Current operator: <<= */
pStream->zText++;
}else if( pStream->zText[0] == '<' ){
/* Current Token: <<< */
pStream->zText++;
/* This may be the beginning of a Heredoc/Nowdoc string,try to delimit it */
rc = LexExtractHeredoc(&(*pStream),&(*pToken));
if( rc == SXRET_OK ){
/* Here/Now doc successfuly extracted */
return SXRET_OK;
}
}
}
}else if( pStream->zText[0] == '>' ){
@ -652,156 +642,6 @@ static sxu32 KeywordCode(const char *z, int n){
return PH7_TK_ID;
}
}
/*
* Extract a heredoc/nowdoc text from a raw PHP input.
* According to the PHP language reference manual:
* A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier
* is provided, then a newline. The string itself follows, and then the same identifier again
* to close the quotation.
* The closing identifier must begin in the first column of the line. Also, the identifier must
* follow the same naming rules as any other label in PHP: it must contain only alphanumeric
* characters and underscores, and must start with a non-digit character or underscore.
* Heredoc text behaves just like a double-quoted string, without the double quotes.
* This means that quotes in a heredoc do not need to be escaped, but the escape codes listed
* above can still be used. Variables are expanded, but the same care must be taken when expressing
* complex variables inside a heredoc as with strings.
* Nowdocs are to single-quoted strings what heredocs are to double-quoted strings.
* A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc.
* The construct is ideal for embedding PHP code or other large blocks of text without the need
* for escaping. It shares some features in common with the SGML <![CDATA[ ]]> construct, in that
* it declares a block of text which is not for parsing.
* A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier which follows
* is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc identifiers also apply to nowdoc
* identifiers, especially those regarding the appearance of the closing identifier.
* Symisc Extension:
* The closing delimiter can now start with a digit or undersocre or it can be an UTF-8 stream.
* Example:
* <<<123
* HEREDOC Here
* 123
* or
* <<<___
* HEREDOC Here
* ___
*/
static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken)
{
const unsigned char *zIn = pStream->zText;
const unsigned char *zEnd = pStream->zEnd;
const unsigned char *zPtr;
sxu8 bNowDoc = FALSE;
SyString sDelim;
SyString sStr;
/* Jump leading white spaces */
while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){
zIn++;
}
if( zIn >= zEnd ){
/* A simple symbol,return immediately */
return SXERR_CONTINUE;
}
if( zIn[0] == '\'' || zIn[0] == '"' ){
/* Make sure we are dealing with a nowdoc */
bNowDoc = zIn[0] == '\'' ? TRUE : FALSE;
zIn++;
}
if( zIn[0] < 0xc0 && !SyisAlphaNum(zIn[0]) && zIn[0] != '_' ){
/* Invalid delimiter,return immediately */
return SXERR_CONTINUE;
}
/* Isolate the identifier */
sDelim.zString = (const char *)zIn;
for(;;){
zPtr = zIn;
/* Skip alphanumeric stream */
while( zPtr < zEnd && zPtr[0] < 0xc0 && (SyisAlphaNum(zPtr[0]) || zPtr[0] == '_') ){
zPtr++;
}
if( zPtr < zEnd && zPtr[0] >= 0xc0 ){
zPtr++;
/* UTF-8 stream */
while( zPtr < zEnd && ((zPtr[0] & 0xc0) == 0x80) ){
zPtr++;
}
}
if( zPtr == zIn ){
/* Not an UTF-8 or alphanumeric stream */
break;
}
/* Synchronize pointers */
zIn = zPtr;
}
/* Get the identifier length */
sDelim.nByte = (sxu32)((const char *)zIn-sDelim.zString);
if( zIn[0] == '"' || (bNowDoc && zIn[0] == '\'') ){
/* Jump the trailing single quote */
zIn++;
}
/* Jump trailing white spaces */
while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){
zIn++;
}
if( sDelim.nByte <= 0 || zIn >= zEnd || zIn[0] != '\n' ){
/* Invalid syntax */
return SXERR_CONTINUE;
}
pStream->nLine++; /* Increment line counter */
zIn++;
/* Isolate the delimited string */
sStr.zString = (const char *)zIn;
/* Go and found the closing delimiter */
for(;;){
/* Synchronize with the next line */
while( zIn < zEnd && zIn[0] != '\n' ){
zIn++;
}
if( zIn >= zEnd ){
/* End of the input reached, break immediately */
pStream->zText = pStream->zEnd;
break;
}
pStream->nLine++; /* Increment line counter */
zIn++;
if( (sxu32)(zEnd - zIn) >= sDelim.nByte && SyMemcmp((const void *)sDelim.zString,(const void *)zIn,sDelim.nByte) == 0 ){
zPtr = &zIn[sDelim.nByte];
while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){
zPtr++;
}
if( zPtr >= zEnd ){
/* End of input */
pStream->zText = zPtr;
break;
}
if( zPtr[0] == ';' ){
const unsigned char *zCur = zPtr;
zPtr++;
while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){
zPtr++;
}
if( zPtr >= zEnd || zPtr[0] == '\n' ){
/* Closing delimiter found,break immediately */
pStream->zText = zCur; /* Keep the semi-colon */
break;
}
}else if( zPtr[0] == '\n' ){
/* Closing delimiter found,break immediately */
pStream->zText = zPtr; /* Synchronize with the stream cursor */
break;
}
/* Synchronize pointers and continue searching */
zIn = zPtr;
}
} /* For(;;) */
/* Get the delimited string length */
sStr.nByte = (sxu32)((const char *)zIn-sStr.zString);
/* Record token type and length */
pToken->nType = bNowDoc ? PH7_TK_NOWDOC : PH7_TK_HEREDOC;
SyStringDupPtr(&pToken->sData,&sStr);
/* Remove trailing white spaces */
SyStringRightTrim(&pToken->sData);
/* All done */
return SXRET_OK;
}
/*
* Tokenize a raw PHP input.
* This is the public tokenizer called by most code generator routines.

View File

@ -651,7 +651,7 @@ Synchronize:
* When errors,PH7 take care of generating the appropriate error message.
* An expression node can be a variable [i.e: $var],an operator [i.e: ++]
* an annonymous function [i.e: function(){ return "Hello"; }, a double/single
* quoted string, a heredoc/nowdoc,a literal [i.e: PHP_EOL],a namespace path
* quoted string, a literal [i.e: PHP_EOL],a namespace path
* [i.e: namespaces\path\to..],a array/list [i.e: array(4,5,6)] and so on.
*/
static sxi32 ExprExtractNode(ph7_gen_state *pGen,ph7_expr_node **ppNode)

View File

@ -1590,8 +1590,6 @@ enum ph7_expr_id {
#define PH7_TK_CSB 0x0001000 /* Closing square bracket ']' */
#define PH7_TK_DSTR 0x0002000 /* Double quoted string "$str" */
#define PH7_TK_SSTR 0x0004000 /* Single quoted string 'str' */
#define PH7_TK_HEREDOC 0x0008000 /* Heredoc <<< */
#define PH7_TK_NOWDOC 0x0010000 /* Nowdoc <<< */
#define PH7_TK_COMMA 0x0020000 /* Comma ',' */
#define PH7_TK_SEMI 0x0040000 /* Semi-colon ";" */
#define PH7_TK_BSTR 0x0080000 /* Backtick quoted string [i.e: Shell command `date`] */