From dce1b38e79f45adb3799491782cb894186b226cb Mon Sep 17 00:00:00 2001 From: belliash Date: Thu, 12 Jul 2018 16:01:47 +0200 Subject: [PATCH] P# allows to use multiline string block, thus heredoc/newdoc syntax is not needed --- compile.c | 83 ++-------------------------- lex.c | 160 ------------------------------------------------------ parse.c | 2 +- ph7int.h | 2 - 4 files changed, 5 insertions(+), 242 deletions(-) diff --git a/compile.c b/compile.c index 7a34eb3..afa5488 100644 --- a/compile.c +++ b/compile.c @@ -446,48 +446,9 @@ PH7_PRIVATE sxi32 PH7_CompileSimpleString(ph7_gen_state *pGen,sxi32 iCompileFlag return SXRET_OK; } /* - * Compile a nowdoc string. - * According to the PHP language reference manual: - * - * Nowdocs are to single-quoted strings what heredocs are to double-quoted strings. - * A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc. - * The construct is ideal for embedding PHP code or other large blocks of text without the - * need for escaping. It shares some features in common with the SGML - * construct, in that it declares a block of text which is not for parsing. - * A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier - * which follows is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc - * identifiers also apply to nowdoc identifiers, especially those regarding the appearance - * of the closing identifier. - */ -static sxi32 PH7_CompileNowDoc(ph7_gen_state *pGen,sxi32 iCompileFlag) -{ - SyString *pStr = &pGen->pIn->sData; /* Constant string literal */ - ph7_value *pObj; - sxu32 nIdx; - nIdx = 0; /* Prevent compiler warning */ - if( pStr->nByte <= 0 ){ - /* Empty string,load NULL */ - PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,0,0,0); - return SXRET_OK; - } - /* Reserve a new constant */ - pObj = PH7_ReserveConstObj(pGen->pVm,&nIdx); - if( pObj == 0 ){ - PH7_GenCompileError(&(*pGen),E_ERROR,pGen->pIn->nLine,"PH7 engine is running out of memory"); - SXUNUSED(iCompileFlag); /* cc warning */ - return SXERR_ABORT; - } - /* No processing is done here, simply a memcpy() operation */ - PH7_MemObjInitFromString(pGen->pVm,pObj,pStr); - /* Emit the load constant instruction */ - PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,nIdx,0,0); - /* Node successfully compiled */ - return SXRET_OK; -} -/* - * Process variable expression [i.e: "$var","${var}"] embedded in a double quoted/heredoc string. + * Process variable expression [i.e: "$var","${var}"] embedded in a double quoted string. * According to the PHP language reference manual - * When a string is specified in double quotes or with heredoc,variables are parsed within it. + * When a string is specified in double quotes,variables are parsed within it. * There are two types of syntax: a simple one and a complex one. The simple syntax is the most * common and convenient. It provides a way to embed a variable, an array value, or an object * property in a string with a minimum of effort. @@ -538,7 +499,7 @@ static sxi32 GenStateProcessStringExpression( return rc; } /* - * Reserve a new constant for a double quoted/heredoc string. + * Reserve a new constant for a double quoted string. */ static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount) { @@ -557,26 +518,8 @@ static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount) return pConstObj; } /* - * Compile a double quoted/heredoc string. + * Compile a double quoted string. * According to the PHP language reference manual - * Heredoc - * A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier - * is provided, then a newline. The string itself follows, and then the same identifier again - * to close the quotation. - * The closing identifier must begin in the first column of the line. Also, the identifier must - * follow the same naming rules as any other label in PHP: it must contain only alphanumeric - * characters and underscores, and must start with a non-digit character or underscore. - * Warning - * It is very important to note that the line with the closing identifier must contain - * no other characters, except possibly a semicolon (;). That means especially that the identifier - * may not be indented, and there may not be any spaces or tabs before or after the semicolon. - * It's also important to realize that the first character before the closing identifier must - * be a newline as defined by the local operating system. This is \n on UNIX systems, including Mac OS X. - * The closing delimiter (possibly followed by a semicolon) must also be followed by a newline. - * If this rule is broken and the closing identifier is not "clean", it will not be considered a closing - * identifier, and PHP will continue looking for one. If a proper closing identifier is not found before - * the end of the current file, a parse error will result at the last line. - * Heredocs can not be used for initializing class properties. * Double quoted * If the string is enclosed in double-quotes ("), PHP will interpret more escape sequences for special characters: * Escaped characters Sequence Meaning @@ -879,18 +822,6 @@ PH7_PRIVATE sxi32 PH7_CompileString(ph7_gen_state *pGen,sxi32 iCompileFlag) /* Compilation result */ return rc; } -/* - * Compile a Heredoc string. - * See the block-comment above for more information. - */ -static sxi32 PH7_CompileHereDoc(ph7_gen_state *pGen,sxi32 iCompileFlag) -{ - sxi32 rc; - rc = GenStateCompileString(&(*pGen)); - SXUNUSED(iCompileFlag); /* cc warning */ - /* Compilation result */ - return SXRET_OK; -} /* * Compile an array entry whether it is a key or a value. * Notes on array entries. @@ -5818,12 +5749,6 @@ PH7_PRIVATE ProcNodeConstruct PH7_GetNodeHandler(sxu32 nNodeType) }else if( nNodeType & PH7_TK_SSTR ){ /* Single quoted string */ return PH7_CompileSimpleString; - }else if( nNodeType & PH7_TK_HEREDOC ){ - /* Heredoc */ - return PH7_CompileHereDoc; - }else if( nNodeType & PH7_TK_NOWDOC ){ - /* Nowdoc */ - return PH7_CompileNowDoc; }else if( nNodeType & PH7_TK_BSTR ){ /* Backtick quoted string */ return PH7_CompileBacktic; diff --git a/lex.c b/lex.c index adcac85..ef83921 100644 --- a/lex.c +++ b/lex.c @@ -18,7 +18,6 @@ */ /* Forward declaration */ static sxu32 KeywordCode(const char *z, int n); -static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken); /* * Tokenize a raw PHP input. * Get a single low-level token from the input file. Update the stream pointer so that @@ -499,15 +498,6 @@ static sxi32 TokenizePHP(SyStream *pStream,SyToken *pToken,void *pUserData,void if( pStream->zText[0] == '=' ){ /* Current operator: <<= */ pStream->zText++; - }else if( pStream->zText[0] == '<' ){ - /* Current Token: <<< */ - pStream->zText++; - /* This may be the beginning of a Heredoc/Nowdoc string,try to delimit it */ - rc = LexExtractHeredoc(&(*pStream),&(*pToken)); - if( rc == SXRET_OK ){ - /* Here/Now doc successfuly extracted */ - return SXRET_OK; - } } } }else if( pStream->zText[0] == '>' ){ @@ -652,156 +642,6 @@ static sxu32 KeywordCode(const char *z, int n){ return PH7_TK_ID; } } -/* - * Extract a heredoc/nowdoc text from a raw PHP input. - * According to the PHP language reference manual: - * A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier - * is provided, then a newline. The string itself follows, and then the same identifier again - * to close the quotation. - * The closing identifier must begin in the first column of the line. Also, the identifier must - * follow the same naming rules as any other label in PHP: it must contain only alphanumeric - * characters and underscores, and must start with a non-digit character or underscore. - * Heredoc text behaves just like a double-quoted string, without the double quotes. - * This means that quotes in a heredoc do not need to be escaped, but the escape codes listed - * above can still be used. Variables are expanded, but the same care must be taken when expressing - * complex variables inside a heredoc as with strings. - * Nowdocs are to single-quoted strings what heredocs are to double-quoted strings. - * A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc. - * The construct is ideal for embedding PHP code or other large blocks of text without the need - * for escaping. It shares some features in common with the SGML construct, in that - * it declares a block of text which is not for parsing. - * A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier which follows - * is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc identifiers also apply to nowdoc - * identifiers, especially those regarding the appearance of the closing identifier. - * Symisc Extension: - * The closing delimiter can now start with a digit or undersocre or it can be an UTF-8 stream. - * Example: - * <<<123 - * HEREDOC Here - * 123 - * or - * <<<___ - * HEREDOC Here - * ___ - */ -static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken) -{ - const unsigned char *zIn = pStream->zText; - const unsigned char *zEnd = pStream->zEnd; - const unsigned char *zPtr; - sxu8 bNowDoc = FALSE; - SyString sDelim; - SyString sStr; - /* Jump leading white spaces */ - while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){ - zIn++; - } - if( zIn >= zEnd ){ - /* A simple symbol,return immediately */ - return SXERR_CONTINUE; - } - if( zIn[0] == '\'' || zIn[0] == '"' ){ - /* Make sure we are dealing with a nowdoc */ - bNowDoc = zIn[0] == '\'' ? TRUE : FALSE; - zIn++; - } - if( zIn[0] < 0xc0 && !SyisAlphaNum(zIn[0]) && zIn[0] != '_' ){ - /* Invalid delimiter,return immediately */ - return SXERR_CONTINUE; - } - /* Isolate the identifier */ - sDelim.zString = (const char *)zIn; - for(;;){ - zPtr = zIn; - /* Skip alphanumeric stream */ - while( zPtr < zEnd && zPtr[0] < 0xc0 && (SyisAlphaNum(zPtr[0]) || zPtr[0] == '_') ){ - zPtr++; - } - if( zPtr < zEnd && zPtr[0] >= 0xc0 ){ - zPtr++; - /* UTF-8 stream */ - while( zPtr < zEnd && ((zPtr[0] & 0xc0) == 0x80) ){ - zPtr++; - } - } - if( zPtr == zIn ){ - /* Not an UTF-8 or alphanumeric stream */ - break; - } - /* Synchronize pointers */ - zIn = zPtr; - } - /* Get the identifier length */ - sDelim.nByte = (sxu32)((const char *)zIn-sDelim.zString); - if( zIn[0] == '"' || (bNowDoc && zIn[0] == '\'') ){ - /* Jump the trailing single quote */ - zIn++; - } - /* Jump trailing white spaces */ - while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){ - zIn++; - } - if( sDelim.nByte <= 0 || zIn >= zEnd || zIn[0] != '\n' ){ - /* Invalid syntax */ - return SXERR_CONTINUE; - } - pStream->nLine++; /* Increment line counter */ - zIn++; - /* Isolate the delimited string */ - sStr.zString = (const char *)zIn; - /* Go and found the closing delimiter */ - for(;;){ - /* Synchronize with the next line */ - while( zIn < zEnd && zIn[0] != '\n' ){ - zIn++; - } - if( zIn >= zEnd ){ - /* End of the input reached, break immediately */ - pStream->zText = pStream->zEnd; - break; - } - pStream->nLine++; /* Increment line counter */ - zIn++; - if( (sxu32)(zEnd - zIn) >= sDelim.nByte && SyMemcmp((const void *)sDelim.zString,(const void *)zIn,sDelim.nByte) == 0 ){ - zPtr = &zIn[sDelim.nByte]; - while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){ - zPtr++; - } - if( zPtr >= zEnd ){ - /* End of input */ - pStream->zText = zPtr; - break; - } - if( zPtr[0] == ';' ){ - const unsigned char *zCur = zPtr; - zPtr++; - while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){ - zPtr++; - } - if( zPtr >= zEnd || zPtr[0] == '\n' ){ - /* Closing delimiter found,break immediately */ - pStream->zText = zCur; /* Keep the semi-colon */ - break; - } - }else if( zPtr[0] == '\n' ){ - /* Closing delimiter found,break immediately */ - pStream->zText = zPtr; /* Synchronize with the stream cursor */ - break; - } - /* Synchronize pointers and continue searching */ - zIn = zPtr; - } - } /* For(;;) */ - /* Get the delimited string length */ - sStr.nByte = (sxu32)((const char *)zIn-sStr.zString); - /* Record token type and length */ - pToken->nType = bNowDoc ? PH7_TK_NOWDOC : PH7_TK_HEREDOC; - SyStringDupPtr(&pToken->sData,&sStr); - /* Remove trailing white spaces */ - SyStringRightTrim(&pToken->sData); - /* All done */ - return SXRET_OK; -} /* * Tokenize a raw PHP input. * This is the public tokenizer called by most code generator routines. diff --git a/parse.c b/parse.c index a3334d7..5207923 100644 --- a/parse.c +++ b/parse.c @@ -651,7 +651,7 @@ Synchronize: * When errors,PH7 take care of generating the appropriate error message. * An expression node can be a variable [i.e: $var],an operator [i.e: ++] * an annonymous function [i.e: function(){ return "Hello"; }, a double/single - * quoted string, a heredoc/nowdoc,a literal [i.e: PHP_EOL],a namespace path + * quoted string, a literal [i.e: PHP_EOL],a namespace path * [i.e: namespaces\path\to..],a array/list [i.e: array(4,5,6)] and so on. */ static sxi32 ExprExtractNode(ph7_gen_state *pGen,ph7_expr_node **ppNode) diff --git a/ph7int.h b/ph7int.h index 2e99fb5..8e5760c 100644 --- a/ph7int.h +++ b/ph7int.h @@ -1590,8 +1590,6 @@ enum ph7_expr_id { #define PH7_TK_CSB 0x0001000 /* Closing square bracket ']' */ #define PH7_TK_DSTR 0x0002000 /* Double quoted string "$str" */ #define PH7_TK_SSTR 0x0004000 /* Single quoted string 'str' */ -#define PH7_TK_HEREDOC 0x0008000 /* Heredoc <<< */ -#define PH7_TK_NOWDOC 0x0010000 /* Nowdoc <<< */ #define PH7_TK_COMMA 0x0020000 /* Comma ',' */ #define PH7_TK_SEMI 0x0040000 /* Semi-colon ";" */ #define PH7_TK_BSTR 0x0080000 /* Backtick quoted string [i.e: Shell command `date`] */