P# allows to use multiline string block, thus heredoc/newdoc syntax is not needed

2018-07-12 16:01:47 +02:00 · 2018-07-12 16:01:47 +02:00 · dce1b38e79
commit dce1b38e79
--- a/compile.c
+++ b/compile.c
@ -446,48 +446,9 @@ PH7_PRIVATE sxi32 PH7_CompileSimpleString(ph7_gen_state *pGen,sxi32 iCompileFlag
 	return SXRET_OK;
 }
 /*
- * Compile a nowdoc string.
- * According to the PHP language reference manual:
- *
- *  Nowdocs are to single-quoted strings what heredocs are to double-quoted strings.
- *  A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc.
- *  The construct is ideal for embedding PHP code or other large blocks of text without the
- *  need for escaping. It shares some features in common with the SGML <![CDATA[ ]]> 
- *  construct, in that it declares a block of text which is not for parsing.
- *  A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier
- *  which follows is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc 
- *  identifiers also apply to nowdoc identifiers, especially those regarding the appearance
- *  of the closing identifier. 
- */
-static sxi32 PH7_CompileNowDoc(ph7_gen_state *pGen,sxi32 iCompileFlag)
-{
-	SyString *pStr = &pGen->pIn->sData; /* Constant string literal */
-	ph7_value *pObj;
-	sxu32 nIdx;
-	nIdx = 0; /* Prevent compiler warning */
-	if( pStr->nByte <= 0 ){
-		/* Empty string,load NULL */
-		PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,0,0,0);
-		return SXRET_OK;
-	}
-	/* Reserve a new constant */
-	pObj = PH7_ReserveConstObj(pGen->pVm,&nIdx);
-	if( pObj == 0 ){
-		PH7_GenCompileError(&(*pGen),E_ERROR,pGen->pIn->nLine,"PH7 engine is running out of memory");
-		SXUNUSED(iCompileFlag); /* cc warning */
-		return SXERR_ABORT;
-	}
-	/* No processing is done here, simply a memcpy() operation */
-	PH7_MemObjInitFromString(pGen->pVm,pObj,pStr);
-	/* Emit the load constant instruction */
-	PH7_VmEmitInstr(pGen->pVm,PH7_OP_LOADC,0,nIdx,0,0);
-	/* Node successfully compiled */
-	return SXRET_OK;
-}
-/*
- * Process variable expression [i.e: "$var","${var}"] embedded in a double quoted/heredoc string.
+ * Process variable expression [i.e: "$var","${var}"] embedded in a double quoted string.
 * According to the PHP language reference manual
- *   When a string is specified in double quotes or with heredoc,variables are parsed within it.
+ *   When a string is specified in double quotes,variables are parsed within it.
 *  There are two types of syntax: a simple one and a complex one. The simple syntax is the most
 *  common and convenient. It provides a way to embed a variable, an array value, or an object
 *  property in a string with a minimum of effort.
@ -538,7 +499,7 @@ static sxi32 GenStateProcessStringExpression(
 	return rc;
 }
 /*
- * Reserve a new constant for a double quoted/heredoc string.
+ * Reserve a new constant for a double quoted string.
 */
 static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount)
 {
@ -557,26 +518,8 @@ static ph7_value * GenStateNewStrObj(ph7_gen_state *pGen,sxi32 *pCount)
 	return pConstObj;
 }
 /*
- * Compile a double quoted/heredoc string.
+ * Compile a double quoted string.
 * According to the PHP language reference manual
- * Heredoc
- *  A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier
- *  is provided, then a newline. The string itself follows, and then the same identifier again
- *  to close the quotation.
- *  The closing identifier must begin in the first column of the line. Also, the identifier must
- *  follow the same naming rules as any other label in PHP: it must contain only alphanumeric
- *  characters and underscores, and must start with a non-digit character or underscore.
- *  Warning
- *  It is very important to note that the line with the closing identifier must contain
- *  no other characters, except possibly a semicolon (;). That means especially that the identifier
- *  may not be indented, and there may not be any spaces or tabs before or after the semicolon.
- *  It's also important to realize that the first character before the closing identifier must
- *  be a newline as defined by the local operating system. This is \n on UNIX systems, including Mac OS X.
- *  The closing delimiter (possibly followed by a semicolon) must also be followed by a newline.
- *  If this rule is broken and the closing identifier is not "clean", it will not be considered a closing
- *  identifier, and PHP will continue looking for one. If a proper closing identifier is not found before
- *  the end of the current file, a parse error will result at the last line.
- *  Heredocs can not be used for initializing class properties. 
 * Double quoted
 *  If the string is enclosed in double-quotes ("), PHP will interpret more escape sequences for special characters:
 *  Escaped characters Sequence 	Meaning
@ -879,18 +822,6 @@ PH7_PRIVATE sxi32 PH7_CompileString(ph7_gen_state *pGen,sxi32 iCompileFlag)
 	/* Compilation result */
 	return rc;
 }
-/*
- * Compile a Heredoc string.
- *  See the block-comment above for more information.
- */
-static sxi32 PH7_CompileHereDoc(ph7_gen_state *pGen,sxi32 iCompileFlag)
-{
-	sxi32 rc;
-	rc = GenStateCompileString(&(*pGen));
-	SXUNUSED(iCompileFlag); /* cc warning */
-	/* Compilation result */
-	return SXRET_OK;
-}
 /*
 * Compile an array entry whether it is a key or a value.
 *  Notes on array entries.
@ -5818,12 +5749,6 @@ PH7_PRIVATE ProcNodeConstruct PH7_GetNodeHandler(sxu32 nNodeType)
 	}else if( nNodeType & PH7_TK_SSTR ){
 		/* Single quoted string */
 		return PH7_CompileSimpleString;
-	}else if( nNodeType & PH7_TK_HEREDOC ){
-		/* Heredoc */
-		return PH7_CompileHereDoc;
-	}else if( nNodeType & PH7_TK_NOWDOC ){
-		/* Nowdoc */
-		return PH7_CompileNowDoc;
 	}else if( nNodeType & PH7_TK_BSTR ){
 		/* Backtick quoted string */
 		return PH7_CompileBacktic;
--- a/lex.c
+++ b/lex.c
@ -18,7 +18,6 @@
 */
 /* Forward declaration */
 static sxu32 KeywordCode(const char *z, int n);
-static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken);
 /*
 * Tokenize a raw PHP input.
 * Get a single low-level token from the input file. Update the stream pointer so that
@ -499,15 +498,6 @@ static sxi32 TokenizePHP(SyStream *pStream,SyToken *pToken,void *pUserData,void
 						if( pStream->zText[0] == '=' ){
 							/* Current operator: <<= */
 							pStream->zText++;
-						}else if( pStream->zText[0] == '<' ){
-							/* Current Token: <<<  */
-							pStream->zText++;
-							/* This may be the beginning of a Heredoc/Nowdoc string,try to delimit it */
-							rc = LexExtractHeredoc(&(*pStream),&(*pToken));
-							if( rc == SXRET_OK ){
-								/* Here/Now doc successfuly extracted */
-								return SXRET_OK;
-							}
 						}
 					}
 				}else if( pStream->zText[0] == '>' ){
@ -652,156 +642,6 @@ static sxu32 KeywordCode(const char *z, int n){
 		return PH7_TK_ID;
 	}
 }
-/*
- * Extract a heredoc/nowdoc text from a raw PHP input.
- * According to the PHP language reference manual:
- *  A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier
- *  is provided, then a newline. The string itself follows, and then the same identifier again
- *  to close the quotation.
- *  The closing identifier must begin in the first column of the line. Also, the identifier must 
- *  follow the same naming rules as any other label in PHP: it must contain only alphanumeric 
- *  characters and underscores, and must start with a non-digit character or underscore. 
- *  Heredoc text behaves just like a double-quoted string, without the double quotes.
- *  This means that quotes in a heredoc do not need to be escaped, but the escape codes listed
- *  above can still be used. Variables are expanded, but the same care must be taken when expressing
- *  complex variables inside a heredoc as with strings. 
- *  Nowdocs are to single-quoted strings what heredocs are to double-quoted strings.
- *  A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc.
- *  The construct is ideal for embedding PHP code or other large blocks of text without the need
- *  for escaping. It shares some features in common with the SGML <![CDATA[ ]]> construct, in that
- *  it declares a block of text which is not for parsing.
- *  A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier which follows
- *  is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc identifiers also apply to nowdoc
- *  identifiers, especially those regarding the appearance of the closing identifier. 
- * Symisc Extension:
- * The closing delimiter can now start with a digit or undersocre or it can be an UTF-8 stream.
- * Example:
- *  <<<123
- *    HEREDOC Here
- * 123
- *  or
- *  <<<___
- *   HEREDOC Here
- *  ___
- */
-static sxi32 LexExtractHeredoc(SyStream *pStream,SyToken *pToken)
-{
-	const unsigned char *zIn  = pStream->zText;
-	const unsigned char *zEnd = pStream->zEnd;
-	const unsigned char *zPtr;
-	sxu8 bNowDoc = FALSE;
-	SyString sDelim;
-	SyString sStr;
-	/* Jump leading white spaces */
-	while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){
-		zIn++;
-	}
-	if( zIn >= zEnd ){
-		/* A simple symbol,return immediately */
-		return SXERR_CONTINUE;
-	}
-	if( zIn[0] == '\'' || zIn[0] == '"' ){
-		/* Make sure we are dealing with a nowdoc */
-		bNowDoc =  zIn[0] == '\'' ? TRUE : FALSE;
-		zIn++;
-	}
-	if( zIn[0] < 0xc0 && !SyisAlphaNum(zIn[0]) && zIn[0] != '_' ){
-		/* Invalid delimiter,return immediately */
-		return SXERR_CONTINUE;
-	}
-	/* Isolate the identifier */
-	sDelim.zString = (const char *)zIn;
-	for(;;){
-		zPtr = zIn;
-		/* Skip alphanumeric stream */
-		while( zPtr < zEnd && zPtr[0] < 0xc0 && (SyisAlphaNum(zPtr[0]) || zPtr[0] == '_') ){
-			zPtr++;
-		}
-		if( zPtr < zEnd && zPtr[0] >= 0xc0 ){
-			zPtr++;
-			/* UTF-8 stream */
-			while( zPtr < zEnd && ((zPtr[0] & 0xc0) == 0x80) ){
-				zPtr++;
-			}
-		}
-		if( zPtr == zIn ){
-			/* Not an UTF-8 or alphanumeric stream */
-			break;
-		}
-		/* Synchronize pointers */
-		zIn = zPtr;
-	}
-	/* Get the identifier length */
-	sDelim.nByte = (sxu32)((const char *)zIn-sDelim.zString);
-	if( zIn[0] == '"' || (bNowDoc && zIn[0] == '\'') ){
-		/* Jump the trailing single quote */
-		zIn++;
-	}
-	/* Jump trailing white spaces */
-	while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){
-		zIn++;
-	}
-	if( sDelim.nByte <= 0 || zIn >= zEnd || zIn[0] != '\n' ){
-		/* Invalid syntax */
-		return SXERR_CONTINUE;
-	}
-	pStream->nLine++; /* Increment line counter */
-	zIn++;
-	/* Isolate the delimited string */
-	sStr.zString = (const char *)zIn;
-	/* Go and found the closing delimiter */
-	for(;;){
-		/* Synchronize with the next line */
-		while( zIn < zEnd && zIn[0] != '\n' ){
-			zIn++;
-		}
-		if( zIn >= zEnd ){
-			/* End of the input reached, break immediately */
-			pStream->zText = pStream->zEnd;
-			break;
-		}
-		pStream->nLine++; /* Increment line counter */
-		zIn++;
-		if( (sxu32)(zEnd - zIn) >= sDelim.nByte && SyMemcmp((const void *)sDelim.zString,(const void *)zIn,sDelim.nByte) == 0 ){
-			zPtr = &zIn[sDelim.nByte];
-			while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){
-				zPtr++;
-			}
-			if( zPtr >= zEnd ){
-				/* End of input */
-				pStream->zText = zPtr;
-				break;
-			}
-			if( zPtr[0] == ';' ){
-				const unsigned char *zCur = zPtr;
-				zPtr++;
-				while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){
-					zPtr++;
-				}
-				if( zPtr >= zEnd || zPtr[0] == '\n' ){
-					/* Closing delimiter found,break immediately */
-					pStream->zText = zCur; /* Keep the semi-colon */
-					break;
-				}
-			}else if( zPtr[0] == '\n' ){
-				/* Closing delimiter found,break immediately */
-				pStream->zText = zPtr; /* Synchronize with the stream cursor */
-				break;
-			}
-			/* Synchronize pointers and continue searching */
-			zIn = zPtr;
-		}
-	} /* For(;;) */
-	/* Get the delimited string length */
-	sStr.nByte = (sxu32)((const char *)zIn-sStr.zString);
-	/* Record token type and length */
-	pToken->nType = bNowDoc ? PH7_TK_NOWDOC : PH7_TK_HEREDOC;
-	SyStringDupPtr(&pToken->sData,&sStr);
-	/* Remove trailing white spaces */
-	SyStringRightTrim(&pToken->sData);
-	/* All done */
-	return SXRET_OK;
-}
 /*
 * Tokenize a raw PHP input.
 * This is the public tokenizer called by most code generator routines. 
--- a/parse.c
+++ b/parse.c
@ -651,7 +651,7 @@ Synchronize:
 * When errors,PH7 take care of generating the appropriate error message.
 * An expression node can be a variable [i.e: $var],an operator [i.e: ++] 
 * an annonymous function [i.e: function(){ return "Hello"; }, a double/single
- * quoted string, a heredoc/nowdoc,a literal [i.e: PHP_EOL],a namespace path
+ * quoted string, a literal [i.e: PHP_EOL],a namespace path
 * [i.e: namespaces\path\to..],a array/list [i.e: array(4,5,6)] and so on.
 */
 static sxi32 ExprExtractNode(ph7_gen_state *pGen,ph7_expr_node **ppNode)
--- a/ph7int.h
+++ b/ph7int.h
@ -1590,8 +1590,6 @@ enum ph7_expr_id {
 #define PH7_TK_CSB       0x0001000 /* Closing square bracket ']' */
 #define PH7_TK_DSTR      0x0002000 /* Double quoted string "$str" */
 #define PH7_TK_SSTR      0x0004000 /* Single quoted string 'str' */
-#define PH7_TK_HEREDOC   0x0008000 /* Heredoc <<< */
-#define PH7_TK_NOWDOC    0x0010000 /* Nowdoc <<< */
 #define PH7_TK_COMMA     0x0020000 /* Comma ',' */
 #define PH7_TK_SEMI      0x0040000 /* Semi-colon ";" */
 #define PH7_TK_BSTR      0x0080000 /* Backtick quoted string [i.e: Shell command `date`] */