627 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			627 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /**
 | |
|  * @PROJECT     PH7 Engine for the AerScript Interpreter
 | |
|  * @COPYRIGHT   See COPYING in the top level directory
 | |
|  * @FILE        engine/lexer.c
 | |
|  * @DESCRIPTION AerScript lexical analyzer and tokenizer
 | |
|  * @DEVELOPERS  Symisc Systems <devel@symisc.net>
 | |
|  *              Rafal Kupiec <belliash@codingworkshop.eu.org>
 | |
|  */
 | |
| #include "ph7int.h"
 | |
| /* Forward declaration */
 | |
| static sxu32 KeywordCode(const char *z, sxu32 n);
 | |
| /*
 | |
|  * Tokenize a raw PHP input.
 | |
|  * Get a single low-level token from the input file. Update the stream pointer so that
 | |
|  * it points to the first character beyond the extracted token.
 | |
|  */
 | |
| static sxi32 TokenizeAerScript(SyStream *pStream, SyToken *pToken, void *pUserData, void *pCtxData) {
 | |
| 	SyString *pStr;
 | |
| 	/* Ignore leading white spaces */
 | |
| 	while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0])) {
 | |
| 		/* Advance the stream cursor */
 | |
| 		if(pStream->zText[0] == '\n') {
 | |
| 			/* Update line counter */
 | |
| 			pStream->nLine++;
 | |
| 		}
 | |
| 		pStream->zText++;
 | |
| 	}
 | |
| 	if(pStream->zText >= pStream->zEnd) {
 | |
| 		/* End of input reached */
 | |
| 		return SXERR_EOF;
 | |
| 	}
 | |
| 	/* Record token starting position and line */
 | |
| 	pToken->nLine = pStream->nLine;
 | |
| 	pToken->pUserData = 0;
 | |
| 	pStr = &pToken->sData;
 | |
| 	SyStringInitFromBuf(pStr, pStream->zText, 0);
 | |
| 	if(pStream->zText[0] >= 0xc0 || SyisAlpha(pStream->zText[0]) || pStream->zText[0] == '_') {
 | |
| 		/* The following code fragment is taken verbatim from the xPP source tree.
 | |
| 		 * xPP is a modern embeddable macro processor with advanced features useful for
 | |
| 		 * application seeking for a production quality,ready to use macro processor.
 | |
| 		 * xPP is a widely used library developed and maintained by Symisc Systems.
 | |
| 		 * You can reach the xPP home page by following this link:
 | |
| 		 * https://xpp.symisc.net/
 | |
| 		 */
 | |
| 		const unsigned char *zIn;
 | |
| 		sxu32 nKeyword;
 | |
| 		/* Isolate UTF-8 or alphanumeric stream */
 | |
| 		if(pStream->zText[0] < 0xc0) {
 | |
| 			pStream->zText++;
 | |
| 		}
 | |
| 		for(;;) {
 | |
| 			zIn = pStream->zText;
 | |
| 			if(zIn[0] >= 0xc0) {
 | |
| 				zIn++;
 | |
| 				/* UTF-8 stream */
 | |
| 				while(zIn < pStream->zEnd && ((zIn[0] & 0xc0) == 0x80)) {
 | |
| 					zIn++;
 | |
| 				}
 | |
| 			}
 | |
| 			/* Skip alphanumeric stream */
 | |
| 			while(zIn < pStream->zEnd && zIn[0] < 0xc0 && (SyisAlphaNum(zIn[0]) || zIn[0] == '_')) {
 | |
| 				zIn++;
 | |
| 			}
 | |
| 			if(zIn == pStream->zText) {
 | |
| 				/* Not an UTF-8 or alphanumeric stream */
 | |
| 				break;
 | |
| 			}
 | |
| 			/* Synchronize pointers */
 | |
| 			pStream->zText = zIn;
 | |
| 		}
 | |
| 		/* Record token length */
 | |
| 		pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
 | |
| 		nKeyword = KeywordCode(pStr->zString, (int)pStr->nByte);
 | |
| 		if(nKeyword != PH7_TK_ID) {
 | |
| 			if(nKeyword &
 | |
| 					(PH7_KEYWORD_NEW | PH7_KEYWORD_CLONE | PH7_KEYWORD_IS)) {
 | |
| 				/* Alpha stream operators [i.e: new,clone,instanceof],save the operator instance for later processing */
 | |
| 				pToken->pUserData = (void *)PH7_ExprExtractOperator(pStr, 0);
 | |
| 				/* Mark as an operator */
 | |
| 				pToken->nType = PH7_TK_ID | PH7_TK_OP;
 | |
| 			} else {
 | |
| 				/* We are dealing with a keyword [i.e: while,foreach,class...],save the keyword ID */
 | |
| 				pToken->nType = PH7_TK_KEYWORD;
 | |
| 				pToken->pUserData = SX_INT_TO_PTR(nKeyword);
 | |
| 			}
 | |
| 		} else {
 | |
| 			/* A simple identifier */
 | |
| 			pToken->nType = PH7_TK_ID;
 | |
| 		}
 | |
| 	} else {
 | |
| 		sxi32 c;
 | |
| 		/* Non-alpha stream */
 | |
| 		if(pStream->zText[0] == '#' ||
 | |
| 				(pStream->zText[0] == '/' &&  &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '/')) {
 | |
| 			pStream->zText++;
 | |
| 			/* Inline comments */
 | |
| 			while(pStream->zText < pStream->zEnd && pStream->zText[0] != '\n') {
 | |
| 				pStream->zText++;
 | |
| 			}
 | |
| 			/* Tell the upper-layer to ignore this token */
 | |
| 			return SXERR_CONTINUE;
 | |
| 		} else if(pStream->zText[0] == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '*') {
 | |
| 			pStream->zText += 2;
 | |
| 			/* Block comment */
 | |
| 			while(pStream->zText < pStream->zEnd) {
 | |
| 				if(pStream->zText[0] == '*') {
 | |
| 					if(&pStream->zText[1] >= pStream->zEnd || pStream->zText[1] == '/') {
 | |
| 						break;
 | |
| 					}
 | |
| 				}
 | |
| 				if(pStream->zText[0] == '\n') {
 | |
| 					pStream->nLine++;
 | |
| 				}
 | |
| 				pStream->zText++;
 | |
| 			}
 | |
| 			pStream->zText += 2;
 | |
| 			/* Tell the upper-layer to ignore this token */
 | |
| 			return SXERR_CONTINUE;
 | |
| 		} else if(SyisDigit(pStream->zText[0])) {
 | |
| 			pStream->zText++;
 | |
| 			/* Decimal digit stream */
 | |
| 			while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0])) {
 | |
| 				pStream->zText++;
 | |
| 			}
 | |
| 			/* Mark the token as integer until we encounter a real number */
 | |
| 			pToken->nType = PH7_TK_INTEGER;
 | |
| 			if(pStream->zText < pStream->zEnd) {
 | |
| 				c = pStream->zText[0];
 | |
| 				if(c == '.') {
 | |
| 					/* Real number */
 | |
| 					pStream->zText++;
 | |
| 					while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0])) {
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 					if(pStream->zText < pStream->zEnd) {
 | |
| 						c = pStream->zText[0];
 | |
| 						if(c == 'e' || c == 'E') {
 | |
| 							pStream->zText++;
 | |
| 							if(pStream->zText < pStream->zEnd) {
 | |
| 								c = pStream->zText[0];
 | |
| 								if((c == '+' || c == '-') && &pStream->zText[1] < pStream->zEnd  &&
 | |
| 										pStream->zText[1] < 0xc0 && SyisDigit(pStream->zText[1])) {
 | |
| 									pStream->zText++;
 | |
| 								}
 | |
| 								while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0])) {
 | |
| 									pStream->zText++;
 | |
| 								}
 | |
| 							}
 | |
| 						}
 | |
| 					}
 | |
| 					pToken->nType = PH7_TK_REAL;
 | |
| 				} else if(c == 'e' || c == 'E') {
 | |
| 					SXUNUSED(pUserData); /* Prevent compiler warning */
 | |
| 					SXUNUSED(pCtxData);
 | |
| 					pStream->zText++;
 | |
| 					if(pStream->zText < pStream->zEnd) {
 | |
| 						c = pStream->zText[0];
 | |
| 						if((c == '+' || c == '-') && &pStream->zText[1] < pStream->zEnd  &&
 | |
| 								pStream->zText[1] < 0xc0 && SyisDigit(pStream->zText[1])) {
 | |
| 							pStream->zText++;
 | |
| 						}
 | |
| 						while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0])) {
 | |
| 							pStream->zText++;
 | |
| 						}
 | |
| 					}
 | |
| 					pToken->nType = PH7_TK_REAL;
 | |
| 				} else if(c == 'x' || c == 'X') {
 | |
| 					/* Hex digit stream */
 | |
| 					pStream->zText++;
 | |
| 					while(pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisHex(pStream->zText[0])) {
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				} else if(c  == 'b' || c == 'B') {
 | |
| 					/* Binary digit stream */
 | |
| 					pStream->zText++;
 | |
| 					while(pStream->zText < pStream->zEnd && (pStream->zText[0] == '0' || pStream->zText[0] == '1')) {
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 			/* Record token length */
 | |
| 			pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
 | |
| 			return SXRET_OK;
 | |
| 		}
 | |
| 		c = pStream->zText[0];
 | |
| 		pStream->zText++; /* Advance the stream cursor */
 | |
| 		/* Assume we are dealing with an operator*/
 | |
| 		pToken->nType = PH7_TK_OP;
 | |
| 		switch(c) {
 | |
| 			case '$':
 | |
| 				pToken->nType = PH7_TK_DOLLAR;
 | |
| 				break;
 | |
| 			case '{':
 | |
| 				pToken->nType = PH7_TK_OCB;
 | |
| 				break;
 | |
| 			case '}':
 | |
| 				pToken->nType = PH7_TK_CCB;
 | |
| 				break;
 | |
| 			case '(':
 | |
| 				pToken->nType = PH7_TK_LPAREN;
 | |
| 				break;
 | |
| 			case '[':
 | |
| 				pToken->nType |= PH7_TK_OSB;
 | |
| 				break; /* Bitwise operation here,since the square bracket token '['
 | |
| 														 * is a potential operator [i.e: subscripting] */
 | |
| 			case ']':
 | |
| 				pToken->nType = PH7_TK_CSB;
 | |
| 				break;
 | |
| 			case ')': {
 | |
| 					SySet *pTokSet = pStream->pSet;
 | |
| 					/* Assemble type cast operators [i.e: (int),(float),(bool)...] */
 | |
| 					if(pTokSet->nUsed >= 2) {
 | |
| 						SyToken *pTmp;
 | |
| 						/* Peek the last recognized token */
 | |
| 						pTmp = (SyToken *)SySetPeek(pTokSet);
 | |
| 						if(pTmp->nType & PH7_TK_KEYWORD) {
 | |
| 							sxi32 nID = SX_PTR_TO_INT(pTmp->pUserData);
 | |
| 							if((sxu32)nID & (PH7_KEYWORD_INT | PH7_KEYWORD_FLOAT | PH7_KEYWORD_STRING | PH7_KEYWORD_OBJECT | PH7_KEYWORD_BOOL | PH7_KEYWORD_CHAR | PH7_KEYWORD_CALLBACK | PH7_KEYWORD_RESOURCE | PH7_KEYWORD_VOID)) {
 | |
| 								pTmp = (SyToken *)SySetAt(pTokSet, pTokSet->nUsed - 2);
 | |
| 								if(pTmp->nType & PH7_TK_LPAREN) {
 | |
| 									/* Merge the three tokens '(' 'TYPE' ')' into a single one */
 | |
| 									const char *zTypeCast = "(int)";
 | |
| 									if(nID & PH7_KEYWORD_FLOAT) {
 | |
| 										zTypeCast = "(float)";
 | |
| 									} else if(nID & PH7_KEYWORD_BOOL) {
 | |
| 										zTypeCast = "(bool)";
 | |
| 									} else if(nID & PH7_KEYWORD_CHAR) {
 | |
| 										zTypeCast = "(char)";
 | |
| 									} else if(nID & PH7_KEYWORD_STRING) {
 | |
| 										zTypeCast = "(string)";
 | |
| 									} else if(nID & PH7_KEYWORD_OBJECT) {
 | |
| 										zTypeCast = "(object)";
 | |
| 									} else if(nID & PH7_KEYWORD_CALLBACK) {
 | |
| 										zTypeCast = "(callback)";
 | |
| 									} else if(nID & PH7_KEYWORD_RESOURCE) {
 | |
| 										zTypeCast = "(resource)";
 | |
| 									} else if(nID & PH7_KEYWORD_VOID) {
 | |
| 										zTypeCast = "(void)";
 | |
| 									}
 | |
| 									/* Reflect the change */
 | |
| 									pToken->nType = PH7_TK_OP;
 | |
| 									SyStringInitFromBuf(&pToken->sData, zTypeCast, SyStrlen(zTypeCast));
 | |
| 									/* Save the instance associated with the type cast operator */
 | |
| 									pToken->pUserData = (void *)PH7_ExprExtractOperator(&pToken->sData, 0);
 | |
| 									/* Remove the two previous tokens */
 | |
| 									pTokSet->nUsed -= 2;
 | |
| 									return SXRET_OK;
 | |
| 								}
 | |
| 							}
 | |
| 						}
 | |
| 					}
 | |
| 					pToken->nType = PH7_TK_RPAREN;
 | |
| 					break;
 | |
| 				}
 | |
| 			case '\'': {
 | |
| 					/* Single quoted string */
 | |
| 					pStr->zString++;
 | |
| 					while(pStream->zText < pStream->zEnd) {
 | |
| 						if(pStream->zText[0] == '\'') {
 | |
| 							if(pStream->zText[-1] != '\\') {
 | |
| 								break;
 | |
| 							} else {
 | |
| 								const unsigned char *zPtr = &pStream->zText[-2];
 | |
| 								sxi32 i = 1;
 | |
| 								while(zPtr > pStream->zInput && zPtr[0] == '\\') {
 | |
| 									zPtr--;
 | |
| 									i++;
 | |
| 								}
 | |
| 								if((i & 1) == 0) {
 | |
| 									break;
 | |
| 								}
 | |
| 							}
 | |
| 						}
 | |
| 						if(pStream->zText[0] == '\n') {
 | |
| 							pStream->nLine++;
 | |
| 						}
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 					/* Record token length and type */
 | |
| 					pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
 | |
| 					pToken->nType = PH7_TK_SSTR;
 | |
| 					/* Jump the trailing single quote */
 | |
| 					pStream->zText++;
 | |
| 					return SXRET_OK;
 | |
| 				}
 | |
| 			case '"': {
 | |
| 					sxi32 iNest;
 | |
| 					/* Double quoted string */
 | |
| 					pStr->zString++;
 | |
| 					while(pStream->zText < pStream->zEnd) {
 | |
| 						if(pStream->zText[0] == '{' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '$') {
 | |
| 							iNest = 1;
 | |
| 							pStream->zText++;
 | |
| 							/* TICKET 1433-40: Handle braces'{}' in double quoted string where everything is allowed */
 | |
| 							while(pStream->zText < pStream->zEnd) {
 | |
| 								if(pStream->zText[0] == '{') {
 | |
| 									iNest++;
 | |
| 								} else if(pStream->zText[0] == '}') {
 | |
| 									iNest--;
 | |
| 									if(iNest <= 0) {
 | |
| 										pStream->zText++;
 | |
| 										break;
 | |
| 									}
 | |
| 								} else if(pStream->zText[0] == '\n') {
 | |
| 									pStream->nLine++;
 | |
| 								}
 | |
| 								pStream->zText++;
 | |
| 							}
 | |
| 							if(pStream->zText >= pStream->zEnd) {
 | |
| 								break;
 | |
| 							}
 | |
| 						}
 | |
| 						if(pStream->zText[0] == '"') {
 | |
| 							if(pStream->zText[-1] != '\\') {
 | |
| 								break;
 | |
| 							} else {
 | |
| 								const unsigned char *zPtr = &pStream->zText[-2];
 | |
| 								sxi32 i = 1;
 | |
| 								while(zPtr > pStream->zInput && zPtr[0] == '\\') {
 | |
| 									zPtr--;
 | |
| 									i++;
 | |
| 								}
 | |
| 								if((i & 1) == 0) {
 | |
| 									break;
 | |
| 								}
 | |
| 							}
 | |
| 						}
 | |
| 						if(pStream->zText[0] == '\n') {
 | |
| 							pStream->nLine++;
 | |
| 						}
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 					/* Record token length and type */
 | |
| 					pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
 | |
| 					pToken->nType = PH7_TK_DSTR;
 | |
| 					/* Jump the trailing quote */
 | |
| 					pStream->zText++;
 | |
| 					return SXRET_OK;
 | |
| 				}
 | |
| 			case '\\':
 | |
| 				pToken->nType = PH7_TK_NSSEP;
 | |
| 				break;
 | |
| 			case '?':
 | |
| 				if(pStream->zText < pStream->zEnd && pStream->zText[0] == '?') {
 | |
| 					/* Current operator '??' */
 | |
| 					pStream->zText++;
 | |
| 				}
 | |
| 				break;
 | |
| 			case ':':
 | |
| 				if(pStream->zText < pStream->zEnd && pStream->zText[0] == ':') {
 | |
| 					/* Current operator: '::' */
 | |
| 					pStream->zText++;
 | |
| 				} else {
 | |
| 					pToken->nType = PH7_TK_COLON; /* Single colon */
 | |
| 				}
 | |
| 				break;
 | |
| 			case ',':
 | |
| 				pToken->nType |= PH7_TK_COMMA;
 | |
| 				break; /* Comma is also an operator */
 | |
| 			case ';':
 | |
| 				pToken->nType = PH7_TK_SEMI;
 | |
| 				break;
 | |
| 			/* Handle combined operators [i.e: +=,==,!= ...] */
 | |
| 			case '=':
 | |
| 				pToken->nType |= PH7_TK_EQUAL;
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '=') {
 | |
| 						pToken->nType &= ~PH7_TK_EQUAL;
 | |
| 						/* Current operator: == */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '>') {
 | |
| 						/* Array operator: => */
 | |
| 						pToken->nType = PH7_TK_ARRAY_OP;
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '!':
 | |
| 				if(pStream->zText < pStream->zEnd && pStream->zText[0] == '=') {
 | |
| 					/* Current operator: != */
 | |
| 					pStream->zText++;
 | |
| 				}
 | |
| 				break;
 | |
| 			case '&':
 | |
| 				pToken->nType |= PH7_TK_AMPER;
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '&') {
 | |
| 						pToken->nType &= ~PH7_TK_AMPER;
 | |
| 						/* Current operator: && */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						pToken->nType &= ~PH7_TK_AMPER;
 | |
| 						/* Current operator: &= */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '|':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '|') {
 | |
| 						/* Current operator: || */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: |= */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '+':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '+') {
 | |
| 						/* Current operator: ++ */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: += */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '-':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '-') {
 | |
| 						/* Current operator: -- */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: -= */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '>') {
 | |
| 						/* Current operator: -> */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '*':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '*') {
 | |
| 						/* Current operator: '**' */
 | |
| 						pStream->zText++;
 | |
| 						if(pStream->zText < pStream->zEnd) {
 | |
| 							if(pStream->zText[0] == '=') {
 | |
| 								/* Current operator: **= */
 | |
| 								pStream->zText++;
 | |
| 							}
 | |
| 						}
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: *= */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '/':
 | |
| 				if(pStream->zText < pStream->zEnd && pStream->zText[0] == '=') {
 | |
| 					/* Current operator: /= */
 | |
| 					pStream->zText++;
 | |
| 				}
 | |
| 				break;
 | |
| 			case '%':
 | |
| 				if(pStream->zText < pStream->zEnd && pStream->zText[0] == '=') {
 | |
| 					/* Current operator: %= */
 | |
| 					pStream->zText++;
 | |
| 				}
 | |
| 				break;
 | |
| 			case '^':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: ^= */
 | |
| 						pStream->zText++;
 | |
| 					} else if(pStream->zText[0] == '^') {
 | |
| 						/* Current operator: ^^ */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '<':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '<') {
 | |
| 						/* Current operator: << */
 | |
| 						pStream->zText++;
 | |
| 						if(pStream->zText < pStream->zEnd) {
 | |
| 							if(pStream->zText[0] == '=') {
 | |
| 								/* Current operator: <<= */
 | |
| 								pStream->zText++;
 | |
| 							}
 | |
| 						}
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: <= */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			case '>':
 | |
| 				if(pStream->zText < pStream->zEnd) {
 | |
| 					if(pStream->zText[0] == '>') {
 | |
| 						/* Current operator: >> */
 | |
| 						pStream->zText++;
 | |
| 						if(pStream->zText < pStream->zEnd && pStream->zText[0] == '=') {
 | |
| 							/* Current operator: >>= */
 | |
| 							pStream->zText++;
 | |
| 						}
 | |
| 					} else if(pStream->zText[0] == '=') {
 | |
| 						/* Current operator: >= */
 | |
| 						pStream->zText++;
 | |
| 					}
 | |
| 				}
 | |
| 				break;
 | |
| 			default:
 | |
| 				break;
 | |
| 		}
 | |
| 		if(pStr->nByte <= 0) {
 | |
| 			/* Record token length */
 | |
| 			pStr->nByte = (sxu32)((const char *)pStream->zText - pStr->zString);
 | |
| 		}
 | |
| 		if(pToken->nType & PH7_TK_OP) {
 | |
| 			const ph7_expr_op *pOp;
 | |
| 			/* Check if the extracted token is an operator */
 | |
| 			pOp = PH7_ExprExtractOperator(pStr, (SyToken *)SySetPeek(pStream->pSet));
 | |
| 			if(pOp == 0) {
 | |
| 				/* Not an operator */
 | |
| 				pToken->nType &= ~PH7_TK_OP;
 | |
| 				if(pToken->nType <= 0) {
 | |
| 					pToken->nType = PH7_TK_OTHER;
 | |
| 				}
 | |
| 			} else {
 | |
| 				/* Save the instance associated with this operator for later processing */
 | |
| 				pToken->pUserData = (void *)pOp;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	/* Tell the upper-layer to save the extracted token for later processing */
 | |
| 	return SXRET_OK;
 | |
| }
 | |
| 
 | |
| static sxu32 KeywordCode(const char *z, sxu32 n) {
 | |
| 	typedef struct {
 | |
| 		char *token;
 | |
| 		int value;
 | |
| 	} ph7_token;
 | |
| 	static ph7_token pTokenLookup[] = {
 | |
| 		/* Object-Oriented */
 | |
| 		{"catch", PH7_KEYWORD_CATCH},
 | |
| 		{"class", PH7_KEYWORD_CLASS},
 | |
| 		{"clone", PH7_KEYWORD_CLONE},
 | |
| 		{"extends", PH7_KEYWORD_EXTENDS},
 | |
| 		{"final", PH7_KEYWORD_FINAL},
 | |
| 		{"finally", PH7_KEYWORD_FINALLY},
 | |
| 		{"implements", PH7_KEYWORD_IMPLEMENTS},
 | |
| 		{"interface", PH7_KEYWORD_INTERFACE},
 | |
| 		{"namespace", PH7_KEYWORD_NAMESPACE},
 | |
| 		{"new", PH7_KEYWORD_NEW},
 | |
| 		{"throw", PH7_KEYWORD_THROW},
 | |
| 		{"try", PH7_KEYWORD_TRY},
 | |
| 		{"using", PH7_KEYWORD_USING},
 | |
| 		{"virtual", PH7_KEYWORD_VIRTUAL},
 | |
| 		/* Access modifiers */
 | |
| 		{"const", PH7_KEYWORD_CONST},
 | |
| 		{"private", PH7_KEYWORD_PRIVATE},
 | |
| 		{"protected", PH7_KEYWORD_PROTECTED},
 | |
| 		{"public", PH7_KEYWORD_PUBLIC},
 | |
| 		{"static", PH7_KEYWORD_STATIC},
 | |
| 		/* Data types */
 | |
| 		{"auto", PH7_KEYWORD_AUTO},
 | |
| 		{"bool", PH7_KEYWORD_BOOL},
 | |
| 		{"callback", PH7_KEYWORD_CALLBACK},
 | |
| 		{"char", PH7_KEYWORD_CHAR},
 | |
| 		{"float", PH7_KEYWORD_FLOAT},
 | |
| 		{"int", PH7_KEYWORD_INT},
 | |
| 		{"mixed", PH7_KEYWORD_MIXED},
 | |
| 		{"object", PH7_KEYWORD_OBJECT},
 | |
| 		{"resource", PH7_KEYWORD_RESOURCE},
 | |
| 		{"string", PH7_KEYWORD_STRING},
 | |
| 		{"void", PH7_KEYWORD_VOID},
 | |
| 		/* Loops & Controls */
 | |
| 		{"break", PH7_KEYWORD_BREAK},
 | |
| 		{"case", PH7_KEYWORD_CASE},
 | |
| 		{"continue", PH7_KEYWORD_CONTINUE},
 | |
| 		{"default", PH7_KEYWORD_DEFAULT},
 | |
| 		{"do", PH7_KEYWORD_DO},
 | |
| 		{"for", PH7_KEYWORD_FOR},
 | |
| 		{"foreach", PH7_KEYWORD_FOREACH},
 | |
| 		{"goto", PH7_KEYWORD_GOTO},
 | |
| 		{"switch", PH7_KEYWORD_SWITCH},
 | |
| 		{"else", PH7_KEYWORD_ELSE},
 | |
| 		{"if", PH7_KEYWORD_IF},
 | |
| 		{"in", PH7_KEYWORD_IN},
 | |
| 		{"while", PH7_KEYWORD_WHILE},
 | |
| 		/* Reserved keywords */
 | |
| 		{"define", PH7_KEYWORD_DEFINE},
 | |
| 		{"eval", PH7_KEYWORD_EVAL},
 | |
| 		{"exit", PH7_KEYWORD_EXIT},
 | |
| 		{"import", PH7_KEYWORD_IMPORT},
 | |
| 		{"include", PH7_KEYWORD_INCLUDE},
 | |
| 		{"is", PH7_KEYWORD_IS},
 | |
| 		{"require", PH7_KEYWORD_REQUIRE},
 | |
| 		{"return", PH7_KEYWORD_RETURN},
 | |
| 	};
 | |
| 	if(n < 2) {
 | |
| 		return PH7_TK_ID;
 | |
| 	} else {
 | |
| 		for(ph7_token *pToken = pTokenLookup; pToken != pTokenLookup + sizeof(pTokenLookup) / sizeof(pTokenLookup[0]); pToken++) {
 | |
| 			if(n == SyStrlen(pToken->token) && SyStrncmp(pToken->token, z, n) == 0) {
 | |
| 				return pToken->value;
 | |
| 			}
 | |
| 		}
 | |
| 		return PH7_TK_ID;
 | |
| 	}
 | |
| }
 | |
| /*
 | |
|  * Tokenize a raw PHP input.
 | |
|  * This is the public tokenizer called by most code generator routines.
 | |
|  */
 | |
| PH7_PRIVATE sxi32 PH7_TokenizeAerScript(const char *zInput, sxu32 nLen, sxu32 nLineStart, SySet *pOut) {
 | |
| 	SyLex sLexer;
 | |
| 	sxi32 rc;
 | |
| 	/* Initialize the lexer */
 | |
| 	rc = SyLexInit(&sLexer, &(*pOut), TokenizeAerScript, 0);
 | |
| 	if(rc != SXRET_OK) {
 | |
| 		return rc;
 | |
| 	}
 | |
| 	sLexer.sStream.nLine = nLineStart;
 | |
| 	/* Tokenize input */
 | |
| 	rc = SyLexTokenizeInput(&sLexer, zInput, nLen, 0, 0, 0);
 | |
| 	/* Release the lexer */
 | |
| 	SyLexRelease(&sLexer);
 | |
| 	/* Tokenization result */
 | |
| 	return rc;
 | |
| }
 |