From ab23338fe9f1b3ed21455867f1c032d7b146ceb8 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 2 Mar 2015 16:51:04 +0100 Subject: Initial Commit --- Mustache/Tokenizer.php | 286 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 Mustache/Tokenizer.php (limited to 'Mustache/Tokenizer.php') diff --git a/Mustache/Tokenizer.php b/Mustache/Tokenizer.php new file mode 100644 index 0000000..fd866e3 --- /dev/null +++ b/Mustache/Tokenizer.php @@ -0,0 +1,286 @@ +'; + const T_PARTIAL_2 = '<'; + const T_DELIM_CHANGE = '='; + const T_ESCAPED = '_v'; + const T_UNESCAPED = '{'; + const T_UNESCAPED_2 = '&'; + const T_TEXT = '_t'; + + // Valid token types + private static $tagTypes = array( + self::T_SECTION => true, + self::T_INVERTED => true, + self::T_END_SECTION => true, + self::T_COMMENT => true, + self::T_PARTIAL => true, + self::T_PARTIAL_2 => true, + self::T_DELIM_CHANGE => true, + self::T_ESCAPED => true, + self::T_UNESCAPED => true, + self::T_UNESCAPED_2 => true, + ); + + // Interpolated tags + private static $interpolatedTags = array( + self::T_ESCAPED => true, + self::T_UNESCAPED => true, + self::T_UNESCAPED_2 => true, + ); + + // Token properties + const TYPE = 'type'; + const NAME = 'name'; + const OTAG = 'otag'; + const CTAG = 'ctag'; + const INDEX = 'index'; + const END = 'end'; + const INDENT = 'indent'; + const NODES = 'nodes'; + const VALUE = 'value'; + + private $state; + private $tagType; + private $tag; + private $buffer; + private $tokens; + private $seenTag; + private $lineStart; + private $otag; + private $ctag; + + /** + * Scan and tokenize template source. + * + * @param string $text Mustache template source to tokenize + * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null) + * + * @return array Set of Mustache tokens + */ + public function scan($text, $delimiters = null) + { + $this->reset(); + + if ($delimiters = trim($delimiters)) { + list($otag, $ctag) = explode(' ', $delimiters); + $this->otag = $otag; + $this->ctag = $ctag; + } + + $len = strlen($text); + for ($i = 0; $i < $len; $i++) { + switch ($this->state) { + case self::IN_TEXT: + if ($this->tagChange($this->otag, $text, $i)) { + $i--; + $this->flushBuffer(); + $this->state = self::IN_TAG_TYPE; + } else { + if ($text[$i] == "\n") { + $this->filterLine(); + } else { + $this->buffer .= $text[$i]; + } + } + break; + + case self::IN_TAG_TYPE: + + $i += strlen($this->otag) - 1; + if (isset(self::$tagTypes[$text[$i + 1]])) { + $tag = $text[$i + 1]; + $this->tagType = $tag; + } else { + $tag = null; + $this->tagType = self::T_ESCAPED; + } + + if ($this->tagType === self::T_DELIM_CHANGE) { + $i = $this->changeDelimiters($text, $i); + $this->state = self::IN_TEXT; + } else { + if ($tag !== null) { + $i++; + } + $this->state = self::IN_TAG; + } + $this->seenTag = $i; + break; + + default: + if ($this->tagChange($this->ctag, $text, $i)) { + $this->tokens[] = array( + self::TYPE => $this->tagType, + self::NAME => trim($this->buffer), + self::OTAG => $this->otag, + self::CTAG => $this->ctag, + self::INDEX => ($this->tagType == self::T_END_SECTION) ? $this->seenTag - strlen($this->otag) : $i + strlen($this->ctag) + ); + + $this->buffer = ''; + $i += strlen($this->ctag) - 1; + $this->state = self::IN_TEXT; + if ($this->tagType == self::T_UNESCAPED) { + if ($this->ctag == '}}') { + $i++; + } else { + // Clean up `{{{ tripleStache }}}` style tokens. + $lastName = $this->tokens[count($this->tokens) - 1][self::NAME]; + if (substr($lastName, -1) === '}') { + $this->tokens[count($this->tokens) - 1][self::NAME] = trim(substr($lastName, 0, -1)); + } + } + } + } else { + $this->buffer .= $text[$i]; + } + break; + } + } + + $this->filterLine(true); + + return $this->tokens; + } + + /** + * Helper function to reset tokenizer internal state. + */ + private function reset() + { + $this->state = self::IN_TEXT; + $this->tagType = null; + $this->tag = null; + $this->buffer = ''; + $this->tokens = array(); + $this->seenTag = false; + $this->lineStart = 0; + $this->otag = '{{'; + $this->ctag = '}}'; + } + + /** + * Flush the current buffer to a token. + */ + private function flushBuffer() + { + if (!empty($this->buffer)) { + $this->tokens[] = array(self::TYPE => self::T_TEXT, self::VALUE => $this->buffer); + $this->buffer = ''; + } + } + + /** + * Test whether the current line is entirely made up of whitespace. + * + * @return boolean True if the current line is all whitespace + */ + private function lineIsWhitespace() + { + $tokensCount = count($this->tokens); + for ($j = $this->lineStart; $j < $tokensCount; $j++) { + $token = $this->tokens[$j]; + if (isset(self::$tagTypes[$token[self::TYPE]])) { + if (isset(self::$interpolatedTags[$token[self::TYPE]])) { + return false; + } + } elseif ($token[self::TYPE] == self::T_TEXT) { + if (preg_match('/\S/', $token[self::VALUE])) { + return false; + } + } + } + + return true; + } + + /** + * Filter out whitespace-only lines and store indent levels for partials. + * + * @param bool $noNewLine Suppress the newline? (default: false) + */ + private function filterLine($noNewLine = false) + { + $this->flushBuffer(); + if ($this->seenTag && $this->lineIsWhitespace()) { + $tokensCount = count($this->tokens); + for ($j = $this->lineStart; $j < $tokensCount; $j++) { + if ($this->tokens[$j][self::TYPE] == self::T_TEXT) { + if (isset($this->tokens[$j+1]) && $this->tokens[$j+1][self::TYPE] == self::T_PARTIAL) { + $this->tokens[$j+1][self::INDENT] = $this->tokens[$j][self::VALUE]; + } + + $this->tokens[$j] = null; + } + } + } elseif (!$noNewLine) { + $this->tokens[] = array(self::TYPE => self::T_TEXT, self::VALUE => "\n"); + } + + $this->seenTag = false; + $this->lineStart = count($this->tokens); + } + + /** + * Change the current Mustache delimiters. Set new `otag` and `ctag` values. + * + * @param string $text Mustache template source + * @param int $index Current tokenizer index + * + * @return int New index value + */ + private function changeDelimiters($text, $index) + { + $startIndex = strpos($text, '=', $index) + 1; + $close = '='.$this->ctag; + $closeIndex = strpos($text, $close, $index); + + list($otag, $ctag) = explode(' ', trim(substr($text, $startIndex, $closeIndex - $startIndex))); + $this->otag = $otag; + $this->ctag = $ctag; + + return $closeIndex + strlen($close) - 1; + } + + /** + * Test whether it's time to change tags. + * + * @param string $tag Current tag name + * @param string $text Mustache template source + * @param int $index Current tokenizer index + * + * @return boolean True if this is a closing section tag + */ + private function tagChange($tag, $text, $index) + { + return substr($text, $index, strlen($tag)) === $tag; + } +} -- cgit v1.2.3-55-g7522