1 files changed, 408 insertions, 286 deletions
diff --git a/Mustache/Tokenizer.php b/Mustache/Tokenizer.php
index fd866e3..d96f129 100644
--- a/Mustache/Tokenizer.php
+++ b/Mustache/Tokenizer.php
@@ -1,286 +1,408 @@
-<?php
-
-/*
- * This file is part of Mustache.php.
- *
- * (c) 2012 Justin Hileman
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
-
-/**
- * Mustache Tokenizer class.
- *
- * This class is responsible for turning raw template source into a set of Mustache tokens.
- */
-class Mustache_Tokenizer
-{
-
-    // Finite state machine states
-    const IN_TEXT     = 0;
-    const IN_TAG_TYPE = 1;
-    const IN_TAG      = 2;
-
-    // Token types
-    const T_SECTION      = '#';
-    const T_INVERTED     = '^';
-    const T_END_SECTION  = '/';
-    const T_COMMENT      = '!';
-    const T_PARTIAL      = '>';
-    const T_PARTIAL_2    = '<';
-    const T_DELIM_CHANGE = '=';
-    const T_ESCAPED      = '_v';
-    const T_UNESCAPED    = '{';
-    const T_UNESCAPED_2  = '&';
-    const T_TEXT         = '_t';
-
-    // Valid token types
-    private static $tagTypes = array(
-        self::T_SECTION      => true,
-        self::T_INVERTED     => true,
-        self::T_END_SECTION  => true,
-        self::T_COMMENT      => true,
-        self::T_PARTIAL      => true,
-        self::T_PARTIAL_2    => true,
-        self::T_DELIM_CHANGE => true,
-        self::T_ESCAPED      => true,
-        self::T_UNESCAPED    => true,
-        self::T_UNESCAPED_2  => true,
-    );
-
-    // Interpolated tags
-    private static $interpolatedTags = array(
-        self::T_ESCAPED      => true,
-        self::T_UNESCAPED    => true,
-        self::T_UNESCAPED_2  => true,
-    );
-
-    // Token properties
-    const TYPE   = 'type';
-    const NAME   = 'name';
-    const OTAG   = 'otag';
-    const CTAG   = 'ctag';
-    const INDEX  = 'index';
-    const END    = 'end';
-    const INDENT = 'indent';
-    const NODES  = 'nodes';
-    const VALUE  = 'value';
-
-    private $state;
-    private $tagType;
-    private $tag;
-    private $buffer;
-    private $tokens;
-    private $seenTag;
-    private $lineStart;
-    private $otag;
-    private $ctag;
-
-    /**
-     * Scan and tokenize template source.
-     *
-     * @param string $text       Mustache template source to tokenize
-     * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null)
-     *
-     * @return array Set of Mustache tokens
-     */
-    public function scan($text, $delimiters = null)
-    {
-        $this->reset();
-
-        if ($delimiters = trim($delimiters)) {
-            list($otag, $ctag) = explode(' ', $delimiters);
-            $this->otag = $otag;
-            $this->ctag = $ctag;
-        }
-
-        $len = strlen($text);
-        for ($i = 0; $i < $len; $i++) {
-            switch ($this->state) {
-                case self::IN_TEXT:
-                    if ($this->tagChange($this->otag, $text, $i)) {
-                        $i--;
-                        $this->flushBuffer();
-                        $this->state = self::IN_TAG_TYPE;
-                    } else {
-                        if ($text[$i] == "\n") {
-                            $this->filterLine();
-                        } else {
-                            $this->buffer .= $text[$i];
-                        }
-                    }
-                    break;
-
-                case self::IN_TAG_TYPE:
-
-                    $i += strlen($this->otag) - 1;
-                    if (isset(self::$tagTypes[$text[$i + 1]])) {
-                        $tag = $text[$i + 1];
-                        $this->tagType = $tag;
-                    } else {
-                        $tag = null;
-                        $this->tagType = self::T_ESCAPED;
-                    }
-
-                    if ($this->tagType === self::T_DELIM_CHANGE) {
-                        $i = $this->changeDelimiters($text, $i);
-                        $this->state = self::IN_TEXT;
-                    } else {
-                        if ($tag !== null) {
-                            $i++;
-                        }
-                        $this->state = self::IN_TAG;
-                    }
-                    $this->seenTag = $i;
-                    break;
-
-                default:
-                    if ($this->tagChange($this->ctag, $text, $i)) {
-                        $this->tokens[] = array(
-                            self::TYPE  => $this->tagType,
-                            self::NAME  => trim($this->buffer),
-                            self::OTAG  => $this->otag,
-                            self::CTAG  => $this->ctag,
-                            self::INDEX => ($this->tagType == self::T_END_SECTION) ? $this->seenTag - strlen($this->otag) : $i + strlen($this->ctag)
-                        );
-
-                        $this->buffer = '';
-                        $i += strlen($this->ctag) - 1;
-                        $this->state = self::IN_TEXT;
-                        if ($this->tagType == self::T_UNESCAPED) {
-                            if ($this->ctag == '}}') {
-                                $i++;
-                            } else {
-                                // Clean up `{{{ tripleStache }}}` style tokens.
-                                $lastName = $this->tokens[count($this->tokens) - 1][self::NAME];
-                                if (substr($lastName, -1) === '}') {
-                                    $this->tokens[count($this->tokens) - 1][self::NAME] = trim(substr($lastName, 0, -1));
-                                }
-                            }
-                        }
-                    } else {
-                        $this->buffer .= $text[$i];
-                    }
-                    break;
-            }
-        }
-
-        $this->filterLine(true);
-
-        return $this->tokens;
-    }
-
-    /**
-     * Helper function to reset tokenizer internal state.
-     */
-    private function reset()
-    {
-        $this->state     = self::IN_TEXT;
-        $this->tagType   = null;
-        $this->tag       = null;
-        $this->buffer    = '';
-        $this->tokens    = array();
-        $this->seenTag   = false;
-        $this->lineStart = 0;
-        $this->otag      = '{{';
-        $this->ctag      = '}}';
-    }
-
-    /**
-     * Flush the current buffer to a token.
-     */
-    private function flushBuffer()
-    {
-        if (!empty($this->buffer)) {
-            $this->tokens[] = array(self::TYPE  => self::T_TEXT, self::VALUE => $this->buffer);
-            $this->buffer   = '';
-        }
-    }
-
-    /**
-     * Test whether the current line is entirely made up of whitespace.
-     *
-     * @return boolean True if the current line is all whitespace
-     */
-    private function lineIsWhitespace()
-    {
-        $tokensCount = count($this->tokens);
-        for ($j = $this->lineStart; $j < $tokensCount; $j++) {
-            $token = $this->tokens[$j];
-            if (isset(self::$tagTypes[$token[self::TYPE]])) {
-                if (isset(self::$interpolatedTags[$token[self::TYPE]])) {
-                    return false;
-                }
-            } elseif ($token[self::TYPE] == self::T_TEXT) {
-                if (preg_match('/\S/', $token[self::VALUE])) {
-                    return false;
-                }
-            }
-        }
-
-        return true;
-    }
-
-    /**
-     * Filter out whitespace-only lines and store indent levels for partials.
-     *
-     * @param bool $noNewLine Suppress the newline? (default: false)
-     */
-    private function filterLine($noNewLine = false)
-    {
-        $this->flushBuffer();
-        if ($this->seenTag && $this->lineIsWhitespace()) {
-            $tokensCount = count($this->tokens);
-            for ($j = $this->lineStart; $j < $tokensCount; $j++) {
-                if ($this->tokens[$j][self::TYPE] == self::T_TEXT) {
-                    if (isset($this->tokens[$j+1]) && $this->tokens[$j+1][self::TYPE] == self::T_PARTIAL) {
-                        $this->tokens[$j+1][self::INDENT] = $this->tokens[$j][self::VALUE];
-                    }
-
-                    $this->tokens[$j] = null;
-                }
-            }
-        } elseif (!$noNewLine) {
-            $this->tokens[] = array(self::TYPE => self::T_TEXT, self::VALUE => "\n");
-        }
-
-        $this->seenTag   = false;
-        $this->lineStart = count($this->tokens);
-    }
-
-    /**
-     * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
-     *
-     * @param string $text  Mustache template source
-     * @param int    $index Current tokenizer index
-     *
-     * @return int New index value
-     */
-    private function changeDelimiters($text, $index)
-    {
-        $startIndex = strpos($text, '=', $index) + 1;
-        $close      = '='.$this->ctag;
-        $closeIndex = strpos($text, $close, $index);
-
-        list($otag, $ctag) = explode(' ', trim(substr($text, $startIndex, $closeIndex - $startIndex)));
-        $this->otag = $otag;
-        $this->ctag = $ctag;
-
-        return $closeIndex + strlen($close) - 1;
-    }
-
-    /**
-     * Test whether it's time to change tags.
-     *
-     * @param string $tag   Current tag name
-     * @param string $text  Mustache template source
-     * @param int    $index Current tokenizer index
-     *
-     * @return boolean True if this is a closing section tag
-     */
-    private function tagChange($tag, $text, $index)
-    {
-        return substr($text, $index, strlen($tag)) === $tag;
-    }
-}
+<?php
+
+/*
+ * This file is part of Mustache.php.
+ *
+ * (c) 2010-2017 Justin Hileman
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+/**
+ * Mustache Tokenizer class.
+ *
+ * This class is responsible for turning raw template source into a set of Mustache tokens.
+ */
+class Mustache_Tokenizer
+{
+    // Finite state machine states
+    const IN_TEXT     = 0;
+    const IN_TAG_TYPE = 1;
+    const IN_TAG      = 2;
+
+    // Token types
+    const T_SECTION      = '#';
+    const T_INVERTED     = '^';
+    const T_END_SECTION  = '/';
+    const T_COMMENT      = '!';
+    const T_PARTIAL      = '>';
+    const T_PARENT       = '<';
+    const T_DELIM_CHANGE = '=';
+    const T_ESCAPED      = '_v';
+    const T_UNESCAPED    = '{';
+    const T_UNESCAPED_2  = '&';
+    const T_TEXT         = '_t';
+    const T_PRAGMA       = '%';
+    const T_BLOCK_VAR    = '$';
+    const T_BLOCK_ARG    = '$arg';
+
+    // Valid token types
+    private static $tagTypes = array(
+        self::T_SECTION      => true,
+        self::T_INVERTED     => true,
+        self::T_END_SECTION  => true,
+        self::T_COMMENT      => true,
+        self::T_PARTIAL      => true,
+        self::T_PARENT       => true,
+        self::T_DELIM_CHANGE => true,
+        self::T_ESCAPED      => true,
+        self::T_UNESCAPED    => true,
+        self::T_UNESCAPED_2  => true,
+        self::T_PRAGMA       => true,
+        self::T_BLOCK_VAR    => true,
+    );
+
+    private static $tagNames = array(
+        self::T_SECTION      => 'section',
+        self::T_INVERTED     => 'inverted section',
+        self::T_END_SECTION  => 'section end',
+        self::T_COMMENT      => 'comment',
+        self::T_PARTIAL      => 'partial',
+        self::T_PARENT       => 'parent',
+        self::T_DELIM_CHANGE => 'set delimiter',
+        self::T_ESCAPED      => 'variable',
+        self::T_UNESCAPED    => 'unescaped variable',
+        self::T_UNESCAPED_2  => 'unescaped variable',
+        self::T_PRAGMA       => 'pragma',
+        self::T_BLOCK_VAR    => 'block variable',
+        self::T_BLOCK_ARG    => 'block variable',
+    );
+
+    // Token properties
+    const TYPE    = 'type';
+    const NAME    = 'name';
+    const DYNAMIC = 'dynamic';
+    const OTAG    = 'otag';
+    const CTAG    = 'ctag';
+    const LINE    = 'line';
+    const INDEX   = 'index';
+    const END     = 'end';
+    const INDENT  = 'indent';
+    const NODES   = 'nodes';
+    const VALUE   = 'value';
+    const FILTERS = 'filters';
+
+    private $state;
+    private $tagType;
+    private $buffer;
+    private $tokens;
+    private $seenTag;
+    private $line;
+
+    private $otag;
+    private $otagChar;
+    private $otagLen;
+
+    private $ctag;
+    private $ctagChar;
+    private $ctagLen;
+
+    /**
+     * Scan and tokenize template source.
+     *
+     * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered
+     * @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid
+     *
+     * @param string $text       Mustache template source to tokenize
+     * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: empty string)
+     *
+     * @return array Set of Mustache tokens
+     */
+    public function scan($text, $delimiters = '')
+    {
+        // Setting mbstring.func_overload makes things *really* slow.
+        // Let's do everyone a favor and scan this string as ASCII instead.
+        //
+        // The INI directive was removed in PHP 8.0 so we don't need to check there (and can drop it
+        // when we remove support for older versions of PHP).
+        //
+        // @codeCoverageIgnoreStart
+        $encoding = null;
+        if (version_compare(PHP_VERSION, '8.0.0', '<')) {
+            if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
+                $encoding = mb_internal_encoding();
+                mb_internal_encoding('ASCII');
+            }
+        }
+        // @codeCoverageIgnoreEnd
+
+        $this->reset();
+
+        if (is_string($delimiters) && $delimiters = trim($delimiters)) {
+            $this->setDelimiters($delimiters);
+        }
+
+        $len = strlen($text);
+        for ($i = 0; $i < $len; $i++) {
+            switch ($this->state) {
+                case self::IN_TEXT:
+                    $char = $text[$i];
+                    // Test whether it's time to change tags.
+                    if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) {
+                        $i--;
+                        $this->flushBuffer();
+                        $this->state = self::IN_TAG_TYPE;
+                    } else {
+                        $this->buffer .= $char;
+                        if ($char === "\n") {
+                            $this->flushBuffer();
+                            $this->line++;
+                        }
+                    }
+                    break;
+
+                case self::IN_TAG_TYPE:
+                    $i += $this->otagLen - 1;
+                    $char = $text[$i + 1];
+                    if (isset(self::$tagTypes[$char])) {
+                        $tag = $char;
+                        $this->tagType = $tag;
+                    } else {
+                        $tag = null;
+                        $this->tagType = self::T_ESCAPED;
+                    }
+
+                    if ($this->tagType === self::T_DELIM_CHANGE) {
+                        $i = $this->changeDelimiters($text, $i);
+                        $this->state = self::IN_TEXT;
+                    } elseif ($this->tagType === self::T_PRAGMA) {
+                        $i = $this->addPragma($text, $i);
+                        $this->state = self::IN_TEXT;
+                    } else {
+                        if ($tag !== null) {
+                            $i++;
+                        }
+                        $this->state = self::IN_TAG;
+                    }
+                    $this->seenTag = $i;
+                    break;
+
+                default:
+                    $char = $text[$i];
+                    // Test whether it's time to change tags.
+                    if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) {
+                        $token = array(
+                            self::TYPE  => $this->tagType,
+                            self::NAME  => trim($this->buffer),
+                            self::OTAG  => $this->otag,
+                            self::CTAG  => $this->ctag,
+                            self::LINE  => $this->line,
+                            self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
+                        );
+
+                        if ($this->tagType === self::T_UNESCAPED) {
+                            // Clean up `{{{ tripleStache }}}` style tokens.
+                            if ($this->ctag === '}}') {
+                                if (($i + 2 < $len) && $text[$i + 2] === '}') {
+                                    $i++;
+                                } else {
+                                    $msg = sprintf(
+                                        'Mismatched tag delimiters: %s on line %d',
+                                        $token[self::NAME],
+                                        $token[self::LINE]
+                                    );
+
+                                    throw new Mustache_Exception_SyntaxException($msg, $token);
+                                }
+                            } else {
+                                $lastName = $token[self::NAME];
+                                if (substr($lastName, -1) === '}') {
+                                    $token[self::NAME] = trim(substr($lastName, 0, -1));
+                                } else {
+                                    $msg = sprintf(
+                                        'Mismatched tag delimiters: %s on line %d',
+                                        $token[self::NAME],
+                                        $token[self::LINE]
+                                    );
+
+                                    throw new Mustache_Exception_SyntaxException($msg, $token);
+                                }
+                            }
+                        }
+
+                        $this->buffer = '';
+                        $i += $this->ctagLen - 1;
+                        $this->state = self::IN_TEXT;
+                        $this->tokens[] = $token;
+                    } else {
+                        $this->buffer .= $char;
+                    }
+                    break;
+            }
+        }
+
+        if ($this->state !== self::IN_TEXT) {
+            $this->throwUnclosedTagException();
+        }
+
+        $this->flushBuffer();
+
+        // Restore the user's encoding...
+        // @codeCoverageIgnoreStart
+        if ($encoding) {
+            mb_internal_encoding($encoding);
+        }
+        // @codeCoverageIgnoreEnd
+
+        return $this->tokens;
+    }
+
+    /**
+     * Helper function to reset tokenizer internal state.
+     */
+    private function reset()
+    {
+        $this->state    = self::IN_TEXT;
+        $this->tagType  = null;
+        $this->buffer   = '';
+        $this->tokens   = array();
+        $this->seenTag  = false;
+        $this->line     = 0;
+
+        $this->otag     = '{{';
+        $this->otagChar = '{';
+        $this->otagLen  = 2;
+
+        $this->ctag     = '}}';
+        $this->ctagChar = '}';
+        $this->ctagLen  = 2;
+    }
+
+    /**
+     * Flush the current buffer to a token.
+     */
+    private function flushBuffer()
+    {
+        if (strlen($this->buffer) > 0) {
+            $this->tokens[] = array(
+                self::TYPE  => self::T_TEXT,
+                self::LINE  => $this->line,
+                self::VALUE => $this->buffer,
+            );
+            $this->buffer   = '';
+        }
+    }
+
+    /**
+     * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
+     *
+     * @throws Mustache_Exception_SyntaxException when delimiter string is invalid
+     *
+     * @param string $text  Mustache template source
+     * @param int    $index Current tokenizer index
+     *
+     * @return int New index value
+     */
+    private function changeDelimiters($text, $index)
+    {
+        $startIndex = strpos($text, '=', $index) + 1;
+        $close      = '=' . $this->ctag;
+        $closeIndex = strpos($text, $close, $index);
+
+        if ($closeIndex === false) {
+            $this->throwUnclosedTagException();
+        }
+
+        $token = array(
+            self::TYPE => self::T_DELIM_CHANGE,
+            self::LINE => $this->line,
+        );
+
+        try {
+            $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
+        } catch (Mustache_Exception_InvalidArgumentException $e) {
+            throw new Mustache_Exception_SyntaxException($e->getMessage(), $token);
+        }
+
+        $this->tokens[] = $token;
+
+        return $closeIndex + strlen($close) - 1;
+    }
+
+    /**
+     * Set the current Mustache `otag` and `ctag` delimiters.
+     *
+     * @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid
+     *
+     * @param string $delimiters
+     */
+    private function setDelimiters($delimiters)
+    {
+        if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) {
+            throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters));
+        }
+
+        list($_, $otag, $ctag) = $matches;
+
+        $this->otag     = $otag;
+        $this->otagChar = $otag[0];
+        $this->otagLen  = strlen($otag);
+
+        $this->ctag     = $ctag;
+        $this->ctagChar = $ctag[0];
+        $this->ctagLen  = strlen($ctag);
+    }
+
+    /**
+     * Add pragma token.
+     *
+     * Pragmas are hoisted to the front of the template, so all pragma tokens
+     * will appear at the front of the token list.
+     *
+     * @param string $text
+     * @param int    $index
+     *
+     * @return int New index value
+     */
+    private function addPragma($text, $index)
+    {
+        $end    = strpos($text, $this->ctag, $index);
+        if ($end === false) {
+            $this->throwUnclosedTagException();
+        }
+
+        $pragma = trim(substr($text, $index + 2, $end - $index - 2));
+
+        // Pragmas are hoisted to the front of the template.
+        array_unshift($this->tokens, array(
+            self::TYPE => self::T_PRAGMA,
+            self::NAME => $pragma,
+            self::LINE => 0,
+        ));
+
+        return $end + $this->ctagLen - 1;
+    }
+
+
+    private function throwUnclosedTagException()
+    {
+        $name = trim($this->buffer);
+        if ($name !== '') {
+            $msg = sprintf('Unclosed tag: %s on line %d', $name, $this->line);
+        } else {
+            $msg = sprintf('Unclosed tag on line %d', $this->line);
+        }
+
+        throw new Mustache_Exception_SyntaxException($msg, array(
+            self::TYPE  => $this->tagType,
+            self::NAME  => $name,
+            self::OTAG  => $this->otag,
+            self::CTAG  => $this->ctag,
+            self::LINE  => $this->line,
+            self::INDEX => $this->seenTag - $this->otagLen,
+        ));
+    }
+
+    /**
+     * Get the human readable name for a tag type.
+     *
+     * @param string $tagType One of the tokenizer T_* constants
+     *
+     * @return string
+     */
+    static function getTagName($tagType)
+    {
+        return isset(self::$tagNames[$tagType]) ? self::$tagNames[$tagType] : 'unknown';
+    }
+}