Initial commit

author: Simon Rettberg 2013-10-15 19:24:01 +0200
committer: Simon Rettberg 2013-10-15 19:24:01 +0200
commit: 43e406068af8f2ae3d77301926bb5d31f392c961 (patch)
tree: a71f2fda66e789a6a1d2a9437bc1e37027e4ee93 /Mustache/Tokenizer.php
download: slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.tar.gz
slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.tar.xz
slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.zip
1 files changed, 286 insertions, 0 deletions
diff --git a/Mustache/Tokenizer.php b/Mustache/Tokenizer.php
new file mode 100644
index 00000000..fd866e30
--- /dev/null
+++ b/Mustache/Tokenizer.php
@@ -0,0 +1,286 @@
+<?php
+
+/*
+ * This file is part of Mustache.php.
+ *
+ * (c) 2012 Justin Hileman
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+/**
+ * Mustache Tokenizer class.
+ *
+ * This class is responsible for turning raw template source into a set of Mustache tokens.
+ */
+class Mustache_Tokenizer
+{
+
+    // Finite state machine states
+    const IN_TEXT     = 0;
+    const IN_TAG_TYPE = 1;
+    const IN_TAG      = 2;
+
+    // Token types
+    const T_SECTION      = '#';
+    const T_INVERTED     = '^';
+    const T_END_SECTION  = '/';
+    const T_COMMENT      = '!';
+    const T_PARTIAL      = '>';
+    const T_PARTIAL_2    = '<';
+    const T_DELIM_CHANGE = '=';
+    const T_ESCAPED      = '_v';
+    const T_UNESCAPED    = '{';
+    const T_UNESCAPED_2  = '&';
+    const T_TEXT         = '_t';
+
+    // Valid token types
+    private static $tagTypes = array(
+        self::T_SECTION      => true,
+        self::T_INVERTED     => true,
+        self::T_END_SECTION  => true,
+        self::T_COMMENT      => true,
+        self::T_PARTIAL      => true,
+        self::T_PARTIAL_2    => true,
+        self::T_DELIM_CHANGE => true,
+        self::T_ESCAPED      => true,
+        self::T_UNESCAPED    => true,
+        self::T_UNESCAPED_2  => true,
+    );
+
+    // Interpolated tags
+    private static $interpolatedTags = array(
+        self::T_ESCAPED      => true,
+        self::T_UNESCAPED    => true,
+        self::T_UNESCAPED_2  => true,
+    );
+
+    // Token properties
+    const TYPE   = 'type';
+    const NAME   = 'name';
+    const OTAG   = 'otag';
+    const CTAG   = 'ctag';
+    const INDEX  = 'index';
+    const END    = 'end';
+    const INDENT = 'indent';
+    const NODES  = 'nodes';
+    const VALUE  = 'value';
+
+    private $state;
+    private $tagType;
+    private $tag;
+    private $buffer;
+    private $tokens;
+    private $seenTag;
+    private $lineStart;
+    private $otag;
+    private $ctag;
+
+    /**
+     * Scan and tokenize template source.
+     *
+     * @param string $text       Mustache template source to tokenize
+     * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null)
+     *
+     * @return array Set of Mustache tokens
+     */
+    public function scan($text, $delimiters = null)
+    {
+        $this->reset();
+
+        if ($delimiters = trim($delimiters)) {
+            list($otag, $ctag) = explode(' ', $delimiters);
+            $this->otag = $otag;
+            $this->ctag = $ctag;
+        }
+
+        $len = strlen($text);
+        for ($i = 0; $i < $len; $i++) {
+            switch ($this->state) {
+                case self::IN_TEXT:
+                    if ($this->tagChange($this->otag, $text, $i)) {
+                        $i--;
+                        $this->flushBuffer();
+                        $this->state = self::IN_TAG_TYPE;
+                    } else {
+                        if ($text[$i] == "\n") {
+                            $this->filterLine();
+                        } else {
+                            $this->buffer .= $text[$i];
+                        }
+                    }
+                    break;
+
+                case self::IN_TAG_TYPE:
+
+                    $i += strlen($this->otag) - 1;
+                    if (isset(self::$tagTypes[$text[$i + 1]])) {
+                        $tag = $text[$i + 1];
+                        $this->tagType = $tag;
+                    } else {
+                        $tag = null;
+                        $this->tagType = self::T_ESCAPED;
+                    }
+
+                    if ($this->tagType === self::T_DELIM_CHANGE) {
+                        $i = $this->changeDelimiters($text, $i);
+                        $this->state = self::IN_TEXT;
+                    } else {
+                        if ($tag !== null) {
+                            $i++;
+                        }
+                        $this->state = self::IN_TAG;
+                    }
+                    $this->seenTag = $i;
+                    break;
+
+                default:
+                    if ($this->tagChange($this->ctag, $text, $i)) {
+                        $this->tokens[] = array(
+                            self::TYPE  => $this->tagType,
+                            self::NAME  => trim($this->buffer),
+                            self::OTAG  => $this->otag,
+                            self::CTAG  => $this->ctag,
+                            self::INDEX => ($this->tagType == self::T_END_SECTION) ? $this->seenTag - strlen($this->otag) : $i + strlen($this->ctag)
+                        );
+
+                        $this->buffer = '';
+                        $i += strlen($this->ctag) - 1;
+                        $this->state = self::IN_TEXT;
+                        if ($this->tagType == self::T_UNESCAPED) {
+                            if ($this->ctag == '}}') {
+                                $i++;
+                            } else {
+                                // Clean up `{{{ tripleStache }}}` style tokens.
+                                $lastName = $this->tokens[count($this->tokens) - 1][self::NAME];
+                                if (substr($lastName, -1) === '}') {
+                                    $this->tokens[count($this->tokens) - 1][self::NAME] = trim(substr($lastName, 0, -1));
+                                }
+                            }
+                        }
+                    } else {
+                        $this->buffer .= $text[$i];
+                    }
+                    break;
+            }
+        }
+
+        $this->filterLine(true);
+
+        return $this->tokens;
+    }
+
+    /**
+     * Helper function to reset tokenizer internal state.
+     */
+    private function reset()
+    {
+        $this->state     = self::IN_TEXT;
+        $this->tagType   = null;
+        $this->tag       = null;
+        $this->buffer    = '';
+        $this->tokens    = array();
+        $this->seenTag   = false;
+        $this->lineStart = 0;
+        $this->otag      = '{{';
+        $this->ctag      = '}}';
+    }
+
+    /**
+     * Flush the current buffer to a token.
+     */
+    private function flushBuffer()
+    {
+        if (!empty($this->buffer)) {
+            $this->tokens[] = array(self::TYPE  => self::T_TEXT, self::VALUE => $this->buffer);
+            $this->buffer   = '';
+        }
+    }
+
+    /**
+     * Test whether the current line is entirely made up of whitespace.
+     *
+     * @return boolean True if the current line is all whitespace
+     */
+    private function lineIsWhitespace()
+    {
+        $tokensCount = count($this->tokens);
+        for ($j = $this->lineStart; $j < $tokensCount; $j++) {
+            $token = $this->tokens[$j];
+            if (isset(self::$tagTypes[$token[self::TYPE]])) {
+                if (isset(self::$interpolatedTags[$token[self::TYPE]])) {
+                    return false;
+                }
+            } elseif ($token[self::TYPE] == self::T_TEXT) {
+                if (preg_match('/\S/', $token[self::VALUE])) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Filter out whitespace-only lines and store indent levels for partials.
+     *
+     * @param bool $noNewLine Suppress the newline? (default: false)
+     */
+    private function filterLine($noNewLine = false)
+    {
+        $this->flushBuffer();
+        if ($this->seenTag && $this->lineIsWhitespace()) {
+            $tokensCount = count($this->tokens);
+            for ($j = $this->lineStart; $j < $tokensCount; $j++) {
+                if ($this->tokens[$j][self::TYPE] == self::T_TEXT) {
+                    if (isset($this->tokens[$j+1]) && $this->tokens[$j+1][self::TYPE] == self::T_PARTIAL) {
+                        $this->tokens[$j+1][self::INDENT] = $this->tokens[$j][self::VALUE];
+                    }
+
+                    $this->tokens[$j] = null;
+                }
+            }
+        } elseif (!$noNewLine) {
+            $this->tokens[] = array(self::TYPE => self::T_TEXT, self::VALUE => "\n");
+        }
+
+        $this->seenTag   = false;
+        $this->lineStart = count($this->tokens);
+    }
+
+    /**
+     * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
+     *
+     * @param string $text  Mustache template source
+     * @param int    $index Current tokenizer index
+     *
+     * @return int New index value
+     */
+    private function changeDelimiters($text, $index)
+    {
+        $startIndex = strpos($text, '=', $index) + 1;
+        $close      = '='.$this->ctag;
+        $closeIndex = strpos($text, $close, $index);
+
+        list($otag, $ctag) = explode(' ', trim(substr($text, $startIndex, $closeIndex - $startIndex)));
+        $this->otag = $otag;
+        $this->ctag = $ctag;
+
+        return $closeIndex + strlen($close) - 1;
+    }
+
+    /**
+     * Test whether it's time to change tags.
+     *
+     * @param string $tag   Current tag name
+     * @param string $text  Mustache template source
+     * @param int    $index Current tokenizer index
+     *
+     * @return boolean True if this is a closing section tag
+     */
+    private function tagChange($tag, $text, $index)
+    {
+        return substr($text, $index, strlen($tag)) === $tag;
+    }
+}
author	Simon Rettberg	2013-10-15 19:24:01 +0200
committer	Simon Rettberg	2013-10-15 19:24:01 +0200
commit	43e406068af8f2ae3d77301926bb5d31f392c961 (patch)
tree	a71f2fda66e789a6a1d2a9437bc1e37027e4ee93 /Mustache/Tokenizer.php
download	slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.tar.gz slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.tar.xz slx-admin-43e406068af8f2ae3d77301926bb5d31f392c961.zip