644 lines
21 KiB
PHP
644 lines
21 KiB
PHP
<?php
|
|
|
|
/* Copyright (c)
|
|
* - 2006-2013, Ivan Sagalaev (maniac@softwaremaniacs.org), highlight.js
|
|
* (original author)
|
|
* - 2013-2019, Geert Bergman (geert@scrivo.nl), highlight.php
|
|
* - 2014 Daniel Lynge, highlight.php (contributor)
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
namespace Highlight;
|
|
|
|
class Highlighter
|
|
{
|
|
const SPAN_END_TAG = "</span>";
|
|
|
|
private $options;
|
|
|
|
private $modeBuffer = "";
|
|
private $result = "";
|
|
private $top = null;
|
|
private $language = null;
|
|
private $keywordCount = 0;
|
|
private $relevance = 0;
|
|
private $ignoreIllegals = false;
|
|
|
|
private static $classMap = array();
|
|
private static $languages = null;
|
|
private static $aliases = null;
|
|
|
|
private $autodetectSet = array(
|
|
"xml", "json", "javascript", "css", "php", "http",
|
|
);
|
|
|
|
public function __construct()
|
|
{
|
|
$this->options = array(
|
|
'classPrefix' => 'hljs-',
|
|
'tabReplace' => null,
|
|
'useBR' => false,
|
|
'languages' => null,
|
|
);
|
|
|
|
self::registerLanguages();
|
|
}
|
|
|
|
private static function registerLanguages()
|
|
{
|
|
// Languages that take precedence in the classMap array.
|
|
$languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR;
|
|
foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) {
|
|
$filePath = $languagePath . $languageId . ".json";
|
|
if (is_readable($filePath)) {
|
|
self::registerLanguage($languageId, $filePath);
|
|
}
|
|
}
|
|
|
|
$d = @dir($languagePath);
|
|
if ($d) {
|
|
while (($entry = $d->read()) !== false) {
|
|
if (substr($entry, -5) === ".json") {
|
|
$languageId = substr($entry, 0, -5);
|
|
$filePath = $languagePath . $entry;
|
|
if (is_readable($filePath)) {
|
|
self::registerLanguage($languageId, $filePath);
|
|
}
|
|
}
|
|
}
|
|
$d->close();
|
|
}
|
|
|
|
self::$languages = array_keys(self::$classMap);
|
|
}
|
|
|
|
/**
|
|
* Register a language definition with the Highlighter's internal language
|
|
* storage. Languages are stored in a static variable, so they'll be available
|
|
* across all instances. You only need to register a language once.
|
|
*
|
|
* @param string $languageId The unique name of a language
|
|
* @param string $filePath The file path to the language definition
|
|
* @param bool $overwrite Overwrite language if it already exists
|
|
*
|
|
* @return Language The object containing the definition for a language's markup
|
|
*/
|
|
public static function registerLanguage($languageId, $filePath, $overwrite = false)
|
|
{
|
|
if (!isset(self::$classMap[$languageId]) || $overwrite) {
|
|
$lang = new Language($languageId, $filePath);
|
|
self::$classMap[$languageId] = $lang;
|
|
|
|
if (isset($lang->mode->aliases)) {
|
|
foreach ($lang->mode->aliases as $alias) {
|
|
self::$aliases[$alias] = $languageId;
|
|
}
|
|
}
|
|
}
|
|
|
|
return self::$classMap[$languageId];
|
|
}
|
|
|
|
private function testRe($re, $lexeme)
|
|
{
|
|
if (!$re) {
|
|
return false;
|
|
}
|
|
$test = preg_match($re, $lexeme, $match, PREG_OFFSET_CAPTURE);
|
|
if ($test === false) {
|
|
throw new \Exception("Invalid regexp: " . var_export($re, true));
|
|
}
|
|
|
|
return count($match) && ($match[0][1] == 0);
|
|
}
|
|
|
|
private function escapeRe($value)
|
|
{
|
|
return sprintf('/%s/m', preg_quote($value));
|
|
}
|
|
|
|
private function subMode($lexeme, $mode)
|
|
{
|
|
for ($i = 0; $i < count($mode->contains); ++$i) {
|
|
if ($this->testRe($mode->contains[$i]->beginRe, $lexeme)) {
|
|
if ($mode->contains[$i]->endSameAsBegin) {
|
|
$matches = array();
|
|
preg_match($mode->contains[$i]->beginRe, $lexeme, $matches);
|
|
|
|
$mode->contains[$i]->endRe = $this->escapeRe($matches[0]);
|
|
}
|
|
|
|
return $mode->contains[$i];
|
|
}
|
|
}
|
|
}
|
|
|
|
private function endOfMode($mode, $lexeme)
|
|
{
|
|
if ($this->testRe($mode->endRe, $lexeme)) {
|
|
while ($mode->endsParent && $mode->parent) {
|
|
$mode = $mode->parent;
|
|
}
|
|
|
|
return $mode;
|
|
}
|
|
if ($mode->endsWithParent) {
|
|
return $this->endOfMode($mode->parent, $lexeme);
|
|
}
|
|
}
|
|
|
|
private function isIllegal($lexeme, $mode)
|
|
{
|
|
return !$this->ignoreIllegals && $this->testRe($mode->illegalRe, $lexeme);
|
|
}
|
|
|
|
private function keywordMatch($mode, $match)
|
|
{
|
|
$kwd = $this->language->caseInsensitive ? mb_strtolower($match[0], "UTF-8") : $match[0];
|
|
|
|
return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null;
|
|
}
|
|
|
|
private function buildSpan($classname, $insideSpan, $leaveOpen = false, $noPrefix = false)
|
|
{
|
|
$classPrefix = $noPrefix ? "" : $this->options['classPrefix'];
|
|
$openSpan = "<span class=\"" . $classPrefix;
|
|
$closeSpan = $leaveOpen ? "" : self::SPAN_END_TAG;
|
|
|
|
$openSpan .= $classname . "\">";
|
|
|
|
if (!$classname) {
|
|
return $insideSpan;
|
|
}
|
|
|
|
return $openSpan . $insideSpan . $closeSpan;
|
|
}
|
|
|
|
private function escape($value)
|
|
{
|
|
return htmlspecialchars($value, ENT_NOQUOTES);
|
|
}
|
|
|
|
private function processKeywords()
|
|
{
|
|
if (empty($this->top->keywords)) {
|
|
return $this->escape($this->modeBuffer);
|
|
}
|
|
|
|
$result = "";
|
|
$lastIndex = 0;
|
|
|
|
/* TODO: when using the crystal language file on django and twigs code
|
|
* the values of $this->top->lexemesRe can become "" (empty). Check
|
|
* if this behaviour is consistent with highlight.js.
|
|
*/
|
|
if ($this->top->lexemesRe) {
|
|
while (preg_match($this->top->lexemesRe, $this->modeBuffer, $match, PREG_OFFSET_CAPTURE, $lastIndex)) {
|
|
$result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match[0][1] - $lastIndex));
|
|
$keyword_match = $this->keywordMatch($this->top, $match[0]);
|
|
|
|
if ($keyword_match) {
|
|
$this->relevance += $keyword_match[1];
|
|
$result .= $this->buildSpan($keyword_match[0], $this->escape($match[0][0]));
|
|
} else {
|
|
$result .= $this->escape($match[0][0]);
|
|
}
|
|
|
|
$lastIndex = strlen($match[0][0]) + $match[0][1];
|
|
}
|
|
}
|
|
|
|
return $result . $this->escape(substr($this->modeBuffer, $lastIndex));
|
|
}
|
|
|
|
private function processSubLanguage()
|
|
{
|
|
try {
|
|
$hl = new Highlighter();
|
|
$hl->setAutodetectLanguages($this->autodetectSet);
|
|
|
|
$explicit = is_string($this->top->subLanguage);
|
|
if ($explicit && !in_array($this->top->subLanguage, self::$languages)) {
|
|
return $this->escape($this->modeBuffer);
|
|
}
|
|
|
|
if ($explicit) {
|
|
$res = $hl->highlight(
|
|
$this->top->subLanguage,
|
|
$this->modeBuffer,
|
|
true,
|
|
isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null
|
|
);
|
|
} else {
|
|
$res = $hl->highlightAuto(
|
|
$this->modeBuffer,
|
|
count($this->top->subLanguage) ? $this->top->subLanguage : null
|
|
);
|
|
}
|
|
// Counting embedded language score towards the host language may
|
|
// be disabled with zeroing the containing mode relevance. Usecase
|
|
// in point is Markdown that allows XML everywhere and makes every
|
|
// XML snippet to have a much larger Markdown score.
|
|
if ($this->top->relevance > 0) {
|
|
$this->relevance += $res->relevance;
|
|
}
|
|
if ($explicit) {
|
|
$this->continuations[$this->top->subLanguage] = $res->top;
|
|
}
|
|
|
|
return $this->buildSpan($res->language, $res->value, false, true);
|
|
} catch (\Exception $e) {
|
|
error_log("TODO, is this a relevant catch?");
|
|
error_log($e);
|
|
|
|
return $this->escape($this->modeBuffer);
|
|
}
|
|
}
|
|
|
|
private function processBuffer()
|
|
{
|
|
if (is_object($this->top) && $this->top->subLanguage) {
|
|
$this->result .= $this->processSubLanguage();
|
|
} else {
|
|
$this->result .= $this->processKeywords();
|
|
}
|
|
|
|
$this->modeBuffer = '';
|
|
}
|
|
|
|
private function startNewMode($mode)
|
|
{
|
|
$this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : "";
|
|
|
|
$t = clone $mode;
|
|
$t->parent = $this->top;
|
|
$this->top = $t;
|
|
}
|
|
|
|
private function processLexeme($buffer, $lexeme = null)
|
|
{
|
|
$this->modeBuffer .= $buffer;
|
|
|
|
if ($lexeme === null) {
|
|
$this->processBuffer();
|
|
|
|
return 0;
|
|
}
|
|
|
|
$new_mode = $this->subMode($lexeme, $this->top);
|
|
if ($new_mode) {
|
|
if ($new_mode->skip) {
|
|
$this->modeBuffer .= $lexeme;
|
|
} else {
|
|
if ($new_mode->excludeBegin) {
|
|
$this->modeBuffer .= $lexeme;
|
|
}
|
|
$this->processBuffer();
|
|
if (!$new_mode->returnBegin && !$new_mode->excludeBegin) {
|
|
$this->modeBuffer = $lexeme;
|
|
}
|
|
}
|
|
$this->startNewMode($new_mode, $lexeme);
|
|
|
|
return $new_mode->returnBegin ? 0 : strlen($lexeme);
|
|
}
|
|
|
|
$end_mode = $this->endOfMode($this->top, $lexeme);
|
|
if ($end_mode) {
|
|
$origin = $this->top;
|
|
if ($origin->skip) {
|
|
$this->modeBuffer .= $lexeme;
|
|
} else {
|
|
if (!($origin->returnEnd || $origin->excludeEnd)) {
|
|
$this->modeBuffer .= $lexeme;
|
|
}
|
|
$this->processBuffer();
|
|
if ($origin->excludeEnd) {
|
|
$this->modeBuffer = $lexeme;
|
|
}
|
|
}
|
|
do {
|
|
if ($this->top->className) {
|
|
$this->result .= self::SPAN_END_TAG;
|
|
}
|
|
if (!$this->top->skip && !$this->top->subLanguage) {
|
|
$this->relevance += $this->top->relevance;
|
|
}
|
|
$this->top = $this->top->parent;
|
|
} while ($this->top != $end_mode->parent);
|
|
if ($end_mode->starts) {
|
|
if ($end_mode->endSameAsBegin) {
|
|
$end_mode->starts->endRe = $end_mode->endRe;
|
|
}
|
|
$this->startNewMode($end_mode->starts, "");
|
|
}
|
|
|
|
return $origin->returnEnd ? 0 : strlen($lexeme);
|
|
}
|
|
|
|
if ($this->isIllegal($lexeme, $this->top)) {
|
|
$className = $this->top->className ? $this->top->className : "unnamed";
|
|
$err = "Illegal lexeme \"{$lexeme}\" for mode \"{$className}\"";
|
|
|
|
throw new \Exception($err);
|
|
}
|
|
|
|
// Parser should not reach this point as all types of lexemes should
|
|
// be caught earlier, but if it does due to some bug make sure it
|
|
// advances at least one character forward to prevent infinite looping.
|
|
|
|
$this->modeBuffer .= $lexeme;
|
|
$l = strlen($lexeme);
|
|
|
|
return $l ? $l : 1;
|
|
}
|
|
|
|
/**
|
|
* Replace tabs for something more usable.
|
|
*/
|
|
private function replaceTabs($code)
|
|
{
|
|
if ($this->options['tabReplace'] !== null) {
|
|
return str_replace("\t", $this->options['tabReplace'], $code);
|
|
}
|
|
|
|
return $code;
|
|
}
|
|
|
|
/**
|
|
* Set the set of languages used for autodetection. When using
|
|
* autodetection the code to highlight will be probed for every language
|
|
* in this set. Limiting this set to only the languages you want to use
|
|
* will greatly improve highlighting speed.
|
|
*
|
|
* @param array $set An array of language games to use for autodetection. This defaults
|
|
* to a typical set Web development languages.
|
|
*/
|
|
public function setAutodetectLanguages(array $set)
|
|
{
|
|
$this->autodetectSet = array_unique($set);
|
|
self::registerLanguages();
|
|
}
|
|
|
|
/**
|
|
* Get the tab replacement string.
|
|
*
|
|
* @return string The tab replacement string
|
|
*/
|
|
public function getTabReplace()
|
|
{
|
|
return $this->options['tabReplace'];
|
|
}
|
|
|
|
/**
|
|
* Set the tab replacement string. This defaults to NULL: no tabs
|
|
* will be replaced.
|
|
*
|
|
* @param string $tabReplace The tab replacement string
|
|
*/
|
|
public function setTabReplace($tabReplace)
|
|
{
|
|
$this->options['tabReplace'] = $tabReplace;
|
|
}
|
|
|
|
/**
|
|
* Get the class prefix string.
|
|
*
|
|
* @return string
|
|
* The class prefix string
|
|
*/
|
|
public function getClassPrefix()
|
|
{
|
|
return $this->options['classPrefix'];
|
|
}
|
|
|
|
/**
|
|
* Set the class prefix string.
|
|
*
|
|
* @param string $classPrefix The class prefix string
|
|
*/
|
|
public function setClassPrefix($classPrefix)
|
|
{
|
|
$this->options['classPrefix'] = $classPrefix;
|
|
}
|
|
|
|
/**
|
|
* @throws \DomainException if the requested language was not in this
|
|
* Highlighter's language set
|
|
*/
|
|
private function getLanguage($name)
|
|
{
|
|
if (isset(self::$classMap[$name])) {
|
|
return self::$classMap[$name];
|
|
} elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) {
|
|
return self::$classMap[self::$aliases[$name]];
|
|
}
|
|
|
|
throw new \DomainException("Unknown language: $name");
|
|
}
|
|
|
|
/**
|
|
* Determine whether or not a language definition supports auto detection.
|
|
*
|
|
* @param string $name Language name
|
|
*
|
|
* @return bool
|
|
*/
|
|
private function autoDetection($name)
|
|
{
|
|
return !$this->getLanguage($name)->disableAutodetect;
|
|
}
|
|
|
|
/**
|
|
* Core highlighting function. Accepts a language name, or an alias, and a
|
|
* string with the code to highlight. Returns an object with the following
|
|
* properties:
|
|
* - relevance (int)
|
|
* - value (an HTML string with highlighting markup).
|
|
*
|
|
* @throws \DomainException if the requested language was not in this
|
|
* Highlighter's language set
|
|
* @throws \Exception if an invalid regex was given in a language file
|
|
*/
|
|
public function highlight($language, $code, $ignoreIllegals = true, $continuation = null)
|
|
{
|
|
$this->language = $this->getLanguage($language);
|
|
$this->language->compile();
|
|
$this->top = $continuation ? $continuation : $this->language->mode;
|
|
$this->continuations = array();
|
|
$this->result = "";
|
|
|
|
for ($current = $this->top; $current != $this->language->mode; $current = $current->parent) {
|
|
if ($current->className) {
|
|
$this->result = $this->buildSpan($current->className, '', true) . $this->result;
|
|
}
|
|
}
|
|
|
|
$this->modeBuffer = "";
|
|
$this->relevance = 0;
|
|
$this->ignoreIllegals = $ignoreIllegals;
|
|
|
|
$res = new \stdClass();
|
|
$res->relevance = 0;
|
|
$res->value = "";
|
|
$res->language = "";
|
|
|
|
try {
|
|
$match = null;
|
|
$count = 0;
|
|
$index = 0;
|
|
|
|
while ($this->top && $this->top->terminators) {
|
|
$test = @preg_match($this->top->terminators, $code, $match, PREG_OFFSET_CAPTURE, $index);
|
|
if ($test === false) {
|
|
throw new \Exception("Invalid " . $this->language->name . " regExp " . var_export($this->top->terminators, true));
|
|
} elseif ($test === 0) {
|
|
break;
|
|
}
|
|
$count = $this->processLexeme(substr($code, $index, $match[0][1] - $index), $match[0][0]);
|
|
$index = $match[0][1] + $count;
|
|
}
|
|
$this->processLexeme(substr($code, $index));
|
|
|
|
for ($current = $this->top; isset($current->parent); $current = $current->parent) {
|
|
if ($current->className) {
|
|
$this->result .= self::SPAN_END_TAG;
|
|
}
|
|
}
|
|
|
|
$res->relevance = $this->relevance;
|
|
$res->value = $this->replaceTabs($this->result);
|
|
$res->language = $this->language->name;
|
|
$res->top = $this->top;
|
|
|
|
return $res;
|
|
} catch (\Exception $e) {
|
|
if (strpos($e->getMessage(), "Illegal") !== false) {
|
|
$res->value = $this->escape($code);
|
|
|
|
return $res;
|
|
}
|
|
throw $e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Highlight the given code by highlighting the given code with each
|
|
* registered language and then finding the match with highest accuracy.
|
|
*
|
|
* @param string $code
|
|
* @param string[]|null $languageSubset When set to null, this method will
|
|
* attempt to highlight $code with each language (170+). Set this to
|
|
* an array of languages of your choice to limit the amount of languages
|
|
* to try.
|
|
*
|
|
* @throws \DomainException if the attempted language to check does not exist
|
|
* @throws \Exception if an invalid regex was given in a language file
|
|
*
|
|
* @return \stdClass
|
|
*/
|
|
public function highlightAuto($code, $languageSubset = null)
|
|
{
|
|
$res = new \stdClass();
|
|
$res->relevance = 0;
|
|
$res->value = $this->escape($code);
|
|
$res->language = "";
|
|
$scnd = clone $res;
|
|
|
|
$tmp = $languageSubset ? $languageSubset : $this->autodetectSet;
|
|
|
|
foreach ($tmp as $l) {
|
|
// don't fail if we run into a non-existent language
|
|
try {
|
|
// skip any languages that don't support auto detection
|
|
if (!$this->autoDetection($l)) {
|
|
continue;
|
|
}
|
|
|
|
$current = $this->highlight($l, $code, false);
|
|
} catch (\DomainException $e) {
|
|
continue;
|
|
}
|
|
|
|
if ($current->relevance > $scnd->relevance) {
|
|
$scnd = $current;
|
|
}
|
|
if ($current->relevance > $res->relevance) {
|
|
$scnd = $res;
|
|
$res = $current;
|
|
}
|
|
}
|
|
|
|
if ($scnd->language) {
|
|
$res->secondBest = $scnd;
|
|
}
|
|
|
|
return $res;
|
|
}
|
|
|
|
/**
|
|
* Return a list of all supported languages. Using this list in
|
|
* setAutodetectLanguages will turn on autodetection for all supported
|
|
* languages.
|
|
*
|
|
* @param bool $include_aliases specify whether language aliases
|
|
* should be included as well
|
|
*
|
|
* @return string[] An array of language names
|
|
*/
|
|
public function listLanguages($include_aliases = false)
|
|
{
|
|
if ($include_aliases === true) {
|
|
return array_merge(self::$languages, array_keys(self::$aliases));
|
|
}
|
|
|
|
return self::$languages;
|
|
}
|
|
|
|
/**
|
|
* Returns list of all available aliases for given language name.
|
|
*
|
|
* @param string $language name or alias of language to look-up
|
|
*
|
|
* @throws \DomainException if the requested language was not in this
|
|
* Highlighter's language set
|
|
*
|
|
* @return string[] An array of all aliases associated with the requested
|
|
* language name language. Passed-in name is included as
|
|
* well.
|
|
*/
|
|
public function getAliasesForLanguage($language)
|
|
{
|
|
$language = self::getLanguage($language);
|
|
|
|
if ($language->aliases === null) {
|
|
return array($language->name);
|
|
}
|
|
|
|
return array_merge(array($language->name), $language->aliases);
|
|
}
|
|
}
|