347 lines
11 KiB
PHP
347 lines
11 KiB
PHP
<?php
|
|
|
|
/* Copyright (c)
|
|
* - 2006-2013, Ivan Sagalaev (maniacsoftwaremaniacs.org), highlight.js
|
|
* (original author)
|
|
* - 2013-2019, Geert Bergman (geertscrivo.nl), highlight.php
|
|
* - 2014 Daniel Lynge, highlight.php (contributor)
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
namespace Highlight;
|
|
|
|
class Language
|
|
{
|
|
public $disableAutodetect = false;
|
|
public $caseInsensitive = false;
|
|
public $aliases = null;
|
|
public $name = null;
|
|
|
|
public function complete(&$e)
|
|
{
|
|
if (!isset($e)) {
|
|
$e = new \stdClass();
|
|
}
|
|
|
|
$patch = array(
|
|
"begin" => true,
|
|
"end" => true,
|
|
"lexemes" => true,
|
|
"illegal" => true,
|
|
);
|
|
|
|
$def = array(
|
|
"begin" => "",
|
|
"beginRe" => "",
|
|
"beginKeywords" => "",
|
|
"excludeBegin" => "",
|
|
"returnBegin" => "",
|
|
"end" => "",
|
|
"endRe" => "",
|
|
"endSameAsBegin" => "",
|
|
"endsParent" => "",
|
|
"endsWithParent" => "",
|
|
"excludeEnd" => "",
|
|
"returnEnd" => "",
|
|
"starts" => "",
|
|
"terminators" => "",
|
|
"terminatorEnd" => "",
|
|
"lexemes" => "",
|
|
"lexemesRe" => "",
|
|
"illegal" => "",
|
|
"illegalRe" => "",
|
|
"className" => "",
|
|
"contains" => array(),
|
|
"keywords" => null,
|
|
"subLanguage" => null,
|
|
"subLanguageMode" => "",
|
|
"compiled" => false,
|
|
"relevance" => 1,
|
|
"skip" => false,
|
|
);
|
|
|
|
foreach ($patch as $k => $v) {
|
|
if (isset($e->$k)) {
|
|
$e->$k = str_replace("\\/", "/", $e->$k);
|
|
$e->$k = str_replace("/", "\\/", $e->$k);
|
|
}
|
|
}
|
|
|
|
foreach ($def as $k => $v) {
|
|
if (!isset($e->$k) && is_object($e)) {
|
|
$e->$k = $v;
|
|
}
|
|
}
|
|
}
|
|
|
|
public function __construct($lang, $filePath)
|
|
{
|
|
$json = file_get_contents($filePath);
|
|
$this->mode = json_decode($json);
|
|
|
|
$this->name = $lang;
|
|
$this->aliases = isset($this->mode->aliases) ? $this->mode->aliases : null;
|
|
|
|
$this->caseInsensitive = isset($this->mode->case_insensitive) ? $this->mode->case_insensitive : false;
|
|
$this->disableAutodetect = isset($this->mode->disableAutodetect) ? $this->mode->disableAutodetect : false;
|
|
}
|
|
|
|
private function langRe($value, $global = false)
|
|
{
|
|
// PCRE allows us to change the definition of "new line." The
|
|
// `(*ANYCRLF)` matches `\r`, `\n`, and `\r\n` for `$`
|
|
//
|
|
// https://www.pcre.org/original/doc/html/pcrepattern.html
|
|
|
|
return "/(*ANYCRLF){$value}/um" . ($this->caseInsensitive ? "i" : "");
|
|
}
|
|
|
|
private function processKeyWords($kw)
|
|
{
|
|
if (is_string($kw)) {
|
|
if ($this->caseInsensitive) {
|
|
$kw = mb_strtolower($kw, "UTF-8");
|
|
}
|
|
$kw = array("keyword" => explode(" ", $kw));
|
|
} else {
|
|
foreach ($kw as $cls => $vl) {
|
|
if (!is_array($vl)) {
|
|
if ($this->caseInsensitive) {
|
|
$vl = mb_strtolower($vl, "UTF-8");
|
|
}
|
|
$kw->$cls = explode(" ", $vl);
|
|
}
|
|
}
|
|
}
|
|
|
|
return $kw;
|
|
}
|
|
|
|
private function inherit()
|
|
{
|
|
$result = new \stdClass();
|
|
$objects = func_get_args();
|
|
$parent = array_shift($objects);
|
|
|
|
foreach ($parent as $key => $value) {
|
|
$result->{$key} = $value;
|
|
}
|
|
|
|
foreach ($objects as $object) {
|
|
foreach ($object as $key => $value) {
|
|
$result->{$key} = $value;
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
private function expandMode($mode)
|
|
{
|
|
if (isset($mode->variants) && !isset($mode->cachedVariants)) {
|
|
$mode->cachedVariants = array();
|
|
|
|
foreach ($mode->variants as $variant) {
|
|
$mode->cachedVariants[] = $this->inherit($mode, array('variants' => null), $variant);
|
|
}
|
|
}
|
|
|
|
if (isset($mode->cachedVariants)) {
|
|
return $mode->cachedVariants;
|
|
}
|
|
|
|
if (isset($mode->endsWithParent) && $mode->endsWithParent) {
|
|
return array($this->inherit($mode));
|
|
}
|
|
|
|
return array($mode);
|
|
}
|
|
|
|
/**
|
|
* joinRe logically computes regexps.join(separator), but fixes the
|
|
* backreferences so they continue to match.
|
|
*
|
|
* @param array $regexps
|
|
* @param string $separator
|
|
*
|
|
* @return string
|
|
*/
|
|
private function joinRe($regexps, $separator)
|
|
{
|
|
// backreferenceRe matches an open parenthesis or backreference. To avoid
|
|
// an incorrect parse, it additionally matches the following:
|
|
// - [...] elements, where the meaning of parentheses and escapes change
|
|
// - other escape sequences, so we do not misparse escape sequences as
|
|
// interesting elements
|
|
// - non-matching or lookahead parentheses, which do not capture. These
|
|
// follow the '(' with a '?'.
|
|
$backreferenceRe = '#\[(?:[^\\\\\]]|\\\.)*\]|\(\??|\\\([1-9][0-9]*)|\\\.#';
|
|
$numCaptures = 0;
|
|
$ret = '';
|
|
|
|
$strLen = count($regexps);
|
|
for ($i = 0; $i < $strLen; ++$i) {
|
|
$offset = $numCaptures;
|
|
$re = $regexps[$i];
|
|
|
|
if ($i > 0) {
|
|
$ret .= $separator;
|
|
}
|
|
|
|
while (strlen($re) > 0) {
|
|
$matches = array();
|
|
$matchFound = preg_match($backreferenceRe, $re, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
if ($matchFound === 0) {
|
|
$ret .= $re;
|
|
break;
|
|
}
|
|
|
|
// PHP aliases to match the JS naming conventions
|
|
$match = $matches[0];
|
|
$index = $match[1];
|
|
|
|
$ret .= substr($re, 0, $index);
|
|
$re = substr($re, $index + strlen($match[0]));
|
|
|
|
if (substr($match[0], 0, 1) === '\\' && isset($matches[1])) {
|
|
// Adjust the backreference.
|
|
$ret .= "\\" . strval(intval($matches[1][0]) + $offset);
|
|
} else {
|
|
$ret .= $match[0];
|
|
if ($match[0] == "(") {
|
|
++$numCaptures;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $ret;
|
|
}
|
|
|
|
private function compileMode($mode, $parent = null)
|
|
{
|
|
if (isset($mode->compiled)) {
|
|
return;
|
|
}
|
|
$this->complete($mode);
|
|
$mode->compiled = true;
|
|
|
|
$mode->keywords = $mode->keywords ? $mode->keywords : $mode->beginKeywords;
|
|
|
|
/* Note: JsonRef method creates different references as those in the
|
|
* original source files. Two modes may refer to the same keywords
|
|
* set, so only testing if the mode has keywords is not enough: the
|
|
* mode's keywords might be compiled already, so it is necessary
|
|
* to do an 'is_array' check.
|
|
*/
|
|
if ($mode->keywords && !is_array($mode->keywords)) {
|
|
$compiledKeywords = array();
|
|
|
|
$mode->lexemesRe = $this->langRe($mode->lexemes ? $mode->lexemes : "\w+", true);
|
|
|
|
foreach ($this->processKeyWords($mode->keywords) as $clsNm => $dat) {
|
|
if (!is_array($dat)) {
|
|
$dat = array($dat);
|
|
}
|
|
foreach ($dat as $kw) {
|
|
$pair = explode("|", $kw);
|
|
$compiledKeywords[$pair[0]] = array($clsNm, isset($pair[1]) ? intval($pair[1]) : 1);
|
|
}
|
|
}
|
|
$mode->keywords = $compiledKeywords;
|
|
}
|
|
|
|
if ($parent) {
|
|
if ($mode->beginKeywords) {
|
|
$mode->begin = "\\b(" . implode("|", explode(" ", $mode->beginKeywords)) . ")\\b";
|
|
}
|
|
if (!$mode->begin) {
|
|
$mode->begin = "\B|\b";
|
|
}
|
|
$mode->beginRe = $this->langRe($mode->begin);
|
|
if ($mode->endSameAsBegin) {
|
|
$mode->end = $mode->begin;
|
|
}
|
|
if (!$mode->end && !$mode->endsWithParent) {
|
|
$mode->end = "\B|\b";
|
|
}
|
|
if ($mode->end) {
|
|
$mode->endRe = $this->langRe($mode->end);
|
|
}
|
|
$mode->terminatorEnd = $mode->end;
|
|
if ($mode->endsWithParent && $parent->terminatorEnd) {
|
|
$mode->terminatorEnd .= ($mode->end ? "|" : "") . $parent->terminatorEnd;
|
|
}
|
|
}
|
|
|
|
if ($mode->illegal) {
|
|
$mode->illegalRe = $this->langRe($mode->illegal);
|
|
}
|
|
|
|
$expandedContains = array();
|
|
foreach ($mode->contains as $c) {
|
|
$expandedContains = array_merge($expandedContains, $this->expandMode(
|
|
$c === 'self' ? $mode : $c
|
|
));
|
|
}
|
|
|
|
$mode->contains = $expandedContains;
|
|
|
|
for ($i = 0; $i < count($mode->contains); ++$i) {
|
|
$this->compileMode($mode->contains[$i], $mode);
|
|
}
|
|
|
|
if ($mode->starts) {
|
|
$this->compileMode($mode->starts, $parent);
|
|
}
|
|
|
|
$terminators = array();
|
|
|
|
for ($i = 0; $i < count($mode->contains); ++$i) {
|
|
$terminators[] = $mode->contains[$i]->beginKeywords
|
|
? "\.?(?:" . $mode->contains[$i]->begin . ")\.?"
|
|
: $mode->contains[$i]->begin;
|
|
}
|
|
if ($mode->terminatorEnd) {
|
|
$terminators[] = $mode->terminatorEnd;
|
|
}
|
|
if ($mode->illegal) {
|
|
$terminators[] = $mode->illegal;
|
|
}
|
|
$mode->terminators = count($terminators) ? $this->langRe($this->joinRe($terminators, "|"), true) : null;
|
|
}
|
|
|
|
public function compile()
|
|
{
|
|
if (!isset($this->mode->compiled)) {
|
|
$jr = new JsonRef();
|
|
$this->mode = $jr->decode($this->mode);
|
|
$this->compileMode($this->mode);
|
|
}
|
|
}
|
|
}
|