diff --git a/src/Exception/ExceptionHelper.php b/src/Exception/ExceptionHelper.php new file mode 100644 index 00000000..bdef52af --- /dev/null +++ b/src/Exception/ExceptionHelper.php @@ -0,0 +1,163 @@ + $result['line'], + 'code' => $code, + 'column' => $column, + 'highlight' => self::getStringHighligher($column), + ]; + } + + /** + * Returns the last line with an error. If the error occurred on + * the line where there is no visible part, before complements + * it with the previous ones. + * + * @param array|string[] $textLines List of code lines + * @return string + */ + private static function getAffectedCodeAsString(array $textLines) + { + $result = ''; + $i = 0; + + while (\count($textLines) && ++$i) { + $textLine = \array_pop($textLines); + $result = $textLine . ($i > 1 ? "\n" . $result : ''); + + if (\trim($textLine)) { + break; + } + } + + return $result; + } + + /** + * The method draws the highlight of the error place. + * + * @param int $charsOffset Error offset in symbols + * @return string + */ + private static function getStringHighligher($charsOffset) + { + $prefix = ''; + + if ($charsOffset > 0) { + $prefix = \str_repeat(' ', $charsOffset); + } + + return $prefix . '↑'; + } + + /** + * Returns the error location in UTF characters by the offset in bytes. + * + * @param string $line The code line from which we get a offset in the characters + * @param int $bytesOffset Length of offset in bytes + * @return int + */ + private static function getMbColumnPosition($line, $bytesOffset) + { + $slice = \substr($line, 0, $bytesOffset); + + return \mb_strlen($slice, 'UTF-8'); + } + + /** + * Returns information about the error location: line, column and affected text lines. + * + * @param string $text The source code in which we search for a line and a column + * @param int $bytesOffset Offset in bytes relative to the beginning of the source code + * @return array + */ + private static function getErrorInfo($text, $bytesOffset) + { + $result = [ + 'line' => 1, + 'column' => 0, + 'trace' => [], + ]; + + $current = 0; + + foreach (\explode("\n", $text) as $line => $code) { + $previous = $current; + $current += \strlen($code) + 1; + $result['trace'][] = $code; + + if ($current > $bytesOffset) { + return [ + 'line' => $line + 1, + 'column' => $bytesOffset - $previous, + 'trace' => $result['trace'] + ]; + } + } + + return $result; + } +} diff --git a/src/Exception/UnrecognizedToken.php b/src/Exception/UnrecognizedToken.php index 82143416..33dd8526 100644 --- a/src/Exception/UnrecognizedToken.php +++ b/src/Exception/UnrecognizedToken.php @@ -48,6 +48,8 @@ */ class UnrecognizedToken extends Exception { + use ExceptionHelper; + /** * Column. * @@ -55,8 +57,6 @@ class UnrecognizedToken extends Exception */ protected $column = 0; - - /** * Override line and add column support. * @@ -76,6 +76,25 @@ public function __construct($message, $code, $arg, $line, $column) return; } + /** + * @param string $message Formatted message. + * @param string $text Source code + * @param int $offsetInBytes Error offset in bytes + * @param int $code Code (the ID). + * @return static + */ + public static function fromOffset($message, $text, $offsetInBytes, $code = 0) + { + $info = self::getErrorPositionByOffset($text, $offsetInBytes); + + // Formatted message + $message .= ' at line %s and column %s' . \PHP_EOL . + $info['code'] . \PHP_EOL . + $info['highlight']; + + return new static($message, $code, [$info['line'], $info['column']], $info['line'], $info['column']); + } + /** * Get column. * diff --git a/src/Llk/Lexer.php b/src/Llk/Lexer.php index c0f6c0bd..b4138813 100644 --- a/src/Llk/Lexer.php +++ b/src/Llk/Lexer.php @@ -85,8 +85,6 @@ final class Lexer */ protected $_pcreOptions = null; - - /** * Constructor. * @@ -155,19 +153,9 @@ public function lexMe($text, array $tokens): \Generator $nextToken = $this->nextToken($offset); if (null === $nextToken) { - throw new Compiler\Exception\UnrecognizedToken( - 'Unrecognized token "%s" at line 1 and column %d:' . - "\n" . '%s' . "\n" . - str_repeat(' ', mb_strlen(substr($text, 0, $offset))) . '↑', - 0, - [ - mb_substr(substr($text, $offset), 0, 1), - $offset + 1, - $text - ], - 1, - $offset - ); + $error = \sprintf('Unrecognized token "%s"', \mb_substr(\substr($text, $offset), 0, 1)); + + throw Compiler\Exception\UnrecognizedToken::fromOffset($error, $text, $offset); } if (true === $nextToken['keep']) { diff --git a/src/Llk/Parser.php b/src/Llk/Parser.php index c8d46282..17e7032b 100644 --- a/src/Llk/Parser.php +++ b/src/Llk/Parser.php @@ -188,46 +188,18 @@ final public function parse(string $text, string $rule = null, bool $tree = true } if (false === $this->backtrack()) { - $token = $this->_errorToken; + $token = $this->_errorToken; if (null === $this->_errorToken) { $token = $this->_tokenSequence->current(); } - $offset = $token['offset']; - $line = 1; - $column = 1; - - if (!empty($text)) { - if (0 === $offset) { - $leftnl = 0; - } else { - $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; - } - - $rightnl = strpos($text, "\n", $offset); - $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; - $column = $offset - $leftnl + (0 === $leftnl); - - if (false !== $rightnl) { - $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); - } - } + $error = \vsprintf('Unexpected token "%s" (%s)', [ + $token['value'], + $token['token'], + ]); - throw new Compiler\Exception\UnexpectedToken( - 'Unexpected token "%s" (%s) at line %d and column %d:' . - "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', - 0, - [ - $token['value'], - $token['token'], - $line, - $column, - $text - ], - $line, - $column - ); + throw Compiler\Exception\UnexpectedToken::fromOffset($error, $text, $token['offset']); } } while (true);