compiler/ast/lexer

  Source   Edit

This lexer is handwritten for efficiency. I used an elegant buffering scheme which I have not seen anywhere else: We guarantee that a whole line is in the buffer. Thus only when scanning the \n or \r character we have to check whether we need to read in the next chunk. (\n or \r already need special handling for incrementing the line counter; choosing both \n and \r allows the lexer to properly read Unix, DOS or Macintosh text files, even when it is not the native format.

Types

Lexer = object of TBaseLexer
  fileIdx*: FileIndex
  indentAhead*: int          ## if > 0 an indentation has already been read
                             ## this is needed because scanning comments
                             ## needs so much look-ahead
  currLineIndent*: int
  strongSpaces*, allowTabs*: bool
  cache*: IdentCache
  when defined(nimsuggest):
      previousToken: TLineInfo

  config*: ConfigRef
  diags: seq[LexerDiag]
  Source   Edit
LexerDiag = object
  msg*: string
  location*: TLineInfo       ## diagnostic location
  instLoc*: InstantiationInfo ## instantiation in lexer's source
  case kind*: LexerDiagKind
  of lexDiagNameXShouldBeY:
      got*: string

  else:
      nil

  
Diagnostic data from the Lexer, mostly errors   Source   Edit
LexerDiagKind = enum
  lexDiagMalformedNumUnderscores, lexDiagMalformedIdentUnderscores,
  lexDiagMalformedTrailingUnderscre, lexDiagInvalidToken,
  lexDiagInvalidTokenSpaceBetweenNumAndIdent, lexDiagNoTabs,
  lexDiagInvalidIntegerLiteralOctalPrefix, lexDiagInvalidIntegerSuffix,
  lexDiagNumberNotInRange, lexDiagExpectedHex, lexDiagInvalidIntegerLiteral,
  lexDiagInvalidNumericLiteral, lexDiagInvalidCharLiteral,
  lexDiagInvalidCharLiteralConstant, lexDiagInvalidCharLiteralPlatformNewline,
  lexDiagInvalidCharLiteralUnicodeCodepoint, lexDiagMissingClosingApostrophe,
  lexDiagInvalidUnicodeCodepointEmpty,
  lexDiagInvalidUnicodeCodepointGreaterThan0x10FFFF,
  lexDiagUnclosedTripleString, lexDiagUnclosedSingleString,
  lexDiagUnclosedComment, lexDiagDeprecatedOctalPrefix = "OctalEscape",
  lexDiagLineTooLong = "LineTooLong", lexDiagNameXShouldBeY = "Name"
  Source   Edit
Token = object
  tokType*: TokType          ## the type of the token
  indent*: int               ## the indentation; != -1 if the token has been
                             ## preceded with indentation
  ident*: PIdent             ## the parsed identifier
  iNumber*: BiggestInt       ## the parsed integer literal
  fNumber*: BiggestFloat     ## the parsed floating point literal
  base*: NumericalBase       ## the numerical base; only valid for int
                             ## or float literals
  strongSpaceA*: int8        ## leading spaces of an operator
  strongSpaceB*: int8        ## trailing spaces of an operator
  literal*: string           ## the parsed (string) literal; and
                             ## documentation comments are here too
  line*, col*: int
  error*: LexerDiag          ## error diagnostic if `tokType` is `tkError`
  
a Nim token   Source   Edit
TokType = enum
  tkInvalid = "tkInvalid", tkError = "tkError", tkEof = "[EOF]",
  tkSymbol = "tkSymbol", tkAddr = "addr", tkAnd = "and", tkAs = "as",
  tkAsm = "asm", tkBind = "bind", tkBlock = "block", tkBreak = "break",
  tkCase = "case", tkCast = "cast", tkConcept = "concept", tkConst = "const",
  tkContinue = "continue", tkConverter = "converter", tkDefer = "defer",
  tkDiscard = "discard", tkDistinct = "distinct", tkDiv = "div", tkDo = "do",
  tkElif = "elif", tkElse = "else", tkEnd = "end", tkEnum = "enum",
  tkExcept = "except", tkExport = "export", tkFinally = "finally",
  tkFor = "for", tkFrom = "from", tkFunc = "func", tkIf = "if",
  tkImport = "import", tkIn = "in", tkInclude = "include",
  tkInterface = "interface", tkIs = "is", tkIsnot = "isnot",
  tkIterator = "iterator", tkLet = "let", tkMacro = "macro",
  tkMethod = "method", tkMixin = "mixin", tkMod = "mod", tkNil = "nil",
  tkNot = "not", tkNotin = "notin", tkObject = "object", tkOf = "of",
  tkOr = "or", tkOut = "out", tkProc = "proc", tkPtr = "ptr", tkRaise = "raise",
  tkRef = "ref", tkReturn = "return", tkShl = "shl", tkShr = "shr",
  tkStatic = "static", tkTemplate = "template", tkTry = "try",
  tkTuple = "tuple", tkType = "type", tkUsing = "using", tkVar = "var",
  tkWhen = "when", tkWhile = "while", tkXor = "xor", tkYield = "yield",
  tkIntLit = "tkIntLit", tkInt8Lit = "tkInt8Lit", tkInt16Lit = "tkInt16Lit",
  tkInt32Lit = "tkInt32Lit", tkInt64Lit = "tkInt64Lit", tkUIntLit = "tkUIntLit",
  tkUInt8Lit = "tkUInt8Lit", tkUInt16Lit = "tkUInt16Lit",
  tkUInt32Lit = "tkUInt32Lit", tkUInt64Lit = "tkUInt64Lit",
  tkFloatLit = "tkFloatLit", tkFloat32Lit = "tkFloat32Lit",
  tkFloat64Lit = "tkFloat64Lit", tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit",
  tkTripleStrLit = "tkTripleStrLit", tkGStrLit = "tkGStrLit",
  tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
  tkCustomLit = "tkCustomLit", tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
  tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}", tkBracketDotLe = "[.",
  tkBracketDotRi = ".]", tkCurlyDotLe = "{.", tkCurlyDotRi = ".}",
  tkParDotLe = "(.", tkParDotRi = ".)", tkComma = ",", tkSemiColon = ";",
  tkColon = ":", tkColonColon = "::", tkEquals = "=", tkDot = ".",
  tkDotDot = "..", tkBracketLeColon = "[:", tkOpr, tkComment, tkAccent = "`",
  tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, tkHideableStart,
  tkHideableEnd
  Source   Edit
TokTypes = set[TokType]
  Source   Edit

Consts

LexDiagsError = {lexDiagMalformedNumUnderscores..lexDiagUnclosedComment}
  Source   Edit
LexDiagsHint = {lexDiagLineTooLong..lexDiagNameXShouldBeY}
  Source   Edit
LexDiagsWarning = {lexDiagDeprecatedOctalPrefix}
  Source   Edit
MaxLineLength = 80
  Source   Edit
numChars: set[char] = {'0'..'9', 'a'..'z', 'A'..'Z'}
  Source   Edit
OpChars: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
                      '|', '=', '%', '&', '$', '@', '~', ':'}
  Source   Edit
SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  Source   Edit
SymStartChars: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
  Source   Edit
tkKeywords = {tkAddr..tkYield}
  Source   Edit
tokKeywordHigh = tkYield
  Source   Edit
tokKeywordLow = tkAddr
  Source   Edit

Procs

proc `$`(tok: Token): string {....raises: [], tags: [].}
  Source   Edit
proc closeLexer(lex: var Lexer) {....raises: [], tags: [].}
  Source   Edit
func diagOffset(L: Lexer): int {.inline, ...raises: [], tags: [].}
return value represents a point in time where all existing diagnostics are considered in the past, used in conjunction with errorsHintsAndWarnings   Source   Edit
func diagToHumanStr(d: LexerDiag): string {....raises: [ValueError], tags: [].}
creates a human readable string message for a diagnostic, does not include any extra information such as line info, severity, and so on.   Source   Edit
proc getLineInfo(L: Lexer): TLineInfo {....raises: [], tags: [].}
  Source   Edit
proc getLineInfo(L: Lexer; tok: Token): TLineInfo {.inline, ...raises: [], tags: [].}
  Source   Edit
proc getPrecedence(ident: PIdent): int {....raises: [], tags: [].}
assumes ident is binary operator already   Source   Edit
proc getPrecedence(tok: Token): int {....raises: [], tags: [].}
Calculates the precedence of the given token.   Source   Edit
proc initToken(L: var Token) {....raises: [], tags: [].}
  Source   Edit
func isKeyword(i: PIdent): bool {....raises: [], tags: [].}
is this the identifier a keyword?   Source   Edit
func isKeyword(kind: TokType): bool {....raises: [], tags: [].}
  Source   Edit
proc isNimIdentifier(s: string): bool {....raises: [], tags: [].}
  Source   Edit
proc openLexer(lex: var Lexer; fileIdx: FileIndex; inputstream: PLLStream;
               cache: IdentCache; config: ConfigRef) {.
    ...raises: [IOError, Exception], tags: [ReadIOEffect, RootEffect].}
  Source   Edit
proc openLexer(lex: var Lexer; filename: AbsoluteFile; inputstream: PLLStream;
               cache: IdentCache; config: ConfigRef) {.
    ...raises: [IOError, Exception, KeyError],
    tags: [ReadIOEffect, RootEffect, ReadDirEffect].}
  Source   Edit
proc prettyTok(tok: Token): string {....raises: [], tags: [].}
  Source   Edit
proc printTok(conf: ConfigRef; tok: Token) {....raises: [Exception],
    tags: [RootEffect].}
  Source   Edit
proc rawGetTok(L: var Lexer; tok: var Token) {....raises: [IOError, Exception],
    tags: [ReadIOEffect, RootEffect].}
  Source   Edit

Iterators

iterator errorsHintsAndWarnings(L: Lexer; diagOffset = 0): LexerDiag {.
    ...raises: [], tags: [].}
iterate over all diagnostics from the beginning, or from the point in time specificed via diagOffset   Source   Edit