This lexer is handwritten for efficiency. I used an elegant buffering scheme which I have not seen anywhere else: We guarantee that a whole line is in the buffer. Thus only when scanning the \n or \r character we have to check whether we need to read in the next chunk. (\n or \r already need special handling for incrementing the line counter; choosing both \n and \r allows the lexer to properly read Unix, DOS or Macintosh text files, even when it is not the native format.
Types
Lexer = object of TBaseLexer fileIdx*: FileIndex indentAhead*: int ## if > 0 an indentation has already been read ## this is needed because scanning comments ## needs so much look-ahead currLineIndent*: int strongSpaces*, allowTabs*: bool cache*: IdentCache when defined(nimsuggest): previousToken: TLineInfo config*: ConfigRef diags: seq[LexerDiag]
- Source Edit
LexerDiag = object msg*: string location*: TLineInfo ## diagnostic location instLoc*: InstantiationInfo ## instantiation in lexer's source case kind*: LexerDiagKind of lexDiagNameXShouldBeY: got*: string else: nil
- Diagnostic data from the Lexer, mostly errors Source Edit
LexerDiagKind = enum lexDiagMalformedNumUnderscores, lexDiagMalformedIdentUnderscores, lexDiagMalformedTrailingUnderscre, lexDiagInvalidToken, lexDiagInvalidTokenSpaceBetweenNumAndIdent, lexDiagNoTabs, lexDiagInvalidIntegerLiteralOctalPrefix, lexDiagInvalidIntegerSuffix, lexDiagNumberNotInRange, lexDiagExpectedHex, lexDiagInvalidIntegerLiteral, lexDiagInvalidNumericLiteral, lexDiagInvalidCharLiteral, lexDiagInvalidCharLiteralConstant, lexDiagInvalidCharLiteralPlatformNewline, lexDiagInvalidCharLiteralUnicodeCodepoint, lexDiagMissingClosingApostrophe, lexDiagInvalidUnicodeCodepointEmpty, lexDiagInvalidUnicodeCodepointGreaterThan0x10FFFF, lexDiagUnclosedTripleString, lexDiagUnclosedSingleString, lexDiagUnclosedComment, lexDiagDeprecatedOctalPrefix = "OctalEscape", lexDiagLineTooLong = "LineTooLong", lexDiagNameXShouldBeY = "Name"
- Source Edit
Token = object tokType*: TokType ## the type of the token indent*: int ## the indentation; != -1 if the token has been ## preceded with indentation ident*: PIdent ## the parsed identifier iNumber*: BiggestInt ## the parsed integer literal fNumber*: BiggestFloat ## the parsed floating point literal base*: NumericalBase ## the numerical base; only valid for int ## or float literals strongSpaceA*: int8 ## leading spaces of an operator strongSpaceB*: int8 ## trailing spaces of an operator literal*: string ## the parsed (string) literal; and ## documentation comments are here too line*, col*: int error*: LexerDiag ## error diagnostic if `tokType` is `tkError`
- a Nim token Source Edit
TokType = enum tkInvalid = "tkInvalid", tkError = "tkError", tkEof = "[EOF]", tkSymbol = "tkSymbol", tkAddr = "addr", tkAnd = "and", tkAs = "as", tkAsm = "asm", tkBind = "bind", tkBlock = "block", tkBreak = "break", tkCase = "case", tkCast = "cast", tkConcept = "concept", tkConst = "const", tkContinue = "continue", tkConverter = "converter", tkDefer = "defer", tkDiscard = "discard", tkDistinct = "distinct", tkDiv = "div", tkDo = "do", tkElif = "elif", tkElse = "else", tkEnd = "end", tkEnum = "enum", tkExcept = "except", tkExport = "export", tkFinally = "finally", tkFor = "for", tkFrom = "from", tkFunc = "func", tkIf = "if", tkImport = "import", tkIn = "in", tkInclude = "include", tkInterface = "interface", tkIs = "is", tkIsnot = "isnot", tkIterator = "iterator", tkLet = "let", tkMacro = "macro", tkMethod = "method", tkMixin = "mixin", tkMod = "mod", tkNil = "nil", tkNot = "not", tkNotin = "notin", tkObject = "object", tkOf = "of", tkOr = "or", tkOut = "out", tkProc = "proc", tkPtr = "ptr", tkRaise = "raise", tkRef = "ref", tkReturn = "return", tkShl = "shl", tkShr = "shr", tkStatic = "static", tkTemplate = "template", tkTry = "try", tkTuple = "tuple", tkType = "type", tkUsing = "using", tkVar = "var", tkWhen = "when", tkWhile = "while", tkXor = "xor", tkYield = "yield", tkIntLit = "tkIntLit", tkInt8Lit = "tkInt8Lit", tkInt16Lit = "tkInt16Lit", tkInt32Lit = "tkInt32Lit", tkInt64Lit = "tkInt64Lit", tkUIntLit = "tkUIntLit", tkUInt8Lit = "tkUInt8Lit", tkUInt16Lit = "tkUInt16Lit", tkUInt32Lit = "tkUInt32Lit", tkUInt64Lit = "tkUInt64Lit", tkFloatLit = "tkFloatLit", tkFloat32Lit = "tkFloat32Lit", tkFloat64Lit = "tkFloat64Lit", tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit", tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit", tkCustomLit = "tkCustomLit", tkParLe = "(", tkParRi = ")", tkBracketLe = "[", tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}", tkBracketDotLe = "[.", tkBracketDotRi = ".]", tkCurlyDotLe = "{.", tkCurlyDotRi = ".}", tkParDotLe = "(.", tkParDotRi = ".)", tkComma = ",", tkSemiColon = ";", tkColon = ":", tkColonColon = "::", tkEquals = "=", tkDot = ".", tkDotDot = "..", tkBracketLeColon = "[:", tkOpr, tkComment, tkAccent = "`", tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, tkHideableStart, tkHideableEnd
- Source Edit
Consts
LexDiagsError = {lexDiagMalformedNumUnderscores..lexDiagUnclosedComment}
- Source Edit
LexDiagsHint = {lexDiagLineTooLong..lexDiagNameXShouldBeY}
- Source Edit
LexDiagsWarning = {lexDiagDeprecatedOctalPrefix}
- Source Edit
MaxLineLength = 80
- Source Edit
OpChars: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', '|', '=', '%', '&', '$', '@', '~', ':'}
- Source Edit
SymStartChars: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
- Source Edit
tkKeywords = {tkAddr..tkYield}
- Source Edit
tokKeywordHigh = tkYield
- Source Edit
tokKeywordLow = tkAddr
- Source Edit
Procs
proc closeLexer(lex: var Lexer) {....raises: [], tags: [].}
- Source Edit
func diagOffset(L: Lexer): int {.inline, ...raises: [], tags: [].}
- return value represents a point in time where all existing diagnostics are considered in the past, used in conjunction with errorsHintsAndWarnings Source Edit
func diagToHumanStr(d: LexerDiag): string {....raises: [ValueError], tags: [].}
- creates a human readable string message for a diagnostic, does not include any extra information such as line info, severity, and so on. Source Edit
proc getLineInfo(L: Lexer): TLineInfo {....raises: [], tags: [].}
- Source Edit
proc getPrecedence(ident: PIdent): int {....raises: [], tags: [].}
- assumes ident is binary operator already Source Edit
proc getPrecedence(tok: Token): int {....raises: [], tags: [].}
- Calculates the precedence of the given token. Source Edit
func isKeyword(i: PIdent): bool {....raises: [], tags: [].}
- is this the identifier a keyword? Source Edit
proc isNimIdentifier(s: string): bool {....raises: [], tags: [].}
- Source Edit
proc openLexer(lex: var Lexer; fileIdx: FileIndex; inputstream: PLLStream; cache: IdentCache; config: ConfigRef) {. ...raises: [IOError, Exception], tags: [ReadIOEffect, RootEffect].}
- Source Edit
proc openLexer(lex: var Lexer; filename: AbsoluteFile; inputstream: PLLStream; cache: IdentCache; config: ConfigRef) {. ...raises: [IOError, Exception, KeyError], tags: [ReadIOEffect, RootEffect, ReadDirEffect].}
- Source Edit