Files
android-dev-ext/langserver/java/tokenizer.js
Dave Holoway 83eda790be version 1.2 (#93)
* initial working language server
* first hacky version of source parsing and type checking
* first iteration of method body parser
* add support for prefix/postfix inc expressions
* add basic support for parsing new expressions
* different attempt to parse using collapsable text ranges
* fix parsing of binary operstors following a bracket expression
* updated validation to use new JavaTypes module instead of MTIs
* add support for array-literal expressions
* fix || and && not being tokenized as operators allow float literals starting with dot
* add new method body parser to use direct linear parsing
* add super as an object literal
* fix interface constructors check constructor type modifiers
* fix assignment operator types
* Fix resolving of enclosed type identifiers
* add default constructor for class types with no explicit constructors
* add missing constructor validator
* add constructor parameters to list of resolvable types
* update SourceMethod to pass name in super constructor
* add Any* classes to reduce cascading errors
* update method call parameter checking use isTypeAssignable instead of getParameterCompatibleTypeSignatures
* tidy up isTypeAssignable allow class equivilents for primitives
* add more info when methods/ctrs cannot be matched
* allow interfaces to be cast to class instances
* use isTypeAssignable for checking branch test expressions
* allow AnyValue to be a constant value
* split shift operators from bitwise operators
* add support for literal numbers to be assignable to multiple primtive types
* clear diagnostics when document is closed
* update check for cast expression
* casting only applies to qualified term not a whole expression
* allow all primitive-number-type casts
* add support for synchronized statement
* update primitive type compatibility
* allow null to be cast to any non-primitve
* use better regex for string literals
* allow character literals to be assigned to number types
* add support for array qualifiers after a variable name
* make sure any long specifier is stripped from a bigint value
* improve invalid array expression message add AnyType array element to prevent cascading errors
* make default a modifer keyword for interface default method support
* initial support for wildcard type arguments
* fix parse issue with nested generic types
* allow generic types to be assigned to inherited types with compatible type arguments
* allow unicode characters, $ and _ in identifiers
* map primitive types to their boxed versions for class member
* support assert statement
* allow unicode char literals
* make type parser and body parser use same tokenizer
* reuse parsed tokens instead of tokenizing each method body
* re-add throws as a keyword
* treat default and synchronized as modifiers
* add SourceInitialiser support
* refactor to prepare for merging with type parsing
* add support for array qualifiers in type identifiers
* pass scoped type instead of method to typeIdent
* update ResolvableType to use same type resolving as method body parsing
* add support for post-name array qualifiers in fields and parameters
* post-name array qualifiers in method decls
* add type variables to SourceMethod
* initial attempt to support type variable arguments in methods
* specialise methods with type variables
* don't require default interface methods to be implemented
* make variable arity parameters an array type
* tidy array constructors and fix some warnings
* update isCallCompatible to handle variable arity calls
* improve assert statement support
* parse labels and break/continue targets
* refactor new term qualifiers
* add support for generic inferred-type arguments
* improve modifier checks for interface types
* improve reporting of unresolved type errors
* fix type checking of field and method declarations
* add missng strictfp modifier
* refactor in preparation for parsing local types
* replace Locals with scopeable MethodDeclarations to allow labels and types to be stored
* initial changes to support local type declarations
* update to use new set of SourceX classes
* refactor to allow expressions to have a type scope
* replace regex parsing with linear parsing
* generate source types before parsing
* fix support for resolving type variables in method declarations
* fix checking of array literal compatability
* report errors from unit parsing
* remove local modifier validation during parse add parameter modifier checking to validation
* allow trailing comma for array literals
* start separating validation from parsing
* add support for parsing enum values
* allow uppercase 0X in hex literals
* include enclosing types in identifier search
* add support for parsing parameterless lambdas
* ignore unresolved types in extends/implements
* implement specialisation of SourceType
* allow super as a member qualifier
* allow empty enums
* don't report missing constructors if superclass has none
* update typemap declarations to use CEIType instead of JavaType
* fix resolving of class type variables
* fix bad imports when resolving annotations
* allow null scope in findIdentifier
* add support for static member imports
* import types from same package
* remove this qualifier from isCastExpression
* add hex exponent support
* parse try-with-resources
* fix resolving imported enclosed types
* extract expression types into separate files
* extract statement types into separate files
* fix type warnings
* extract literals into separate files
* remove Value class, add NewExpression and separate out Any classes
* rename source types module
* remove some parse checks that should be in verify
* support token extraction in expressions
* implement resolveExpression
* add type cast checking
* check for valid type in class member expressions
* allow assigns for assignable type arguments
* improve reporting of unresolved identifiers
* add new array validation
* validate array literals
* validate array indexes
* improve validation of binary operators
* rename ResolvedType  to ResolvedValue
* improve checking of number literals
* support package name as a resolved value
* implement method body and ststement validation
* improve method call resolving
* add support for this() and super() constructor calls
* remove return type for source constructors
* add checks for unary operators
* ensure tokens are assigned for qualified expressions
* check castability using type assignments
* add implicit enum methods values() and valueOf()
* add basic type checking of lambda expressions
* fix return type check
* fix assert statement checks
* improve support for ternary operators in assignments and method invocations
* perform more detailed search of implemented methods
* initial test of context-dependant code completion
* support package, type and static field import completion
* support for member expressions
* use exact type signatures for locating types for completion items
* add support for field and method docs
* add support for docs in source types
* support member completion for array types improve comment formatting
* ensure Object is always last in the list of inherited types
* add owning method to statements create common keyword statement class
* improve code completion list add method parameters order list items by scope
* add source types to list hide this and super for non-methods
* fix bad member resolution at end of block fix missing method and type docs
* add support for editing multiple files
* allow multiple source files to be used in parsing
* load and parse files at startup
* add support for displaying method signatures
* add single trace function with timestamps
* implement shceduleReparse to reduce parsing load while typing
* remove parsed type list logging
* wait for reparsing before returning method signatures
* resolve new object contructors
* improve extraction of parameter docs
* update @types/vscode
* cache decoded android library in globalStoragePath
* load single android library cache from local folder
* android-29 library cache
* allow configurable app root setting
* set configurable trace logging and update section names
* description updates
* handle null token passed to ParseProblem
* refactoring
* Rename language client extension to Android
* ignore unnamed type declarations
* handle java file change notifications
* make sure we only try and parse java files
* add option to allow language server to be shutdown
* simplify handling of this and class member qualifiers
* relocate java-mti package into project
* get main node install to install langserver dependencies
* remove debugging pause
* rename body-parser3 to body-parser
* clean up import resolving code
* remove unused field from ResolvedImport
* remove validation modules that used old parser types
* remove old parser files
* remove redundant types and functions used by old parser
* move addproblem into TokenList
* remove unused ResolvedType class
* validate more statements
* add support for parsing and validating anonymous types
* hide some method modifiers which aren't useful to show
* code comments and minor improvements
* fix some type warnings
* improve support for completion of enum values
* add type name to parameter completion labels
* ignore synthetic members in completion list
* use a specialised map for handling case-insenstive file uris
* add basic build script
* reference java-mti package from GitHub
* revert @types/vscode
* update initial file loading to use URIs passed from the client changes to the appSourceRoot now require an extension restart
* add support for loading filtered androidx libraries for code completion
* update version of java-mti
* add mixpanel package
* add basic analytics
* fix dependency versions
* fix dependency versions
* set empty cache file markers
* add language server debug config
* add file to build script
* add unqualified type members when inside a method
* apply statics filter to enum values
* add basic debugger analytics
* include current time in startup event
* add terminate reason to debugger
* update changelog and readme
2020-07-03 01:54:32 +01:00

270 lines
9.1 KiB
JavaScript

/**
* @typedef {import('java-mti').Method} Method
* @typedef {import('java-mti').Constructor} Constructor
*/
const { TextBlock, BlockRange } = require('./parsetypes/textblock');
/**
* Convert a token to its simplified form for easier declaration parsing.
*
* - Whitespace, comments, strings and character literals are normalised.
* - Modifier keywords and identifers are abbreviated.
* - Any invalid text is replaced with spaces.
*
* Abbreviated and normalised values are padded to occupy the same space
* as the original text - this ensures any parse errors are reported in the
* correct location.
* @param {string} text
* @param {number} start
* @param {number} length
* @param {string} kind
*/
function tokenKindToSimplified(text, start, length, kind) {
const chunk = text.slice(start, start + length);
switch (kind) {
case 'wsc':
return chunk.replace(/[^\r\n]/g, ' ');
case 'string-literal':
if (chunk.length <= 2) return chunk;
return `"${'#'.repeat(chunk.length - 2)}"`;
case 'char-literal':
if (chunk.length <= 2) return chunk;
return `'${'#'.repeat(chunk.length - 2)}'`;
case 'primitive-type':
return `P${' '.repeat(chunk.length - 1)}`;
case 'modifier':
return `M${' '.repeat(chunk.length - 1)}`;
case 'ident':
return `W${' '.repeat(chunk.length - 1)}`;
case 'invalid':
return ' '.repeat(chunk.length);
}
return chunk;
}
class Token extends TextBlock {
/**
* @param {string} text
* @param {number} start
* @param {number} length
* @param {string} kind
*/
constructor(text, start, length, kind) {
super(new BlockRange(text, start, length), tokenKindToSimplified(text, start, length, kind));
this.kind = kind;
/** @type {{key:string}} */
this.loc = null;
/**
* Stores information about the resolved methods/constructors this token is an argument for.
* This is used to provide method signature info to vscode
* @type {{methods:(Method|Constructor)[], methodIdx:number, argIdx:number}}
*/
this.methodCallInfo = null;
}
get value() {
return this.source;
}
}
/**
* \s+ whitespace
* \/\/.* single-line comment (slc)
* \/\*[\d\D]*?\*\/ multi-line comment (mlc)
* "[^\r\n\\"]*(?:\\.[^\r\n\\"]*)*" string literal - correctly terminated but may contain invalid escapes
* ".* unterminated string literal
* '\\?.?'? character literal - possibly unterminated and/or with invalid escape
* \.?\d number literal (start) - further processing extracts the value
* [\p{L}\p{N}_$]* word - keyword or identifier
* [;,?:(){}\[\]] single-character symbols and operators
* \.(\.\.)? . ...
*
* the operators: [!=/%*^]=?|<<?=?|>>?[>=]?|&[&=]?|\|[|=]?|\+(=|\++)?|\-+=?
* [!=/%*^]=? ! = / % * ^ != == /= %= *= ^=
* <<?=? < << <= <<=
* >>?[>=]? > >> >= >>> >>=
* &[&=]? & && &=
* \|[|=]? | || |=
* (\+\+|--) ++ -- postfix inc - only matches if immediately preceded by a word or a ]
* [+-]=? + - += -=
*
*
*
*/
/**
*
* @param {string} source
* @param {number} [offset]
* @param {number} [length]
*/
function tokenize(source, offset = 0, length = source.length) {
const text = source.slice(offset, offset + length);
const raw_token_re = /(\s+|\/\/.*|\/\*[\d\D]*?\*\/|\/\*[\d\D]*)|("[^\r\n\\"]*(?:\\.[^\r\n\\"]*)*"|".*)|('\\u[\da-fA-F]{0,4}'?|'\\?.?'?)|(\.?\d)|([\p{L}\p{N}$_]+)|(\()|([;,?:(){}\[\]@]|\.(?:\.\.)?)|([!=/%*^]=?|<<?=?|>>?>?=?|&[&=]?|\|[|=]?|(\+\+|--)|->|[+-]=?|~)|$/gu;
const raw_token_types = [
'wsc',
'string-literal',
'char-literal',
'number-literal',
'word',
'open-bracket',
'symbol',
'operator',
];
/**
* Note that some keywords have context-dependant meanings:
* default - modifier or statement-keyword
* synchronized - modifier or statement-keyword
* They are treated as modifiers and updated with their new token-type when method bodies are parsed
*
* ```
* true|false boolean
* this|null object
* int|long|short|byte|float|double|char|boolean|void primitive type
* new
* instanceof
* public|private|protected|static|final|abstract|native|volatile|transient|default|synchronized modifier
* if|else|while|for|do|try|catch|finally|switch|case|return|break|continue|throw statement keyword
* class|enum|interface type keyword
* package|import package keyword
* \w+ word
* ```
*/
const word_re = /^(?:(true|false)|(this|super|null)|(int|long|short|byte|float|double|char|boolean|void)|(new)|(instanceof)|(public|private|protected|static|final|abstract|native|volatile|transient|strictfp|default|synchronized)|(if|else|while|for|do|try|catch|finally|switch|case|return|break|continue|throw|assert)|(class|enum|interface)|(extends|implements|throws)|(package|import)|(.+))$/;
const word_token_types = [
'boolean-literal',
'object-literal',
'primitive-type',
'new-operator',
'instanceof-operator',
'modifier',
'statement-kw',
'type-kw',
'package-kw',
'eit-kw',
'ident'
]
/**
* ```
* \d+(?:\.?\d*)?|\.\d+)[eE][+-]?\d*[fFdD]? decimal exponent: 1e0, 1.5e+10, 0.123E-20d
* (?:\d+\.\d*|\.\d+)[fFdD]? decimal number: 0.1, 12.34f, 7.D, .3
* 0[xX][\da-fA-F]*\.[\da-fA-F]*[pP][+-]?\d*[fFdD]? hex exponent: 0x123.abcP-100
* 0x[\da-fA-F]*[lL]? hex integer: 0x1, 0xaBc, 0x, 0x7L
* \d+[fFdDlL]? integer: 0, 123, 234f, 345L
* ```
* todo - underscore seperators
*/
const number_re = /((?:\d+(?:\.?\d*)?|\.\d+)[eE][+-]?\d*[fFdD]?)|((?:\d+\.\d*|\.\d+)[fFdD]?)|(0[xX][\da-fA-F]*\.[\da-fA-F]*[pP][+-]?\d*[fFdD]?)|(0[xX][\da-fA-F]*[lL]?)|(\d+[fFdDlL]?)/g;
const number_token_types = [
'dec-exp-number-literal',
'dec-number-literal',
'hex-exp-number-literal',
'hex-number-literal',
'int-number-literal',
]
const tokens = [];
let lastindex = 0, m;
while (m = raw_token_re.exec(text)) {
// any text appearing between two matches is invalid
if (m.index > lastindex) {
tokens.push(new Token(source, offset + lastindex, m.index - lastindex, 'invalid'));
}
lastindex = m.index + m[0].length;
if (m.index >= text.length) {
// end of input
break;
}
let idx = m.findIndex((match,i) => i && match) - 1;
let tokentype = raw_token_types[idx];
switch(tokentype) {
case 'number-literal':
// we need to extract the exact number part
number_re.lastIndex = m.index;
m = number_re.exec(text);
idx = m.findIndex((match,i) => i && match) - 1;
tokentype = number_token_types[idx];
// update the raw_token_re position based on the length of the extracted number
raw_token_re.lastIndex = lastindex = number_re.lastIndex;
break;
case 'word':
// we need to work out what kind of keyword, literal or ident this is
let word_m = m[0].match(word_re);
idx = word_m.findIndex((match,i) => i && match) - 1;
tokentype = word_token_types[idx];
break;
case 'operator':
// find the operator-type
tokentype = getOperatorType(m[0]);
break;
}
tokens.push(new Token(source, offset + m.index, m[0].length, tokentype));
}
return tokens;
}
/**
* ```
* =|[/%*&|^+-]=|>>>?=|<<= assignment
* \+\+|-- inc
* [!=]= equality
* [<>]=? comparison
* [&|^] bitwise
* <<|>>>? shift
* &&|[|][|] logical
* [*%/] muldiv
* [+-] plumin
* [~!] unary
* ```
*/
const operator_re = /^(?:(=|[/%*&|^+-]=|>>>?=|<<=)|(\+\+|--)|([!=]=)|([<>]=?)|([&|^])|(<<|>>>?)|(&&|[|][|])|([*%/])|(->)|([+-])|([~!]))$/;
/**
* @typedef {
'assignment-operator'|
'inc-operator'|
'equality-operator'|
'comparison-operator'|
'bitwise-operator'|
'shift-operator'|
'logical-operator'|
'muldiv-operator'|
'lambda-operator'|
'plumin-operator'|
'unary-operator'} OperatorKind
*/
/** @type {OperatorKind[]} */
const operator_token_types = [
'assignment-operator',
'inc-operator',
'equality-operator',
'comparison-operator',
'bitwise-operator',
'shift-operator',
'logical-operator',
'muldiv-operator',
'lambda-operator',
'plumin-operator',
'unary-operator',
]
/**
* @param {string} value
*/
function getOperatorType(value) {
const op_match = value.match(operator_re);
const idx = op_match.findIndex((match,i) => i && match) - 1;
// @ts-ignore
return operator_token_types[idx];
}
exports.getOperatorType = getOperatorType;
exports.tokenize = tokenize;
exports.Token = Token;