From 9498888ee694174be8dcb1ba7ca10a192e455b1b Mon Sep 17 00:00:00 2001 From: Dave Holoway Date: Mon, 29 Jun 2020 15:54:12 +0100 Subject: [PATCH] remove old parser files --- langserver/document.js | 2 +- langserver/java/parser.js | 498 ------------------ langserver/java/parser9.js | 1024 ------------------------------------ 3 files changed, 1 insertion(+), 1523 deletions(-) delete mode 100644 langserver/java/parser.js delete mode 100644 langserver/java/parser9.js diff --git a/langserver/document.js b/langserver/document.js index e0946ae..431bede 100644 --- a/langserver/document.js +++ b/langserver/document.js @@ -3,7 +3,7 @@ const path = require('path'); const os = require('os'); const { CEIType } = require('java-mti'); const { Settings } = require('./settings'); -const { ParseProblem } = require('./java/parser'); +const ParseProblem = require('./java/parsetypes/parse-problem'); const { parse } = require('./java/body-parser'); const { SourceUnit } = require('./java/source-types'); const { parseMethodBodies } = require('./java/validater'); diff --git a/langserver/java/parser.js b/langserver/java/parser.js deleted file mode 100644 index 3eae70d..0000000 --- a/langserver/java/parser.js +++ /dev/null @@ -1,498 +0,0 @@ -const Annotation = require('./parsetypes/annotation'); -const Declaration = require('./parsetypes/declaration'); -const FMCDeclaration = require('./parsetypes/fmc'); -const ImportDeclaration = require('./parsetypes/import'); -const PackageDeclaration = require('./parsetypes/package'); -const ParameterDeclaration = require('./parsetypes/parameter'); -const ParseProblem = require('./parsetypes/parse-problem'); -const ParseResult = require('./parsetypes/parse-result'); -const ParseSyntaxError = require('./parsetypes/parse-error'); -const ProblemSeverity = require('./parsetypes/problem-severity'); -const Token = require('./parsetypes/token'); -const TypeDeclaration = require('./parsetypes/type'); -const TypeIdent = require('./parsetypes/typeident'); -const TypeParameters = require('./parsetypes/type-parameters'); -/** - * @typedef {import('./parsetypes/modifier')} Modifier - */ - - - /** - * @param {Token[]} tokens - * @param {number} idx - */ -function findToken(tokens, idx) { - return tokens.find(t => t.simplified_text_idx === idx); -} - -/** - * @param {string} simplified - * @param {number} lastIndex - */ -function parseToBracketEnd(simplified, lastIndex) { - // parse until close bracket - let re = /[()]/g, balance = 1; - const start = re.lastIndex = lastIndex; - for (let m; m = re.exec(simplified);) { - if (m[0] === '(') balance++; - else if (--balance === 0) { - re.lastIndex++; - break; - } - } - return { - start, - end: re.lastIndex, - } -} - -/** - * @param {string} simplified - * @param {Token[]} tokens - * @param {{start: number, end: number}} simplified_range - * @param {*[]} invalids - */ -function parseParameters(simplified, tokens, simplified_range, invalids) { - const decls = [ - /[ X]+/g, - /@ *W( *\. *W)*( *\()?/g, - /M/g, - /W(?: *\. *W)*(?: *<.*?>)?(?: *\[ *\])*(?: +|( *\.\.\. *))W(?: *\[ *\])*( *,)?/g, // parameter decl - /(\)|$)/g, // end of params - ]; - const parameters = []; - /** @type {Modifier[]} */ - const modifiers = []; - let lastIndex = simplified_range.start; - for(;;) { - /** @type {{idx:number, d: RegExp, m:RegExpMatchArray}} */ - let best_match = null, next_best = null; - decls.find((d,idx) => { - d.lastIndex = lastIndex; - const m = d.exec(simplified); - if (!m) return; - if (m.index === lastIndex) { - best_match = {idx, d, m}; - return true; - } - if (idx === 0) { - return; - } - if (!next_best || m.index < next_best.m.index) { - next_best = {idx, d, m}; - } - }); - if (!best_match) { - const errorToken = findToken(tokens, lastIndex); - const error = new ParseSyntaxError(null, modifiers.splice(0), errorToken); - invalids.push(error); - best_match = next_best; - if (!next_best) { - break; - } - } - - lastIndex = best_match.d.lastIndex; - - if (best_match.idx === 1) { - // annotation - const at = findToken(tokens, best_match.m.index); - const name = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W')); - const annotation = new Annotation(at, name); - modifiers.push(annotation); - if (best_match.m[0].endsWith('(')) { - lastIndex = parseToBracketEnd(simplified, lastIndex).end; - } - } - else if (best_match.idx === 2) { - // modifier - const modifier = findToken(tokens, best_match.m.index); - modifiers.push(modifier); - } - else if (best_match.idx === 3) { - // parameter - const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W')); - const varargs = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].indexOf('...')) : null; - const comma = best_match.m[2] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf(',')) : null; - const typetokens = []; - const first_type_token = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W')); - for (let t = first_type_token, i = tokens.indexOf(t); t !== name; t = tokens[++i]) { - if (t.simplified_text !== ' ') - typetokens.push(t); - } - const param = new ParameterDeclaration(modifiers.splice(0), new TypeIdent(typetokens), varargs, name, comma); - parameters.push(param); - } else if (best_match.idx === 4) { - // end of parameters - break; - } - } - - return parameters; -} - -/** - * @param {Token[]} typelist_tokens - */ -function parseTypeIdentList(typelist_tokens) { - // split the typelist into typetoken chunks, separated by commas - let typeargs_balance = 0, array_balance = 0; - /** @type {Token[][]} */ - let types = [[]]; - typelist_tokens.forEach(t => { - switch(t.text) { - case ' ': - if (types[0].length === 0) { - return; - } - break; - case ',': - if (typeargs_balance <= 0 && array_balance <= 0) { - while (types[0][types[0].length - 1].text === ' ') { - types[0].pop(); - } - typeargs_balance = array_balance = 0; - types.unshift([]); - return; - } - break; - case '<': - typeargs_balance++; - break; - case '>': - typeargs_balance--; - break; - case ']': - array_balance++; - break; - case '[': - array_balance--; - break; - } - types[0].push(t); - }); - - // remove any blank entries (start comma or sequential commas) - return types.filter(t => t.length).reverse().map(tokens => new TypeIdent(tokens)); -} - -/** - * @param {string} source - */ -function parse(source) { - const re = /(\/\*[\d\D]*?\*\/)|(\/\*)|(\*\/)|((?:\/\/.*)|(?:\s+))|(".*?")|('.'?)|\b(package|import|class|enum|interface|extends|implements|throws)\b|\b(public|private|protected|static|final|abstract|native|volatile|transient|synchronized|strictfp)\b|(\.{3}|[@{}()<>,;?*\[\].])|\b(super|new)\b|\b([A-Za-z_]\w*)|(\d[\w.]*)/g; - - let source_idx = 0, simplified_text_idx = 0; - /** @type {Token[]} */ - let tokens = []; - function mapSimplified( - _, - mlc, - unterminated_mlc, - mlc_end, - slc_ws, - string, - char, - decl_keyword, - modifier, - symbol, - kw, - word - /* number, */ - ) { - if (mlc) return 'X';//mlc.replace(/[^\n]+/g, '') || ' '; - if (unterminated_mlc) return ' '; - if (mlc_end) return ' '; - if (slc_ws) return ' '; //slc_ws.replace(/[^\n]+/g, '').replace(/ +/,' ') || ' '; - if (string) return 'S'; - if (char) return 'C'; - if (decl_keyword) return decl_keyword; - if (modifier) return 'M'; - if (symbol) return symbol; - if (kw) return kw; - if (word) return 'W'; - return 'N'; - - } - const simplified = source.replace(re, (...args) => { - let text = args[0]; - let next_idx = source.indexOf(text, source_idx); - - simplified_text_idx += (next_idx - source_idx); - source_idx = next_idx; - - const simplified_text = mapSimplified.apply(null, args); - tokens.push(new Token(source_idx, text, simplified_text, simplified_text_idx)); - - source_idx += text.length; - simplified_text_idx += simplified_text.length; - - return simplified_text; - }); - - // console.log(simplified); - - const decls = [ - / +/g, - /package +W(?: *\. *W)*( *;)?/g, - /import +(M +)?W(?: *\. *W)*( *\.\*)?( *;)?/g, - /@ *W( *\. *W)*( *\()?/g, - /M/g, - /(class|enum|interface|@ *interface) +W(.+?(?= *[a-z{]))/g, // type declaration - /(implements|extends|throws) +W(.+?(?= *[a-z{]))/g, // decl - /W(?: *\. *W)*(?: *<.*?>)?(?: *\[ *\])* +W(?: *\[ *\])*( *[=;(,])?/g, // field/method - /W *\(/g, // constructor - /[{}]/g, // scope - /X/g, // multi-line comment - /<.*?>(?= *[WM@])/g, // type variables - /$/g, // end of file - ] - let lastIndex = 0; - let loc = ['base']; - let package_decl = null; - let imports = []; - let modifiers = []; - let types = []; - let invalids = []; - let lastMLC = null; - /** @type {TypeDeclaration[]} */ - let type_stack = [null]; - - for(;;) { - /** @type {{idx:number, d: RegExp, m:RegExpMatchArray}} */ - let best_match = null, next_best = null; - decls.find((d,idx) => { - d.lastIndex = lastIndex; - const m = d.exec(simplified); - if (!m) return; - if (m.index === lastIndex) { - best_match = {idx, d, m}; - return true; - } - if (idx === 0) { - return; - } - if (!next_best || m.index < next_best.m.index) { - next_best = {idx, d, m}; - } - }); - if (!best_match) { - const errorToken = findToken(tokens, lastIndex); - const error = new ParseSyntaxError(lastMLC, modifiers.splice(0), errorToken); - invalids.push(error); - lastMLC = null; - console.log(simplified.slice(lastIndex, lastIndex + 100)); - best_match = next_best; - if (!next_best) { - break; - } - } - - lastIndex = best_match.d.lastIndex; - - function parseToExpressionEnd() { - // parse expression - let re = /[(){};]/g, balance = [0,0]; - re.lastIndex = lastIndex; - for (let m; m = re.exec(simplified);) { - if (m[0] === '{') balance[0]++; - else if (m[0] === '(') balance[1]++; - else if (m[0] === '}') balance[0]--; - else if (m[0] === ')') balance[1]--; - else if (balance[0] <= 0 && balance[1] <= 0) { - break; - } - } - // console.log(simplified.slice(lastIndex, re.lastIndex)); - lastIndex = re.lastIndex; - } - - if (best_match.idx === 1) { - // package - map all the name parts - const nameparts = []; - for (let m, re=/W/g; m = re.exec(best_match.m[0]); ) { - const ident = findToken(tokens, best_match.m.index + m.index); - nameparts.push(ident); - } - const semicolon = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].length - 1) : null; - if (!package_decl) { - package_decl = new PackageDeclaration(lastMLC, modifiers.splice(0), nameparts, semicolon); - } - lastMLC = null; - } - if (best_match.idx === 2) { - // import - map all the name parts - const nameparts = []; - for (let m, re=/W/g; m = re.exec(best_match.m[0]); ) { - const ident = findToken(tokens, best_match.m.index + m.index); - nameparts.push(ident); - } - const static = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].indexOf('M')) : null; - const asterisk = best_match.m[2] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('*')) : null - const semicolon = best_match.m[3] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf(';')) : null; - let import_decl = new ImportDeclaration(lastMLC, modifiers.splice(0), nameparts, static, asterisk, semicolon); - imports.push(import_decl); - lastMLC = null; - } - if (best_match.idx === 3) { - // annotation - const at = findToken(tokens, best_match.m.index); - const name = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W')); - const annotation = new Annotation(at, name); - modifiers.push(annotation); - if (best_match.m[0].endsWith('(')) { - lastIndex = parseToBracketEnd(simplified, lastIndex).end; - } - } - if (best_match.idx === 4) { - // modifier - const modifier = findToken(tokens, best_match.m.index); - modifiers.push(modifier); - } - - if (best_match.idx === 5) { - // type declaration - const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W')); - /** @type {'class'|'interface'|'enum'|'@interface'} */ - // @ts-ignore - const kind = best_match.m[1].replace(/ /g, ''); - const type = new TypeDeclaration(type_stack[0], lastMLC, modifiers.splice(0), kind, name); - lastMLC = null; - types.push(type); - type_stack.unshift(type); - loc.unshift('typedecl'); - } - - if (best_match.idx === 6) { - // extends/implements/throws - const decl_kw = findToken(tokens, best_match.m.index); - const startidx = tokens.indexOf(findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W'))); - const endidx = tokens.indexOf(findToken(tokens,best_match.m.index + best_match.m[0].length - 1)); - const typelist = parseTypeIdentList(tokens.slice(startidx, endidx + 1)); - switch(decl_kw.text) { - case 'throws': - break; - case 'extends': - case 'implements': - if (loc[0] === 'typedecl') { - type_stack[0].super_declarations.push({ decl_kw, typelist }); - } - } - } - - if (best_match.idx === 7) { - // field or method - const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W')); - const typetokens = []; - for (let t = findToken(tokens, best_match.m.index), i = tokens.indexOf(t); t !== name; t = tokens[++i]) { - if (t.simplified_text !== ' ') - typetokens.push(t); - } - let parameters, equals_comma_sc = null; - switch (best_match.m[0].slice(-1)) { - case '(': - // method - let params_source_range = parseToBracketEnd(simplified, lastIndex); - lastIndex = params_source_range.end; - parameters = parseParameters(simplified, tokens, params_source_range, invalids); - break; - case '=': - // initialised field - equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length); - parseToExpressionEnd(); - break; - case ',': - // multi-declaration field - equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length); - throw new Error('not implemented'); - case ';': - // single field - equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length); - break; - default: - // invalid - but treat as a single field - break; - } - if (type_stack[0]) { - const fmc = new FMCDeclaration(type_stack[0], lastMLC, modifiers.splice(0), best_match.m[0].endsWith('(') ? 'method' : 'field', name, new TypeIdent(typetokens), equals_comma_sc, parameters); - type_stack[0].declarations.push(fmc); - } - lastMLC = null; - } - - if (best_match.idx === 8) { - // constructor (if the name matches the type) - let params_source_range = parseToBracketEnd(simplified, lastIndex); - lastIndex = params_source_range.end; - const parameters = parseParameters(simplified, tokens, params_source_range, invalids); - const name = findToken(tokens, best_match.m.index); - if (type_stack[0] && name.text === type_stack[0].name.text) { - const fmc = new FMCDeclaration(type_stack[0], lastMLC, modifiers.splice(0), 'constructor', name, null, null, parameters); - type_stack[0].declarations.push(fmc); - } else { - invalids.push(new ParseSyntaxError(lastMLC, modifiers.splice(0), name)); - } - lastMLC = null; - } - - if (best_match.idx === 9) { - // open/close scope - if (best_match.m[0] === '{') { - if (loc[0] === 'typedecl') loc[0] = 'typebody'; - else if (loc[0] === 'typebody') { - // static initer / method body - let re = /[{}]/g, balance = 1; - re.lastIndex = lastIndex; - for (let m; m = re.exec(simplified);) { - if (m[0] === '{') balance++; - else if (--balance === 0) { - re.lastIndex++; - break; - } - } - lastIndex = re.lastIndex; - } - } else { - // end scope - if (/^type/.test(loc[0])) { - loc.shift(); - type_stack.shift(); - } - } - } - - if (best_match.idx === 10) { - // mlc - lastMLC = findToken(tokens, best_match.m.index); - } - - if (best_match.idx === 11) { - // type parameters - const open = findToken(tokens, best_match.m.index); - const close = findToken(tokens, best_match.m.index + best_match.m[0].length - 1); - modifiers.push(new TypeParameters(open, close)); - } - - if (best_match.idx === 12) { - // end of file - break; - } - } - - return new ParseResult(package_decl, imports, types, invalids); -} - -module.exports = { - Annotation, - Declaration, - FMCDeclaration, - ImportDeclaration, - PackageDeclaration, - parse, - ParseProblem, - ParseResult, - ProblemSeverity, - Token, - TypeDeclaration, - TypeParameters, -} diff --git a/langserver/java/parser9.js b/langserver/java/parser9.js deleted file mode 100644 index c6a30f2..0000000 --- a/langserver/java/parser9.js +++ /dev/null @@ -1,1024 +0,0 @@ -const { TextBlock, TextBlockArray } = require('./parsetypes/textblock'); -const { tokenize, Token } = require('./tokenizer'); - -/** - * Normalises comments, whitespace, string and character literals. - * - * - this makes the regexes used for parsing much simpler - * - we make a note of the MLCs as we need some of them for JavaDocs - * After preprocessing, the source layout should still be the same - spaces - * are used to fill the gaps where necessary - * @param {string} source - */ -function preprocess(source) { - - let mlcs = []; - - const re = /(\/\*[\d\D]*?\*\/)|(\/\/.*)|([^\S\n ]+)|(".*?")|('.')/g; - let lastIndex = 0; - let normalised_source = source.replace(re, (_, mlc, slc, other_ws, str, char) => { - const idx = source.indexOf(_, lastIndex); - lastIndex = idx + _.length; - if (mlc) { - mlcs.push({ - comment: _, - index: idx, - }); - } else if (str) { - // string and character literals are filled with an invalid source character - return `"${'#'.repeat(str.length - 2)}"`; - } else if (char) { - // string and character literals are filled with an invalid source character - return `'#'`; - } - - return _.replace(/./g,' '); - }); - - // also strip out parameters from annotations here - we don't need them to parse the source - // and they make parsing messier. - // at some point, we will add them back in to check them... - normalised_source = stripAnnotationParameters(normalised_source); - - // the normalized source must have the same layout (line-lengths) as the original - // - this is important to preserve token positioning - if (normalised_source.length !== source.length) { - throw new Error('Preprocessing altered source length'); - } - - return { - original: source, - normalised: normalised_source, - mlcs, - } -} - -/** - * Removes parameters from annotations, keeping the annotation identifiers - * - * E.g @-Retention({"source"}) -> @-Retention - * @param {string} source (normalised) source text - */ -function stripAnnotationParameters(source) { - const parameterised_annotations_regex = /(@ *[a-zA-Z_]\w*(?: *\. *\w+)* *\()|(\()|(\))/g; - let annotation_start = null; - for (let m; m = parameterised_annotations_regex.exec(source); ) { - if (!annotation_start) { - if (m[1]) { - annotation_start = { - balance: 1, - idx: m.index + m[0].length - 1, - } - } - continue; - } - // we are inside an annotation and searching for the end - if (m[1] || m[2]) { - // another open bracket inside the annotation parameter - annotation_start.balance++; - } else if (m[3]) { - // close bracket - if (--annotation_start.balance === 0) { - // we've reached the end of the annotation parameters - const paramtext = source.slice(annotation_start.idx, m.index+1); - source = `${source.slice(0, annotation_start.idx)}${paramtext.replace(/./g, ' ')}${source.slice(m.index+1)}`; - annotation_start = null; - } - } - } - - return source; -} - -/** - * @param {string} source (normalised) source text - */ -function scopify(source) { - // \b(class|interface|enum|@ *interface)\b( +(\w+))? - looks for a type declaration with optional name - // (\. *)? - this is used to ignore 'XYZ.class' expressions - const module_scope = { - kind: 'module', - start: 0, - open: null, - end: source.length, - name: null, - inner_scopes: [], - parent: null, - }; - const scope_stack = [module_scope]; - let method_scope = null; - const scopes_regex = /((\. *)?(\bclass|\binterface|\benum|@ *interface)\b(?: +(\w+))?)|(=[^;]*?\{)|(\{)|(\})/g; - for (let m; m = scopes_regex.exec(source); ) { - if (m[1]) { - if (m[2]) { - // ignore type keywords prefixed with . - continue; - } - // start of a new type declaration - const scope = { - kind: m[3].startsWith('@') ? '@interface' : m[3], - start: m.index, - end: null, - name: m[4] || null, - inner_scopes: [], - open: null, - parent: scope_stack[0], - } - scope_stack[0].inner_scopes.push(scope); - scope_stack.unshift(scope); - continue; - } - if (m[5]) { - // equals - // searching for equals is a pain, but is necessary to prevent - // field initialiser expressions like '{"arrinit"}' and 'new X() {}' from - // messing up scoping boundaries - if (method_scope) { - scopes_regex.lastIndex = m.index + 1; - continue; // ignore if we are inside a method - } - // parse the expression until we reach a semicolon, taking into account balanced scopes - const expr_re = /(\{)|(\})|;/g; - expr_re.lastIndex = m.index; - let expr_balance = 0; - for (let m; m = expr_re.exec(source);) { - if (m[1]) expr_balance++; - else if (m[2]) { - if (expr_balance === 0) { - // force a break if there are too many closes - scopes_regex.lastIndex = expr_re.lastIndex - 1; - break; - } - expr_balance--; - } else if (expr_balance === 0) { - // semicolon reached - scopes_regex.lastIndex = expr_re.lastIndex; - break; - } - } - continue; - } - if (m[6]) { - // open brace - if (method_scope) { - method_scope.balance++; - continue; - } - if (scope_stack[0].open === null) { - // the start of the type body - scope_stack[0].open = m.index; - continue; - } - method_scope = { - balance: 1, - }; - continue; - } - // close brace - if (method_scope) { - if (--method_scope.balance === 0) { - method_scope = null; - } - continue; - } - if (scope_stack.length > 1) { - scope_stack[0].end = m.index+1; - scope_stack.shift(); - continue; - } - } - - return module_scope; -} - -function parse2(source) { - console.time('preprocess'); - const preprocessed = preprocess(source); - console.timeEnd('preprocess'); - - // after preprocessing, divide the source into type scopes - // - this allows us to quickly determine what named types are available - // and to eliminate method implementations (which involve more complex parsing later). - console.time('scopify'); - const scopes = scopify(preprocessed.normalised); - console.timeEnd('scopify'); - scopes; - -} - -const markers = { - arrayQualifier: 'A', - blocks: 'B', - constructor: 'C', - dottedIdent: 'D', - initialiser: 'E', - field: 'F', - parameter: 'F', - method: 'G', - typevarInterface: 'H', - boundedTypeVar: 'I', - extends: 'J', - implements:'K', - throws: 'L', - modifier: 'M', - package: 'N', - import: 'O', - primitive: 'P', - annotation: 'Q', - brackets: 'R', - typeArgs: 'T', - enumvalues: 'U', - varDecl: 'V', - ident: 'W', - typeDecl: 'Z', - error: ' ', -} - -/** - * - * @param {TextBlockArray} sourceblocks - * @param {string} id - * @param {RegExp} re - * @param {string} [marker] - * @param {boolean} [recursive] - * @param {{}} [parseClass] - * @param {{time:boolean}} [opts] - */ -function group(sourceblocks, id, re, marker, recursive, parseClass, opts) { - if (opts && opts.time) console.time(id); - let grouped = []; - let sourcemap = sourceblocks.sourcemap(); - if (!re.global) { - throw new Error('regex must have the global flag enabled'); - } - for (;;) { - re.lastIndex = 0; - const matches = []; - for (let m; m = re.exec(sourcemap.simplified); ) { - // every group must start and end on a definite boundary - const start = sourcemap.map[m.index]; - let end = sourcemap.map[m.index + m[0].length - 1]; - if (end === undefined) - end = sourcemap.map[m.index + m[0].length]; - if (start === undefined || end === undefined) { - throw new Error('undefined group boundary') - } - // if no marker is defined, the first capturing group acts like a lookup - const char = marker || markers[m[1]]; - if (!char) { - throw new Error(`Missing marker for ${id}`); - } - const info = { start, end, match: m, replace: char, }; - // unshift so we end up in reverse order - matches.unshift(info); - } - for (let {start, end, match, replace} of matches) { - const shrunk = sourceblocks.shrink(id, start, end-start+1, match, replace, parseClass); - // the blocks are shrunk in reverse order, so unshift to get the correct order - grouped.unshift(shrunk); - } - if (recursive && matches.length) { - sourcemap = sourceblocks.sourcemap(); - continue; - } - break; - } - if (opts && opts.time) console.timeEnd(id); - return grouped; -} - -class DeclarationBlock extends TextBlock { - /** - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified) { - super(section, simplified); - //this.docs_token = section.blocks.filter(b => b.simplified.startsWith('\t')).pop(); - this.modifiers = section.blocks.filter(b => b.simplified.startsWith('M')); - this.annotations = section.blocks.filter(b => b.simplified.startsWith('Q')); - } - - get docs() { - return '';// this.docs_token ? this.docs_token.source : ''; - } -} - -class DeclaredVariableBlock extends DeclarationBlock { - static parseRE = /([MQ](\s*[MQ])*\s+)?(V)( *=[^;MV]*)? *;/g - - /** - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified, match) { - super(section, simplified); - this.decl = section; - const sm = section.sourcemap(); - /** @type {VarDeclBlock} */ - // @ts-ignore - this.varBlock = section.blocks[sm.map[match[1] ? match[1].length : 0]]; - } - - get isVarArgs() { - return !!this.varBlock.varargs_token; - } - - /** - * Return the field name - */ - get name() { - return this.varBlock ? this.varBlock.name : ''; - } - - get name_token() { - return this.varBlock ? this.varBlock.name_token : null; - } - - get type() { - return this.varBlock ? this.varBlock.type : ''; - } - - get typeTokens() { - return this.varBlock ? this.varBlock.typeTokens : []; - } -} - -class FieldBlock extends DeclaredVariableBlock { } - -class EnumValueBlock extends TextBlock { - - static parseRE = /(?<=^\{\s*)[W](\s*=[^,;]*)?(\s*,\s*[W](\s*=[^,;]*)?)*(\s*;)?/g - - /** - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified) { - super(section, simplified); - } - -} - -class ParameterBlock extends DeclaredVariableBlock { - static parseRE = /([MQ](\s*[MQ])*\s+)?(V)/g -} - - -class MCBlock extends DeclarationBlock { - - /** - * - * @param {TextBlockArray} section - * @param {string} simplified - * @param {RegExpMatchArray} match - */ - constructor(section, simplified, match) { - super(section, simplified); - const sm = section.sourcemap(); - this.paramBlock = section.blocks[sm.map[match[0].indexOf('R')]]; - this.typevarsBlock = section.blocks[sm.map[match[0].indexOf('T')]]; - this.parsed = { - typevars: null, - parameters: null, - /** @type {TextBlock[]} */ - errors: [], - } - } - - /** - * @return {ParameterBlock[]} - */ - get parameters() { - this._ensureParsed(); - return this.parsed.parameters; - } - - /** - * Returns the TextBlock associated with the method body (or the semicolon) - */ - body() { - // always the last block atm - const blocks = this.blockArray(); - return blocks.blocks[blocks.blocks.length - 1]; - } - - get name() { - // overriden by subclasses - return ''; - } - - /** - * Return the method name and params, formatted on a single line - */ - get nameAndParams() { - return `${this.name}${this.paramBlock.source}`.replace(/\s+/g, ' '); - } - - get parseErrors() { - this._ensureParsed(); - return this.parsed.errors; - } - - get typeVariables() { - this._ensureParsed(); - return this.parsed.typevars; - } - - _ensureParsed() { - if (this.parsed.parameters) { - return; - } - const param_block = this.paramBlock.blockArray(); - parseArrayTypes(param_block); - parseAnnotations(param_block); - parseTypeArgs(param_block); - const vars = group(param_block, 'var-decl', VarDeclBlock.parseRE, markers.varDecl, false, VarDeclBlock); - this.parsed.parameters = group(param_block, 'param', ParameterBlock.parseRE, markers.parameter, false, ParameterBlock); - // parameters must be a comma-separated list - const sm = param_block.sourcemap(); - if (sm.simplified.search(/^\((\s*F(\s*,\s*F)*)?\s*\)/) !== 0) { - let invalid = sm.simplified.match(/^(\(\s*)(F?)(?:\s*,\s*F)*\s*/); - if (invalid) { - const token_idx = invalid[2] - ? sm.map[invalid[0].length] // there's a problem with a subsequent declaration - : sm.map[invalid[1].length] // there's a problem with the first declaration - const token = param_block.blocks[token_idx]; - if (token) { - this.parsed.errors.push(token); - } - } - } - - // parse type arguments - this.parsed.typevars = this.typevarsBlock ? parseTypeVariables(this.typevarsBlock.blockArray()) : []; - } -} - -class MethodBlock extends MCBlock { - static parseRE = /([MQT](?:\s*[MQT])*\s+)?(V\s*)R(\s*A)?(\s*L)?\s*[B;]/g; - - /** - * - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified, match) { - super(section, simplified, match); - const sm = section.sourcemap(); - /** @type {VarDeclBlock} */ - // @ts-ignore - this.varBlock = section.blocks[sm.map[match[0].lastIndexOf('V')]]; - this.postNameArrToken = section.blocks[sm.map[match[0].lastIndexOf('A')]]; - } - - /** - * Return the method name - */ - get name() { - return this.varBlock.name; - } - - get name_token() { - return this.varBlock.name_token; - } - - get type() { - return this.varBlock.type + (this.postNameArrToken ? this.postNameArrToken.source : ''); - } - - get typeTokens() { - return this.postNameArrToken ? [...this.varBlock.typeTokens, this.postNameArrToken] : this.varBlock.typeTokens; - } -} - -class ConstructorBlock extends MCBlock { - static parseRE = /([MQT](?:\s*[MQT])*\s+)?(W\s*)R(\s*L)?\s*[B;]/g; - - /** - * - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified, match) { - super(section, simplified, match); - const sm = section.sourcemap(); - const name_offset = match[1] ? match[1].length : 0; - /** @type {VarDeclBlock} */ - // @ts-ignore - this.nameBlock = section.blocks[sm.map[name_offset]]; - } - - get name() { - return this.nameBlock ? this.nameBlock.source : ''; - } -} - -class InitialiserBlock extends DeclarationBlock { - static parseRE = /([MQ](?:\s*[MQ])*\s+)?B/g; - - /** - * - * @param {TextBlockArray} section - * @param {string} simplified - */ - constructor(section, simplified, match) { - super(section, simplified); - } - - /** - * Returns the TextBlock associated with the method body - */ - body() { - // always the last block atm - const blocks = this.blockArray(); - return blocks.blocks[blocks.blocks.length - 1]; - } -} - -class TypeDeclBlock extends DeclarationBlock { - static parseRE = /([MQ](\s*[MQ])*\s+)?(class|enum|interface|@ *interface) +W(\s*T)?(\s*[JK])*\s*B/g; - static marker = 'Z'; - - /** - * @param {TextBlockArray} blocks - * @param {string} simplified - */ - constructor(blocks, simplified) { - super(blocks, simplified); - this.decl = blocks; - this.kindToken = this.decl.blocks.find(b => !/^[MQ\s]/.test(b.simplified)); - this.name_token = this.decl.blocks.find(b => b.simplified.startsWith('W')); - this.typevars_token = this.decl.blocks.find(b => b.simplified.startsWith('T')); - this.extends_decl = this.decl.blocks.find(b => b.simplified.startsWith('J')); - this.implements_decl = this.decl.blocks.find(b => b.simplified.startsWith('K')); - /** @type {TypeDeclBlock} */ - this.outer_type = null; - /** @type {ModuleBlock} */ - this.mod = null; - this.parsed = { - /** @type {{name: string, decl:(TextBlock|BoundedTypeVar)}[]} */ - typevars: null, - /** @type {FieldBlock[]} */ - fields: null, - /** @type {MethodBlock[]} */ - methods: null, - /** @type {ConstructorBlock[]} */ - constructors: null, - /** @type {InitialiserBlock[]} */ - initialisers: null, - /** @type {TypeDeclBlock[]} */ - types: null, - /** @type {TextBlock[]} */ - errors: null, - } - } - - /** - * Return the kind of type declared - */ - kind() { - /** @type {'class'|'enum'|'interface'|'@'} */ - // @ts-ignore - const id = this.kindToken.toSource(); - return id === '@' ? '@interface' : id; - } - - get fullyDottedName() { - return this.shortSignature.replace(/[/$]/g, '.'); - } - - get shortSignature() { - if (this.outer_type) { - return `${this.outer_type.shortSignature}$${this.simpleName}` - } - const pkg = this.mod.packageName.replace(/\./g, '/'); - return `${pkg}${pkg ? '/' : ''}${this.simpleName}` - } - - /** - * Return the type name with no type-parameter info - */ - get simpleName() { - return this.name_token ? this.name_token.toSource() : ''; - } - - /** - * Returns the TextBlock associated with the type body - */ - body() { - // always the last block atm - return this.decl.blocks[this.decl.blocks.length - 1]; - } - - get typevars() { - this._ensureParsed(); - return this.parsed.typevars; - } - - get fields() { - this._ensureParsed(); - return this.parsed.fields; - } - - get methods() { - this._ensureParsed(); - return this.parsed.methods; - } - - get constructors() { - this._ensureParsed(); - return this.parsed.constructors; - } - - get initialisers() { - this._ensureParsed(); - return this.parsed.initialisers; - } - - get types() { - this._ensureParsed(); - return this.parsed.types; - } - - get parseErrors() { - this._ensureParsed(); - return this.parsed.errors; - } - - /** - */ - _ensureParsed() { - if (this.parsed.fields) { - return; - } - this.parsed.typevars = this.typevars_token - ? parseTypeVariables(this.typevars_token.blockArray()) - : []; - const body = this.body().blockArray(); - parseArrayTypes(body); - parseTypeArgs(body); - parseAnnotations(body); - parseEITDecls(body); - /** @type {TypeDeclBlock[]} */ - this.parsed.types = parseTypeDecls(body, this, this.mod); - - group(body, 'var-decl', VarDeclBlock.parseRE, markers.varDecl, false, VarDeclBlock); - if (this.kind() === 'enum') { - /** @type {EnumValueBlock[]} */ - this.parsed.enums = group(body, 'enumvalue', EnumValueBlock.parseRE, markers.enumvalues, false, EnumValueBlock); - } - /** @type {FieldBlock[]} */ - this.parsed.fields = group(body, 'field', FieldBlock.parseRE, markers.field, false, FieldBlock); - /** @type {MethodBlock[]} */ - this.parsed.methods = group(body, 'method', MethodBlock.parseRE, markers.method, false, MethodBlock); - /** @type {ConstructorBlock[]} */ - this.parsed.constructors = group(body, 'constructor', ConstructorBlock.parseRE, markers.constructor, false, ConstructorBlock); - /** @type {InitialiserBlock[]} */ - this.parsed.initialisers = group(body, 'initialiser', InitialiserBlock.parseRE, markers.initialiser, false, InitialiserBlock); - // anything other than types, fields, methods, constructors, enums and initialisers are errors - /** @type {TextBlock[]} */ - this.parsed.errors = group(body, 'type-body-error', /[^{}ZFGCEU\s;]+/g, markers.error); - } -} - -class PackageBlock extends DeclarationBlock { - static parseRE = /([Q](\s*[Q])*\s*)?package +[DW] *;/g; - - /** - * - * @param {TextBlockArray} section - * @param {string} simplified - * @param {RegExpMatchArray} match - */ - constructor(section, simplified, match) { - super(section, simplified); - const sm = section.sourcemap(); - this.name_token = section.blocks[sm.map[(match[0].search(/[DW]/))]]; - } - - get name() { - if (!this.name_token) return ''; - if (this.name_token.range instanceof TextBlockArray) { - // dotted ident - strip any intermediate whitespace between the tokens - const filtered = this.name_token.range.blocks.filter(b => !b.simplified.startsWith(' ')); - return filtered.map(b => b.source).join(''); - } - // single ident - return this.name_token.source; - } -} - -class ImportBlock extends DeclarationBlock { - static parseRE = /([Q](\s*[Q])*\s*)?import( +M)? +[DW]( *\.\*)? *;/g - - /** - * @param {TextBlockArray} section - * @param {string} simplified - * @param {RegExpMatchArray} match - */ - constructor(section, simplified, match) { - super(section, simplified); - const sm = section.sourcemap(); - this._static_token = section.blocks[sm.map[(match[0].search(/M/))]]; - this._name_token = section.blocks[sm.map[(match[0].search(/[DW]/))]]; - this._demandload_token = section.blocks[sm.map[(match[0].search(/\*/))]]; - } - - get isStatic() { - return this._static_token ? this._static_token.source === 'static' : false; - } - - get isDemandLoad() { - return !!this._demandload_token; - } - - get name() { - if (!this._name_token) return ''; - if (this._name_token.range instanceof TextBlockArray) { - // dotted ident - strip any intermediate whitespace between the tokens - const filtered = this._name_token.range.blocks.filter(b => !b.simplified.startsWith(' ')); - return filtered.map(b => b.source).join(''); - } - // single ident - return this._name_token.source; - } -} - -class ModuleBlock extends TextBlockArray { - /** - * @param {Token[]} blocks - */ - constructor(blocks) { - super('module', blocks); - this._parsed = null; - - // merge dotted identifiers - group(this, 'dotted-ident', /W(?:\s*\.\s*W)+/g, markers.dottedIdent); - group(this, 'brackets', /\([^()]*\)/g, markers.brackets, true); - group(this, 'block', /\{[^{}]*\}/g, markers.blocks, true); - } - - decls() { - const parsed = this._ensureParsed(); - return [ - ...parsed.packages, - ...parsed.imports, - ...parsed.types, - ].sort((a,b) => a.range.start - b.range.start); - } - - get packageName() { - const pkg_token = this.package; - return pkg_token ? pkg_token.name : ''; - } - - get package() { - return this._ensureParsed().packages[0]; - } - - get packages() { - return this._ensureParsed().packages; - } - - get imports() { - return this._ensureParsed().imports; - } - - get types() { - return this._ensureParsed().types; - } - - get parseErrors() { - return this._ensureParsed().errors; - } - - _ensureParsed() { - if (this._parsed) { - return this._parsed; - } - /** @type {PackageBlock[]} */ - const packages = parsePackages(this); - const imports = parseImports(this); - parseTypeArgs(this); - parseAnnotations(this); - parseEITDecls(this); - const types = parseTypeDecls(this, null, this); - // anything that's not a package, import or type declaration is an error - const errors = group(this, 'module-errors', /[^NOZ;\s]+/g, ' '); - return this._parsed = { - packages, - imports, - types, - errors, - } - } -} - -/** - * @param {TextBlockArray} sourceblocks - * @return {PackageBlock[]} - */ -function parsePackages(sourceblocks) { - return group(sourceblocks, 'package', PackageBlock.parseRE, markers.package, false, PackageBlock); -} - -/** - * @param {TextBlockArray} sourceblocks - * @return {ImportBlock[]} - */ -function parseImports(sourceblocks) { - return group(sourceblocks, 'import', ImportBlock.parseRE, markers.import, false, ImportBlock); -} - -function parseArrayTypes(sourceblocks) { - group(sourceblocks, 'array-type', /\[ *\](( *\[ *\])*)/g, markers.arrayQualifier); -} - -/** - * @param {TextBlockArray} sourceblocks - * @returns {{decl: TextBlock|BoundedTypeVar, name_token: TextBlockArray, name: string}[]} - */ -function parseTypeVariables(sourceblocks) { - // split the token into a list of typevars - // - each type var must be a simple ident (W), a bounded var (I) - // or a wildcard (?) - return sourceblocks.blocks.reduce((arr,b) => { - if (/^[WI?]/.test(b.simplified)) { - arr.push({ - decl: b, - get name_token() { - return this.decl instanceof BoundedTypeVar - ? this.decl.range.blocks[0] - : this.decl - }, - get name() { - return this.name_token.source; - }, - }) - } - return arr; - }, []); - -} - -function parseTypeArgs(sourceblocks) { - // sort out type parameters + type arguments - // re = /< *[PWD?]( *T)?( *A)?( *, *[PWD]( *T)?( *A)?)* *>/g; - // const bounded_re = /[W?] +(extends|super) +[PWD?]( *T)?( *A)?( *& *[PWD?]( *T)?( *A)?)*/g; - - // we must perform a recursive type-args grouping before and after bounded typevars - // to handle things like: - // class X> & X> - // class W> & W> - // -> class W - // -> class W - // -> class W - // -> class WT - const re = /< *[PWDI?]( *T)?( *A)?( *, *[PWDI?]( *T)?( *A)?)* *>/g; - group(sourceblocks, 'type-args', re, markers.typeArgs, true); - - group(sourceblocks, 'typevar-bound-intf', TypeVarBoundInterface.parseRE, markers.typevarInterface, false, TypeVarBoundInterface); - group(sourceblocks, 'bounded-typevar', BoundedTypeVar.parseRE, markers.boundedTypeVar, false, BoundedTypeVar); - - //const re = /< *[PWDI?]( *T)?( *A)?( *, *[PWDI]( *T)?( *A)?)* *>/g; - //const re = /< *[PWD?]( +(extends|super) +[PWD?]( *T)?( *A)?( *& *[PWD?]( *T)?( *A)?)*)?( *T)?( *A)?( *, *[PWD]( +(extends|super) +[PWD?]( *T)?( *A)?( *& *[PWD?]( *T)?( *A)?)*)?( *T)?( *A)?)* *>/g; - //const re = /(?<=[DW]\s*)<[ ]>/g; - const ta2 = group(sourceblocks, 'type-args', re, markers.typeArgs, true); -} - -function parseAnnotations(sourceblocks) { - group(sourceblocks, 'annotation', /@ *[WD]( *R)?/g, markers.annotation); -} - -function parseEITDecls(sourceblocks) { - group(sourceblocks, 'eit-decl', /\b(extends|implements|throws)\s+[WD](\s*[WDT,.])*/g); -} - -/** - * @param {TextBlockArray} sourceblocks - * @param {TypeDeclBlock} outer_type - * @param {ModuleBlock} mod - */ -function parseTypeDecls(sourceblocks, outer_type, mod) { - /** @type {TypeDeclBlock[]} */ - const typedecls = group(sourceblocks, 'type-decl', TypeDeclBlock.parseRE, markers.typeDecl, false, TypeDeclBlock); - typedecls.forEach(td => { - td.outer_type = outer_type; - td.mod = mod; - }); - return typedecls; -} - -/** - * Optional interface bounds that follow a bounded type variable - * e.g - * - * Type - * - * marker: H - */ -class TypeVarBoundInterface extends TextBlock { - static parseRE = /& *([PWD](?: *T)?(?: *\. *[PWD](?: *T)?)*)/g; - - /** - * @param {TextBlockArray} section - * @param {string} simplified - * @param {RegExpMatchArray} match - */ - constructor(section, simplified, match) { - super(section, simplified); - } -} - -/** - * Bounded type variable - * - * marker: I - */ -class BoundedTypeVar extends TextBlock { - // we need the class|enum|interface lookbehind to prevent matches to class declarations with extends - static parseRE = /(? t.source).join(''); - } - - get typeTokens() { - return this.type_tokens; - } -} - -/** - * @param {string} source - */ -function parse(source) { - console.time('tokenize'); - const tokens = tokenize(source); - console.timeEnd('tokenize'); - - const mod = new ModuleBlock(tokens); - return mod; -} - -module.exports = { - parse, - TextBlock, - TextBlockArray, - ModuleBlock, - PackageBlock, - ImportBlock, - TypeDeclBlock, - FieldBlock, - MethodBlock, - ConstructorBlock, - InitialiserBlock, - DeclaredVariableBlock, - ParameterBlock, -}