diff --git a/langserver/java/body-parser.js b/langserver/java/body-parser.js new file mode 100644 index 0000000..e524588 --- /dev/null +++ b/langserver/java/body-parser.js @@ -0,0 +1,429 @@ +const Token = require('./parsetypes/token'); +const Declaration = require('./parsetypes/declaration'); +const TypeIdent = require('./parsetypes/typeident'); +const { parse_expression, ExpressionText, ParsedExpression } = require('../../src/expression/parse'); + +class LocalVariableDeclaration extends Declaration { + /** + * @param {*[]} modifiers + * @param {TypeIdent} typeident + */ + constructor(modifiers, typeident) { + super(null, null, modifiers); + this.typeident = typeident; + /** @type {LocalVariable[]} */ + this.vars = []; + } +} + +class LocalVariable { + /** + * @param {LocalVariableDeclaration} declaration + * @param {Token} name + * @param {Token} arrdims + */ + constructor(declaration, name, arrdims) { + this.declaration = declaration; + this.name = name; + this.arrdims = arrdims; + /** @type {Token} */ + this.equals = null; + /** @type {ParsedExpression} */ + this.expression = null; + /** @type {Token} */ + this.comma = null; + } +} + +/** + * @param {string} text + * @param {number} index + */ +function extractExpression(text, index = 0) { + const e = new ExpressionText(text.slice(index)); + const parsed = parse_expression(e); + console.log(parsed); + let consumed = text.indexOf(e.expr); + return { + parsed, + index: consumed, + } +} + +/** + * Parse a method body + * + * The parser is an inside-out parser. + * It works by tokenizing at the lowest level (comments, whitespace, identifiers, literals and symbols) + * and works its way outward, grouping tokens together in larger and larger chunks that it recognises. + * + * Each stage is forgiving on what it accepts and syntax errors (unexpected or missing tokens) are noted along the way. + * The final parse stage matches a set of statements - the highest-level concept of a method body. + * + * Once the parse is complete, all the complete expressions in the body can be type-resolved and checked. + * + * @param {string} text + * @param {number} text_index + */ +function parseBody(text, text_index = 0) { + const tokens = new TextBlockArray('body'); + + // preprocess - strip any comments and normalise strings + text = text.replace(/(\/\/.*|\/\*[\D\d]*?\*\/)|(".+?")/g, (_,comment,str) => + str ? + `"${' '.repeat(str.length-2)}"` + : _.replace(/[^\r\n]/g,' ') + ); + + const re = /(\s+)|(["'\d]|\b(?:true|false|null)\b)|\b(if|switch|while|else|for|case|default|do|try|finally|catch|return|break|continue)\b|(\bnew\b)|(\w+)|([;{}():])|(.)/g; + for (let m; m = re.exec(text);) { + if (m[1]) { + // ignore ws + comments + continue; + } + if (m[2]) { + // string, character, number, boolean or null literal - parse as an expression + const { parsed, index } = extractExpression(text, m.index); + tokens.blocks.push(new ParsedExpressionBlock(text, m.index, index - m.index, parsed)); + re.lastIndex = index; + continue; + } + if (m[3]) { + // statement keyword + tokens.blocks.push(TextBlock.from(text, m.index, m[0].length)); + continue; + } + if (m[4]) { + // new keyword - need extra handling because of anonymous types + const { parsed, index } = extractExpression(text, m.index); + tokens.blocks.push(new ParsedExpressionBlock(text, m.index, index - m.index, parsed)); + re.lastIndex = index; + } + if (m[5]) { + // word - first check if this looks like a variable declaration + const local_var_re = /(\w+(?: *\. *\w+)*(?: *<.*?>)?(?: *\[ *\])*)( +)(\w+)( *\[ *\])*/g; + local_var_re.lastIndex = m.index; + const local_var_match = local_var_re.exec(text); + if (local_var_match && local_var_match.index === m.index) { + m = local_var_match; + // it looks like a local variable declaration + const typeident = new TypeIdent([new Token(text_index + m.index, m[1], '', null)]); + const local_var_decl = new LocalVariableDeclaration([], typeident); + let name_token = new Token(text_index + m.index + m[1].length + m[2].length, m[3], '', null); + let postarray_token = m[4] ? new Token(name_token.source_idx + m[3].length, m[4], '', null) : null; + const vars = [new LocalVariable(local_var_decl, name_token, postarray_token)]; + + const next = /( *=)|( *, *)(\w+)( *\[ *\])*/g; + let lastIndex = local_var_re.lastIndex; + for (;;) { + next.lastIndex = lastIndex; + let m = next.exec(text); + if (!m || m.index !== lastIndex) { + break; + } + lastIndex = next.lastIndex; + if (m[1]) { + vars[0].equals = new Token(text_index + m.index + m[0].indexOf('='), '=', '', null); + // variable initialiser + const { parsed, index } = extractExpression(text, next.lastIndex); + lastIndex = index; + vars[0].expression = parsed; + } else { + // another variable + vars[0].comma = new Token(text_index + m.index + m[0].indexOf(','), ',', '', null); + name_token = new Token(text_index + m.index + m[2].length, m[3], '', null); + postarray_token = m[4] ? new Token(name_token.source_idx + m[3].length, m[4], '', null) : null; + vars.unshift(new LocalVariable(local_var_decl, name_token, postarray_token)); + } + } + local_var_decl.vars = vars.reverse(); + + tokens.blocks.push(new LocalVariableDeclBlock(text, local_var_match.index, lastIndex, local_var_decl)); + + re.lastIndex = lastIndex; + continue; + } + const { parsed, index } = extractExpression(text, m.index); + tokens.blocks.push(new ParsedExpressionBlock(text, m.index, index - m.index, parsed)); + re.lastIndex = index; + continue; + } + if (m[6]) { + // brackets, scopes or semcolon + tokens.blocks.push(TextBlock.from(text, m.index, m[0].length)); + continue; + } + // anything else is invalid + tokens.blocks.push(new InvalidTextBlock(text, m.index, m[0].length)); + } + tokens; + + // convert the tokens to their simplified form for grouping + let sourcemap = tokens.sourcemap(); + + // convert simple statements and expressions + let chunks = [ + /(for)(\(?)([XY]?)(;?)(X?)(;?)(X?)(\)?)/g, // for -> F + /(if)(\(?)(X?)(\)?)/g, // if -> I + /(while)(\(?)(X?)(\)?)/g, // while -> W + /(switch)(\(?)(X?)(\)?)/g, // switch -> P + /(catch)(\(?)(V?)(\)?)/g, // catch -> C + /(case)(X?)(:?)/g, // single case -> Q + /(default)(:?)/g, // default case -> Q + /(return|break|continue)(X?)(;?)/g, // return/break/continue -> S + /(finally)/g, // finally block -> N + /(else)/g, // else statement -> L + /Y(;?)/g, // variable declaration -> V + /X(;?)/g, // statement expression -> E + ] + + let replacements = 'FIWPCQQSNLVE'; + let ids = 'for_hdr if_hdr while_hdr switch_hdr catch case default rbc finally else localvar expr'.split(' '); + chunks.forEach((re,idx) => { + re.lastIndex = 0; + for (let m; m = re.exec(sourcemap.simplified);) { + let start = sourcemap.map[m.index]; + let end = sourcemap.map[m.index + m[0].length]; + tokens.shrink(ids[idx], start, end - start, replacements[idx]); + sourcemap = tokens.sourcemap(); + re.lastIndex = 0; + } + }) + + chunks = [ + /\{([SBVE;]*)(\})/g, // statement block -> B + /I([SBVE;])L?[SBVE;]/g, // if (Expression) Statement/Block Else -> S + /F[SBVE;]/g, // for loop -> S + /P(\{)(Q+[SBVE]*)*(\}?)/g, // switch(Expression){ Q(caseblock),... } -> S + /try(B)(C?B?)(N?B?)/g, // try, Block, catch/finally -> S + /do(B)(W?)(;?)/g, // do Block While -> S + /(? S - this needs the no-pre-brace check to allow do-while to pair correctly + ] + replacements = 'BSSSSSS'; + ids = 'block if for switch try dowhile while'.split(' '); + for (;;) { + let old = sourcemap.simplified; + chunks.forEach((re,idx) => { + re.lastIndex = 0; + for (let m; m = re.exec(sourcemap.simplified);) { + let start = sourcemap.map[m.index]; + let end = sourcemap.map[m.index + m[0].length]; + tokens.shrink(ids[idx], start, end - start, replacements[idx]); + sourcemap = tokens.sourcemap(); + re.lastIndex = 0; + } + }) + if (old === sourcemap.simplified) break; + } + + return tokens; + +} + +const expressions = [ + '1 for(){}', + 'a', + 'true', + 'null', + `""`, + `'c'`, + // operators + `1 + 2`, + `1 - 2`, + `1 * 2`, + `1 / 2`, + `1 % 2`, + `1 & 2`, + `1 | 2`, + `1 ^ 2`, + `1 < 2`, + `1 <= 2`, + `1 << 2`, + `1 > 2`, + `1 >= 2`, + `1 >> 2`, + `1 == 2`, + `1 instanceof 2`, + // assignment operators + `a += 2`, + `a -= 2`, + `a *= 2`, + `a /= 2`, + `a %= 2`, + `a &= 2`, + `a |= 2`, + `a ^= 2`, + `a <<= 2`, + `a >>= 2`, + // member, array, methodcall + `a.b`, + `a.b.c`, + `a[1]`, + `a[1,2]`, + `a[1][2]`, + `a()`, + `a(b)`, + `a(b, "")`, + `a.b()`, + `a.b()[1]`, +]; +expressions.map(e => { + extractExpression(e); +}) + +const src = +`for (int i=0; i < 10; i++) { + do { + if (i) { + System.out.println("1234"); + } + # + switch(x) { + case 4: + case 5: + return x; + case 6: + default: + return; + } + while (x > 0) true; + } while (i > 0); + while (x > 0) + System.out.println("1234"); +} +` + +class BlockRange { + + get end() { return this.start + this.length } + get text() { return this.source.slice(this.start, this.end) } + /** + * + * @param {string} source + * @param {number} start + * @param {number} length + */ + constructor(source, start, length) { + this.source = source; + this.start = start; + this.length = length; + } +} + +class TextBlock { + /** + * @param {BlockRange|TextBlockArray} range + * @param {string} simplified + */ + constructor(range, simplified) { + this.range = range; + this.simplified = simplified; + } + + /** + * @param {string} source + * @param {number} start + * @param {number} length + * @param {string} [simplified] + */ + static from(source, start, length, simplified) { + const range = new BlockRange(source, start, length); + return new TextBlock(range, simplified || range.text); + } + + toSource() { + return this.range instanceof BlockRange + ? this.range.text + : this.range.toSource() + } +} + +class ParsedExpressionBlock extends TextBlock { + /** + * @param {string} source + * @param {number} start + * @param {number} length + * @param {ParsedExpression} expression + */ + constructor(source, start, length, expression) { + super(new BlockRange(source, start, length), 'X'); + this.expression = expression; + } +} + +class LocalVariableDeclBlock extends TextBlock { + /** + * @param {string} source + * @param {number} start + * @param {number} end + * @param {LocalVariableDeclaration} decl + */ + constructor(source, start, end, decl) { + super(new BlockRange(source, start, end - start), 'Y'); + this.decl = decl; + } +} + +class InvalidTextBlock extends TextBlock { + /** + * @param {string} source + * @param {number} start + * @param {number} length + */ + constructor(source, start, length) { + super(new BlockRange(source, start, length), ''); + } +} + +class TextBlockArray { + /** + * @param {string} id + * @param {TextBlock[]} [blocks] + */ + constructor(id, blocks = []) { + this.id = id; + this.blocks = blocks; + } + + get simplified() { + return this.blocks.map(tb => tb.simplified).join(''); + } + + sourcemap() { + let idx = 0; + const parts = []; + /** @type {number[]} */ + const map = this.blocks.reduce((arr,tb,i) => { + arr[idx] = i; + parts.push(tb.simplified); + idx += tb.simplified.length; + return arr; + }, []); + map[idx] = this.blocks.length; + return { + simplified: parts.join(''), + map, + } + } + + /** + * @param {string} id + * @param {number} start + * @param {number} count + * @param {string} simplified + */ + shrink(id, start, count, simplified) { + if (count <= 0) return; + const collapsed = new TextBlockArray(id, this.blocks.splice(start, count, null)); + this.blocks[start] = new TextBlock(collapsed, simplified); + } + + get source() { return this.toSource() } + + toSource() { + return this.blocks.map(tb => tb.toSource()).join(''); + } +} + + +parseBody(src);