remove old parser files

This commit is contained in:
Dave Holoway
2020-06-29 15:54:12 +01:00
parent d74837158a
commit 9498888ee6
3 changed files with 1 additions and 1523 deletions

View File

@@ -3,7 +3,7 @@ const path = require('path');
const os = require('os'); const os = require('os');
const { CEIType } = require('java-mti'); const { CEIType } = require('java-mti');
const { Settings } = require('./settings'); const { Settings } = require('./settings');
const { ParseProblem } = require('./java/parser'); const ParseProblem = require('./java/parsetypes/parse-problem');
const { parse } = require('./java/body-parser'); const { parse } = require('./java/body-parser');
const { SourceUnit } = require('./java/source-types'); const { SourceUnit } = require('./java/source-types');
const { parseMethodBodies } = require('./java/validater'); const { parseMethodBodies } = require('./java/validater');

View File

@@ -1,498 +0,0 @@
const Annotation = require('./parsetypes/annotation');
const Declaration = require('./parsetypes/declaration');
const FMCDeclaration = require('./parsetypes/fmc');
const ImportDeclaration = require('./parsetypes/import');
const PackageDeclaration = require('./parsetypes/package');
const ParameterDeclaration = require('./parsetypes/parameter');
const ParseProblem = require('./parsetypes/parse-problem');
const ParseResult = require('./parsetypes/parse-result');
const ParseSyntaxError = require('./parsetypes/parse-error');
const ProblemSeverity = require('./parsetypes/problem-severity');
const Token = require('./parsetypes/token');
const TypeDeclaration = require('./parsetypes/type');
const TypeIdent = require('./parsetypes/typeident');
const TypeParameters = require('./parsetypes/type-parameters');
/**
* @typedef {import('./parsetypes/modifier')} Modifier
*/
/**
* @param {Token[]} tokens
* @param {number} idx
*/
function findToken(tokens, idx) {
return tokens.find(t => t.simplified_text_idx === idx);
}
/**
* @param {string} simplified
* @param {number} lastIndex
*/
function parseToBracketEnd(simplified, lastIndex) {
// parse until close bracket
let re = /[()]/g, balance = 1;
const start = re.lastIndex = lastIndex;
for (let m; m = re.exec(simplified);) {
if (m[0] === '(') balance++;
else if (--balance === 0) {
re.lastIndex++;
break;
}
}
return {
start,
end: re.lastIndex,
}
}
/**
* @param {string} simplified
* @param {Token[]} tokens
* @param {{start: number, end: number}} simplified_range
* @param {*[]} invalids
*/
function parseParameters(simplified, tokens, simplified_range, invalids) {
const decls = [
/[ X]+/g,
/@ *W( *\. *W)*( *\()?/g,
/M/g,
/W(?: *\. *W)*(?: *<.*?>)?(?: *\[ *\])*(?: +|( *\.\.\. *))W(?: *\[ *\])*( *,)?/g, // parameter decl
/(\)|$)/g, // end of params
];
const parameters = [];
/** @type {Modifier[]} */
const modifiers = [];
let lastIndex = simplified_range.start;
for(;;) {
/** @type {{idx:number, d: RegExp, m:RegExpMatchArray}} */
let best_match = null, next_best = null;
decls.find((d,idx) => {
d.lastIndex = lastIndex;
const m = d.exec(simplified);
if (!m) return;
if (m.index === lastIndex) {
best_match = {idx, d, m};
return true;
}
if (idx === 0) {
return;
}
if (!next_best || m.index < next_best.m.index) {
next_best = {idx, d, m};
}
});
if (!best_match) {
const errorToken = findToken(tokens, lastIndex);
const error = new ParseSyntaxError(null, modifiers.splice(0), errorToken);
invalids.push(error);
best_match = next_best;
if (!next_best) {
break;
}
}
lastIndex = best_match.d.lastIndex;
if (best_match.idx === 1) {
// annotation
const at = findToken(tokens, best_match.m.index);
const name = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W'));
const annotation = new Annotation(at, name);
modifiers.push(annotation);
if (best_match.m[0].endsWith('(')) {
lastIndex = parseToBracketEnd(simplified, lastIndex).end;
}
}
else if (best_match.idx === 2) {
// modifier
const modifier = findToken(tokens, best_match.m.index);
modifiers.push(modifier);
}
else if (best_match.idx === 3) {
// parameter
const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W'));
const varargs = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].indexOf('...')) : null;
const comma = best_match.m[2] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf(',')) : null;
const typetokens = [];
const first_type_token = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W'));
for (let t = first_type_token, i = tokens.indexOf(t); t !== name; t = tokens[++i]) {
if (t.simplified_text !== ' ')
typetokens.push(t);
}
const param = new ParameterDeclaration(modifiers.splice(0), new TypeIdent(typetokens), varargs, name, comma);
parameters.push(param);
} else if (best_match.idx === 4) {
// end of parameters
break;
}
}
return parameters;
}
/**
* @param {Token[]} typelist_tokens
*/
function parseTypeIdentList(typelist_tokens) {
// split the typelist into typetoken chunks, separated by commas
let typeargs_balance = 0, array_balance = 0;
/** @type {Token[][]} */
let types = [[]];
typelist_tokens.forEach(t => {
switch(t.text) {
case ' ':
if (types[0].length === 0) {
return;
}
break;
case ',':
if (typeargs_balance <= 0 && array_balance <= 0) {
while (types[0][types[0].length - 1].text === ' ') {
types[0].pop();
}
typeargs_balance = array_balance = 0;
types.unshift([]);
return;
}
break;
case '<':
typeargs_balance++;
break;
case '>':
typeargs_balance--;
break;
case ']':
array_balance++;
break;
case '[':
array_balance--;
break;
}
types[0].push(t);
});
// remove any blank entries (start comma or sequential commas)
return types.filter(t => t.length).reverse().map(tokens => new TypeIdent(tokens));
}
/**
* @param {string} source
*/
function parse(source) {
const re = /(\/\*[\d\D]*?\*\/)|(\/\*)|(\*\/)|((?:\/\/.*)|(?:\s+))|(".*?")|('.'?)|\b(package|import|class|enum|interface|extends|implements|throws)\b|\b(public|private|protected|static|final|abstract|native|volatile|transient|synchronized|strictfp)\b|(\.{3}|[@{}()<>,;?*\[\].])|\b(super|new)\b|\b([A-Za-z_]\w*)|(\d[\w.]*)/g;
let source_idx = 0, simplified_text_idx = 0;
/** @type {Token[]} */
let tokens = [];
function mapSimplified(
_,
mlc,
unterminated_mlc,
mlc_end,
slc_ws,
string,
char,
decl_keyword,
modifier,
symbol,
kw,
word
/* number, */
) {
if (mlc) return 'X';//mlc.replace(/[^\n]+/g, '') || ' ';
if (unterminated_mlc) return ' ';
if (mlc_end) return ' ';
if (slc_ws) return ' '; //slc_ws.replace(/[^\n]+/g, '').replace(/ +/,' ') || ' ';
if (string) return 'S';
if (char) return 'C';
if (decl_keyword) return decl_keyword;
if (modifier) return 'M';
if (symbol) return symbol;
if (kw) return kw;
if (word) return 'W';
return 'N';
}
const simplified = source.replace(re, (...args) => {
let text = args[0];
let next_idx = source.indexOf(text, source_idx);
simplified_text_idx += (next_idx - source_idx);
source_idx = next_idx;
const simplified_text = mapSimplified.apply(null, args);
tokens.push(new Token(source_idx, text, simplified_text, simplified_text_idx));
source_idx += text.length;
simplified_text_idx += simplified_text.length;
return simplified_text;
});
// console.log(simplified);
const decls = [
/ +/g,
/package +W(?: *\. *W)*( *;)?/g,
/import +(M +)?W(?: *\. *W)*( *\.\*)?( *;)?/g,
/@ *W( *\. *W)*( *\()?/g,
/M/g,
/(class|enum|interface|@ *interface) +W(.+?(?= *[a-z{]))/g, // type declaration
/(implements|extends|throws) +W(.+?(?= *[a-z{]))/g, // decl
/W(?: *\. *W)*(?: *<.*?>)?(?: *\[ *\])* +W(?: *\[ *\])*( *[=;(,])?/g, // field/method
/W *\(/g, // constructor
/[{}]/g, // scope
/X/g, // multi-line comment
/<.*?>(?= *[WM@])/g, // type variables
/$/g, // end of file
]
let lastIndex = 0;
let loc = ['base'];
let package_decl = null;
let imports = [];
let modifiers = [];
let types = [];
let invalids = [];
let lastMLC = null;
/** @type {TypeDeclaration[]} */
let type_stack = [null];
for(;;) {
/** @type {{idx:number, d: RegExp, m:RegExpMatchArray}} */
let best_match = null, next_best = null;
decls.find((d,idx) => {
d.lastIndex = lastIndex;
const m = d.exec(simplified);
if (!m) return;
if (m.index === lastIndex) {
best_match = {idx, d, m};
return true;
}
if (idx === 0) {
return;
}
if (!next_best || m.index < next_best.m.index) {
next_best = {idx, d, m};
}
});
if (!best_match) {
const errorToken = findToken(tokens, lastIndex);
const error = new ParseSyntaxError(lastMLC, modifiers.splice(0), errorToken);
invalids.push(error);
lastMLC = null;
console.log(simplified.slice(lastIndex, lastIndex + 100));
best_match = next_best;
if (!next_best) {
break;
}
}
lastIndex = best_match.d.lastIndex;
function parseToExpressionEnd() {
// parse expression
let re = /[(){};]/g, balance = [0,0];
re.lastIndex = lastIndex;
for (let m; m = re.exec(simplified);) {
if (m[0] === '{') balance[0]++;
else if (m[0] === '(') balance[1]++;
else if (m[0] === '}') balance[0]--;
else if (m[0] === ')') balance[1]--;
else if (balance[0] <= 0 && balance[1] <= 0) {
break;
}
}
// console.log(simplified.slice(lastIndex, re.lastIndex));
lastIndex = re.lastIndex;
}
if (best_match.idx === 1) {
// package - map all the name parts
const nameparts = [];
for (let m, re=/W/g; m = re.exec(best_match.m[0]); ) {
const ident = findToken(tokens, best_match.m.index + m.index);
nameparts.push(ident);
}
const semicolon = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].length - 1) : null;
if (!package_decl) {
package_decl = new PackageDeclaration(lastMLC, modifiers.splice(0), nameparts, semicolon);
}
lastMLC = null;
}
if (best_match.idx === 2) {
// import - map all the name parts
const nameparts = [];
for (let m, re=/W/g; m = re.exec(best_match.m[0]); ) {
const ident = findToken(tokens, best_match.m.index + m.index);
nameparts.push(ident);
}
const static = best_match.m[1] ? findToken(tokens, best_match.m.index + best_match.m[0].indexOf('M')) : null;
const asterisk = best_match.m[2] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('*')) : null
const semicolon = best_match.m[3] ? findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf(';')) : null;
let import_decl = new ImportDeclaration(lastMLC, modifiers.splice(0), nameparts, static, asterisk, semicolon);
imports.push(import_decl);
lastMLC = null;
}
if (best_match.idx === 3) {
// annotation
const at = findToken(tokens, best_match.m.index);
const name = findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W'));
const annotation = new Annotation(at, name);
modifiers.push(annotation);
if (best_match.m[0].endsWith('(')) {
lastIndex = parseToBracketEnd(simplified, lastIndex).end;
}
}
if (best_match.idx === 4) {
// modifier
const modifier = findToken(tokens, best_match.m.index);
modifiers.push(modifier);
}
if (best_match.idx === 5) {
// type declaration
const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W'));
/** @type {'class'|'interface'|'enum'|'@interface'} */
// @ts-ignore
const kind = best_match.m[1].replace(/ /g, '');
const type = new TypeDeclaration(type_stack[0], lastMLC, modifiers.splice(0), kind, name);
lastMLC = null;
types.push(type);
type_stack.unshift(type);
loc.unshift('typedecl');
}
if (best_match.idx === 6) {
// extends/implements/throws
const decl_kw = findToken(tokens, best_match.m.index);
const startidx = tokens.indexOf(findToken(tokens, best_match.m.index + best_match.m[0].indexOf('W')));
const endidx = tokens.indexOf(findToken(tokens,best_match.m.index + best_match.m[0].length - 1));
const typelist = parseTypeIdentList(tokens.slice(startidx, endidx + 1));
switch(decl_kw.text) {
case 'throws':
break;
case 'extends':
case 'implements':
if (loc[0] === 'typedecl') {
type_stack[0].super_declarations.push({ decl_kw, typelist });
}
}
}
if (best_match.idx === 7) {
// field or method
const name = findToken(tokens, best_match.m.index + best_match.m[0].lastIndexOf('W'));
const typetokens = [];
for (let t = findToken(tokens, best_match.m.index), i = tokens.indexOf(t); t !== name; t = tokens[++i]) {
if (t.simplified_text !== ' ')
typetokens.push(t);
}
let parameters, equals_comma_sc = null;
switch (best_match.m[0].slice(-1)) {
case '(':
// method
let params_source_range = parseToBracketEnd(simplified, lastIndex);
lastIndex = params_source_range.end;
parameters = parseParameters(simplified, tokens, params_source_range, invalids);
break;
case '=':
// initialised field
equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length);
parseToExpressionEnd();
break;
case ',':
// multi-declaration field
equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length);
throw new Error('not implemented');
case ';':
// single field
equals_comma_sc = findToken(tokens, best_match.m.index + best_match.m[0].length);
break;
default:
// invalid - but treat as a single field
break;
}
if (type_stack[0]) {
const fmc = new FMCDeclaration(type_stack[0], lastMLC, modifiers.splice(0), best_match.m[0].endsWith('(') ? 'method' : 'field', name, new TypeIdent(typetokens), equals_comma_sc, parameters);
type_stack[0].declarations.push(fmc);
}
lastMLC = null;
}
if (best_match.idx === 8) {
// constructor (if the name matches the type)
let params_source_range = parseToBracketEnd(simplified, lastIndex);
lastIndex = params_source_range.end;
const parameters = parseParameters(simplified, tokens, params_source_range, invalids);
const name = findToken(tokens, best_match.m.index);
if (type_stack[0] && name.text === type_stack[0].name.text) {
const fmc = new FMCDeclaration(type_stack[0], lastMLC, modifiers.splice(0), 'constructor', name, null, null, parameters);
type_stack[0].declarations.push(fmc);
} else {
invalids.push(new ParseSyntaxError(lastMLC, modifiers.splice(0), name));
}
lastMLC = null;
}
if (best_match.idx === 9) {
// open/close scope
if (best_match.m[0] === '{') {
if (loc[0] === 'typedecl') loc[0] = 'typebody';
else if (loc[0] === 'typebody') {
// static initer / method body
let re = /[{}]/g, balance = 1;
re.lastIndex = lastIndex;
for (let m; m = re.exec(simplified);) {
if (m[0] === '{') balance++;
else if (--balance === 0) {
re.lastIndex++;
break;
}
}
lastIndex = re.lastIndex;
}
} else {
// end scope
if (/^type/.test(loc[0])) {
loc.shift();
type_stack.shift();
}
}
}
if (best_match.idx === 10) {
// mlc
lastMLC = findToken(tokens, best_match.m.index);
}
if (best_match.idx === 11) {
// type parameters
const open = findToken(tokens, best_match.m.index);
const close = findToken(tokens, best_match.m.index + best_match.m[0].length - 1);
modifiers.push(new TypeParameters(open, close));
}
if (best_match.idx === 12) {
// end of file
break;
}
}
return new ParseResult(package_decl, imports, types, invalids);
}
module.exports = {
Annotation,
Declaration,
FMCDeclaration,
ImportDeclaration,
PackageDeclaration,
parse,
ParseProblem,
ParseResult,
ProblemSeverity,
Token,
TypeDeclaration,
TypeParameters,
}

File diff suppressed because it is too large Load Diff