Extract manifest directly from APK (#74)

* initial support for decoding manifest from the APK

* add support for overriding AndroidManifest.xml file location in launch config

* correct property name in comment
This commit is contained in:
Dave Holoway
2019-08-25 19:37:21 +01:00
committed by GitHub
parent 8f2f7d2fd4
commit 989de8254a
5 changed files with 444 additions and 35 deletions

281
src/apkdecoder.js Normal file
View File

@@ -0,0 +1,281 @@
const START_NAMESPACE_SPEC = {
hdr: '0x0100',
hdrsz: '0x0010',
sz: '0x00000018',
startline: 4,
commentId: 4,
nameId: 4,
valueId: 4,
}
const END_NAMESPACE_SPEC = {
hdr: '0x0101',
hdrsz: '0x0010',
sz: '0x00000018',
startline: 4,
commentId: 4,
nameId: 4,
valueId: 4,
}
const BEGIN_NODE_SPEC = {
hdr: '0x0102',
hdrsz: '0x0010',
sz: 4,
startline: 4,
commentId: 4,
namespaceId: 4,
nameId: 4,
attr: {
offset: 2,
size: 2,
count: 2,
},
id_attr_offset: 2,
cls_attr_offset: 2,
style_attr_offset: 2,
attributes: [{
length: main => main.attr.count,
element_spec: {
ns: 4,
nameId: 4,
commentId: 4,
sz: '0x0008',
zero: '0x00',
type: 1,
value: 4,
}
}]
}
const END_NODE_SPEC = {
hdr: '0x0103',
hdrsz: '0x0010',
sz: 4,
startline: 4,
commentId: 4,
namespaceId: 4,
nameId: 4,
}
function decode_spec_value(o, key, value, buf, idx, main) {
let byteLength;
switch (true) {
case typeof value === 'number': {
// raw integer value
byteLength = value;
o[key] = buf.readIntLE(idx, byteLength);
break;
}
case Array.isArray(value): {
// known-length array of values
const length = value[0].length(main);
byteLength = 0;
o[key] = new Array(length);
for (let i = 0; i < length; i++) {
const bytes = decode_spec_value(o[key], i, value[0].element_spec, buf, idx, main);
idx += bytes;
byteLength += bytes;
}
break;
}
case typeof value === 'object': {
// named sub-spec
o[key] = {};
byteLength = decode_spec(buf, value, o[key], o, idx);
break
}
case /^0x[\da-fA-F]/.test(value): {
// exact integer value
byteLength = (value.length - 2) / 2;
o[key] = buf.readUIntLE(idx, byteLength);
if (parseInt(value) !== o[key]) {
throw new Error(`Bad value. Expected ${value}, got 0x${o[key].toString(16)}`);
}
break;
}
case value === 'length-utf16-null': {
// 2-byte length, utf16 chars, null char
const string_byte_length = buf.readUInt16LE(idx) * 2; // 1 char = 2 bytes
idx += 2;
o[key] = buf.slice(idx, idx + string_byte_length).toString('ucs2');
idx += string_byte_length;
if (buf.readUInt16LE(idx) !== 0) {
throw new Error(`Bad value. Nul char expected but not found.`);
}
byteLength = 2 + string_byte_length + 2;
break;
}
case /^align:\d+$/.test(value): {
// used for arbitrary padding to a specified alignment
const align = parseInt(value.split(':')[1], 10);
byteLength = align - (idx % align);
o[key] = buf.slice(idx, idx + byteLength);
break;
}
default: throw new Error(`Unknown spec value definition: ${value}`);
}
return byteLength;
}
function decode_spec(buf, spec, o = {}, main = o, idx = 0) {
let byteLength = 0;
for (let key of Object.keys(spec)) {
const value = spec[key];
const bytes = decode_spec_value(o, key, value, buf, idx, main);
idx += bytes;
byteLength += bytes;
}
return byteLength;
}
/**
* Converts a binary XML file back into a readable XML document
* @param {Buffer} buf binary XMl content
*/
function decode_binary_xml(buf) {
const xml_spec = {
header: '0x00080003',
headerSize: 4,
stringPool: {
header: '0x0001',
hdrsize: '0x001c',
sz: 4,
stringCount: 4,
styleCount: 4,
flags: 4,
stringStart: 4,
styleStart: 4,
stringOffsets: [{
length: main => main.stringPool.stringCount,
element_spec: 4,
}],
strings: [{
length: main => main.stringPool.stringCount,
element_spec: 'length-utf16-null',
}],
padding: 'align:4',
},
resourceIDPool: {
hdr: '0x0180',
hdrsize: '0x0008',
sz: 4,
resIDs: [{
length: main => (main.resourceIDPool.sz - main.resourceIDPool.hdrsize) / 4,
element_spec: 4,
}]
}
}
const decoded = {};
let idx = decode_spec(buf, xml_spec, decoded);
// after we've extracted the string and id's, it should be time to parse the xml
const node_stack = [{ nodes: [] }];
const namespaces = [];
while (idx < buf.byteLength) {
const id = buf.readUInt16LE(idx);
switch (id) {
case 0x0100: {
// start namespace
const node = {};
idx += decode_spec(buf, START_NAMESPACE_SPEC, node, node, idx);
namespaces.push(node);
break;
}
case 0x0101: {
// end namespace
const node = {};
idx += decode_spec(buf, END_NAMESPACE_SPEC, node, node, idx);
const i = namespaces.findIndex(ns => ns.nameId === node.nameId);
namespaces.splice(i, 1);
break;
}
case 0x0102: {
// begin node
const node = {};
idx += decode_spec(buf, BEGIN_NODE_SPEC, node, node, idx);
node.namespaces = namespaces.slice();
node.namespaces.forEach(ns => {
if (!ns.node) ns.node = node;
});
node.nodes = [];
node_stack[0].nodes.push(node);
node_stack.unshift(node);
break;
}
case 0x0103: {
// end node
const spec = END_NODE_SPEC;
const node = {};
idx += decode_spec(buf, spec, node, node, idx);
node_stack.shift();
break;
}
default: throw new Error(`Unknown XML element ${id.toString(16)}`);
}
}
decoded.nodes = node_stack[0].nodes;
const xml = toXMLDocument(decoded);
return xml;
}
/**
* Convert the decoded binary XML to a readable XML document
* @param {*} decoded
*/
function toXMLDocument(decoded) {
const strings = decoded.stringPool.strings;
const format = {
nodes: (nodes, indent) => {
return nodes.map(node => format.node(node, indent)).join('\n');
},
node: (node, indent) => {
const parts = [indent, '<', strings[node.nameId]];
for (let ns of node.namespaces.filter(ns => ns.node === node)) {
parts.push(' ', `xmlns:${strings[ns.nameId]}="${strings[ns.valueId]}"`);
}
const attr_indent = node.attributes.length > 1 ? `\n${indent} ` : ' ';
for (let attr of node.attributes) {
parts.push(attr_indent, format.attribute(attr, node.namespaces));
}
if (node.nodes.length) {
parts.push('>\n', format.nodes(node.nodes, indent + ' '), '\n', indent, '</', strings[node.nameId], '>');
} else {
parts.push(' />');
}
return parts.join('');
},
attribute: (attr, namespaces) => {
let value = attr.value;
switch (attr.type) {
case 3:
value = strings[value];
break;
case 16:
value |= 0; // convert to signed integer
break;
case 18:
value = value ? true : false;
break;
case 1: // resource id
case 17: // flags
default:
value = '0x' + value.toString(`16`);
break;
}
let ns = '';
if (attr.ns >= 0) {
ns = `${strings[namespaces.find(ns => ns.valueId === attr.ns).nameId]}:`;
}
return `${ns}${strings[attr.nameId]}="${value}"`;
}
}
return '<?xml version="1.0" encoding="utf-8"?>\n' + format.nodes(decoded.nodes, '');
}
module.exports = {
decode_binary_xml,
}

View File

@@ -10,10 +10,12 @@ const dom = require('xmldom').DOMParser;
const fs = require('fs');
const os = require('os');
const path = require('path');
const unzipper = require('unzipper');
const xpath = require('xpath');
// our stuff
const { ADBClient } = require('./adbclient');
const { decode_binary_xml } = require('./apkdecoder');
const { Debugger } = require('./debugger');
const $ = require('./jq-promise');
const { AndroidThread } = require('./threads');
@@ -45,6 +47,8 @@ class AndroidDebugSession extends DebugSession {
this.src_packages = {};
// the device we are debugging
this._device = null;
// the full file path name of the AndroidManifest.xml, taken from the manifestFile launch property
this.manifest_fpn = '';
// the threads (we know about from the last refreshThreads call)
// this is implemented as both a hashmap<threadid,AndroidThread> and an array of AndroidThread objects
@@ -240,6 +244,7 @@ class AndroidDebugSession extends DebugSession {
// app_src_root must end in a path-separator for correct validation of sub-paths
this.app_src_root = ensure_path_end_slash(args.appSrcRoot);
this.apk_fpn = args.apkFile;
this.manifest_fpn = args.manifestFile;
if (typeof args.callStackDisplaySize === 'number' && args.callStackDisplaySize >= 0)
this.callStackDisplaySize = args.callStackDisplaySize|0;
@@ -432,7 +437,7 @@ class AndroidDebugSession extends DebugSession {
h.update(apk_file_data);
done.result.content_hash = h.digest('hex');
// read the manifest
fs.readFile(path.join(this.app_src_root,'AndroidManifest.xml'), 'utf8', (err,manifest) => {
this.readAndroidManifest((err, manifest) => {
if (err) return done.rejectWith(this, [new Error('Manifest read error. ' + err.message)]);
done.result.manifest = manifest;
try {
@@ -460,6 +465,64 @@ class AndroidDebugSession extends DebugSession {
});
return done;
}
readAndroidManifest(cb) {
// Because of manifest merging and build-injected properties, the manifest compiled inside
// the APK is frequently different from the AndroidManifest.xml source file.
// We try to extract the manifest from 3 sources (in priority order):
// 1. The 'manifestFile' launch configuration property
// 2. The decoded manifest from the APK
// 3. The AndroidManifest.xml file from the root of the source tree.
const readAPKManifest = (cb) => {
D(`Reading APK Manifest`);
const apk_manifest_chunks = [];
function cb_once(err, manifest) {
cb && cb(err, manifest);
cb = null;
}
fs.createReadStream(this.apk_fpn)
.pipe(unzipper.ParseOne(/^AndroidManifest\.xml$/))
.on('data', chunk => {
apk_manifest_chunks.push(chunk);
})
.once('error', err => {
cb_once(err);
})
.once('end', () => {
try {
const manifest = decode_binary_xml(Buffer.concat(apk_manifest_chunks));
D(`APK manifest read complete`);
cb_once(null, manifest);
} catch (err) {
D(`APK manifest decode failed: ${err.message}`);
cb_once(err);
}
});
}
const readSourceManifest = (cb) => {
D(`Reading source manifest from ${this.app_src_root}`);
fs.readFile(path.join(this.app_src_root, 'AndroidManifest.xml'), 'utf8', cb);
}
// a value from the manifestFile overrides the default manifest extraction
// note: there's no validation that the file is a valid AndroidManifest.xml file
if (this.manifest_fpn) {
D(`Reading manifest from ${this.manifest_fpn}`);
fs.readFile(this.manifest_fpn, 'utf8', cb);
return;
}
readAPKManifest((err, manifest) => {
if (err) {
// if we fail to read the APK manifest, revert to the source manifest
readSourceManifest(cb)
return;
}
cb(err, manifest);
});
}
scanSourceSync(app_root) {
try {