354 lines
13 KiB
Python
354 lines
13 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import struct
|
|
from enum import IntEnum
|
|
from io import BufferedReader
|
|
from typing import List
|
|
from zipfile import ZipFile
|
|
|
|
|
|
def main() -> None:
|
|
from frida_tools.application import ConsoleApplication
|
|
|
|
class ApkApplication(ConsoleApplication):
|
|
def _usage(self) -> str:
|
|
return "%(prog)s [options] path.apk"
|
|
|
|
def _add_options(self, parser: argparse.ArgumentParser) -> None:
|
|
parser.add_argument("-o", "--output", help="output path", metavar="OUTPUT")
|
|
parser.add_argument("apk", help="apk file")
|
|
|
|
def _needs_device(self) -> bool:
|
|
return False
|
|
|
|
def _initialize(self, parser: argparse.ArgumentParser, options: argparse.Namespace, args: List[str]) -> None:
|
|
self._output_path = options.output
|
|
self._path = options.apk
|
|
|
|
if not self._path.endswith(".apk"):
|
|
parser.error("path must end in .apk")
|
|
|
|
if self._output_path is None:
|
|
self._output_path = self._path.replace(".apk", ".d.apk")
|
|
|
|
def _start(self) -> None:
|
|
try:
|
|
debug(self._path, self._output_path)
|
|
except Exception as e:
|
|
self._update_status(f"Error: {e}")
|
|
self._exit(1)
|
|
self._exit(0)
|
|
|
|
app = ApkApplication()
|
|
app.run()
|
|
|
|
|
|
def debug(path: str, output_path: str) -> None:
|
|
with ZipFile(path, "r") as iz, ZipFile(output_path, "w") as oz:
|
|
for info in iz.infolist():
|
|
with iz.open(info) as f:
|
|
if info.filename == "AndroidManifest.xml":
|
|
manifest = BinaryXML(f)
|
|
|
|
pool = None
|
|
debuggable_index = None
|
|
|
|
size = 8
|
|
for header in manifest.chunk_headers[1:]:
|
|
if header.type == ChunkType.STRING_POOL:
|
|
pool = StringPool(header)
|
|
debuggable_index = pool.append_str("debuggable")
|
|
|
|
if header.type == ChunkType.RESOURCE_MAP:
|
|
# The "debuggable" attribute name is not only a reference to the string pool, but
|
|
# also to the resource map. We need to extend the resource map with a valid entry.
|
|
# refs https://justanapplication.wordpress.com/category/android/android-binary-xml/android-xml-startelement-chunk/
|
|
resource_map = ResourceMap(header)
|
|
resource_map.add_debuggable(debuggable_index)
|
|
|
|
if header.type == ChunkType.START_ELEMENT:
|
|
start = StartElement(header)
|
|
name = pool.get_string(start.name)
|
|
if name == "application":
|
|
start.insert_debuggable(debuggable_index, resource_map)
|
|
|
|
size += header.size
|
|
|
|
header = manifest.chunk_headers[0]
|
|
header_data = bytearray(header.chunk_data)
|
|
header_data[4 : 4 + 4] = struct.pack("<I", size)
|
|
|
|
data = bytearray()
|
|
data.extend(header_data)
|
|
for header in manifest.chunk_headers[1:]:
|
|
data.extend(header.chunk_data)
|
|
|
|
oz.writestr(info.filename, bytes(data), info.compress_type)
|
|
elif info.filename.upper() == "META-INF/MANIFEST.MF":
|
|
# Historically frida-apk deleted META-INF/ entirely, but that breaks some apps.
|
|
# It turns out that v1 signatures (META-INF/MANIFEST.MF) are not validated at all on
|
|
# modern Android versions, so we can keep them in for now.
|
|
# If this doesn't work for you, try to comment out the following line.
|
|
oz.writestr(info.filename, f.read(), info.compress_type)
|
|
else:
|
|
oz.writestr(info.filename, f.read(), info.compress_type)
|
|
|
|
|
|
class BinaryXML:
|
|
def __init__(self, stream: BufferedReader) -> None:
|
|
self.stream = stream
|
|
self.chunk_headers = []
|
|
self.parse()
|
|
|
|
def parse(self) -> None:
|
|
chunk_header = ChunkHeader(self.stream, False)
|
|
if chunk_header.type != ChunkType.XML:
|
|
raise BadHeader()
|
|
self.chunk_headers.append(chunk_header)
|
|
|
|
size = chunk_header.size
|
|
|
|
while self.stream.tell() < size:
|
|
chunk_header = ChunkHeader(self.stream)
|
|
self.chunk_headers.append(chunk_header)
|
|
|
|
|
|
class ChunkType(IntEnum):
|
|
STRING_POOL = 0x001
|
|
XML = 0x003
|
|
START_ELEMENT = 0x102
|
|
RESOURCE_MAP = 0x180
|
|
|
|
|
|
class ResourceType(IntEnum):
|
|
BOOL = 0x12
|
|
|
|
|
|
class StringType(IntEnum):
|
|
UTF8 = 1 << 8
|
|
|
|
|
|
class BadHeader(Exception):
|
|
pass
|
|
|
|
|
|
class ChunkHeader:
|
|
FORMAT = "<HHI"
|
|
|
|
def __init__(self, stream: BufferedReader, consume_data: bool = True) -> None:
|
|
self.stream = stream
|
|
data = self.stream.peek(struct.calcsize(self.FORMAT))
|
|
(self.type, self.header_size, self.size) = struct.unpack_from(self.FORMAT, data)
|
|
if consume_data:
|
|
self.chunk_data = self.stream.read(self.size)
|
|
else:
|
|
self.chunk_data = self.stream.read(struct.calcsize(self.FORMAT))
|
|
|
|
|
|
class StartElement:
|
|
FORMAT = "<HHIIIIIIHHHH"
|
|
ATTRIBUTE_FORMAT = "<IIiHBBi"
|
|
|
|
def __init__(self, header: ChunkHeader) -> None:
|
|
self.header = header
|
|
self.stream = self.header.stream
|
|
self.header_size = struct.calcsize(self.FORMAT)
|
|
|
|
data = struct.unpack_from(self.FORMAT, self.header.chunk_data)
|
|
if data[0] != ChunkType.START_ELEMENT:
|
|
raise BadHeader()
|
|
|
|
self.name = data[6]
|
|
self.attribute_count = data[8]
|
|
|
|
attributes_data = self.header.chunk_data[self.header_size :]
|
|
if len(attributes_data[-20:]) == 20:
|
|
previous_attribute = struct.unpack(self.ATTRIBUTE_FORMAT, attributes_data[-20:])
|
|
self.namespace = previous_attribute[0]
|
|
else:
|
|
# There are no other attributes in the application tag
|
|
self.namespace = -1
|
|
|
|
def insert_debuggable(self, name: int, resource_map: ResourceMap) -> None:
|
|
# TODO: Instead of using the previous attribute to determine the probable
|
|
# namespace for the debuggable tag we could scan the strings section
|
|
# for the AndroidManifest schema tag
|
|
if self.namespace == -1:
|
|
raise BadHeader()
|
|
|
|
chunk_data = bytearray(self.header.chunk_data)
|
|
|
|
resource_size = 8
|
|
resource_type = ResourceType.BOOL
|
|
# Denotes a True value in AXML, 0 is used for False
|
|
resource_data = -1
|
|
|
|
debuggable = struct.pack(
|
|
self.ATTRIBUTE_FORMAT, self.namespace, name, -1, resource_size, 0, resource_type, resource_data
|
|
)
|
|
|
|
# Some parts of Android expect this to be sorted by resource ID.
|
|
attr_offset = None
|
|
for insert_pos in range(self.attribute_count + 1):
|
|
attr_offset = 0x24 + 20 * insert_pos
|
|
idx = int.from_bytes(chunk_data[attr_offset + 4 : attr_offset + 8], "little")
|
|
if resource_map.get_resource(idx) > ResourceMap.DEBUGGING_RESOURCE:
|
|
break
|
|
chunk_data[attr_offset:attr_offset] = debuggable
|
|
|
|
self.header.size = len(chunk_data)
|
|
chunk_data[4 : 4 + 4] = struct.pack("<I", self.header.size)
|
|
|
|
self.attribute_count += 1
|
|
chunk_data[28 : 28 + 2] = struct.pack("<H", self.attribute_count)
|
|
|
|
self.header.chunk_data = bytes(chunk_data)
|
|
|
|
|
|
class ResourceMap:
|
|
DEBUGGING_RESOURCE = 0x101000F
|
|
|
|
def __init__(self, header: ChunkHeader) -> None:
|
|
self.header = header
|
|
|
|
def add_debuggable(self, idx: int) -> None:
|
|
assert idx is not None
|
|
data_size = len(self.header.chunk_data) - 8
|
|
target = (idx + 1) * 4
|
|
self.header.chunk_data += b"\x00" * (target - data_size - 4) + self.DEBUGGING_RESOURCE.to_bytes(4, "little")
|
|
|
|
self.header.size = len(self.header.chunk_data)
|
|
self.header.chunk_data = (
|
|
self.header.chunk_data[:4] + struct.pack("<I", self.header.size) + self.header.chunk_data[8:]
|
|
)
|
|
|
|
def get_resource(self, index: int) -> int:
|
|
offset = index * 4 + 8
|
|
return int.from_bytes(self.header.chunk_data[offset : offset + 4], "little")
|
|
|
|
|
|
class StringPool:
|
|
FORMAT = "<HHIIIIII"
|
|
|
|
def __init__(self, header: ChunkHeader):
|
|
self.header = header
|
|
self.stream = self.header.stream
|
|
self.header_size = struct.calcsize(self.FORMAT)
|
|
|
|
data = struct.unpack_from(self.FORMAT, self.header.chunk_data)
|
|
if data[0] != ChunkType.STRING_POOL:
|
|
raise BadHeader()
|
|
|
|
self.string_count = data[3]
|
|
self.flags = data[5]
|
|
self.strings_offset = data[6]
|
|
self.styles_offset = data[7]
|
|
self.utf8 = (self.flags & StringType.UTF8) != 0
|
|
self.dirty = False
|
|
|
|
offsets_data = self.header.chunk_data[self.header_size : self.header_size + self.string_count * 4]
|
|
self.offsets: List[int] = list(map(lambda f: f[0], struct.iter_unpack("<I", offsets_data)))
|
|
|
|
def get_string(self, index: int) -> str:
|
|
offset = self.offsets[index]
|
|
|
|
# HACK: We subtract 4 because we insert a string offset during append_str
|
|
# but we do not update the original stream and thus it reads stale data.
|
|
if self.dirty:
|
|
offset -= 4
|
|
|
|
position = self.stream.tell()
|
|
self.stream.seek(self.strings_offset + 8 + offset, os.SEEK_SET)
|
|
|
|
string = None
|
|
if self.utf8:
|
|
# Ignore number of characters
|
|
n = struct.unpack("<B", self.stream.read(1))[0]
|
|
if n & 0x80:
|
|
n = ((n & 0x7F) << 8) | struct.unpack("<B", self.stream.read(1))[0]
|
|
|
|
# UTF-8 encoded length
|
|
n = struct.unpack("<B", self.stream.read(1))[0]
|
|
if n & 0x80:
|
|
n = ((n & 0x7F) << 8) | struct.unpack("<B", self.stream.read(1))[0]
|
|
|
|
string = self.stream.read(n).decode("utf-8")
|
|
else:
|
|
n = struct.unpack("<H", self.stream.read(2))[0]
|
|
if n & 0x8000:
|
|
n |= ((n & 0x7FFF) << 16) | struct.unpack("<H", self.stream.read(2))[0]
|
|
|
|
string = self.stream.read(n * 2).decode("utf-16le")
|
|
|
|
self.stream.seek(position, os.SEEK_SET)
|
|
return string
|
|
|
|
def append_str(self, add: str) -> int:
|
|
data_size = len(self.header.chunk_data)
|
|
# Reserve data for our new offset
|
|
data_size += 4
|
|
|
|
chunk_data = bytearray(data_size)
|
|
end = self.header_size + self.string_count * 4
|
|
chunk_data[:end] = self.header.chunk_data[:end]
|
|
chunk_data[end + 4 :] = self.header.chunk_data[end:]
|
|
|
|
# Add 4 since we have added a string offset
|
|
offset = len(chunk_data) - 8 - self.strings_offset + 4
|
|
|
|
if self.utf8:
|
|
assert len(add.encode("utf-8")) < 128 # multi-byte len strings not supported yet
|
|
length_in_characters = len(add)
|
|
length_in_bytes = len(add.encode("utf-8"))
|
|
chunk_data.extend(struct.pack("<BB", length_in_characters, length_in_bytes))
|
|
|
|
chunk_data.extend(add.encode("utf-8"))
|
|
# Insert a UTF-8 NUL
|
|
chunk_data.extend([0])
|
|
else:
|
|
chunk_data.extend(struct.pack("<H", len(add)))
|
|
chunk_data.extend(add.encode("utf-16le"))
|
|
# Insert a UTF-16 NUL
|
|
chunk_data.extend([0, 0])
|
|
|
|
# pad to a multiple of 4 bytes
|
|
if len(chunk_data) % 4 != 0:
|
|
alignment_padding = [0] * (4 - len(chunk_data) % 4)
|
|
chunk_data.extend(alignment_padding)
|
|
|
|
# Insert a new offset at the end of the existing offsets
|
|
chunk_data[end : end + 4] = struct.pack("<I", offset)
|
|
|
|
# Increase the header size since we have inserted a new offset and string
|
|
self.header.size = len(chunk_data)
|
|
chunk_data[4 : 4 + 4] = struct.pack("<I", self.header.size)
|
|
|
|
self.string_count += 1
|
|
chunk_data[8 : 8 + 4] = struct.pack("<I", self.string_count)
|
|
|
|
# Increase strings offset since we have inserted a new offset and thus
|
|
# shifted the offset of the strings
|
|
self.strings_offset += 4
|
|
chunk_data[20 : 20 + 4] = struct.pack("<I", self.strings_offset)
|
|
|
|
# If there are styles, offset them as we have inserted into the strings
|
|
# offsets
|
|
if self.styles_offset != 0:
|
|
self.styles_offset += 4
|
|
chunk_data[24 : 24 + 4] = struct.pack("<I", self.strings_offset)
|
|
|
|
self.header.chunk_data = bytes(chunk_data)
|
|
|
|
self.dirty = True
|
|
|
|
return self.string_count - 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except KeyboardInterrupt:
|
|
pass
|