This repository has been archived on 2024-09-30. You can view files and clone it, but cannot push or open issues/pull-requests.
hook-frida/venv/Lib/site-packages/frida_tools/apk.py

354 lines
13 KiB
Python

from __future__ import annotations
import argparse
import os
import struct
from enum import IntEnum
from io import BufferedReader
from typing import List
from zipfile import ZipFile
def main() -> None:
from frida_tools.application import ConsoleApplication
class ApkApplication(ConsoleApplication):
def _usage(self) -> str:
return "%(prog)s [options] path.apk"
def _add_options(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument("-o", "--output", help="output path", metavar="OUTPUT")
parser.add_argument("apk", help="apk file")
def _needs_device(self) -> bool:
return False
def _initialize(self, parser: argparse.ArgumentParser, options: argparse.Namespace, args: List[str]) -> None:
self._output_path = options.output
self._path = options.apk
if not self._path.endswith(".apk"):
parser.error("path must end in .apk")
if self._output_path is None:
self._output_path = self._path.replace(".apk", ".d.apk")
def _start(self) -> None:
try:
debug(self._path, self._output_path)
except Exception as e:
self._update_status(f"Error: {e}")
self._exit(1)
self._exit(0)
app = ApkApplication()
app.run()
def debug(path: str, output_path: str) -> None:
with ZipFile(path, "r") as iz, ZipFile(output_path, "w") as oz:
for info in iz.infolist():
with iz.open(info) as f:
if info.filename == "AndroidManifest.xml":
manifest = BinaryXML(f)
pool = None
debuggable_index = None
size = 8
for header in manifest.chunk_headers[1:]:
if header.type == ChunkType.STRING_POOL:
pool = StringPool(header)
debuggable_index = pool.append_str("debuggable")
if header.type == ChunkType.RESOURCE_MAP:
# The "debuggable" attribute name is not only a reference to the string pool, but
# also to the resource map. We need to extend the resource map with a valid entry.
# refs https://justanapplication.wordpress.com/category/android/android-binary-xml/android-xml-startelement-chunk/
resource_map = ResourceMap(header)
resource_map.add_debuggable(debuggable_index)
if header.type == ChunkType.START_ELEMENT:
start = StartElement(header)
name = pool.get_string(start.name)
if name == "application":
start.insert_debuggable(debuggable_index, resource_map)
size += header.size
header = manifest.chunk_headers[0]
header_data = bytearray(header.chunk_data)
header_data[4 : 4 + 4] = struct.pack("<I", size)
data = bytearray()
data.extend(header_data)
for header in manifest.chunk_headers[1:]:
data.extend(header.chunk_data)
oz.writestr(info.filename, bytes(data), info.compress_type)
elif info.filename.upper() == "META-INF/MANIFEST.MF":
# Historically frida-apk deleted META-INF/ entirely, but that breaks some apps.
# It turns out that v1 signatures (META-INF/MANIFEST.MF) are not validated at all on
# modern Android versions, so we can keep them in for now.
# If this doesn't work for you, try to comment out the following line.
oz.writestr(info.filename, f.read(), info.compress_type)
else:
oz.writestr(info.filename, f.read(), info.compress_type)
class BinaryXML:
def __init__(self, stream: BufferedReader) -> None:
self.stream = stream
self.chunk_headers = []
self.parse()
def parse(self) -> None:
chunk_header = ChunkHeader(self.stream, False)
if chunk_header.type != ChunkType.XML:
raise BadHeader()
self.chunk_headers.append(chunk_header)
size = chunk_header.size
while self.stream.tell() < size:
chunk_header = ChunkHeader(self.stream)
self.chunk_headers.append(chunk_header)
class ChunkType(IntEnum):
STRING_POOL = 0x001
XML = 0x003
START_ELEMENT = 0x102
RESOURCE_MAP = 0x180
class ResourceType(IntEnum):
BOOL = 0x12
class StringType(IntEnum):
UTF8 = 1 << 8
class BadHeader(Exception):
pass
class ChunkHeader:
FORMAT = "<HHI"
def __init__(self, stream: BufferedReader, consume_data: bool = True) -> None:
self.stream = stream
data = self.stream.peek(struct.calcsize(self.FORMAT))
(self.type, self.header_size, self.size) = struct.unpack_from(self.FORMAT, data)
if consume_data:
self.chunk_data = self.stream.read(self.size)
else:
self.chunk_data = self.stream.read(struct.calcsize(self.FORMAT))
class StartElement:
FORMAT = "<HHIIIIIIHHHH"
ATTRIBUTE_FORMAT = "<IIiHBBi"
def __init__(self, header: ChunkHeader) -> None:
self.header = header
self.stream = self.header.stream
self.header_size = struct.calcsize(self.FORMAT)
data = struct.unpack_from(self.FORMAT, self.header.chunk_data)
if data[0] != ChunkType.START_ELEMENT:
raise BadHeader()
self.name = data[6]
self.attribute_count = data[8]
attributes_data = self.header.chunk_data[self.header_size :]
if len(attributes_data[-20:]) == 20:
previous_attribute = struct.unpack(self.ATTRIBUTE_FORMAT, attributes_data[-20:])
self.namespace = previous_attribute[0]
else:
# There are no other attributes in the application tag
self.namespace = -1
def insert_debuggable(self, name: int, resource_map: ResourceMap) -> None:
# TODO: Instead of using the previous attribute to determine the probable
# namespace for the debuggable tag we could scan the strings section
# for the AndroidManifest schema tag
if self.namespace == -1:
raise BadHeader()
chunk_data = bytearray(self.header.chunk_data)
resource_size = 8
resource_type = ResourceType.BOOL
# Denotes a True value in AXML, 0 is used for False
resource_data = -1
debuggable = struct.pack(
self.ATTRIBUTE_FORMAT, self.namespace, name, -1, resource_size, 0, resource_type, resource_data
)
# Some parts of Android expect this to be sorted by resource ID.
attr_offset = None
for insert_pos in range(self.attribute_count + 1):
attr_offset = 0x24 + 20 * insert_pos
idx = int.from_bytes(chunk_data[attr_offset + 4 : attr_offset + 8], "little")
if resource_map.get_resource(idx) > ResourceMap.DEBUGGING_RESOURCE:
break
chunk_data[attr_offset:attr_offset] = debuggable
self.header.size = len(chunk_data)
chunk_data[4 : 4 + 4] = struct.pack("<I", self.header.size)
self.attribute_count += 1
chunk_data[28 : 28 + 2] = struct.pack("<H", self.attribute_count)
self.header.chunk_data = bytes(chunk_data)
class ResourceMap:
DEBUGGING_RESOURCE = 0x101000F
def __init__(self, header: ChunkHeader) -> None:
self.header = header
def add_debuggable(self, idx: int) -> None:
assert idx is not None
data_size = len(self.header.chunk_data) - 8
target = (idx + 1) * 4
self.header.chunk_data += b"\x00" * (target - data_size - 4) + self.DEBUGGING_RESOURCE.to_bytes(4, "little")
self.header.size = len(self.header.chunk_data)
self.header.chunk_data = (
self.header.chunk_data[:4] + struct.pack("<I", self.header.size) + self.header.chunk_data[8:]
)
def get_resource(self, index: int) -> int:
offset = index * 4 + 8
return int.from_bytes(self.header.chunk_data[offset : offset + 4], "little")
class StringPool:
FORMAT = "<HHIIIIII"
def __init__(self, header: ChunkHeader):
self.header = header
self.stream = self.header.stream
self.header_size = struct.calcsize(self.FORMAT)
data = struct.unpack_from(self.FORMAT, self.header.chunk_data)
if data[0] != ChunkType.STRING_POOL:
raise BadHeader()
self.string_count = data[3]
self.flags = data[5]
self.strings_offset = data[6]
self.styles_offset = data[7]
self.utf8 = (self.flags & StringType.UTF8) != 0
self.dirty = False
offsets_data = self.header.chunk_data[self.header_size : self.header_size + self.string_count * 4]
self.offsets: List[int] = list(map(lambda f: f[0], struct.iter_unpack("<I", offsets_data)))
def get_string(self, index: int) -> str:
offset = self.offsets[index]
# HACK: We subtract 4 because we insert a string offset during append_str
# but we do not update the original stream and thus it reads stale data.
if self.dirty:
offset -= 4
position = self.stream.tell()
self.stream.seek(self.strings_offset + 8 + offset, os.SEEK_SET)
string = None
if self.utf8:
# Ignore number of characters
n = struct.unpack("<B", self.stream.read(1))[0]
if n & 0x80:
n = ((n & 0x7F) << 8) | struct.unpack("<B", self.stream.read(1))[0]
# UTF-8 encoded length
n = struct.unpack("<B", self.stream.read(1))[0]
if n & 0x80:
n = ((n & 0x7F) << 8) | struct.unpack("<B", self.stream.read(1))[0]
string = self.stream.read(n).decode("utf-8")
else:
n = struct.unpack("<H", self.stream.read(2))[0]
if n & 0x8000:
n |= ((n & 0x7FFF) << 16) | struct.unpack("<H", self.stream.read(2))[0]
string = self.stream.read(n * 2).decode("utf-16le")
self.stream.seek(position, os.SEEK_SET)
return string
def append_str(self, add: str) -> int:
data_size = len(self.header.chunk_data)
# Reserve data for our new offset
data_size += 4
chunk_data = bytearray(data_size)
end = self.header_size + self.string_count * 4
chunk_data[:end] = self.header.chunk_data[:end]
chunk_data[end + 4 :] = self.header.chunk_data[end:]
# Add 4 since we have added a string offset
offset = len(chunk_data) - 8 - self.strings_offset + 4
if self.utf8:
assert len(add.encode("utf-8")) < 128 # multi-byte len strings not supported yet
length_in_characters = len(add)
length_in_bytes = len(add.encode("utf-8"))
chunk_data.extend(struct.pack("<BB", length_in_characters, length_in_bytes))
chunk_data.extend(add.encode("utf-8"))
# Insert a UTF-8 NUL
chunk_data.extend([0])
else:
chunk_data.extend(struct.pack("<H", len(add)))
chunk_data.extend(add.encode("utf-16le"))
# Insert a UTF-16 NUL
chunk_data.extend([0, 0])
# pad to a multiple of 4 bytes
if len(chunk_data) % 4 != 0:
alignment_padding = [0] * (4 - len(chunk_data) % 4)
chunk_data.extend(alignment_padding)
# Insert a new offset at the end of the existing offsets
chunk_data[end : end + 4] = struct.pack("<I", offset)
# Increase the header size since we have inserted a new offset and string
self.header.size = len(chunk_data)
chunk_data[4 : 4 + 4] = struct.pack("<I", self.header.size)
self.string_count += 1
chunk_data[8 : 8 + 4] = struct.pack("<I", self.string_count)
# Increase strings offset since we have inserted a new offset and thus
# shifted the offset of the strings
self.strings_offset += 4
chunk_data[20 : 20 + 4] = struct.pack("<I", self.strings_offset)
# If there are styles, offset them as we have inserted into the strings
# offsets
if self.styles_offset != 0:
self.styles_offset += 4
chunk_data[24 : 24 + 4] = struct.pack("<I", self.strings_offset)
self.header.chunk_data = bytes(chunk_data)
self.dirty = True
return self.string_count - 1
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
pass