480 lines
15 KiB
Python
480 lines
15 KiB
Python
# -*- coding: utf-8 -*-
|
||
# !/usr/bin/env python
|
||
# -*- coding: latin-1 -*-
|
||
|
||
# <-- removing this magic comment breaks Python 3.4 on Windows
|
||
"""
|
||
1. Dump binary data to the following text format:
|
||
|
||
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||
00000010: 00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
|
||
|
||
It is similar to the one used by:
|
||
Scapy
|
||
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||
00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
|
||
|
||
Far Manager
|
||
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
|
||
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 AA BB CC DD EE FF ?"3DUfwª»ÌÝîÿ
|
||
|
||
|
||
2. Restore binary data from the formats above as well
|
||
as from less exotic strings of raw hex
|
||
|
||
"""
|
||
|
||
__version__ = '3.3'
|
||
__author__ = 'anatoly techtonik <techtonik@gmail.com>'
|
||
__license__ = 'Public Domain'
|
||
|
||
__history__ = \
|
||
"""
|
||
3.3 (2015-01-22)
|
||
* accept input from sys.stdin if "-" is specified
|
||
for both dump and restore (issue #1)
|
||
* new normalize_py() helper to set sys.stdout to
|
||
binary mode on Windows
|
||
|
||
3.2 (2015-07-02)
|
||
* hexdump is now packaged as .zip on all platforms
|
||
(on Linux created archive was tar.gz)
|
||
* .zip is executable! try `python hexdump-3.2.zip`
|
||
* dump() now accepts configurable separator, patch
|
||
by Ian Land (PR #3)
|
||
|
||
3.1 (2014-10-20)
|
||
* implemented workaround against mysterious coding
|
||
issue with Python 3 (see revision 51302cf)
|
||
* fix Python 3 installs for systems where UTF-8 is
|
||
not default (Windows), thanks to George Schizas
|
||
(the problem was caused by reading of README.txt)
|
||
|
||
3.0 (2014-09-07)
|
||
* remove unused int2byte() helper
|
||
* add dehex(text) helper to convert hex string
|
||
to binary data
|
||
* add 'size' argument to dump() helper to specify
|
||
length of chunks
|
||
|
||
2.0 (2014-02-02)
|
||
* add --restore option to command line mode to get
|
||
binary data back from hex dump
|
||
* support saving test output with `--test logfile`
|
||
* restore() from hex strings without spaces
|
||
* restore() now raises TypeError if input data is
|
||
not string
|
||
* hexdump() and dumpgen() now don't return unicode
|
||
strings in Python 2.x when generator is requested
|
||
|
||
1.0 (2013-12-30)
|
||
* length of address is reduced from 10 to 8
|
||
* hexdump() got new 'result' keyword argument, it
|
||
can be either 'print', 'generator' or 'return'
|
||
* actual dumping logic is now in new dumpgen()
|
||
generator function
|
||
* new dump(binary) function that takes binary data
|
||
and returns string like "66 6F 72 6D 61 74"
|
||
* new genchunks(mixed, size) function that chunks
|
||
both sequences and file like objects
|
||
|
||
0.5 (2013-06-10)
|
||
* hexdump is now also a command line utility (no
|
||
restore yet)
|
||
|
||
0.4 (2013-06-09)
|
||
* fix installation with Python 3 for non English
|
||
versions of Windows, thanks to George Schizas
|
||
|
||
0.3 (2013-04-29)
|
||
* fully Python 3 compatible
|
||
|
||
0.2 (2013-04-28)
|
||
* restore() to recover binary data from a hex dump in
|
||
native, Far Manager and Scapy text formats (others
|
||
might work as well)
|
||
* restore() is Python 3 compatible
|
||
|
||
0.1 (2013-04-28)
|
||
* working hexdump() function for Python 2
|
||
"""
|
||
|
||
import binascii # binascii is required for Python 3
|
||
import sys
|
||
|
||
# --- constants
|
||
PY3K = sys.version_info >= (3, 0)
|
||
|
||
|
||
# --- workaround against Python consistency issues
|
||
def normalize_py():
|
||
''' Problem 001 - sys.stdout in Python is by default opened in
|
||
text mode, and writes to this stdout produce corrupted binary
|
||
data on Windows
|
||
|
||
python -c "import sys; sys.stdout.write('_\n_')" > file
|
||
python -c "print(repr(open('file', 'rb').read()))"
|
||
'''
|
||
if sys.platform == "win32":
|
||
# set sys.stdout to binary mode on Windows
|
||
import os, msvcrt
|
||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||
|
||
|
||
# --- - chunking helpers
|
||
def chunks(seq, size):
|
||
'''Generator that cuts sequence (bytes, memoryview, etc.)
|
||
into chunks of given size. If `seq` length is not multiply
|
||
of `size`, the lengh of the last chunk returned will be
|
||
less than requested.
|
||
|
||
>>> list( chunks([1,2,3,4,5,6,7], 3) )
|
||
[[1, 2, 3], [4, 5, 6], [7]]
|
||
'''
|
||
d, m = divmod(len(seq), size)
|
||
for i in range(d):
|
||
yield seq[i * size:(i + 1) * size]
|
||
if m:
|
||
yield seq[d * size:]
|
||
|
||
|
||
def chunkread(f, size):
|
||
'''Generator that reads from file like object. May return less
|
||
data than requested on the last read.'''
|
||
c = f.read(size)
|
||
while len(c):
|
||
yield c
|
||
c = f.read(size)
|
||
|
||
|
||
def genchunks(mixed, size):
|
||
'''Generator to chunk binary sequences or file like objects.
|
||
The size of the last chunk returned may be less than
|
||
requested.'''
|
||
if hasattr(mixed, 'read'):
|
||
return chunkread(mixed, size)
|
||
else:
|
||
return chunks(mixed, size)
|
||
|
||
|
||
# --- - /chunking helpers
|
||
|
||
|
||
def dehex(hextext):
|
||
"""
|
||
Convert from hex string to binary data stripping
|
||
whitespaces from `hextext` if necessary.
|
||
"""
|
||
if PY3K:
|
||
return bytes.fromhex(hextext)
|
||
else:
|
||
hextext = "".join(hextext.split())
|
||
return hextext.decode('hex')
|
||
|
||
|
||
def dump(binary, size=2, sep=' '):
|
||
'''
|
||
Convert binary data (bytes in Python 3 and str in
|
||
Python 2) to hex string like '00 DE AD BE EF'.
|
||
`size` argument specifies length of text chunks
|
||
and `sep` sets chunk separator.
|
||
'''
|
||
hexstr = binascii.hexlify(binary)
|
||
if PY3K:
|
||
hexstr = hexstr.decode('ascii')
|
||
return sep.join(chunks(hexstr.upper(), size))
|
||
|
||
|
||
def dumpgen(data, only_str):
|
||
'''
|
||
Generator that produces strings:
|
||
|
||
'00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................'
|
||
'''
|
||
generator = genchunks(data, 16)
|
||
for addr, d in enumerate(generator):
|
||
line = ""
|
||
if not only_str:
|
||
# 00000000:
|
||
line = '%08X: ' % (addr * 16)
|
||
# 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||
dumpstr = dump(d)
|
||
line += dumpstr[:8 * 3]
|
||
if len(d) > 8: # insert separator if needed
|
||
line += ' ' + dumpstr[8 * 3:]
|
||
# ................
|
||
# calculate indentation, which may be different for the last line
|
||
pad = 2
|
||
if len(d) < 16:
|
||
pad += 3 * (16 - len(d))
|
||
if len(d) <= 8:
|
||
pad += 1
|
||
line += ' ' * pad
|
||
|
||
for byte in d:
|
||
# printable ASCII range 0x20 to 0x7E
|
||
if not PY3K:
|
||
byte = ord(byte)
|
||
if 0x20 <= byte <= 0x7E:
|
||
line += chr(byte)
|
||
else:
|
||
line += '.'
|
||
yield line
|
||
|
||
|
||
def hexdump(data, result='print', only_str=False):
|
||
'''
|
||
Transform binary data to the hex dump text format:
|
||
|
||
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||
|
||
[x] data argument as a binary string
|
||
[x] data argument as a file like object
|
||
|
||
Returns result depending on the `result` argument:
|
||
'print' - prints line by line
|
||
'return' - returns single string
|
||
'generator' - returns generator that produces lines
|
||
'''
|
||
if PY3K and type(data) == str:
|
||
raise TypeError('Abstract unicode data (expected bytes sequence)')
|
||
|
||
gen = dumpgen(data, only_str=only_str)
|
||
if result == 'generator':
|
||
return gen
|
||
elif result == 'return':
|
||
return '\n'.join(gen)
|
||
elif result == 'print':
|
||
for line in gen:
|
||
print(line)
|
||
else:
|
||
raise ValueError('Unknown value of `result` argument')
|
||
|
||
|
||
def restore(dump):
|
||
'''
|
||
Restore binary data from a hex dump.
|
||
[x] dump argument as a string
|
||
[ ] dump argument as a line iterator
|
||
|
||
Supported formats:
|
||
[x] hexdump.hexdump
|
||
[x] Scapy
|
||
[x] Far Manager
|
||
'''
|
||
minhexwidth = 2 * 16 # minimal width of the hex part - 00000... style
|
||
bytehexwidth = 3 * 16 - 1 # min width for a bytewise dump - 00 00 ... style
|
||
|
||
result = bytes() if PY3K else ''
|
||
if type(dump) != str:
|
||
raise TypeError('Invalid data for restore')
|
||
|
||
text = dump.strip() # ignore surrounding empty lines
|
||
for line in text.split('\n'):
|
||
# strip address part
|
||
addrend = line.find(':')
|
||
if 0 < addrend < minhexwidth: # : is not in ascii part
|
||
line = line[addrend + 1:]
|
||
line = line.lstrip()
|
||
# check dump type
|
||
if line[2] == ' ': # 00 00 00 ... type of dump
|
||
# check separator
|
||
sepstart = (2 + 1) * 7 + 2 # ('00'+' ')*7+'00'
|
||
sep = line[sepstart:sepstart + 3]
|
||
if sep[:2] == ' ' and sep[2:] != ' ': # ...00 00 00 00...
|
||
hexdata = line[:bytehexwidth + 1]
|
||
elif sep[2:] == ' ': # ...00 00 | 00 00... - Far Manager
|
||
hexdata = line[:sepstart] + line[sepstart + 3:bytehexwidth + 2]
|
||
else: # ...00 00 00 00... - Scapy, no separator
|
||
hexdata = line[:bytehexwidth]
|
||
line = hexdata
|
||
result += dehex(line)
|
||
return result
|
||
|
||
|
||
def runtest(logfile=None):
|
||
'''Run hexdump tests. Requires hexfile.bin to be in the same
|
||
directory as hexdump.py itself'''
|
||
|
||
class TeeOutput(object):
|
||
def __init__(self, stream1, stream2):
|
||
self.outputs = [stream1, stream2]
|
||
|
||
# -- methods from sys.stdout / sys.stderr
|
||
def write(self, data):
|
||
for stream in self.outputs:
|
||
if PY3K:
|
||
if 'b' in stream.mode:
|
||
data = data.encode('utf-8')
|
||
stream.write(data)
|
||
stream.flush()
|
||
|
||
def tell(self):
|
||
raise IOError
|
||
|
||
def flush(self):
|
||
for stream in self.outputs:
|
||
stream.flush()
|
||
# --/ sys.stdout
|
||
|
||
if logfile:
|
||
openlog = open(logfile, 'wb')
|
||
# copy stdout and stderr streams to log file
|
||
savedstd = sys.stderr, sys.stdout
|
||
sys.stderr = TeeOutput(sys.stderr, openlog)
|
||
sys.stdout = TeeOutput(sys.stdout, openlog)
|
||
|
||
def echo(msg, linefeed=True):
|
||
sys.stdout.write(msg)
|
||
if linefeed:
|
||
sys.stdout.write('\n')
|
||
|
||
expected = '''\
|
||
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........\
|
||
'''
|
||
|
||
# get path to hexfile.bin
|
||
# this doesn't work from .zip
|
||
# import os.path as osp
|
||
# hexfile = osp.dirname(osp.abspath(__file__)) + '/hexfile.bin'
|
||
# this doesn't work either
|
||
# hexfile = osp.dirname(sys.modules[__name__].__file__) + '/hexfile.bin'
|
||
# this works
|
||
import pkgutil
|
||
bin = pkgutil.get_data('hexdump', 'data/hexfile.bin')
|
||
|
||
# varios length of input data
|
||
hexdump(b'zzzz' * 12)
|
||
hexdump(b'o' * 17)
|
||
hexdump(b'p' * 24)
|
||
hexdump(b'q' * 26)
|
||
# allowable character set filter
|
||
hexdump(b'line\nfeed\r\ntest')
|
||
hexdump(b'\x00\x00\x00\x5B\x68\x65\x78\x64\x75\x6D\x70\x5D\x00\x00\x00\x00'
|
||
b'\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\x0A\xBB\xCC\xDD\xEE\xFF')
|
||
print('---')
|
||
# dumping file-like binary object to screen (default behavior)
|
||
hexdump(bin)
|
||
print('return output')
|
||
hexout = hexdump(bin, result='return')
|
||
assert hexout == expected, 'returned hex didn\'t match'
|
||
print('return generator')
|
||
hexgen = hexdump(bin, result='generator')
|
||
assert next(hexgen) == expected.split('\n')[0], 'hex generator 1 didn\'t match'
|
||
assert next(hexgen) == expected.split('\n')[1], 'hex generator 2 didn\'t match'
|
||
|
||
# binary restore test
|
||
bindata = restore(
|
||
'''
|
||
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
|
||
''')
|
||
echo('restore check ', linefeed=False)
|
||
assert bin == bindata, 'restore check failed'
|
||
echo('passed')
|
||
|
||
far = \
|
||
'''
|
||
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
|
||
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 0A BB CC DD EE FF ?"3DUfwª»ÌÝîÿ
|
||
'''
|
||
echo('restore far format ', linefeed=False)
|
||
assert bin == restore(far), 'far format check failed'
|
||
echo('passed')
|
||
|
||
scapy = '''\
|
||
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||
00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
|
||
'''
|
||
echo('restore scapy format ', linefeed=False)
|
||
assert bin == restore(scapy), 'scapy format check failed'
|
||
echo('passed')
|
||
|
||
if not PY3K:
|
||
assert restore('5B68657864756D705D') == '[hexdump]', 'no space check failed'
|
||
assert dump('\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
|
||
else:
|
||
assert restore('5B68657864756D705D') == b'[hexdump]', 'no space check failed'
|
||
assert dump(b'\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
|
||
|
||
print('---[test file hexdumping]---')
|
||
|
||
import os
|
||
import tempfile
|
||
hexfile = tempfile.NamedTemporaryFile(delete=False)
|
||
try:
|
||
hexfile.write(bin)
|
||
hexfile.close()
|
||
hexdump(open(hexfile.name, 'rb'))
|
||
finally:
|
||
os.remove(hexfile.name)
|
||
if logfile:
|
||
sys.stderr, sys.stdout = savedstd
|
||
openlog.close()
|
||
|
||
|
||
def main():
|
||
from optparse import OptionParser
|
||
parser = OptionParser(usage='''
|
||
%prog [binfile|-]
|
||
%prog -r hexfile
|
||
%prog --test [logfile]''', version=__version__)
|
||
parser.add_option('-r', '--restore', action='store_true',
|
||
help='restore binary from hex dump')
|
||
parser.add_option('--test', action='store_true', help='run hexdump sanity checks')
|
||
|
||
options, args = parser.parse_args()
|
||
|
||
if options.test:
|
||
if args:
|
||
runtest(logfile=args[0])
|
||
else:
|
||
runtest()
|
||
elif not args or len(args) > 1:
|
||
parser.print_help()
|
||
sys.exit(-1)
|
||
else:
|
||
## dump file
|
||
if not options.restore:
|
||
# [x] memory effective dump
|
||
if args[0] == '-':
|
||
if not PY3K:
|
||
hexdump(sys.stdin)
|
||
else:
|
||
hexdump(sys.stdin.buffer)
|
||
else:
|
||
hexdump(open(args[0], 'rb'))
|
||
|
||
## restore file
|
||
else:
|
||
# prepare input stream
|
||
if args[0] == '-':
|
||
instream = sys.stdin
|
||
else:
|
||
if PY3K:
|
||
instream = open(args[0])
|
||
else:
|
||
instream = open(args[0], 'rb')
|
||
|
||
# output stream
|
||
# [ ] memory efficient restore
|
||
if PY3K:
|
||
sys.stdout.buffer.write(restore(instream.read()))
|
||
else:
|
||
# Windows - binary mode for sys.stdout to prevent data corruption
|
||
normalize_py()
|
||
sys.stdout.write(restore(instream.read()))
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|
||
|
||
# [x] file restore from command line utility
|
||
# [ ] write dump with LF on Windows for consistency
|
||
# [ ] encoding param for hexdump()ing Python 3 str if anybody requests that
|
||
|
||
# [ ] document chunking API
|
||
# [ ] document hexdump API
|
||
# [ ] blog about sys.stdout text mode problem on Windows
|