创建 myhexdump.py
parent
27631b814a
commit
649dd5ac89
|
|
@ -0,0 +1,479 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# !/usr/bin/env python
|
||||||
|
# -*- coding: latin-1 -*-
|
||||||
|
|
||||||
|
# <-- removing this magic comment breaks Python 3.4 on Windows
|
||||||
|
"""
|
||||||
|
1. Dump binary data to the following text format:
|
||||||
|
|
||||||
|
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||||||
|
00000010: 00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
|
||||||
|
|
||||||
|
It is similar to the one used by:
|
||||||
|
Scapy
|
||||||
|
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||||||
|
00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
|
||||||
|
|
||||||
|
Far Manager
|
||||||
|
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
|
||||||
|
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 AA BB CC DD EE FF ?"3DUfwª»ÌÝîÿ
|
||||||
|
|
||||||
|
|
||||||
|
2. Restore binary data from the formats above as well
|
||||||
|
as from less exotic strings of raw hex
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = '3.3'
|
||||||
|
__author__ = 'anatoly techtonik <techtonik@gmail.com>'
|
||||||
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
|
__history__ = \
|
||||||
|
"""
|
||||||
|
3.3 (2015-01-22)
|
||||||
|
* accept input from sys.stdin if "-" is specified
|
||||||
|
for both dump and restore (issue #1)
|
||||||
|
* new normalize_py() helper to set sys.stdout to
|
||||||
|
binary mode on Windows
|
||||||
|
|
||||||
|
3.2 (2015-07-02)
|
||||||
|
* hexdump is now packaged as .zip on all platforms
|
||||||
|
(on Linux created archive was tar.gz)
|
||||||
|
* .zip is executable! try `python hexdump-3.2.zip`
|
||||||
|
* dump() now accepts configurable separator, patch
|
||||||
|
by Ian Land (PR #3)
|
||||||
|
|
||||||
|
3.1 (2014-10-20)
|
||||||
|
* implemented workaround against mysterious coding
|
||||||
|
issue with Python 3 (see revision 51302cf)
|
||||||
|
* fix Python 3 installs for systems where UTF-8 is
|
||||||
|
not default (Windows), thanks to George Schizas
|
||||||
|
(the problem was caused by reading of README.txt)
|
||||||
|
|
||||||
|
3.0 (2014-09-07)
|
||||||
|
* remove unused int2byte() helper
|
||||||
|
* add dehex(text) helper to convert hex string
|
||||||
|
to binary data
|
||||||
|
* add 'size' argument to dump() helper to specify
|
||||||
|
length of chunks
|
||||||
|
|
||||||
|
2.0 (2014-02-02)
|
||||||
|
* add --restore option to command line mode to get
|
||||||
|
binary data back from hex dump
|
||||||
|
* support saving test output with `--test logfile`
|
||||||
|
* restore() from hex strings without spaces
|
||||||
|
* restore() now raises TypeError if input data is
|
||||||
|
not string
|
||||||
|
* hexdump() and dumpgen() now don't return unicode
|
||||||
|
strings in Python 2.x when generator is requested
|
||||||
|
|
||||||
|
1.0 (2013-12-30)
|
||||||
|
* length of address is reduced from 10 to 8
|
||||||
|
* hexdump() got new 'result' keyword argument, it
|
||||||
|
can be either 'print', 'generator' or 'return'
|
||||||
|
* actual dumping logic is now in new dumpgen()
|
||||||
|
generator function
|
||||||
|
* new dump(binary) function that takes binary data
|
||||||
|
and returns string like "66 6F 72 6D 61 74"
|
||||||
|
* new genchunks(mixed, size) function that chunks
|
||||||
|
both sequences and file like objects
|
||||||
|
|
||||||
|
0.5 (2013-06-10)
|
||||||
|
* hexdump is now also a command line utility (no
|
||||||
|
restore yet)
|
||||||
|
|
||||||
|
0.4 (2013-06-09)
|
||||||
|
* fix installation with Python 3 for non English
|
||||||
|
versions of Windows, thanks to George Schizas
|
||||||
|
|
||||||
|
0.3 (2013-04-29)
|
||||||
|
* fully Python 3 compatible
|
||||||
|
|
||||||
|
0.2 (2013-04-28)
|
||||||
|
* restore() to recover binary data from a hex dump in
|
||||||
|
native, Far Manager and Scapy text formats (others
|
||||||
|
might work as well)
|
||||||
|
* restore() is Python 3 compatible
|
||||||
|
|
||||||
|
0.1 (2013-04-28)
|
||||||
|
* working hexdump() function for Python 2
|
||||||
|
"""
|
||||||
|
|
||||||
|
import binascii # binascii is required for Python 3
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# --- constants
|
||||||
|
PY3K = sys.version_info >= (3, 0)
|
||||||
|
|
||||||
|
|
||||||
|
# --- workaround against Python consistency issues
|
||||||
|
def normalize_py():
|
||||||
|
''' Problem 001 - sys.stdout in Python is by default opened in
|
||||||
|
text mode, and writes to this stdout produce corrupted binary
|
||||||
|
data on Windows
|
||||||
|
|
||||||
|
python -c "import sys; sys.stdout.write('_\n_')" > file
|
||||||
|
python -c "print(repr(open('file', 'rb').read()))"
|
||||||
|
'''
|
||||||
|
if sys.platform == "win32":
|
||||||
|
# set sys.stdout to binary mode on Windows
|
||||||
|
import os, msvcrt
|
||||||
|
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||||
|
|
||||||
|
|
||||||
|
# --- - chunking helpers
|
||||||
|
def chunks(seq, size):
|
||||||
|
'''Generator that cuts sequence (bytes, memoryview, etc.)
|
||||||
|
into chunks of given size. If `seq` length is not multiply
|
||||||
|
of `size`, the lengh of the last chunk returned will be
|
||||||
|
less than requested.
|
||||||
|
|
||||||
|
>>> list( chunks([1,2,3,4,5,6,7], 3) )
|
||||||
|
[[1, 2, 3], [4, 5, 6], [7]]
|
||||||
|
'''
|
||||||
|
d, m = divmod(len(seq), size)
|
||||||
|
for i in range(d):
|
||||||
|
yield seq[i * size:(i + 1) * size]
|
||||||
|
if m:
|
||||||
|
yield seq[d * size:]
|
||||||
|
|
||||||
|
|
||||||
|
def chunkread(f, size):
|
||||||
|
'''Generator that reads from file like object. May return less
|
||||||
|
data than requested on the last read.'''
|
||||||
|
c = f.read(size)
|
||||||
|
while len(c):
|
||||||
|
yield c
|
||||||
|
c = f.read(size)
|
||||||
|
|
||||||
|
|
||||||
|
def genchunks(mixed, size):
|
||||||
|
'''Generator to chunk binary sequences or file like objects.
|
||||||
|
The size of the last chunk returned may be less than
|
||||||
|
requested.'''
|
||||||
|
if hasattr(mixed, 'read'):
|
||||||
|
return chunkread(mixed, size)
|
||||||
|
else:
|
||||||
|
return chunks(mixed, size)
|
||||||
|
|
||||||
|
|
||||||
|
# --- - /chunking helpers
|
||||||
|
|
||||||
|
|
||||||
|
def dehex(hextext):
|
||||||
|
"""
|
||||||
|
Convert from hex string to binary data stripping
|
||||||
|
whitespaces from `hextext` if necessary.
|
||||||
|
"""
|
||||||
|
if PY3K:
|
||||||
|
return bytes.fromhex(hextext)
|
||||||
|
else:
|
||||||
|
hextext = "".join(hextext.split())
|
||||||
|
return hextext.decode('hex')
|
||||||
|
|
||||||
|
|
||||||
|
def dump(binary, size=2, sep=' '):
|
||||||
|
'''
|
||||||
|
Convert binary data (bytes in Python 3 and str in
|
||||||
|
Python 2) to hex string like '00 DE AD BE EF'.
|
||||||
|
`size` argument specifies length of text chunks
|
||||||
|
and `sep` sets chunk separator.
|
||||||
|
'''
|
||||||
|
hexstr = binascii.hexlify(binary)
|
||||||
|
if PY3K:
|
||||||
|
hexstr = hexstr.decode('ascii')
|
||||||
|
return sep.join(chunks(hexstr.upper(), size))
|
||||||
|
|
||||||
|
|
||||||
|
def dumpgen(data, only_str):
|
||||||
|
'''
|
||||||
|
Generator that produces strings:
|
||||||
|
|
||||||
|
'00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................'
|
||||||
|
'''
|
||||||
|
generator = genchunks(data, 16)
|
||||||
|
for addr, d in enumerate(generator):
|
||||||
|
line = ""
|
||||||
|
if not only_str:
|
||||||
|
# 00000000:
|
||||||
|
line = '%08X: ' % (addr * 16)
|
||||||
|
# 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||||
|
dumpstr = dump(d)
|
||||||
|
line += dumpstr[:8 * 3]
|
||||||
|
if len(d) > 8: # insert separator if needed
|
||||||
|
line += ' ' + dumpstr[8 * 3:]
|
||||||
|
# ................
|
||||||
|
# calculate indentation, which may be different for the last line
|
||||||
|
pad = 2
|
||||||
|
if len(d) < 16:
|
||||||
|
pad += 3 * (16 - len(d))
|
||||||
|
if len(d) <= 8:
|
||||||
|
pad += 1
|
||||||
|
line += ' ' * pad
|
||||||
|
|
||||||
|
for byte in d:
|
||||||
|
# printable ASCII range 0x20 to 0x7E
|
||||||
|
if not PY3K:
|
||||||
|
byte = ord(byte)
|
||||||
|
if 0x20 <= byte <= 0x7E:
|
||||||
|
line += chr(byte)
|
||||||
|
else:
|
||||||
|
line += '.'
|
||||||
|
yield line
|
||||||
|
|
||||||
|
|
||||||
|
def hexdump(data, result='print', only_str=False):
|
||||||
|
'''
|
||||||
|
Transform binary data to the hex dump text format:
|
||||||
|
|
||||||
|
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||||
|
|
||||||
|
[x] data argument as a binary string
|
||||||
|
[x] data argument as a file like object
|
||||||
|
|
||||||
|
Returns result depending on the `result` argument:
|
||||||
|
'print' - prints line by line
|
||||||
|
'return' - returns single string
|
||||||
|
'generator' - returns generator that produces lines
|
||||||
|
'''
|
||||||
|
if PY3K and type(data) == str:
|
||||||
|
raise TypeError('Abstract unicode data (expected bytes sequence)')
|
||||||
|
|
||||||
|
gen = dumpgen(data, only_str=only_str)
|
||||||
|
if result == 'generator':
|
||||||
|
return gen
|
||||||
|
elif result == 'return':
|
||||||
|
return '\n'.join(gen)
|
||||||
|
elif result == 'print':
|
||||||
|
for line in gen:
|
||||||
|
print(line)
|
||||||
|
else:
|
||||||
|
raise ValueError('Unknown value of `result` argument')
|
||||||
|
|
||||||
|
|
||||||
|
def restore(dump):
|
||||||
|
'''
|
||||||
|
Restore binary data from a hex dump.
|
||||||
|
[x] dump argument as a string
|
||||||
|
[ ] dump argument as a line iterator
|
||||||
|
|
||||||
|
Supported formats:
|
||||||
|
[x] hexdump.hexdump
|
||||||
|
[x] Scapy
|
||||||
|
[x] Far Manager
|
||||||
|
'''
|
||||||
|
minhexwidth = 2 * 16 # minimal width of the hex part - 00000... style
|
||||||
|
bytehexwidth = 3 * 16 - 1 # min width for a bytewise dump - 00 00 ... style
|
||||||
|
|
||||||
|
result = bytes() if PY3K else ''
|
||||||
|
if type(dump) != str:
|
||||||
|
raise TypeError('Invalid data for restore')
|
||||||
|
|
||||||
|
text = dump.strip() # ignore surrounding empty lines
|
||||||
|
for line in text.split('\n'):
|
||||||
|
# strip address part
|
||||||
|
addrend = line.find(':')
|
||||||
|
if 0 < addrend < minhexwidth: # : is not in ascii part
|
||||||
|
line = line[addrend + 1:]
|
||||||
|
line = line.lstrip()
|
||||||
|
# check dump type
|
||||||
|
if line[2] == ' ': # 00 00 00 ... type of dump
|
||||||
|
# check separator
|
||||||
|
sepstart = (2 + 1) * 7 + 2 # ('00'+' ')*7+'00'
|
||||||
|
sep = line[sepstart:sepstart + 3]
|
||||||
|
if sep[:2] == ' ' and sep[2:] != ' ': # ...00 00 00 00...
|
||||||
|
hexdata = line[:bytehexwidth + 1]
|
||||||
|
elif sep[2:] == ' ': # ...00 00 | 00 00... - Far Manager
|
||||||
|
hexdata = line[:sepstart] + line[sepstart + 3:bytehexwidth + 2]
|
||||||
|
else: # ...00 00 00 00... - Scapy, no separator
|
||||||
|
hexdata = line[:bytehexwidth]
|
||||||
|
line = hexdata
|
||||||
|
result += dehex(line)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def runtest(logfile=None):
|
||||||
|
'''Run hexdump tests. Requires hexfile.bin to be in the same
|
||||||
|
directory as hexdump.py itself'''
|
||||||
|
|
||||||
|
class TeeOutput(object):
|
||||||
|
def __init__(self, stream1, stream2):
|
||||||
|
self.outputs = [stream1, stream2]
|
||||||
|
|
||||||
|
# -- methods from sys.stdout / sys.stderr
|
||||||
|
def write(self, data):
|
||||||
|
for stream in self.outputs:
|
||||||
|
if PY3K:
|
||||||
|
if 'b' in stream.mode:
|
||||||
|
data = data.encode('utf-8')
|
||||||
|
stream.write(data)
|
||||||
|
stream.flush()
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
raise IOError
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
for stream in self.outputs:
|
||||||
|
stream.flush()
|
||||||
|
# --/ sys.stdout
|
||||||
|
|
||||||
|
if logfile:
|
||||||
|
openlog = open(logfile, 'wb')
|
||||||
|
# copy stdout and stderr streams to log file
|
||||||
|
savedstd = sys.stderr, sys.stdout
|
||||||
|
sys.stderr = TeeOutput(sys.stderr, openlog)
|
||||||
|
sys.stdout = TeeOutput(sys.stdout, openlog)
|
||||||
|
|
||||||
|
def echo(msg, linefeed=True):
|
||||||
|
sys.stdout.write(msg)
|
||||||
|
if linefeed:
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
|
||||||
|
expected = '''\
|
||||||
|
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||||||
|
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........\
|
||||||
|
'''
|
||||||
|
|
||||||
|
# get path to hexfile.bin
|
||||||
|
# this doesn't work from .zip
|
||||||
|
# import os.path as osp
|
||||||
|
# hexfile = osp.dirname(osp.abspath(__file__)) + '/hexfile.bin'
|
||||||
|
# this doesn't work either
|
||||||
|
# hexfile = osp.dirname(sys.modules[__name__].__file__) + '/hexfile.bin'
|
||||||
|
# this works
|
||||||
|
import pkgutil
|
||||||
|
bin = pkgutil.get_data('hexdump', 'data/hexfile.bin')
|
||||||
|
|
||||||
|
# varios length of input data
|
||||||
|
hexdump(b'zzzz' * 12)
|
||||||
|
hexdump(b'o' * 17)
|
||||||
|
hexdump(b'p' * 24)
|
||||||
|
hexdump(b'q' * 26)
|
||||||
|
# allowable character set filter
|
||||||
|
hexdump(b'line\nfeed\r\ntest')
|
||||||
|
hexdump(b'\x00\x00\x00\x5B\x68\x65\x78\x64\x75\x6D\x70\x5D\x00\x00\x00\x00'
|
||||||
|
b'\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\x0A\xBB\xCC\xDD\xEE\xFF')
|
||||||
|
print('---')
|
||||||
|
# dumping file-like binary object to screen (default behavior)
|
||||||
|
hexdump(bin)
|
||||||
|
print('return output')
|
||||||
|
hexout = hexdump(bin, result='return')
|
||||||
|
assert hexout == expected, 'returned hex didn\'t match'
|
||||||
|
print('return generator')
|
||||||
|
hexgen = hexdump(bin, result='generator')
|
||||||
|
assert next(hexgen) == expected.split('\n')[0], 'hex generator 1 didn\'t match'
|
||||||
|
assert next(hexgen) == expected.split('\n')[1], 'hex generator 2 didn\'t match'
|
||||||
|
|
||||||
|
# binary restore test
|
||||||
|
bindata = restore(
|
||||||
|
'''
|
||||||
|
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||||||
|
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
|
||||||
|
''')
|
||||||
|
echo('restore check ', linefeed=False)
|
||||||
|
assert bin == bindata, 'restore check failed'
|
||||||
|
echo('passed')
|
||||||
|
|
||||||
|
far = \
|
||||||
|
'''
|
||||||
|
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
|
||||||
|
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 0A BB CC DD EE FF ?"3DUfwª»ÌÝîÿ
|
||||||
|
'''
|
||||||
|
echo('restore far format ', linefeed=False)
|
||||||
|
assert bin == restore(far), 'far format check failed'
|
||||||
|
echo('passed')
|
||||||
|
|
||||||
|
scapy = '''\
|
||||||
|
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
|
||||||
|
00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
|
||||||
|
'''
|
||||||
|
echo('restore scapy format ', linefeed=False)
|
||||||
|
assert bin == restore(scapy), 'scapy format check failed'
|
||||||
|
echo('passed')
|
||||||
|
|
||||||
|
if not PY3K:
|
||||||
|
assert restore('5B68657864756D705D') == '[hexdump]', 'no space check failed'
|
||||||
|
assert dump('\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
|
||||||
|
else:
|
||||||
|
assert restore('5B68657864756D705D') == b'[hexdump]', 'no space check failed'
|
||||||
|
assert dump(b'\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
|
||||||
|
|
||||||
|
print('---[test file hexdumping]---')
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
hexfile = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
try:
|
||||||
|
hexfile.write(bin)
|
||||||
|
hexfile.close()
|
||||||
|
hexdump(open(hexfile.name, 'rb'))
|
||||||
|
finally:
|
||||||
|
os.remove(hexfile.name)
|
||||||
|
if logfile:
|
||||||
|
sys.stderr, sys.stdout = savedstd
|
||||||
|
openlog.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from optparse import OptionParser
|
||||||
|
parser = OptionParser(usage='''
|
||||||
|
%prog [binfile|-]
|
||||||
|
%prog -r hexfile
|
||||||
|
%prog --test [logfile]''', version=__version__)
|
||||||
|
parser.add_option('-r', '--restore', action='store_true',
|
||||||
|
help='restore binary from hex dump')
|
||||||
|
parser.add_option('--test', action='store_true', help='run hexdump sanity checks')
|
||||||
|
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
|
||||||
|
if options.test:
|
||||||
|
if args:
|
||||||
|
runtest(logfile=args[0])
|
||||||
|
else:
|
||||||
|
runtest()
|
||||||
|
elif not args or len(args) > 1:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(-1)
|
||||||
|
else:
|
||||||
|
## dump file
|
||||||
|
if not options.restore:
|
||||||
|
# [x] memory effective dump
|
||||||
|
if args[0] == '-':
|
||||||
|
if not PY3K:
|
||||||
|
hexdump(sys.stdin)
|
||||||
|
else:
|
||||||
|
hexdump(sys.stdin.buffer)
|
||||||
|
else:
|
||||||
|
hexdump(open(args[0], 'rb'))
|
||||||
|
|
||||||
|
## restore file
|
||||||
|
else:
|
||||||
|
# prepare input stream
|
||||||
|
if args[0] == '-':
|
||||||
|
instream = sys.stdin
|
||||||
|
else:
|
||||||
|
if PY3K:
|
||||||
|
instream = open(args[0])
|
||||||
|
else:
|
||||||
|
instream = open(args[0], 'rb')
|
||||||
|
|
||||||
|
# output stream
|
||||||
|
# [ ] memory efficient restore
|
||||||
|
if PY3K:
|
||||||
|
sys.stdout.buffer.write(restore(instream.read()))
|
||||||
|
else:
|
||||||
|
# Windows - binary mode for sys.stdout to prevent data corruption
|
||||||
|
normalize_py()
|
||||||
|
sys.stdout.write(restore(instream.read()))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
# [x] file restore from command line utility
|
||||||
|
# [ ] write dump with LF on Windows for consistency
|
||||||
|
# [ ] encoding param for hexdump()ing Python 3 str if anybody requests that
|
||||||
|
|
||||||
|
# [ ] document chunking API
|
||||||
|
# [ ] document hexdump API
|
||||||
|
# [ ] blog about sys.stdout text mode problem on Windows
|
||||||
Reference in New Issue