-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcodedmp_extractor.py
235 lines (215 loc) · 8.51 KB
/
pcodedmp_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import itertools
import sys
from struct import unpack_from
"""
Wrapper over the getTheIdentifiers function.
'vba_project_stream' parameter is a bytes object.
"""
def get_all_identifiers(vba_project_stream):
identifiers = getTheIdentifiers(vba_project_stream)
print(f'\t[PCODEDMP] All Identifiers = {identifiers}.')
return identifiers
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
vbaProjectData parameter is a bytes object.
Added some commented comments at the end.
"""
def getTheIdentifiers(vbaProjectData):
identifiers = []
try:
magic = getWord(vbaProjectData, 0, '<')
if magic != 0x61CC:
return identifiers
version = getWord(vbaProjectData, 2, '<')
unicodeRef = (version >= 0x5B) and (not version in [0x60, 0x62, 0x63]) or (version == 0x4E)
unicodeName = (version >= 0x59) and (not version in [0x60, 0x62, 0x63]) or (version == 0x4E)
nonUnicodeName = ((version <= 0x59) and (version != 0x4E)) or (0x5F > version > 0x6B)
word = getWord(vbaProjectData, 5, '<')
if word == 0x000E:
endian = '>'
else:
endian = '<'
offset = 0x1E
offset, numRefs = getVar(vbaProjectData, offset, endian, False)
offset += 2
for _ in itertools.repeat(None, numRefs):
offset, refLength = getVar(vbaProjectData, offset, endian, False)
if refLength == 0:
offset += 6
else:
if ((unicodeRef and (refLength < 5)) or ((not unicodeRef) and (refLength < 3))):
offset += refLength
else:
if unicodeRef:
c = vbaProjectData[offset + 4]
else:
c = vbaProjectData[offset + 2]
offset += refLength
if chr(ord(c)) in ['C', 'D']:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset += 10
offset, word = getVar(vbaProjectData, offset, endian, False)
if word:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset, wLength = getVar(vbaProjectData, offset, endian, False)
if wLength:
offset += 2
offset += wLength + 30
# Number of entries in the class/user forms table
offset = skipStructure(vbaProjectData, offset, endian, False, 2, False)
# Number of compile-time identifier-value pairs
offset = skipStructure(vbaProjectData, offset, endian, False, 4, False)
offset += 2
# Typeinfo typeID
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
# Project description
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
# Project help file name
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset += 0x64
# Skip the module descriptors
offset, numProjects = getVar(vbaProjectData, offset, endian, False)
for _ in itertools.repeat(None, numProjects):
offset, wLength = getVar(vbaProjectData, offset, endian, False)
# Code module name
if unicodeName:
offset += wLength
if nonUnicodeName:
if wLength:
offset, wLength = getVar(vbaProjectData, offset, endian, False)
offset += wLength
# Stream time
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset, _ = getVar(vbaProjectData, offset, endian, False)
if version >= 0x6B:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset += 2
if version != 0x51:
offset += 4
offset = skipStructure(vbaProjectData, offset, endian, False, 8, False)
offset += 11
offset += 6
offset = skipStructure(vbaProjectData, offset, endian, True, 1, False)
offset += 6
offset, w0 = getVar(vbaProjectData, offset, endian, False)
offset, numIDs = getVar(vbaProjectData, offset, endian, False)
offset, w1 = getVar(vbaProjectData, offset, endian, False)
offset += 4
numJunkIDs = numIDs + w1 - w0
numIDs = w0 - w1
# Skip the junk IDs
for _ in itertools.repeat(None, numJunkIDs):
offset += 4
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
if idType > 0x7F:
offset += 6
offset += idLength
# Now offset points to the start of the variable names area
i = 0
for _ in itertools.repeat(None, numIDs):
i += 1
start_offset = offset
isKwd = False
ident = ''
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
if (idLength == 0) and (idType == 0):
offset += 2
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
isKwd = True
if idType & 0x80:
offset += 6
if idLength:
ident = decode(vbaProjectData[offset:offset + idLength])
identifiers.append(ident)
offset += idLength
if not isKwd:
offset += 4
# end_offset = offset
# print(f'[PCODEDMP][IDENTIFIERS] i = {i}: ident = {ident}; isKwd = {isKwd}; '
# f'idType = {hex(idType)}; idLength = {hex(idLength)}; '
# f'start_offset = {hex(start_offset)}; end_offset = {hex(end_offset)}.\n'
# f'{hexdump(vbaProjectData[start_offset:end_offset])}')
except Exception as e:
print('[PCODEDMP] Error: {}.'.format(e), file=sys.stderr)
return identifiers
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def getWord(buffer, offset, endian):
return unpack_from(endian + 'H', buffer, offset)[0]
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def getVar(buffer, offset, endian, isDWord):
if isDWord:
value = getDWord(buffer, offset, endian)
offset += 4
else:
value = getWord(buffer, offset, endian)
offset += 2
return offset, value
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def getDWord(buffer, offset, endian):
return unpack_from(endian + 'L', buffer, offset)[0]
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def skipStructure(buffer, offset, endian, isLengthDW, elementSize, checkForMinusOne):
if isLengthDW:
length = getDWord(buffer, offset, endian)
offset += 4
skip = checkForMinusOne and (length == 0xFFFFFFFF)
else:
length = getWord(buffer, offset, endian)
offset += 2
skip = checkForMinusOne and (length == 0xFFFF)
if not skip:
offset += length * elementSize
return offset
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def getTypeAndLength(buffer, offset, endian):
if endian == '>':
return ord(buffer[offset]), ord(buffer[offset + 1])
else:
return ord(buffer[offset + 1]), ord(buffer[offset])
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
codec = 'latin1' # Assume 'latin1' unless redefined by the 'dir' stream
def decode(x):
return x.decode(codec, errors='replace')
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
Changed xrange -> range
"""
def hexdump(buffer, length=16):
theHex = lambda data: ' '.join('{:02X}'.format(ord(i)) for i in data)
theStr = lambda data: ''.join(chr(ord(i)) if (31 < ord(i) < 127) else '.' for i in data)
result = ''
for offset in range(0, len(buffer), length):
data = buffer[offset:offset + length]
result += '{:08X} {:{}} {}\n'.format(offset, theHex(data), length * 3 - 1, theStr(data))
return result
"""
Code from https://github.com/bontchev/pcodedmp
LICENSE: GNU General Public License v3.0
"""
def ord(x):
return x