#!/usr/bin/env python from construct import * from cStringIO import StringIO # For each metatype, which attributes are references # to another type type_refs = { "LF_ARGLIST": ["arg_type"], "LF_ARRAY": ["element_type", "index_type"], "LF_ARRAY_ST": ["element_type", "index_type"], "LF_BITFIELD": ["base_type"], "LF_CLASS": ["fieldlist", "derived", "vshape"], "LF_ENUM": ["utype", "fieldlist"], "LF_FIELDLIST": [], "LF_MFUNCTION": ["return_type", "class_type", "this_type", "arglist"], "LF_MODIFIER": ["modified_type"], "LF_POINTER": ["utype"], "LF_PROCEDURE": ["return_type", "arglist"], "LF_STRUCTURE": ["fieldlist", "derived", "vshape"], "LF_STRUCTURE_ST": ["fieldlist", "derived", "vshape"], "LF_UNION": ["fieldlist"], "LF_UNION_ST": ["fieldlist"], "LF_VTSHAPE": [], # TODO: Unparsed "LF_METHODLIST": [], # FIELDLIST substructures "LF_BCLASS": ["index"], "LF_ENUMERATE": [], "LF_MEMBER": ["index"], "LF_MEMBER_ST": ["index"], "LF_METHOD": ["mlist"], "LF_NESTTYPE": ["index"], "LF_ONEMETHOD": ["index"], "LF_VFUNCTAB": ["type"], } ### Enums for base and leaf types # Note: python only supports a max of 255 arguments to # a function, so we have to put it into a dict and then # call the function with the ** operator base_types = { 'T_NOTYPE' : 0x00000000, 'T_ABS' : 0x00000001, 'T_SEGMENT' : 0x00000002, 'T_VOID' : 0x00000003, 'T_HRESULT' : 0x00000008, 'T_32PHRESULT' : 0x00000408, 'T_64PHRESULT' : 0x00000608, 'T_PVOID' : 0x00000103, 'T_PFVOID' : 0x00000203, 'T_PHVOID' : 0x00000303, 'T_32PVOID' : 0x00000403, 'T_32PFVOID' : 0x00000503, 'T_64PVOID' : 0x00000603, 'T_CURRENCY' : 0x00000004, 'T_NBASICSTR' : 0x00000005, 'T_FBASICSTR' : 0x00000006, 'T_NOTTRANS' : 0x00000007, 'T_BIT' : 0x00000060, 'T_PASCHAR' : 0x00000061, 'T_CHAR' : 0x00000010, 'T_PCHAR' : 0x00000110, 'T_PFCHAR' : 0x00000210, 'T_PHCHAR' : 0x00000310, 'T_32PCHAR' : 0x00000410, 'T_32PFCHAR' : 0x00000510, 'T_64PCHAR' : 0x00000610, 'T_UCHAR' : 0x00000020, 'T_PUCHAR' : 0x00000120, 'T_PFUCHAR' : 0x00000220, 'T_PHUCHAR' : 0x00000320, 'T_32PUCHAR' : 0x00000420, 'T_32PFUCHAR' : 0x00000520, 'T_64PUCHAR' : 0x00000620, 'T_RCHAR' : 0x00000070, 'T_PRCHAR' : 0x00000170, 'T_PFRCHAR' : 0x00000270, 'T_PHRCHAR' : 0x00000370, 'T_32PRCHAR' : 0x00000470, 'T_32PFRCHAR' : 0x00000570, 'T_64PRCHAR' : 0x00000670, 'T_WCHAR' : 0x00000071, 'T_PWCHAR' : 0x00000171, 'T_PFWCHAR' : 0x00000271, 'T_PHWCHAR' : 0x00000371, 'T_32PWCHAR' : 0x00000471, 'T_32PFWCHAR' : 0x00000571, 'T_64PWCHAR' : 0x00000671, 'T_INT1' : 0x00000068, 'T_PINT1' : 0x00000168, 'T_PFINT1' : 0x00000268, 'T_PHINT1' : 0x00000368, 'T_32PINT1' : 0x00000468, 'T_32PFINT1' : 0x00000568, 'T_64PINT1' : 0x00000668, 'T_UINT1' : 0x00000069, 'T_PUINT1' : 0x00000169, 'T_PFUINT1' : 0x00000269, 'T_PHUINT1' : 0x00000369, 'T_32PUINT1' : 0x00000469, 'T_32PFUINT1' : 0x00000569, 'T_64PUINT1' : 0x00000669, 'T_SHORT' : 0x00000011, 'T_PSHORT' : 0x00000111, 'T_PFSHORT' : 0x00000211, 'T_PHSHORT' : 0x00000311, 'T_32PSHORT' : 0x00000411, 'T_32PFSHORT' : 0x00000511, 'T_64PSHORT' : 0x00000611, 'T_USHORT' : 0x00000021, 'T_PUSHORT' : 0x00000121, 'T_PFUSHORT' : 0x00000221, 'T_PHUSHORT' : 0x00000321, 'T_32PUSHORT' : 0x00000421, 'T_32PFUSHORT' : 0x00000521, 'T_64PUSHORT' : 0x00000621, 'T_INT2' : 0x00000072, 'T_PINT2' : 0x00000172, 'T_PFINT2' : 0x00000272, 'T_PHINT2' : 0x00000372, 'T_32PINT2' : 0x00000472, 'T_32PFINT2' : 0x00000572, 'T_64PINT2' : 0x00000672, 'T_UINT2' : 0x00000073, 'T_PUINT2' : 0x00000173, 'T_PFUINT2' : 0x00000273, 'T_PHUINT2' : 0x00000373, 'T_32PUINT2' : 0x00000473, 'T_32PFUINT2' : 0x00000573, 'T_64PUINT2' : 0x00000673, 'T_LONG' : 0x00000012, 'T_PLONG' : 0x00000112, 'T_PFLONG' : 0x00000212, 'T_PHLONG' : 0x00000312, 'T_32PLONG' : 0x00000412, 'T_32PFLONG' : 0x00000512, 'T_64PLONG' : 0x00000612, 'T_ULONG' : 0x00000022, 'T_PULONG' : 0x00000122, 'T_PFULONG' : 0x00000222, 'T_PHULONG' : 0x00000322, 'T_32PULONG' : 0x00000422, 'T_32PFULONG' : 0x00000522, 'T_64PULONG' : 0x00000622, 'T_INT4' : 0x00000074, 'T_PINT4' : 0x00000174, 'T_PFINT4' : 0x00000274, 'T_PHINT4' : 0x00000374, 'T_32PINT4' : 0x00000474, 'T_32PFINT4' : 0x00000574, 'T_64PINT4' : 0x00000674, 'T_UINT4' : 0x00000075, 'T_PUINT4' : 0x00000175, 'T_PFUINT4' : 0x00000275, 'T_PHUINT4' : 0x00000375, 'T_32PUINT4' : 0x00000475, 'T_32PFUINT4' : 0x00000575, 'T_64PUINT4' : 0x00000675, 'T_QUAD' : 0x00000013, 'T_PQUAD' : 0x00000113, 'T_PFQUAD' : 0x00000213, 'T_PHQUAD' : 0x00000313, 'T_32PQUAD' : 0x00000413, 'T_32PFQUAD' : 0x00000513, 'T_64PQUAD' : 0x00000613, 'T_UQUAD' : 0x00000023, 'T_PUQUAD' : 0x00000123, 'T_PFUQUAD' : 0x00000223, 'T_PHUQUAD' : 0x00000323, 'T_32PUQUAD' : 0x00000423, 'T_32PFUQUAD' : 0x00000523, 'T_64PUQUAD' : 0x00000623, 'T_INT8' : 0x00000076, 'T_PINT8' : 0x00000176, 'T_PFINT8' : 0x00000276, 'T_PHINT8' : 0x00000376, 'T_32PINT8' : 0x00000476, 'T_32PFINT8' : 0x00000576, 'T_64PINT8' : 0x00000676, 'T_UINT8' : 0x00000077, 'T_PUINT8' : 0x00000177, 'T_PFUINT8' : 0x00000277, 'T_PHUINT8' : 0x00000377, 'T_32PUINT8' : 0x00000477, 'T_32PFUINT8' : 0x00000577, 'T_64PUINT8' : 0x00000677, 'T_OCT' : 0x00000014, 'T_POCT' : 0x00000114, 'T_PFOCT' : 0x00000214, 'T_PHOCT' : 0x00000314, 'T_32POCT' : 0x00000414, 'T_32PFOCT' : 0x00000514, 'T_64POCT' : 0x00000614, 'T_UOCT' : 0x00000024, 'T_PUOCT' : 0x00000124, 'T_PFUOCT' : 0x00000224, 'T_PHUOCT' : 0x00000324, 'T_32PUOCT' : 0x00000424, 'T_32PFUOCT' : 0x00000524, 'T_64PUOCT' : 0x00000624, 'T_INT16' : 0x00000078, 'T_PINT16' : 0x00000178, 'T_PFINT16' : 0x00000278, 'T_PHINT16' : 0x00000378, 'T_32PINT16' : 0x00000478, 'T_32PFINT16' : 0x00000578, 'T_64PINT16' : 0x00000678, 'T_UINT16' : 0x00000079, 'T_PUINT16' : 0x00000179, 'T_PFUINT16' : 0x00000279, 'T_PHUINT16' : 0x00000379, 'T_32PUINT16' : 0x00000479, 'T_32PFUINT16' : 0x00000579, 'T_64PUINT16' : 0x00000679, 'T_REAL32' : 0x00000040, 'T_PREAL32' : 0x00000140, 'T_PFREAL32' : 0x00000240, 'T_PHREAL32' : 0x00000340, 'T_32PREAL32' : 0x00000440, 'T_32PFREAL32' : 0x00000540, 'T_64PREAL32' : 0x00000640, 'T_REAL48' : 0x00000044, 'T_PREAL48' : 0x00000144, 'T_PFREAL48' : 0x00000244, 'T_PHREAL48' : 0x00000344, 'T_32PREAL48' : 0x00000444, 'T_32PFREAL48' : 0x00000544, 'T_64PREAL48' : 0x00000644, 'T_REAL64' : 0x00000041, 'T_PREAL64' : 0x00000141, 'T_PFREAL64' : 0x00000241, 'T_PHREAL64' : 0x00000341, 'T_32PREAL64' : 0x00000441, 'T_32PFREAL64' : 0x00000541, 'T_64PREAL64' : 0x00000641, 'T_REAL80' : 0x00000042, 'T_PREAL80' : 0x00000142, 'T_PFREAL80' : 0x00000242, 'T_PHREAL80' : 0x00000342, 'T_32PREAL80' : 0x00000442, 'T_32PFREAL80' : 0x00000542, 'T_64PREAL80' : 0x00000642, 'T_REAL128' : 0x00000043, 'T_PREAL128' : 0x00000143, 'T_PFREAL128' : 0x00000243, 'T_PHREAL128' : 0x00000343, 'T_32PREAL128' : 0x00000443, 'T_32PFREAL128' : 0x00000543, 'T_64PREAL128' : 0x00000643, 'T_CPLX32' : 0x00000050, 'T_PCPLX32' : 0x00000150, 'T_PFCPLX32' : 0x00000250, 'T_PHCPLX32' : 0x00000350, 'T_32PCPLX32' : 0x00000450, 'T_32PFCPLX32' : 0x00000550, 'T_64PCPLX32' : 0x00000650, 'T_CPLX64' : 0x00000051, 'T_PCPLX64' : 0x00000151, 'T_PFCPLX64' : 0x00000251, 'T_PHCPLX64' : 0x00000351, 'T_32PCPLX64' : 0x00000451, 'T_32PFCPLX64' : 0x00000551, 'T_64PCPLX64' : 0x00000651, 'T_CPLX80' : 0x00000052, 'T_PCPLX80' : 0x00000152, 'T_PFCPLX80' : 0x00000252, 'T_PHCPLX80' : 0x00000352, 'T_32PCPLX80' : 0x00000452, 'T_32PFCPLX80' : 0x00000552, 'T_64PCPLX80' : 0x00000652, 'T_CPLX128' : 0x00000053, 'T_PCPLX128' : 0x00000153, 'T_PFCPLX128' : 0x00000253, 'T_PHCPLX128' : 0x00000353, 'T_32PCPLX128' : 0x00000453, 'T_32PFCPLX128' : 0x00000553, 'T_64PCPLX128' : 0x00000653, 'T_BOOL08' : 0x00000030, 'T_PBOOL08' : 0x00000130, 'T_PFBOOL08' : 0x00000230, 'T_PHBOOL08' : 0x00000330, 'T_32PBOOL08' : 0x00000430, 'T_32PFBOOL08' : 0x00000530, 'T_64PBOOL08' : 0x00000630, 'T_BOOL16' : 0x00000031, 'T_PBOOL16' : 0x00000131, 'T_PFBOOL16' : 0x00000231, 'T_PHBOOL16' : 0x00000331, 'T_32PBOOL16' : 0x00000431, 'T_32PFBOOL16' : 0x00000531, 'T_64PBOOL16' : 0x00000631, 'T_BOOL32' : 0x00000032, 'T_PBOOL32' : 0x00000132, 'T_PFBOOL32' : 0x00000232, 'T_PHBOOL32' : 0x00000332, 'T_32PBOOL32' : 0x00000432, 'T_32PFBOOL32' : 0x00000532, 'T_64PBOOL32' : 0x00000632, 'T_BOOL64' : 0x00000033, 'T_PBOOL64' : 0x00000133, 'T_PFBOOL64' : 0x00000233, 'T_PHBOOL64' : 0x00000333, 'T_32PBOOL64' : 0x00000433, 'T_32PFBOOL64' : 0x00000533, 'T_64PBOOL64' : 0x00000633, 'T_NCVPTR' : 0x000001F0, 'T_FCVPTR' : 0x000002F0, 'T_HCVPTR' : 0x000003F0, 'T_32NCVPTR' : 0x000004F0, 'T_32FCVPTR' : 0x000005F0, 'T_64NCVPTR' : 0x000006F0, } base_type = Enum(ULInt16("base_type"), **base_types) # Fewer than 255 values so we're ok here leaf_type = Enum(ULInt16("leaf_type"), LF_MODIFIER_16t = 0x00000001, LF_POINTER_16t = 0x00000002, LF_ARRAY_16t = 0x00000003, LF_CLASS_16t = 0x00000004, LF_STRUCTURE_16t = 0x00000005, LF_UNION_16t = 0x00000006, LF_ENUM_16t = 0x00000007, LF_PROCEDURE_16t = 0x00000008, LF_MFUNCTION_16t = 0x00000009, LF_VTSHAPE = 0x0000000A, LF_COBOL0_16t = 0x0000000B, LF_COBOL1 = 0x0000000C, LF_BARRAY_16t = 0x0000000D, LF_LABEL = 0x0000000E, LF_NULL = 0x0000000F, LF_NOTTRAN = 0x00000010, LF_DIMARRAY_16t = 0x00000011, LF_VFTPATH_16t = 0x00000012, LF_PRECOMP_16t = 0x00000013, LF_ENDPRECOMP = 0x00000014, LF_OEM_16t = 0x00000015, LF_TYPESERVER_ST = 0x00000016, LF_SKIP_16t = 0x00000200, LF_ARGLIST_16t = 0x00000201, LF_DEFARG_16t = 0x00000202, LF_LIST = 0x00000203, LF_FIELDLIST_16t = 0x00000204, LF_DERIVED_16t = 0x00000205, LF_BITFIELD_16t = 0x00000206, LF_METHODLIST_16t = 0x00000207, LF_DIMCONU_16t = 0x00000208, LF_DIMCONLU_16t = 0x00000209, LF_DIMVARU_16t = 0x0000020A, LF_DIMVARLU_16t = 0x0000020B, LF_REFSYM = 0x0000020C, LF_BCLASS_16t = 0x00000400, LF_VBCLASS_16t = 0x00000401, LF_IVBCLASS_16t = 0x00000402, LF_ENUMERATE_ST = 0x00000403, LF_FRIENDFCN_16t = 0x00000404, LF_INDEX_16t = 0x00000405, LF_MEMBER_16t = 0x00000406, LF_STMEMBER_16t = 0x00000407, LF_METHOD_16t = 0x00000408, LF_NESTTYPE_16t = 0x00000409, LF_VFUNCTAB_16t = 0x0000040A, LF_FRIENDCLS_16t = 0x0000040B, LF_ONEMETHOD_16t = 0x0000040C, LF_VFUNCOFF_16t = 0x0000040D, LF_TI16_MAX = 0x00001000, LF_MODIFIER = 0x00001001, LF_POINTER = 0x00001002, LF_ARRAY_ST = 0x00001003, LF_CLASS_ST = 0x00001004, LF_STRUCTURE_ST = 0x00001005, LF_UNION_ST = 0x00001006, LF_ENUM_ST = 0x00001007, LF_PROCEDURE = 0x00001008, LF_MFUNCTION = 0x00001009, LF_COBOL0 = 0x0000100A, LF_BARRAY = 0x0000100B, LF_DIMARRAY_ST = 0x0000100C, LF_VFTPATH = 0x0000100D, LF_PRECOMP_ST = 0x0000100E, LF_OEM = 0x0000100F, LF_ALIAS_ST = 0x00001010, LF_OEM2 = 0x00001011, LF_SKIP = 0x00001200, LF_ARGLIST = 0x00001201, LF_DEFARG_ST = 0x00001202, LF_FIELDLIST = 0x00001203, LF_DERIVED = 0x00001204, LF_BITFIELD = 0x00001205, LF_METHODLIST = 0x00001206, LF_DIMCONU = 0x00001207, LF_DIMCONLU = 0x00001208, LF_DIMVARU = 0x00001209, LF_DIMVARLU = 0x0000120A, LF_BCLASS = 0x00001400, LF_VBCLASS = 0x00001401, LF_IVBCLASS = 0x00001402, LF_FRIENDFCN_ST = 0x00001403, LF_INDEX = 0x00001404, LF_MEMBER_ST = 0x00001405, LF_STMEMBER_ST = 0x00001406, LF_METHOD_ST = 0x00001407, LF_NESTTYPE_ST = 0x00001408, LF_VFUNCTAB = 0x00001409, LF_FRIENDCLS = 0x0000140A, LF_ONEMETHOD_ST = 0x0000140B, LF_VFUNCOFF = 0x0000140C, LF_NESTTYPEEX_ST = 0x0000140D, LF_MEMBERMODIFY_ST = 0x0000140E, LF_MANAGED_ST = 0x0000140F, LF_ST_MAX = 0x00001500, LF_TYPESERVER = 0x00001501, LF_ENUMERATE = 0x00001502, LF_ARRAY = 0x00001503, LF_CLASS = 0x00001504, LF_STRUCTURE = 0x00001505, LF_UNION = 0x00001506, LF_ENUM = 0x00001507, LF_DIMARRAY = 0x00001508, LF_PRECOMP = 0x00001509, LF_ALIAS = 0x0000150A, LF_DEFARG = 0x0000150B, LF_FRIENDFCN = 0x0000150C, LF_MEMBER = 0x0000150D, LF_STMEMBER = 0x0000150E, LF_METHOD = 0x0000150F, LF_NESTTYPE = 0x00001510, LF_ONEMETHOD = 0x00001511, LF_NESTTYPEEX = 0x00001512, LF_MEMBERMODIFY = 0x00001513, LF_MANAGED = 0x00001514, LF_TYPESERVER2 = 0x00001515, LF_CHAR = 0x00008000, LF_SHORT = 0x00008001, LF_USHORT = 0x00008002, LF_LONG = 0x00008003, LF_ULONG = 0x00008004, LF_REAL32 = 0x00008005, LF_REAL64 = 0x00008006, LF_REAL80 = 0x00008007, LF_REAL128 = 0x00008008, LF_QUADWORD = 0x00008009, LF_UQUADWORD = 0x0000800A, LF_REAL48 = 0x0000800B, LF_COMPLEX32 = 0x0000800C, LF_COMPLEX64 = 0x0000800D, LF_COMPLEX80 = 0x0000800E, LF_COMPLEX128 = 0x0000800F, LF_VARSTRING = 0x00008010, LF_OCTWORD = 0x00008017, LF_UOCTWORD = 0x00008018, LF_DECIMAL = 0x00008019, LF_DATE = 0x0000801A, LF_UTF8STRING = 0x0000801B, LF_PAD0 = 0x000000F0, LF_PAD1 = 0x000000F1, LF_PAD2 = 0x000000F2, LF_PAD3 = 0x000000F3, LF_PAD4 = 0x000000F4, LF_PAD5 = 0x000000F5, LF_PAD6 = 0x000000F6, LF_PAD7 = 0x000000F7, LF_PAD8 = 0x000000F8, LF_PAD9 = 0x000000F9, LF_PAD10 = 0x000000FA, LF_PAD11 = 0x000000FB, LF_PAD12 = 0x000000FC, LF_PAD13 = 0x000000FD, LF_PAD14 = 0x000000FE, LF_PAD15 = 0x000000FF ) ### CodeView bitfields and enums # NOTE: Construct assumes big-endian # ordering for BitStructs CV_fldattr = BitStruct("fldattr", Flag("noconstruct"), Flag("noinherit"), Flag("pseudo"), Enum(BitField("mprop", 3), MTvanilla = 0x00, MTvirtual = 0x01, MTstatic = 0x02, MTfriend = 0x03, MTintro = 0x04, MTpurevirt = 0x05, MTpureintro = 0x06, _default_ = Pass, ), Enum(BitField("access", 2), private = 1, protected = 2, public = 3, _default_ = Pass, ), Padding(7), Flag("compgenx"), ) CV_call = Enum(ULInt8("call_conv"), NEAR_C = 0x00000000, FAR_C = 0x00000001, NEAR_PASCAL = 0x00000002, FAR_PASCAL = 0x00000003, NEAR_FAST = 0x00000004, FAR_FAST = 0x00000005, SKIPPED = 0x00000006, NEAR_STD = 0x00000007, FAR_STD = 0x00000008, NEAR_SYS = 0x00000009, FAR_SYS = 0x0000000A, THISCALL = 0x0000000B, MIPSCALL = 0x0000000C, GENERIC = 0x0000000D, ALPHACALL = 0x0000000E, PPCCALL = 0x0000000F, SHCALL = 0x00000010, ARMCALL = 0x00000011, AM33CALL = 0x00000012, TRICALL = 0x00000013, SH5CALL = 0x00000014, M32RCALL = 0x00000015, RESERVED = 0x00000016, _default_ = Pass, ) CV_property = BitStruct("prop", Flag("fwdref"), Flag("opcast"), Flag("opassign"), Flag("cnested"), Flag("isnested"), Flag("ovlops"), Flag("ctor"), Flag("packed"), BitField("reserved", 7, swapped=True), Flag("scoped"), ) def val(name): return Struct("value", Value("_value_name", lambda ctx: name), ULInt16("value_or_type"), IfThenElse("name_or_val", lambda ctx: ctx.value_or_type < leaf_type._encode("LF_CHAR",ctx), CString("name"), Switch("val", lambda ctx: leaf_type._decode(ctx.value_or_type, {}), { "LF_CHAR": Struct("char", String("value", 1), CString("name"), ), "LF_SHORT": Struct("short", SLInt16("value"), CString("name"), ), "LF_USHORT": Struct("ushort", ULInt16("value"), CString("name"), ), "LF_LONG": Struct("char", SLInt32("value"), CString("name"), ), "LF_ULONG": Struct("char", ULInt32("value"), CString("name"), ), }, ), ), ) PadAlign = If(lambda ctx: ctx._pad > 0xF0, Optional(Padding(lambda ctx: ctx._pad & 0x0F)) ) ### Leaf types subStruct = Struct("substructs", leaf_type, Switch("type_info", lambda ctx: ctx.leaf_type, { "LF_MEMBER_ST": Struct("lfMemberST", CV_fldattr, ULInt32("index"), ULInt16("offset"), PascalString("name"), Peek(ULInt8("_pad")), PadAlign, ), "LF_MEMBER": Struct("lfMember", CV_fldattr, ULInt32("index"), val("offset"), Peek(ULInt8("_pad")), PadAlign, ), "LF_ENUMERATE": Struct("lfEnumerate", CV_fldattr, val("enum_value"), Peek(ULInt8("_pad")), PadAlign, ), "LF_BCLASS": Struct("lfBClass", CV_fldattr, ULInt32("index"), val("offset"), Peek(ULInt8("_pad")), PadAlign, ), "LF_VFUNCTAB": Struct("lfVFuncTab", Padding(2), ULInt32("type"), Peek(ULInt8("_pad")), PadAlign, ), "LF_ONEMETHOD": Struct("lfOneMethod", CV_fldattr, ULInt32("index"), Switch("intro", lambda ctx: ctx.fldattr.mprop, { "MTintro": Struct("value", ULInt32("val"), CString("str_data"), ), "MTpureintro": Struct("value", ULInt32("val"), CString("str_data"), ), }, default = CString("str_data"), ), Peek(ULInt8("_pad")), PadAlign, ), "LF_METHOD": Struct("lfMethod", ULInt16("count"), ULInt32("mlist"), CString("name"), Peek(ULInt8("_pad")), PadAlign, ), "LF_NESTTYPE": Struct("lfNestType", Padding(2), ULInt32("index"), CString("name"), ), }, ), ) lfFieldList = Struct("lfFieldList", OptionalGreedyRange(subStruct) ) lfEnum = Struct("lfEnum", ULInt16("count"), CV_property, ULInt32("utype"), ULInt32("fieldlist"), CString("name"), Peek(ULInt8("_pad")), PadAlign, ) lfBitfield = Struct("lfBitfield", ULInt32("base_type"), ULInt8("length"), ULInt8("position"), Peek(ULInt8("_pad")), PadAlign, ) lfStructureST = Struct("lfStructureST", ULInt16("count"), CV_property, ULInt32("fieldlist"), ULInt32("derived"), ULInt32("vshape"), ULInt16("size"), PascalString("name"), Peek(ULInt8("_pad")), PadAlign, ) lfStructure = Struct("lfStructure", ULInt16("count"), CV_property, ULInt32("fieldlist"), ULInt32("derived"), ULInt32("vshape"), val("size"), Peek(ULInt8("_pad")), PadAlign, ) lfClass = Rename("lfClass", lfStructure) lfArray = Struct("lfArray", ULInt32("element_type"), ULInt32("index_type"), val("size"), Peek(ULInt8("_pad")), PadAlign, ) lfArrayST = Struct("lfArray", ULInt32("element_type"), ULInt32("index_type"), ULInt16("size"), PascalString("name"), Peek(ULInt8("_pad")), PadAlign, ) lfArgList = Struct("lfArgList", ULInt32("count"), Array(lambda ctx: ctx.count, ULInt32("arg_type")), Peek(ULInt8("_pad")), PadAlign, ) lfProcedure = Struct("lfProcedure", ULInt32("return_type"), CV_call, ULInt8("reserved"), ULInt16("parm_count"), ULInt32("arglist"), Peek(ULInt8("_pad")), PadAlign, ) lfModifier = Struct("lfModifier", ULInt32("modified_type"), BitStruct("modifier", Padding(5), Flag("unaligned"), Flag("volatile"), Flag("const"), Padding(8), ), Peek(ULInt8("_pad")), PadAlign, ) lfPointer = Struct("lfPointer", ULInt32("utype"), BitStruct("ptr_attr", Enum(BitField("mode", 3), PTR_MODE_PTR = 0x00000000, PTR_MODE_REF = 0x00000001, PTR_MODE_PMEM = 0x00000002, PTR_MODE_PMFUNC = 0x00000003, PTR_MODE_RESERVED = 0x00000004, ), Enum(BitField("type", 5), PTR_NEAR = 0x00000000, PTR_FAR = 0x00000001, PTR_HUGE = 0x00000002, PTR_BASE_SEG = 0x00000003, PTR_BASE_VAL = 0x00000004, PTR_BASE_SEGVAL = 0x00000005, PTR_BASE_ADDR = 0x00000006, PTR_BASE_SEGADDR = 0x00000007, PTR_BASE_TYPE = 0x00000008, PTR_BASE_SELF = 0x00000009, PTR_NEAR32 = 0x0000000A, PTR_FAR32 = 0x0000000B, PTR_64 = 0x0000000C, PTR_UNUSEDPTR = 0x0000000D, ), Padding(3), Flag("restrict"), Flag("unaligned"), Flag("const"), Flag("volatile"), Flag("flat32"), Padding(16), ), Peek(ULInt8("_pad")), PadAlign, ) lfUnion = Struct("lfUnion", ULInt16("count"), CV_property, ULInt32("fieldlist"), val("size"), Peek(ULInt8("_pad")), PadAlign, ) lfUnionST = Struct("lfUnionST", ULInt16("count"), CV_property, ULInt32("fieldlist"), ULInt16("size"), PascalString("name"), Peek(ULInt8("_pad")), PadAlign, ) lfMFunc = Struct("lfMFunc", ULInt32("return_type"), ULInt32("class_type"), ULInt32("this_type"), CV_call, ULInt8("reserved"), ULInt16("parm_count"), ULInt32("arglist"), SLInt32("thisadjust"), Peek(ULInt8("_pad")), PadAlign, ) lfVTShape = Struct("lfVTShape", ULInt16("count"), BitStruct("vt_descriptors", Array(lambda ctx: ctx._.count, BitField("vt_descriptors", 4) ), # Needed to align to a byte boundary Padding(lambda ctx: (ctx._.count % 2) * 4), ), Peek(ULInt8("_pad")), PadAlign, ) Type = Debugger(Struct("type", leaf_type, Switch("type_info", lambda ctx: ctx.leaf_type, { "LF_ARGLIST": lfArgList, "LF_ARRAY": lfArray, "LF_ARRAY_ST": lfArrayST, "LF_BITFIELD": lfBitfield, "LF_CLASS": lfClass, "LF_ENUM": lfEnum, "LF_FIELDLIST": lfFieldList, "LF_MFUNCTION": lfMFunc, "LF_MODIFIER": lfModifier, "LF_POINTER": lfPointer, "LF_PROCEDURE": lfProcedure, "LF_STRUCTURE": lfStructure, "LF_STRUCTURE_ST": lfStructureST, "LF_UNION": lfUnion, "LF_UNION_ST": lfUnionST, "LF_VTSHAPE": lfVTShape, }, default = Pass, ), )) Types = Struct("types", ULInt16("length"), Tunnel( String("type_data", lambda ctx: ctx.length), Type, ), ) ### Header structures def OffCb(name): return Struct(name, SLInt32("off"), SLInt32("cb"), ) TPI = Struct("TPIHash", ULInt16("sn"), Padding(2), SLInt32("HashKey"), SLInt32("Buckets"), OffCb("HashVals"), OffCb("TiOff"), OffCb("HashAdj"), ) Header = Struct("TPIHeader", ULInt32("version"), SLInt32("hdr_size"), ULInt32("ti_min"), ULInt32("ti_max"), ULInt32("follow_size"), TPI, ) ### Stream as a whole TPIStream = Struct("TPIStream", Header, Array(lambda ctx: ctx.TPIHeader.ti_max - ctx.TPIHeader.ti_min, Types), ) ### END PURE CONSTRUCT DATA ### # FIXME: this should not be necessary if we use the Embed construct def merge_subcon(parent, subattr): """Merge a subcon's fields into its parent. parent: the Container into which subattr's fields should be merged subattr: the name of the subconstruct """ subcon = getattr(parent, subattr, None) if not subcon: return for a in subcon: setattr(parent, a, getattr(subcon, a)) delattr(parent, subattr) def fix_value(leaf): """Translate the value member of a leaf node into a nicer form. Due to limitations in construct, the inital parsed form of a value is: value `- _value_name `- value_or_type `- name_or_val OR value `- _value_name `- value_or_type `- name_or_val `- value `- name This function normalizes the structure to just the value and the name. The value is named according to the string in _value_name. """ if not hasattr(leaf, 'value'): return if leaf.value.value_or_type < leaf_type._encode("LF_CHAR",{}): setattr(leaf, 'name', leaf.value.name_or_val) setattr(leaf, leaf.value._value_name, leaf.value.value_or_type) else: setattr(leaf, 'name', leaf.value.name_or_val.name) setattr(leaf, leaf.value._value_name, leaf.value.name_or_val.value) delattr(leaf, 'value') def resolve_typerefs(leaf, types, min): """Resolve the numeric type references in a leaf node. For each reference to another type in the leaf node, look up the corresponding type (base type or type defined in the TPI stream). The dictionary type_refs is used to determine which fields in the leaf node are references. leaf: the leaf node to convert types: a dictionary of index->type mappings min: the value of tpi_min; that is, the lowest type index in the stream """ for attr in type_refs[leaf.leaf_type]: ref = getattr(leaf, attr) if isinstance(ref, list): newrefs = [] for r in ref: if r < min: newrefs.append(base_type._decode(r,{})) else: newrefs.append(types[r]) newrefs = ListContainer(newrefs) setattr(leaf, attr, newrefs) else: if ref < min: setattr(leaf, attr, base_type._decode(ref,{})) elif ref >= min: try: setattr(leaf, attr, types[ref]) except KeyError: pass return leaf def merge_fwdrefs(leaf, types, map): for attr in type_refs[leaf.leaf_type]: ref = getattr(leaf, attr) if isinstance(ref, list): newrefs = [] for r in ref: try: newrefs.append(types[map[r.tpi_idx]]) except (KeyError, AttributeError): newrefs.append(r) newrefs = ListContainer(newrefs) setattr(leaf, attr, newrefs) elif not isinstance(ref,str): try: newref = types[map[ref.tpi_idx]] except (KeyError, AttributeError): newref = ref setattr(leaf, attr, newref) return leaf def rename_2_7(lf): if lf.leaf_type.endswith("_ST"): lf.leaf_type = lf.leaf_type[:-3] def parse_stream(fp, unnamed_hack=True, elim_fwdrefs=True): """Parse a TPI stream. fp: a file-like object that holds the type data to be parsed. Must support seeking. """ tpi_stream = TPIStream.parse_stream(fp) # Postprocessing # 1. Index the types tpi_stream.types = dict( (i, t) for (i,t) in zip( range(tpi_stream.TPIHeader.ti_min, tpi_stream.TPIHeader.ti_max), tpi_stream.types ) ) for k in tpi_stream.types: tpi_stream.types[k].tpi_idx = k # 2. Flatten type_info and type_data for t in tpi_stream.types.values(): merge_subcon(t,'type_data') merge_subcon(t,'type_info') if t.leaf_type == 'LF_FIELDLIST': for s in t.substructs: merge_subcon(s,'type_info') # 3. Fix up value and name structures for t in tpi_stream.types.values(): if t.leaf_type == 'LF_FIELDLIST': for s in t.substructs: fix_value(s) else: fix_value(t) # 4. Resolve type references types = tpi_stream.types min = tpi_stream.TPIHeader.ti_min for i in types: if types[i].leaf_type == "LF_FIELDLIST": types[i].substructs = ListContainer([ resolve_typerefs(t, types, min) for t in types[i].substructs ]) else: types[i] = resolve_typerefs(types[i], types, min) # 5. Standardize v2 leaf names to v7 convention for i in types: rename_2_7(types[i]) if types[i].leaf_type == "LF_FIELDLIST": for s in types[i].substructs: rename_2_7(s) # 6. Attempt to eliminate forward refs # Not possible to eliminate all fwdrefs; some may not be in # this PDB file (eg _UNICODE_STRING in ntoskrnl.pdb) if elim_fwdrefs: # Get list of fwdrefs fwdrefs = {} for i in types: if hasattr(types[i], 'prop') and types[i].prop.fwdref: fwdrefs[types[i].name] = i # Map them to the real type fwdref_map = {} for i in types: if (hasattr(types[i], 'name') and hasattr(types[i], 'prop') and not types[i].prop.fwdref): if types[i].name in fwdrefs: fwdref_map[fwdrefs[types[i].name]] = types[i].tpi_idx # Change any references to the fwdref to point to the real type for i in types: if types[i].leaf_type == "LF_FIELDLIST": types[i].substructs = ListContainer([ merge_fwdrefs(t, types, fwdref_map) for t in types[i].substructs ]) else: types[i] = merge_fwdrefs(types[i], types, fwdref_map) # Get rid of the resolved fwdrefs for i in fwdref_map: del types[i] if unnamed_hack: for i in types: if (hasattr(types[i], 'name') and (types[i].name == "__unnamed" or types[i].name == "")): types[i].name = "__unnamed" + ("_%x" % types[i].tpi_idx) return tpi_stream def parse(data, unnamed_hack=True, elim_fwdrefs=True): return parse_stream(StringIO(data), unnamed_hack, elim_fwdrefs) if __name__ == "__main__": import sys import time st = time.time() tpi_stream = parse_stream(open(sys.argv[1])) ed = time.time() print "Parsed %d types in %f seconds" % (len(tpi_stream.types), ed - st) #for k,v in tpi_stream.types.items(): # print k,v