libcxxrt

git clone https://git.neptards.moe/neptards/libcxxrt.git
Log | Files | Refs | README | LICENSE

dwarf_eh.h (16369B)


      1 /* 
      2  * Copyright 2010-2011 PathScale, Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are met:
      6  *
      7  * 1. Redistributions of source code must retain the above copyright notice,
      8  *    this list of conditions and the following disclaimer.
      9  *
     10  * 2. Redistributions in binary form must reproduce the above copyright notice,
     11  *    this list of conditions and the following disclaimer in the documentation
     12  *    and/or other materials provided with the distribution.
     13  * 
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
     15  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     16  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     23  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     24  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 /**
     27  * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
     28  * handling tables.
     29  *
     30  * This file contains various helper functions that are independent of the
     31  * language-specific code.  It can be used in any personality function for the
     32  * Itanium ABI.
     33  */
     34 #include <assert.h>
     35 
     36 // TODO: Factor out Itanium / ARM differences.  We probably want an itanium.h
     37 // and arm.h that can be included by this file depending on the target ABI.
     38 
     39 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
     40 // that we want.  If it isn't, then we define it and undefine it to make sure
     41 // that it doesn't impact the rest of the program.
     42 #ifndef _GNU_SOURCE
     43 #	define _GNU_SOURCE 1
     44 #	include "unwind.h"
     45 #	undef _GNU_SOURCE
     46 #else
     47 #	include "unwind.h"
     48 #endif
     49 
     50 #include <stdint.h>
     51 
     52 /// Type used for pointers into DWARF data
     53 typedef unsigned char *dw_eh_ptr_t;
     54 
     55 // Flag indicating a signed quantity
     56 #define DW_EH_PE_signed 0x08
     57 /// DWARF data encoding types.  
     58 enum dwarf_data_encoding
     59 {
     60 	/// Absolute pointer value
     61 	DW_EH_PE_absptr   = 0x00,
     62 	/// Unsigned, little-endian, base 128-encoded (variable length).
     63 	DW_EH_PE_uleb128 = 0x01,
     64 	/// Unsigned 16-bit integer.
     65 	DW_EH_PE_udata2  = 0x02,
     66 	/// Unsigned 32-bit integer.
     67 	DW_EH_PE_udata4  = 0x03,
     68 	/// Unsigned 64-bit integer.
     69 	DW_EH_PE_udata8  = 0x04,
     70 	/// Signed, little-endian, base 128-encoded (variable length)
     71 	DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
     72 	/// Signed 16-bit integer.
     73 	DW_EH_PE_sdata2  = DW_EH_PE_udata2 | DW_EH_PE_signed,
     74 	/// Signed 32-bit integer.
     75 	DW_EH_PE_sdata4  = DW_EH_PE_udata4 | DW_EH_PE_signed,
     76 	/// Signed 32-bit integer.
     77 	DW_EH_PE_sdata8  = DW_EH_PE_udata8 | DW_EH_PE_signed
     78 };
     79 
     80 /**
     81  * Returns the encoding for a DWARF EH table entry.  The encoding is stored in
     82  * the low four of an octet.  The high four bits store the addressing mode.
     83  */
     84 static inline enum dwarf_data_encoding get_encoding(unsigned char x)
     85 {
     86 	return static_cast<enum dwarf_data_encoding>(x & 0xf);
     87 }
     88 
     89 /**
     90  * DWARF addressing mode constants.  When reading a pointer value from a DWARF
     91  * exception table, you must know how it is stored and what the addressing mode
     92  * is.  The low four bits tell you the encoding, allowing you to decode a
     93  * number.  The high four bits tell you the addressing mode, allowing you to
     94  * turn that number into an address in memory.
     95  */
     96 enum dwarf_data_relative
     97 {
     98 	/// Value is omitted
     99 	DW_EH_PE_omit     = 0xff,
    100 	/// Value relative to program counter
    101 	DW_EH_PE_pcrel    = 0x10,
    102 	/// Value relative to the text segment
    103 	DW_EH_PE_textrel  = 0x20,
    104 	/// Value relative to the data segment
    105 	DW_EH_PE_datarel  = 0x30,
    106 	/// Value relative to the start of the function
    107 	DW_EH_PE_funcrel  = 0x40,
    108 	/// Aligned pointer (Not supported yet - are they actually used?)
    109 	DW_EH_PE_aligned  = 0x50,
    110 	/// Pointer points to address of real value
    111 	DW_EH_PE_indirect = 0x80
    112 };
    113 /**
    114  * Returns the addressing mode component of this encoding.
    115  */
    116 static inline enum dwarf_data_relative get_base(unsigned char x)
    117 {
    118 	return static_cast<enum dwarf_data_relative>(x & 0x70);
    119 }
    120 /**
    121  * Returns whether an encoding represents an indirect address.
    122  */
    123 static int is_indirect(unsigned char x)
    124 {
    125 	return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
    126 }
    127 
    128 /**
    129  * Returns the size of a fixed-size encoding.  This function will abort if
    130  * called with a value that is not a fixed-size encoding.
    131  */
    132 static inline int dwarf_size_of_fixed_size_field(unsigned char type)
    133 {
    134 	switch (get_encoding(type))
    135 	{
    136 		default: abort();
    137 		case DW_EH_PE_sdata2: 
    138 		case DW_EH_PE_udata2: return 2;
    139 		case DW_EH_PE_sdata4:
    140 		case DW_EH_PE_udata4: return 4;
    141 		case DW_EH_PE_sdata8:
    142 		case DW_EH_PE_udata8: return 8;
    143 		case DW_EH_PE_absptr: return sizeof(void*);
    144 	}
    145 }
    146 
    147 /** 
    148  * Read an unsigned, little-endian, base-128, DWARF value.  Updates *data to
    149  * point to the end of the value.  Stores the number of bits read in the value
    150  * pointed to by b, allowing you to determine the value of the highest bit, and
    151  * therefore the sign of a signed value.
    152  *
    153  * This function is not intended to be called directly.  Use read_sleb128() or
    154  * read_uleb128() for reading signed and unsigned versions, respectively.
    155  */
    156 static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
    157 {
    158 	uint64_t uleb = 0;
    159 	unsigned int bit = 0;
    160 	unsigned char digit = 0;
    161 	// We have to read at least one octet, and keep reading until we get to one
    162 	// with the high bit unset
    163 	do
    164 	{
    165 		// This check is a bit too strict - we should also check the highest
    166 		// bit of the digit.
    167 		assert(bit < sizeof(uint64_t) * 8);
    168 		// Get the base 128 digit 
    169 		digit = (**data) & 0x7f;
    170 		// Add it to the current value
    171 		uleb += digit << bit;
    172 		// Increase the shift value
    173 		bit += 7;
    174 		// Proceed to the next octet
    175 		(*data)++;
    176 		// Terminate when we reach a value that does not have the high bit set
    177 		// (i.e. which was not modified when we mask it with 0x7f)
    178 	} while ((*(*data - 1)) != digit);
    179 	*b = bit;
    180 
    181 	return uleb;
    182 }
    183 
    184 /**
    185  * Reads an unsigned little-endian base-128 value starting at the address
    186  * pointed to by *data.  Updates *data to point to the next byte after the end
    187  * of the variable-length value.
    188  */
    189 static int64_t read_uleb128(dw_eh_ptr_t *data)
    190 {
    191 	int b;
    192 	return read_leb128(data, &b);
    193 }
    194 
    195 /**
    196  * Reads a signed little-endian base-128 value starting at the address pointed
    197  * to by *data.  Updates *data to point to the next byte after the end of the
    198  * variable-length value.
    199  */
    200 static int64_t read_sleb128(dw_eh_ptr_t *data)
    201 {
    202 	int bits;
    203 	// Read as if it's signed
    204 	uint64_t uleb = read_leb128(data, &bits);
    205 	// If the most significant bit read is 1, then we need to sign extend it
    206 	if ((uleb >> (bits-1)) == 1)
    207 	{
    208 		// Sign extend by setting all bits in front of it to 1
    209 		uleb |= static_cast<int64_t>(-1) << bits;
    210 	}
    211 	return static_cast<int64_t>(uleb);
    212 }
    213 /**
    214  * Reads a value using the specified encoding from the address pointed to by
    215  * *data.  Updates the value of *data to point to the next byte after the end
    216  * of the data.
    217  */
    218 static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
    219 {
    220 	enum dwarf_data_encoding type = get_encoding(encoding);
    221 	switch (type)
    222 	{
    223 		// Read fixed-length types
    224 #define READ(dwarf, type) \
    225 		case dwarf:\
    226 		{\
    227 			type t;\
    228 			memcpy(&t, *data, sizeof t);\
    229 			*data += sizeof t;\
    230 			return static_cast<uint64_t>(t);\
    231 		}
    232 		READ(DW_EH_PE_udata2, uint16_t)
    233 		READ(DW_EH_PE_udata4, uint32_t)
    234 		READ(DW_EH_PE_udata8, uint64_t)
    235 		READ(DW_EH_PE_sdata2, int16_t)
    236 		READ(DW_EH_PE_sdata4, int32_t)
    237 		READ(DW_EH_PE_sdata8, int64_t)
    238 		READ(DW_EH_PE_absptr, intptr_t)
    239 #undef READ
    240 		// Read variable-length types
    241 		case DW_EH_PE_sleb128:
    242 			return read_sleb128(data);
    243 		case DW_EH_PE_uleb128:
    244 			return read_uleb128(data);
    245 		default: abort();
    246 	}
    247 }
    248 
    249 /**
    250  * Resolves an indirect value.  This expects an unwind context, an encoding, a
    251  * decoded value, and the start of the region as arguments.  The returned value
    252  * is a pointer to the address identified by the encoded value.
    253  *
    254  * If the encoding does not specify an indirect value, then this returns v.
    255  */
    256 static uint64_t resolve_indirect_value(_Unwind_Context *c,
    257                                        unsigned char encoding,
    258                                        int64_t v,
    259                                        dw_eh_ptr_t start)
    260 {
    261 	switch (get_base(encoding))
    262 	{
    263 		case DW_EH_PE_pcrel:
    264 			v += reinterpret_cast<uint64_t>(start);
    265 			break;
    266 		case DW_EH_PE_textrel:
    267 			v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetTextRelBase(c)));
    268 			break;
    269 		case DW_EH_PE_datarel:
    270 			v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetDataRelBase(c)));
    271 			break;
    272 		case DW_EH_PE_funcrel:
    273 			v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetRegionStart(c)));
    274 		default:
    275 			break;
    276 	}
    277 	// If this is an indirect value, then it is really the address of the real
    278 	// value
    279 	// TODO: Check whether this should really always be a pointer - it seems to
    280 	// be a GCC extensions, so not properly documented...
    281 	if (is_indirect(encoding))
    282 	{
    283 		v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(*reinterpret_cast<void**>(v)));
    284 	}
    285 	return v;
    286 }
    287 
    288 
    289 /**
    290  * Reads an encoding and a value, updating *data to point to the next byte.  
    291  */
    292 static inline void read_value_with_encoding(_Unwind_Context *context,
    293                                             dw_eh_ptr_t *data,
    294                                             uint64_t *out)
    295 {
    296 	dw_eh_ptr_t start = *data;
    297 	unsigned char encoding = *((*data)++);
    298 	// If this value is omitted, skip it and don't touch the output value
    299 	if (encoding == DW_EH_PE_omit) { return; }
    300 
    301 	*out = read_value(encoding, data);
    302 	*out = resolve_indirect_value(context, encoding, *out, start);
    303 }
    304 
    305 /**
    306  * Structure storing a decoded language-specific data area.  Use parse_lsda()
    307  * to generate an instance of this structure from the address returned by the
    308  * generic unwind library.  
    309  *
    310  * You should not need to inspect the fields of this structure directly if you
    311  * are just using this header.  The structure stores the locations of the
    312  * various tables used for unwinding exceptions and is used by the functions
    313  * for reading values from these tables.
    314  */
    315 struct dwarf_eh_lsda
    316 {
    317 	/// The start of the region.  This is a cache of the value returned by
    318 	/// _Unwind_GetRegionStart().
    319 	dw_eh_ptr_t region_start;
    320 	/// The start of the landing pads table.
    321 	dw_eh_ptr_t landing_pads;
    322 	/// The start of the type table.
    323 	dw_eh_ptr_t type_table;
    324 	/// The encoding used for entries in the type tables.
    325 	unsigned char type_table_encoding;
    326 	/// The location of the call-site table.
    327 	dw_eh_ptr_t call_site_table;
    328 	/// The location of the action table.
    329 	dw_eh_ptr_t action_table;
    330 	/// The encoding used for entries in the call-site table.
    331 	unsigned char callsite_encoding;
    332 };
    333 
    334 /**
    335  * Parse the header on the language-specific data area and return a structure
    336  * containing the addresses and encodings of the various tables.
    337  */
    338 static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
    339                                               unsigned char *data)
    340 {
    341 	struct dwarf_eh_lsda lsda;
    342 
    343 	lsda.region_start = reinterpret_cast<dw_eh_ptr_t>(_Unwind_GetRegionStart(context));
    344 
    345 	// If the landing pads are relative to anything other than the start of
    346 	// this region, find out where.  This is @LPStart in the spec, although the
    347 	// encoding that GCC uses does not quite match the spec.
    348 	uint64_t v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(lsda.region_start));
    349 	read_value_with_encoding(context, &data, &v);
    350 	lsda.landing_pads = reinterpret_cast<dw_eh_ptr_t>(static_cast<uintptr_t>(v));
    351 
    352 	// If there is a type table, find out where it is.  This is @TTBase in the
    353 	// spec.  Note: we find whether there is a type table pointer by checking
    354 	// whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
    355 	// spec says, but does seem to be how G++ indicates this.
    356 	lsda.type_table = 0;
    357 	lsda.type_table_encoding = *data++;
    358 	if (lsda.type_table_encoding != DW_EH_PE_omit)
    359 	{
    360 		v = read_uleb128(&data);
    361 		dw_eh_ptr_t type_table = data;
    362 		type_table += v;
    363 		lsda.type_table = type_table;
    364 		//lsda.type_table = (uintptr_t*)(data + v);
    365 	}
    366 #if defined(__arm__) && !defined(__ARM_DWARF_EH__)
    367 	lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
    368 #endif
    369 
    370 	lsda.callsite_encoding = static_cast<enum dwarf_data_encoding>(*(data++));
    371 
    372 	// Action table is immediately after the call site table
    373 	lsda.action_table = data;
    374 	uintptr_t callsite_size = static_cast<uintptr_t>(read_uleb128(&data));
    375 	lsda.action_table = data + callsite_size;
    376 	// Call site table is immediately after the header
    377 	lsda.call_site_table = static_cast<dw_eh_ptr_t>(data);
    378 
    379 
    380 	return lsda;
    381 }
    382 
    383 /**
    384  * Structure representing an action to be performed while unwinding.  This
    385  * contains the address that should be unwound to and the action record that
    386  * provoked this action.
    387  */
    388 struct dwarf_eh_action
    389 {
    390 	/** 
    391 	 * The address that this action directs should be the new program counter
    392 	 * value after unwinding.
    393 	 */
    394 	dw_eh_ptr_t landing_pad;
    395 	/// The address of the action record.
    396 	dw_eh_ptr_t action_record;
    397 };
    398 
    399 /**
    400  * Look up the landing pad that corresponds to the current invoke.
    401  * Returns true if record exists.  The context is provided by the generic
    402  * unwind library and the lsda should be the result of a call to parse_lsda().
    403  *
    404  * The action record is returned via the result parameter.  
    405  */
    406 static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
    407                                    struct dwarf_eh_lsda *lsda,
    408                                    struct dwarf_eh_action *result)
    409 {
    410 	result->action_record = 0;
    411 	result->landing_pad = 0;
    412 	// The current instruction pointer offset within the region
    413 	uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
    414 	unsigned char *callsite_table = static_cast<unsigned char*>(lsda->call_site_table);
    415 
    416 	while (callsite_table <= lsda->action_table)
    417 	{
    418 		// Once again, the layout deviates from the spec.
    419 		uint64_t call_site_start, call_site_size, landing_pad, action;
    420 		call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
    421 		call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
    422 
    423 		// Call site entries are sorted, so if we find a call site that's after
    424 		// the current instruction pointer then there is no action associated
    425 		// with this call and we should unwind straight through this frame
    426 		// without doing anything.
    427 		if (call_site_start > ip) { break; }
    428 
    429 		// Read the address of the landing pad and the action from the call
    430 		// site table.
    431 		landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
    432 		action = read_uleb128(&callsite_table);
    433 
    434 		// We should not include the call_site_start (beginning of the region)
    435 		// address in the ip range. For each call site:
    436 		//
    437 		// address1: call proc
    438 		// address2: next instruction
    439 		//
    440 		// The call stack contains address2 and not address1, address1 can be
    441 		// at the end of another EH region.
    442 		if (call_site_start < ip && ip <= call_site_start + call_site_size)
    443 		{
    444 			if (action)
    445 			{
    446 				// Action records are 1-biased so both no-record and zeroth
    447 				// record can be stored.
    448 				result->action_record = lsda->action_table + action - 1;
    449 			}
    450 			// No landing pad means keep unwinding.
    451 			if (landing_pad)
    452 			{
    453 				// Landing pad is the offset from the value in the header
    454 				result->landing_pad = lsda->landing_pads + landing_pad;
    455 			}
    456 			return true;
    457 		}
    458 	}
    459 	return false;
    460 }
    461 
    462 /// Defines an exception class from 8 bytes (endian independent)
    463 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
    464 	((static_cast<uint64_t>(a) << 56) +\
    465 	 (static_cast<uint64_t>(b) << 48) +\
    466 	 (static_cast<uint64_t>(c) << 40) +\
    467 	 (static_cast<uint64_t>(d) << 32) +\
    468 	 (static_cast<uint64_t>(e) << 24) +\
    469 	 (static_cast<uint64_t>(f) << 16) +\
    470 	 (static_cast<uint64_t>(g) << 8) +\
    471 	 (static_cast<uint64_t>(h)))
    472 
    473 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
    474 	 (static_cast<uint32_t>(e) << 24) +\
    475 	 (static_cast<uint32_t>(f) << 16) +\
    476 	 (static_cast<uint32_t>(g) << 8) +\
    477 	 (static_cast<uint32_t>(h))