/thirdparty/breakpad/common/test_assembler.h

http://github.com/tomahawk-player/tomahawk · C Header · 481 lines · 123 code · 55 blank · 303 comment · 2 complexity · 2404c7321d345de1d92528ab917b80ae MD5 · raw file

  1. // -*- mode: C++ -*-
  2. // Copyright (c) 2010, Google Inc.
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  31. // test-assembler.h: interface to class for building complex binary streams.
  32. // To test the Breakpad symbol dumper and processor thoroughly, for
  33. // all combinations of host system and minidump processor
  34. // architecture, we need to be able to easily generate complex test
  35. // data like debugging information and minidump files.
  36. //
  37. // For example, if we want our unit tests to provide full code
  38. // coverage for stack walking, it may be difficult to persuade the
  39. // compiler to generate every possible sort of stack walking
  40. // information that we want to support; there are probably DWARF CFI
  41. // opcodes that GCC never emits. Similarly, if we want to test our
  42. // error handling, we will need to generate damaged minidumps or
  43. // debugging information that (we hope) the client or compiler will
  44. // never produce on its own.
  45. //
  46. // google_breakpad::TestAssembler provides a predictable and
  47. // (relatively) simple way to generate complex formatted data streams
  48. // like minidumps and CFI. Furthermore, because TestAssembler is
  49. // portable, developers without access to (say) Visual Studio or a
  50. // SPARC assembler can still work on test data for those targets.
  51. #ifndef PROCESSOR_TEST_ASSEMBLER_H_
  52. #define PROCESSOR_TEST_ASSEMBLER_H_
  53. #include <list>
  54. #include <vector>
  55. #include <string>
  56. #include "google_breakpad/common/breakpad_types.h"
  57. namespace google_breakpad {
  58. using std::list;
  59. using std::string;
  60. using std::vector;
  61. namespace test_assembler {
  62. // A Label represents a value not yet known that we need to store in a
  63. // section. As long as all the labels a section refers to are defined
  64. // by the time we retrieve its contents as bytes, we can use undefined
  65. // labels freely in that section's construction.
  66. //
  67. // A label can be in one of three states:
  68. // - undefined,
  69. // - defined as the sum of some other label and a constant, or
  70. // - a constant.
  71. //
  72. // A label's value never changes, but it can accumulate constraints.
  73. // Adding labels and integers is permitted, and yields a label.
  74. // Subtracting a constant from a label is permitted, and also yields a
  75. // label. Subtracting two labels that have some relationship to each
  76. // other is permitted, and yields a constant.
  77. //
  78. // For example:
  79. //
  80. // Label a; // a's value is undefined
  81. // Label b; // b's value is undefined
  82. // {
  83. // Label c = a + 4; // okay, even though a's value is unknown
  84. // b = c + 4; // also okay; b is now a+8
  85. // }
  86. // Label d = b - 2; // okay; d == a+6, even though c is gone
  87. // d.Value(); // error: d's value is not yet known
  88. // d - a; // is 6, even though their values are not known
  89. // a = 12; // now b == 20, and d == 18
  90. // d.Value(); // 18: no longer an error
  91. // b.Value(); // 20
  92. // d = 10; // error: d is already defined.
  93. //
  94. // Label objects' lifetimes are unconstrained: notice that, in the
  95. // above example, even though a and b are only related through c, and
  96. // c goes out of scope, the assignment to a sets b's value as well. In
  97. // particular, it's not necessary to ensure that a Label lives beyond
  98. // Sections that refer to it.
  99. class Label {
  100. public:
  101. Label(); // An undefined label.
  102. Label(u_int64_t value); // A label with a fixed value
  103. Label(const Label &value); // A label equal to another.
  104. ~Label();
  105. // Return this label's value; it must be known.
  106. //
  107. // Providing this as a cast operator is nifty, but the conversions
  108. // happen in unexpected places. In particular, ISO C++ says that
  109. // Label + size_t becomes ambigious, because it can't decide whether
  110. // to convert the Label to a u_int64_t and then to a size_t, or use
  111. // the overloaded operator that returns a new label, even though the
  112. // former could fail if the label is not yet defined and the latter won't.
  113. u_int64_t Value() const;
  114. Label &operator=(u_int64_t value);
  115. Label &operator=(const Label &value);
  116. Label operator+(u_int64_t addend) const;
  117. Label operator-(u_int64_t subtrahend) const;
  118. u_int64_t operator-(const Label &subtrahend) const;
  119. // We could also provide == and != that work on undefined, but
  120. // related, labels.
  121. // Return true if this label's value is known. If VALUE_P is given,
  122. // set *VALUE_P to the known value if returning true.
  123. bool IsKnownConstant(u_int64_t *value_p = NULL) const;
  124. // Return true if the offset from LABEL to this label is known. If
  125. // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
  126. //
  127. // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
  128. // except that it also returns a value indicating whether the
  129. // subtraction is possible given what we currently know of l and m.
  130. // It can be possible even if we don't know l and m's values. For
  131. // example:
  132. //
  133. // Label l, m;
  134. // m = l + 10;
  135. // l.IsKnownConstant(); // false
  136. // m.IsKnownConstant(); // false
  137. // u_int64_t d;
  138. // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10.
  139. // l-m // -10
  140. // m-l // 10
  141. // m.Value() // error: m's value is not known
  142. bool IsKnownOffsetFrom(const Label &label, u_int64_t *offset_p = NULL) const;
  143. private:
  144. // A label's value, or if that is not yet known, how the value is
  145. // related to other labels' values. A binding may be:
  146. // - a known constant,
  147. // - constrained to be equal to some other binding plus a constant, or
  148. // - unconstrained, and free to take on any value.
  149. //
  150. // Many labels may point to a single binding, and each binding may
  151. // refer to another, so bindings and labels form trees whose leaves
  152. // are labels, whose interior nodes (and roots) are bindings, and
  153. // where links point from children to parents. Bindings are
  154. // reference counted, allowing labels to be lightweight, copyable,
  155. // assignable, placed in containers, and so on.
  156. class Binding {
  157. public:
  158. Binding();
  159. Binding(u_int64_t addend);
  160. ~Binding();
  161. // Increment our reference count.
  162. void Acquire() { reference_count_++; };
  163. // Decrement our reference count, and return true if it is zero.
  164. bool Release() { return --reference_count_ == 0; }
  165. // Set this binding to be equal to BINDING + ADDEND. If BINDING is
  166. // NULL, then set this binding to the known constant ADDEND.
  167. // Update every binding on this binding's chain to point directly
  168. // to BINDING, or to be a constant, with addends adjusted
  169. // appropriately.
  170. void Set(Binding *binding, u_int64_t value);
  171. // Return what we know about the value of this binding.
  172. // - If this binding's value is a known constant, set BASE to
  173. // NULL, and set ADDEND to its value.
  174. // - If this binding is not a known constant but related to other
  175. // bindings, set BASE to the binding at the end of the relation
  176. // chain (which will always be unconstrained), and set ADDEND to the
  177. // value to add to that binding's value to get this binding's
  178. // value.
  179. // - If this binding is unconstrained, set BASE to this, and leave
  180. // ADDEND unchanged.
  181. void Get(Binding **base, u_int64_t *addend);
  182. private:
  183. // There are three cases:
  184. //
  185. // - A binding representing a known constant value has base_ NULL,
  186. // and addend_ equal to the value.
  187. //
  188. // - A binding representing a completely unconstrained value has
  189. // base_ pointing to this; addend_ is unused.
  190. //
  191. // - A binding whose value is related to some other binding's
  192. // value has base_ pointing to that other binding, and addend_
  193. // set to the amount to add to that binding's value to get this
  194. // binding's value. We only represent relationships of the form
  195. // x = y+c.
  196. //
  197. // Thus, the bind_ links form a chain terminating in either a
  198. // known constant value or a completely unconstrained value. Most
  199. // operations on bindings do path compression: they change every
  200. // binding on the chain to point directly to the final value,
  201. // adjusting addends as appropriate.
  202. Binding *base_;
  203. u_int64_t addend_;
  204. // The number of Labels and Bindings pointing to this binding.
  205. // (When a binding points to itself, indicating a completely
  206. // unconstrained binding, that doesn't count as a reference.)
  207. int reference_count_;
  208. };
  209. // This label's value.
  210. Binding *value_;
  211. };
  212. inline Label operator+(u_int64_t a, const Label &l) { return l + a; }
  213. // Note that int-Label isn't defined, as negating a Label is not an
  214. // operation we support.
  215. // Conventions for representing larger numbers as sequences of bytes.
  216. enum Endianness {
  217. kBigEndian, // Big-endian: the most significant byte comes first.
  218. kLittleEndian, // Little-endian: the least significant byte comes first.
  219. kUnsetEndian, // used internally
  220. };
  221. // A section is a sequence of bytes, constructed by appending bytes
  222. // to the end. Sections have a convenient and flexible set of member
  223. // functions for appending data in various formats: big-endian and
  224. // little-endian signed and unsigned values of different sizes;
  225. // LEB128 and ULEB128 values (see below), and raw blocks of bytes.
  226. //
  227. // If you need to append a value to a section that is not convenient
  228. // to compute immediately, you can create a label, append the
  229. // label's value to the section, and then set the label's value
  230. // later, when it's convenient to do so. Once a label's value is
  231. // known, the section class takes care of updating all previously
  232. // appended references to it.
  233. //
  234. // Once all the labels to which a section refers have had their
  235. // values determined, you can get a copy of the section's contents
  236. // as a string.
  237. //
  238. // Note that there is no specified "start of section" label. This is
  239. // because there are typically several different meanings for "the
  240. // start of a section": the offset of the section within an object
  241. // file, the address in memory at which the section's content appear,
  242. // and so on. It's up to the code that uses the Section class to
  243. // keep track of these explicitly, as they depend on the application.
  244. class Section {
  245. public:
  246. Section(Endianness endianness = kUnsetEndian)
  247. : endianness_(endianness) { };
  248. ~Section() { };
  249. // Set the default endianness of this section to ENDIANNESS. This
  250. // sets the behavior of the D<N> appending functions. If the
  251. // assembler's default endianness was set, this is the
  252. void set_endianness(Endianness endianness) {
  253. endianness_ = endianness;
  254. }
  255. // Return the default endianness of this section.
  256. Endianness endianness() const { return endianness_; }
  257. // Append the SIZE bytes at DATA or the contents of STRING to the
  258. // end of this section. Return a reference to this section.
  259. Section &Append(const u_int8_t *data, size_t size) {
  260. contents_.append(reinterpret_cast<const char *>(data), size);
  261. return *this;
  262. };
  263. Section &Append(const string &data) {
  264. contents_.append(data);
  265. return *this;
  266. };
  267. // Append SIZE copies of BYTE to the end of this section. Return a
  268. // reference to this section.
  269. Section &Append(size_t size, u_int8_t byte) {
  270. contents_.append(size, (char) byte);
  271. return *this;
  272. }
  273. // Append NUMBER to this section. ENDIANNESS is the endianness to
  274. // use to write the number. SIZE is the length of the number in
  275. // bytes. Return a reference to this section.
  276. Section &Append(Endianness endianness, size_t size, u_int64_t number);
  277. Section &Append(Endianness endianness, size_t size, const Label &label);
  278. // Append SECTION to the end of this section. The labels SECTION
  279. // refers to need not be defined yet.
  280. //
  281. // Note that this has no effect on any Labels' values, or on
  282. // SECTION. If placing SECTION within 'this' provides new
  283. // constraints on existing labels' values, then it's up to the
  284. // caller to fiddle with those labels as needed.
  285. Section &Append(const Section &section);
  286. // Append the contents of DATA as a series of bytes terminated by
  287. // a NULL character.
  288. Section &AppendCString(const string &data) {
  289. Append(data);
  290. contents_ += '\0';
  291. return *this;
  292. }
  293. // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes
  294. // long, pad with '\0' characters.
  295. Section &AppendCString(const string &data, size_t size) {
  296. contents_.append(data, 0, size);
  297. if (data.size() < size)
  298. Append(size - data.size(), 0);
  299. return *this;
  300. }
  301. // Append VALUE or LABEL to this section, with the given bit width and
  302. // endianness. Return a reference to this section.
  303. //
  304. // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
  305. // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
  306. // 'B' (big-endian, most significant byte first), or
  307. // 'D' (default, the section's default endianness)
  308. // <BITWIDTH> is 8, 16, 32, or 64.
  309. //
  310. // Since endianness doesn't matter for a single byte, all the
  311. // <BITWIDTH>=8 functions are equivalent.
  312. //
  313. // These can be used to write both signed and unsigned values, as
  314. // the compiler will properly sign-extend a signed value before
  315. // passing it to the function, at which point the function's
  316. // behavior is the same either way.
  317. Section &L8(u_int8_t value) { contents_ += value; return *this; }
  318. Section &B8(u_int8_t value) { contents_ += value; return *this; }
  319. Section &D8(u_int8_t value) { contents_ += value; return *this; }
  320. Section &L16(u_int16_t), &L32(u_int32_t), &L64(u_int64_t),
  321. &B16(u_int16_t), &B32(u_int32_t), &B64(u_int64_t),
  322. &D16(u_int16_t), &D32(u_int32_t), &D64(u_int64_t);
  323. Section &L8(const Label &label), &L16(const Label &label),
  324. &L32(const Label &label), &L64(const Label &label),
  325. &B8(const Label &label), &B16(const Label &label),
  326. &B32(const Label &label), &B64(const Label &label),
  327. &D8(const Label &label), &D16(const Label &label),
  328. &D32(const Label &label), &D64(const Label &label);
  329. // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
  330. //
  331. // The signed LEB128 representation of an integer N is a variable
  332. // number of bytes:
  333. //
  334. // - If N is between -0x40 and 0x3f, then its signed LEB128
  335. // representation is a single byte whose value is N.
  336. //
  337. // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
  338. // 0x80, followed by the signed LEB128 representation of N / 128,
  339. // rounded towards negative infinity.
  340. //
  341. // In other words, we break VALUE into groups of seven bits, put
  342. // them in little-endian order, and then write them as eight-bit
  343. // bytes with the high bit on all but the last.
  344. //
  345. // Note that VALUE cannot be a Label (we would have to implement
  346. // relaxation).
  347. Section &LEB128(long long value);
  348. // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
  349. //
  350. // The unsigned LEB128 representation of an integer N is a variable
  351. // number of bytes:
  352. //
  353. // - If N is between 0 and 0x7f, then its unsigned LEB128
  354. // representation is a single byte whose value is N.
  355. //
  356. // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
  357. // 0x80, followed by the unsigned LEB128 representation of N /
  358. // 128, rounded towards negative infinity.
  359. //
  360. // Note that VALUE cannot be a Label (we would have to implement
  361. // relaxation).
  362. Section &ULEB128(u_int64_t value);
  363. // Jump to the next location aligned on an ALIGNMENT-byte boundary,
  364. // relative to the start of the section. Fill the gap with PAD_BYTE.
  365. // ALIGNMENT must be a power of two. Return a reference to this
  366. // section.
  367. Section &Align(size_t alignment, u_int8_t pad_byte = 0);
  368. // Clear the contents of this section.
  369. void Clear();
  370. // Return the current size of the section.
  371. size_t Size() const { return contents_.size(); }
  372. // Return a label representing the start of the section.
  373. //
  374. // It is up to the user whether this label represents the section's
  375. // position in an object file, the section's address in memory, or
  376. // what have you; some applications may need both, in which case
  377. // this simple-minded interface won't be enough. This class only
  378. // provides a single start label, for use with the Here and Mark
  379. // member functions.
  380. //
  381. // Ideally, we'd provide this in a subclass that actually knows more
  382. // about the application at hand and can provide an appropriate
  383. // collection of start labels. But then the appending member
  384. // functions like Append and D32 would return a reference to the
  385. // base class, not the derived class, and the chaining won't work.
  386. // Since the only value here is in pretty notation, that's a fatal
  387. // flaw.
  388. Label start() const { return start_; }
  389. // Return a label representing the point at which the next Appended
  390. // item will appear in the section, relative to start().
  391. Label Here() const { return start_ + Size(); }
  392. // Set *LABEL to Here, and return a reference to this section.
  393. Section &Mark(Label *label) { *label = Here(); return *this; }
  394. // If there are no undefined label references left in this
  395. // section, set CONTENTS to the contents of this section, as a
  396. // string, and clear this section. Return true on success, or false
  397. // if there were still undefined labels.
  398. bool GetContents(string *contents);
  399. private:
  400. // Used internally. A reference to a label's value.
  401. struct Reference {
  402. Reference(size_t set_offset, Endianness set_endianness, size_t set_size,
  403. const Label &set_label)
  404. : offset(set_offset), endianness(set_endianness), size(set_size),
  405. label(set_label) { }
  406. // The offset of the reference within the section.
  407. size_t offset;
  408. // The endianness of the reference.
  409. Endianness endianness;
  410. // The size of the reference.
  411. size_t size;
  412. // The label to which this is a reference.
  413. Label label;
  414. };
  415. // The default endianness of this section.
  416. Endianness endianness_;
  417. // The contents of the section.
  418. string contents_;
  419. // References to labels within those contents.
  420. vector<Reference> references_;
  421. // A label referring to the beginning of the section.
  422. Label start_;
  423. };
  424. } // namespace test_assembler
  425. } // namespace google_breakpad
  426. #endif // PROCESSOR_TEST_ASSEMBLER_H_