PageRenderTime 66ms CodeModel.GetById 49ms app.highlight 12ms RepoModel.GetById 0ms app.codeStats 1ms

/thirdparty/breakpad/common/test_assembler.h

http://github.com/tomahawk-player/tomahawk
C++ Header | 481 lines | 123 code | 55 blank | 303 comment | 2 complexity | 2404c7321d345de1d92528ab917b80ae MD5 | raw file
  1// -*- mode: C++ -*-
  2
  3// Copyright (c) 2010, Google Inc.
  4// All rights reserved.
  5//
  6// Redistribution and use in source and binary forms, with or without
  7// modification, are permitted provided that the following conditions are
  8// met:
  9//
 10//     * Redistributions of source code must retain the above copyright
 11// notice, this list of conditions and the following disclaimer.
 12//     * Redistributions in binary form must reproduce the above
 13// copyright notice, this list of conditions and the following disclaimer
 14// in the documentation and/or other materials provided with the
 15// distribution.
 16//     * Neither the name of Google Inc. nor the names of its
 17// contributors may be used to endorse or promote products derived from
 18// this software without specific prior written permission.
 19//
 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31
 32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 33
 34// test-assembler.h: interface to class for building complex binary streams.
 35
 36// To test the Breakpad symbol dumper and processor thoroughly, for
 37// all combinations of host system and minidump processor
 38// architecture, we need to be able to easily generate complex test
 39// data like debugging information and minidump files.
 40// 
 41// For example, if we want our unit tests to provide full code
 42// coverage for stack walking, it may be difficult to persuade the
 43// compiler to generate every possible sort of stack walking
 44// information that we want to support; there are probably DWARF CFI
 45// opcodes that GCC never emits. Similarly, if we want to test our
 46// error handling, we will need to generate damaged minidumps or
 47// debugging information that (we hope) the client or compiler will
 48// never produce on its own.
 49//
 50// google_breakpad::TestAssembler provides a predictable and
 51// (relatively) simple way to generate complex formatted data streams
 52// like minidumps and CFI. Furthermore, because TestAssembler is
 53// portable, developers without access to (say) Visual Studio or a
 54// SPARC assembler can still work on test data for those targets.
 55
 56#ifndef PROCESSOR_TEST_ASSEMBLER_H_
 57#define PROCESSOR_TEST_ASSEMBLER_H_
 58
 59#include <list>
 60#include <vector>
 61#include <string>
 62
 63#include "google_breakpad/common/breakpad_types.h"
 64
 65namespace google_breakpad {
 66
 67using std::list;
 68using std::string;
 69using std::vector;
 70
 71namespace test_assembler {
 72
 73// A Label represents a value not yet known that we need to store in a
 74// section. As long as all the labels a section refers to are defined
 75// by the time we retrieve its contents as bytes, we can use undefined
 76// labels freely in that section's construction.
 77//
 78// A label can be in one of three states:
 79// - undefined,
 80// - defined as the sum of some other label and a constant, or
 81// - a constant.
 82// 
 83// A label's value never changes, but it can accumulate constraints.
 84// Adding labels and integers is permitted, and yields a label.
 85// Subtracting a constant from a label is permitted, and also yields a
 86// label. Subtracting two labels that have some relationship to each
 87// other is permitted, and yields a constant.
 88//
 89// For example:
 90//
 91//   Label a;               // a's value is undefined
 92//   Label b;               // b's value is undefined
 93//   {
 94//     Label c = a + 4;     // okay, even though a's value is unknown
 95//     b = c + 4;           // also okay; b is now a+8
 96//   }
 97//   Label d = b - 2;       // okay; d == a+6, even though c is gone
 98//   d.Value();             // error: d's value is not yet known
 99//   d - a;                 // is 6, even though their values are not known
100//   a = 12;                // now b == 20, and d == 18
101//   d.Value();             // 18: no longer an error
102//   b.Value();             // 20
103//   d = 10;                // error: d is already defined.
104//
105// Label objects' lifetimes are unconstrained: notice that, in the
106// above example, even though a and b are only related through c, and
107// c goes out of scope, the assignment to a sets b's value as well. In
108// particular, it's not necessary to ensure that a Label lives beyond
109// Sections that refer to it.
110class Label {
111 public:
112  Label();                      // An undefined label.
113  Label(u_int64_t value);       // A label with a fixed value
114  Label(const Label &value);    // A label equal to another.
115  ~Label();
116
117  // Return this label's value; it must be known.
118  //
119  // Providing this as a cast operator is nifty, but the conversions
120  // happen in unexpected places. In particular, ISO C++ says that
121  // Label + size_t becomes ambigious, because it can't decide whether
122  // to convert the Label to a u_int64_t and then to a size_t, or use
123  // the overloaded operator that returns a new label, even though the
124  // former could fail if the label is not yet defined and the latter won't.
125  u_int64_t Value() const;
126
127  Label &operator=(u_int64_t value);
128  Label &operator=(const Label &value);
129  Label operator+(u_int64_t addend) const;
130  Label operator-(u_int64_t subtrahend) const;
131  u_int64_t operator-(const Label &subtrahend) const;
132
133  // We could also provide == and != that work on undefined, but
134  // related, labels.
135
136  // Return true if this label's value is known. If VALUE_P is given,
137  // set *VALUE_P to the known value if returning true.
138  bool IsKnownConstant(u_int64_t *value_p = NULL) const;
139
140  // Return true if the offset from LABEL to this label is known. If
141  // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
142  //
143  // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
144  // except that it also returns a value indicating whether the
145  // subtraction is possible given what we currently know of l and m.
146  // It can be possible even if we don't know l and m's values. For
147  // example:
148  // 
149  //   Label l, m;
150  //   m = l + 10;
151  //   l.IsKnownConstant();             // false
152  //   m.IsKnownConstant();             // false
153  //   u_int64_t d;                     
154  //   l.IsKnownOffsetFrom(m, &d);      // true, and sets d to -10.
155  //   l-m                              // -10
156  //   m-l                              // 10
157  //   m.Value()                        // error: m's value is not known
158  bool IsKnownOffsetFrom(const Label &label, u_int64_t *offset_p = NULL) const;
159
160 private:
161  // A label's value, or if that is not yet known, how the value is
162  // related to other labels' values. A binding may be:
163  // - a known constant,
164  // - constrained to be equal to some other binding plus a constant, or
165  // - unconstrained, and free to take on any value.
166  //
167  // Many labels may point to a single binding, and each binding may
168  // refer to another, so bindings and labels form trees whose leaves
169  // are labels, whose interior nodes (and roots) are bindings, and
170  // where links point from children to parents. Bindings are
171  // reference counted, allowing labels to be lightweight, copyable,
172  // assignable, placed in containers, and so on.
173  class Binding {
174   public:
175    Binding();
176    Binding(u_int64_t addend);
177    ~Binding();
178
179    // Increment our reference count.
180    void Acquire() { reference_count_++; };
181    // Decrement our reference count, and return true if it is zero.
182    bool Release() { return --reference_count_ == 0; }
183
184    // Set this binding to be equal to BINDING + ADDEND. If BINDING is
185    // NULL, then set this binding to the known constant ADDEND.
186    // Update every binding on this binding's chain to point directly
187    // to BINDING, or to be a constant, with addends adjusted
188    // appropriately.
189    void Set(Binding *binding, u_int64_t value);
190
191    // Return what we know about the value of this binding.
192    // - If this binding's value is a known constant, set BASE to
193    //   NULL, and set ADDEND to its value.
194    // - If this binding is not a known constant but related to other
195    //   bindings, set BASE to the binding at the end of the relation
196    //   chain (which will always be unconstrained), and set ADDEND to the
197    //   value to add to that binding's value to get this binding's
198    //   value.
199    // - If this binding is unconstrained, set BASE to this, and leave
200    //   ADDEND unchanged.
201    void Get(Binding **base, u_int64_t *addend);
202
203   private:
204    // There are three cases:
205    //
206    // - A binding representing a known constant value has base_ NULL,
207    //   and addend_ equal to the value.
208    //
209    // - A binding representing a completely unconstrained value has
210    //   base_ pointing to this; addend_ is unused.
211    //
212    // - A binding whose value is related to some other binding's
213    //   value has base_ pointing to that other binding, and addend_
214    //   set to the amount to add to that binding's value to get this
215    //   binding's value. We only represent relationships of the form
216    //   x = y+c.
217    //
218    // Thus, the bind_ links form a chain terminating in either a
219    // known constant value or a completely unconstrained value. Most
220    // operations on bindings do path compression: they change every
221    // binding on the chain to point directly to the final value,
222    // adjusting addends as appropriate.
223    Binding *base_;
224    u_int64_t addend_;
225
226    // The number of Labels and Bindings pointing to this binding.
227    // (When a binding points to itself, indicating a completely
228    // unconstrained binding, that doesn't count as a reference.)
229    int reference_count_;
230  };
231
232  // This label's value.
233  Binding *value_;
234};
235
236inline Label operator+(u_int64_t a, const Label &l) { return l + a; }
237// Note that int-Label isn't defined, as negating a Label is not an
238// operation we support.
239
240// Conventions for representing larger numbers as sequences of bytes.
241enum Endianness {
242  kBigEndian,        // Big-endian: the most significant byte comes first.
243  kLittleEndian,     // Little-endian: the least significant byte comes first.
244  kUnsetEndian,      // used internally
245};
246 
247// A section is a sequence of bytes, constructed by appending bytes
248// to the end. Sections have a convenient and flexible set of member
249// functions for appending data in various formats: big-endian and
250// little-endian signed and unsigned values of different sizes;
251// LEB128 and ULEB128 values (see below), and raw blocks of bytes.
252//
253// If you need to append a value to a section that is not convenient
254// to compute immediately, you can create a label, append the
255// label's value to the section, and then set the label's value
256// later, when it's convenient to do so. Once a label's value is
257// known, the section class takes care of updating all previously
258// appended references to it.
259//
260// Once all the labels to which a section refers have had their
261// values determined, you can get a copy of the section's contents
262// as a string.
263//
264// Note that there is no specified "start of section" label. This is
265// because there are typically several different meanings for "the
266// start of a section": the offset of the section within an object
267// file, the address in memory at which the section's content appear,
268// and so on. It's up to the code that uses the Section class to 
269// keep track of these explicitly, as they depend on the application.
270class Section {
271 public:
272  Section(Endianness endianness = kUnsetEndian)
273      : endianness_(endianness) { };
274  ~Section() { };
275
276  // Set the default endianness of this section to ENDIANNESS. This
277  // sets the behavior of the D<N> appending functions. If the
278  // assembler's default endianness was set, this is the 
279  void set_endianness(Endianness endianness) {
280    endianness_ = endianness;
281  }
282
283  // Return the default endianness of this section.
284  Endianness endianness() const { return endianness_; }
285
286  // Append the SIZE bytes at DATA or the contents of STRING to the
287  // end of this section. Return a reference to this section.
288  Section &Append(const u_int8_t *data, size_t size) {
289    contents_.append(reinterpret_cast<const char *>(data), size);
290    return *this;
291  };
292  Section &Append(const string &data) {
293    contents_.append(data);
294    return *this;
295  };
296
297  // Append SIZE copies of BYTE to the end of this section. Return a
298  // reference to this section.
299  Section &Append(size_t size, u_int8_t byte) {
300    contents_.append(size, (char) byte);
301    return *this;
302  }
303      
304  // Append NUMBER to this section. ENDIANNESS is the endianness to
305  // use to write the number. SIZE is the length of the number in
306  // bytes. Return a reference to this section.
307  Section &Append(Endianness endianness, size_t size, u_int64_t number);
308  Section &Append(Endianness endianness, size_t size, const Label &label);
309
310  // Append SECTION to the end of this section. The labels SECTION
311  // refers to need not be defined yet.
312  //
313  // Note that this has no effect on any Labels' values, or on
314  // SECTION. If placing SECTION within 'this' provides new
315  // constraints on existing labels' values, then it's up to the
316  // caller to fiddle with those labels as needed.
317  Section &Append(const Section &section);
318
319  // Append the contents of DATA as a series of bytes terminated by
320  // a NULL character.
321  Section &AppendCString(const string &data) {
322    Append(data);
323    contents_ += '\0';
324    return *this;
325  }
326
327  // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes
328  // long, pad with '\0' characters.
329  Section &AppendCString(const string &data, size_t size) {
330    contents_.append(data, 0, size);
331    if (data.size() < size)
332      Append(size - data.size(), 0);
333    return *this;
334  }
335
336  // Append VALUE or LABEL to this section, with the given bit width and
337  // endianness. Return a reference to this section.
338  //
339  // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
340  // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
341  //                        'B' (big-endian, most significant byte first), or
342  //                        'D' (default, the section's default endianness)
343  // <BITWIDTH> is 8, 16, 32, or 64.
344  //
345  // Since endianness doesn't matter for a single byte, all the
346  // <BITWIDTH>=8 functions are equivalent.
347  //
348  // These can be used to write both signed and unsigned values, as
349  // the compiler will properly sign-extend a signed value before
350  // passing it to the function, at which point the function's
351  // behavior is the same either way.
352  Section &L8(u_int8_t value) { contents_ += value; return *this; }
353  Section &B8(u_int8_t value) { contents_ += value; return *this; }
354  Section &D8(u_int8_t value) { contents_ += value; return *this; }
355  Section &L16(u_int16_t), &L32(u_int32_t), &L64(u_int64_t),
356          &B16(u_int16_t), &B32(u_int32_t), &B64(u_int64_t),
357          &D16(u_int16_t), &D32(u_int32_t), &D64(u_int64_t);
358  Section &L8(const Label &label),  &L16(const Label &label),
359          &L32(const Label &label), &L64(const Label &label),
360          &B8(const Label &label),  &B16(const Label &label),
361          &B32(const Label &label), &B64(const Label &label),
362          &D8(const Label &label),  &D16(const Label &label),
363          &D32(const Label &label), &D64(const Label &label);
364
365  // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
366  // 
367  // The signed LEB128 representation of an integer N is a variable
368  // number of bytes:
369  //
370  // - If N is between -0x40 and 0x3f, then its signed LEB128
371  //   representation is a single byte whose value is N.
372  // 
373  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
374  //   0x80, followed by the signed LEB128 representation of N / 128,
375  //   rounded towards negative infinity.
376  //
377  // In other words, we break VALUE into groups of seven bits, put
378  // them in little-endian order, and then write them as eight-bit
379  // bytes with the high bit on all but the last.
380  //
381  // Note that VALUE cannot be a Label (we would have to implement
382  // relaxation).
383  Section &LEB128(long long value);
384
385  // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
386  // 
387  // The unsigned LEB128 representation of an integer N is a variable
388  // number of bytes:
389  //
390  // - If N is between 0 and 0x7f, then its unsigned LEB128
391  //   representation is a single byte whose value is N.
392  // 
393  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
394  //   0x80, followed by the unsigned LEB128 representation of N /
395  //   128, rounded towards negative infinity.
396  //
397  // Note that VALUE cannot be a Label (we would have to implement
398  // relaxation).
399  Section &ULEB128(u_int64_t value);
400
401  // Jump to the next location aligned on an ALIGNMENT-byte boundary,
402  // relative to the start of the section. Fill the gap with PAD_BYTE.
403  // ALIGNMENT must be a power of two. Return a reference to this
404  // section.
405  Section &Align(size_t alignment, u_int8_t pad_byte = 0);
406
407  // Clear the contents of this section.
408  void Clear();
409
410  // Return the current size of the section.
411  size_t Size() const { return contents_.size(); }
412
413  // Return a label representing the start of the section.
414  // 
415  // It is up to the user whether this label represents the section's
416  // position in an object file, the section's address in memory, or
417  // what have you; some applications may need both, in which case
418  // this simple-minded interface won't be enough. This class only
419  // provides a single start label, for use with the Here and Mark
420  // member functions.
421  //
422  // Ideally, we'd provide this in a subclass that actually knows more
423  // about the application at hand and can provide an appropriate
424  // collection of start labels. But then the appending member
425  // functions like Append and D32 would return a reference to the
426  // base class, not the derived class, and the chaining won't work.
427  // Since the only value here is in pretty notation, that's a fatal
428  // flaw.
429  Label start() const { return start_; }
430
431  // Return a label representing the point at which the next Appended
432  // item will appear in the section, relative to start().
433  Label Here() const { return start_ + Size(); }
434
435  // Set *LABEL to Here, and return a reference to this section.
436  Section &Mark(Label *label) { *label = Here(); return *this; }
437
438  // If there are no undefined label references left in this
439  // section, set CONTENTS to the contents of this section, as a
440  // string, and clear this section. Return true on success, or false
441  // if there were still undefined labels.
442  bool GetContents(string *contents);
443
444 private:
445  // Used internally. A reference to a label's value.
446  struct Reference {
447    Reference(size_t set_offset, Endianness set_endianness,  size_t set_size,
448              const Label &set_label)
449        : offset(set_offset), endianness(set_endianness), size(set_size),
450          label(set_label) { }
451      
452    // The offset of the reference within the section.
453    size_t offset;
454
455    // The endianness of the reference.
456    Endianness endianness;
457
458    // The size of the reference.
459    size_t size;
460
461    // The label to which this is a reference.
462    Label label;
463  };
464
465  // The default endianness of this section.
466  Endianness endianness_;
467
468  // The contents of the section.
469  string contents_;
470  
471  // References to labels within those contents.
472  vector<Reference> references_;
473
474  // A label referring to the beginning of the section.
475  Label start_;
476};
477
478}  // namespace test_assembler
479}  // namespace google_breakpad
480
481#endif  // PROCESSOR_TEST_ASSEMBLER_H_