PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/extensions/hphp/runtime/base/string_data.h

https://bitbucket.org/asuhan/happy/
C Header | 387 lines | 246 code | 40 blank | 101 comment | 26 complexity | 76b484ab8d77477f7575887dabae5433 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #ifndef __HPHP_STRING_DATA_H__
  17. #define __HPHP_STRING_DATA_H__
  18. #include <runtime/base/types.h>
  19. #include <runtime/base/util/countable.h>
  20. #include <runtime/base/memory/smart_allocator.h>
  21. #include <runtime/base/macros.h>
  22. #include <runtime/base/bstring.h>
  23. #include <util/hash.h>
  24. #include <runtime/base/util/exceptions.h>
  25. namespace HPHP {
  26. class SharedVariant;
  27. class Array;
  28. class String;
  29. ///////////////////////////////////////////////////////////////////////////////
  30. /**
  31. * A Slice is a compact way to refer to an extent of array elements.
  32. * This type is designed to be passed around by value. Methods on slice
  33. * are set up to match the Boost Range<T> concept.
  34. */
  35. template <class T>
  36. struct Slice {
  37. T* ptr; // pointer to bytes, not necessarily \0 teriminated
  38. uint32_t len; // number of bytes, not counting possible \0
  39. Slice(T* ptr, int len) : ptr(ptr), len(len) {}
  40. T* begin() const { return ptr; }
  41. T* end() const { return ptr + len; }
  42. uint32_t size() const { return len; }
  43. };
  44. typedef Slice<const char> StringSlice;
  45. typedef Slice<char> MutableSlice;
  46. // const char* points to a string which must remain valid for the lifetime
  47. // of the StringData. It is fragile to rely on StringData.data() returning
  48. // the same pointer after construction -- this invariant will probably be
  49. // deprecated to enable copying of small strings.
  50. enum AttachLiteralMode { AttachLiteral };
  51. // DEPRECATED. const char* points to malloc'd memory that will be freed
  52. // sometime after the the StringData is constructed (in case of mutation),
  53. // but no later than when ~StringData is called. Really, the new StringData
  54. // owns data from the point of construction -- see AttachStringMode.
  55. // This is deprecated because callers should assume the buffer can be
  56. // freed immediately when a String[Data] is constructed.
  57. enum AttachDeprecatedMode { AttachDeprecated };
  58. // Aggressively copy small strings and free the passed-in buffer immediately;
  59. // otherwise keep the buffer for long strings, and free it when the string
  60. // is mutated or released.
  61. enum AttachStringMode { AttachString };
  62. // const char* points to client-owned memory, StringData will copy it
  63. // at construct-time.
  64. enum CopyStringMode { CopyString };
  65. /**
  66. * Inner data class for String type. As a coding guideline, String and
  67. * StringOffset classes should delegate real string work to this class,
  68. * although both String and StringOffset classes are more than welcome to test
  69. * nullability to avoid calling this class.
  70. *
  71. * A StringData can be in two formats, small or big. Small format
  72. * stores the string inline by overlapping with some fields, as follows:
  73. *
  74. * small: m_data:8, _count:4, m_len:4, m_hash:4, m_small:44
  75. * big: m_data:8, _count:4, m_len:4, m_hash:4, junk[28], shared:8, cap:8
  76. *
  77. * If the format is IsLiteral or IsShared, we always use the "big" layout.
  78. * resemblences to fbstring are not accidental.
  79. */
  80. class StringData {
  81. StringData(const StringData&); // disable copying
  82. StringData& operator=(const StringData&);
  83. enum Format {
  84. IsSmall = 0, // short str overlaps m_big
  85. IsLiteral = 0x4000000000000000, // literal string
  86. IsShared = 0x8000000000000000, // shared memory string
  87. IsMalloc = 0xC000000000000000, // m_big.data points to malloc'd memory
  88. IsMask = 0xC000000000000000
  89. };
  90. public:
  91. const static uint32_t MaxSmallSize = 43;
  92. const static uint32_t MaxSize = 0x3fffffff; // 2^30-1
  93. /**
  94. * StringData does not formally derive from Countable, however it has a
  95. * _count field and implements all of the methods from Countable.
  96. */
  97. IMPLEMENT_COUNTABLE_METHODS_NO_STATIC
  98. void setRefCount(int32_t n) { _count = n;}
  99. /* Only call preCompute() and setStatic() in a thread-neutral context! */
  100. void preCompute() const;
  101. void setStatic() const;
  102. bool isStatic() const { return _count == RefCountStaticValue; }
  103. /**
  104. * Get the wrapped SharedVariant.
  105. */
  106. SharedVariant *getSharedVariant() const {
  107. if (isShared()) return m_big.shared;
  108. return NULL;
  109. }
  110. static StringData *Escalate(StringData *in);
  111. /**
  112. * When we have static StringData in SharedStore, we should avoid directly
  113. * deleting the StringData pointer, but rather call destruct().
  114. */
  115. void destruct() const { if (!isStatic()) delete this; }
  116. StringData() : m_data(0), _count(0), m_len(0), m_hash(0) {
  117. m_big.shared = 0;
  118. m_big.cap = 0;
  119. }
  120. /**
  121. * Different ways of constructing StringData. Default constructor at above
  122. * is actually only for SmartAllocator to pre-allocate the objects.
  123. */
  124. StringData(const char* data) {
  125. initLiteral(data);
  126. }
  127. StringData(const char *data, AttachLiteralMode) {
  128. initLiteral(data);
  129. }
  130. StringData(const char *data, AttachDeprecatedMode) {
  131. initAttachDeprecated(data);
  132. }
  133. StringData(const char *data, AttachStringMode) {
  134. initAttach(data);
  135. }
  136. StringData(const char *data, CopyStringMode) {
  137. initCopy(data);
  138. }
  139. StringData(const char *data, int len, AttachLiteralMode) {
  140. initLiteral(data, len);
  141. }
  142. StringData(const char* data, int len, AttachDeprecatedMode) {
  143. initAttachDeprecated(data, len);
  144. }
  145. StringData(const char* data, int len, AttachStringMode) {
  146. initAttach(data, len);
  147. }
  148. StringData(const char* data, int len, CopyStringMode) {
  149. initCopy(data, len);
  150. }
  151. StringData(const StringData* s, CopyStringMode) {
  152. StringSlice r = s->slice();
  153. initCopy(r.ptr, r.len);
  154. }
  155. StringData(StringSlice r1, CopyStringMode) {
  156. initCopy(r1.ptr, r1.len);
  157. }
  158. // Create a new string by concatingating two existing strings.
  159. StringData(const StringData* s1, const StringData* s2) {
  160. initConcat(s1->slice(), s2->slice());
  161. }
  162. StringData(const StringData* s1, StringSlice s2) {
  163. initConcat(s1->slice(), s2);
  164. }
  165. StringData(const StringData* s1, const char* lit2) {
  166. initConcat(s1->slice(), StringSlice(lit2, strlen(lit2)));
  167. }
  168. StringData(StringSlice s1, StringSlice s2) {
  169. initConcat(s1, s2);
  170. }
  171. StringData(StringSlice s1, const char* lit2) {
  172. initConcat(s1, StringSlice(lit2, strlen(lit2)));
  173. }
  174. /**
  175. * Create a new empty string big enough to hold the requested size,
  176. * not counting the \0 terminator.
  177. */
  178. StringData(int reserve);
  179. public:
  180. void append(StringSlice r) { append(r.ptr, r.len); }
  181. void append(const char *s, int len);
  182. StringData *copy(bool sharedMemory = false) const;
  183. MutableSlice mutableSlice() {
  184. ASSERT(!isImmutable());
  185. return isSmall() ? MutableSlice(m_small, MaxSmallSize) :
  186. MutableSlice(m_data, bigCap());
  187. }
  188. void setSize(int len) {
  189. ASSERT(len >= 0 && len <= capacity() && !isImmutable());
  190. m_data[len] = 0;
  191. m_len = len;
  192. }
  193. ~StringData() { releaseData(); }
  194. /**
  195. * Informational.
  196. */
  197. const char *data() const {
  198. return rawdata();
  199. }
  200. // This method should only be used internally by the String class.
  201. const char *dataIgnoreTaint() const { return rawdata(); }
  202. int size() const { return m_len; }
  203. StringSlice slice() const {
  204. return StringSlice(m_data, m_len);
  205. }
  206. bool empty() const { return size() == 0;}
  207. bool isLiteral() const { return format() == IsLiteral; }
  208. bool isShared() const { return format() == IsShared; }
  209. bool isSmall() const { return format() == IsSmall; }
  210. bool isImmutable() const {
  211. Format f = format();
  212. return f == IsLiteral || f == IsShared || isStatic();
  213. }
  214. DataType isNumericWithVal(int64 &lval, double &dval, int allow_errors) const;
  215. bool isNumeric() const;
  216. bool isInteger() const;
  217. bool isStrictlyInteger(int64 &res) const {
  218. if (isStatic() && m_hash < 0) return false;
  219. StringSlice s = slice();
  220. return is_strictly_integer(s.ptr, s.len, res);
  221. }
  222. bool isZero() const { return size() == 1 && rawdata()[0] == '0'; }
  223. bool isValidVariableName() const;
  224. /**
  225. * Mutations.
  226. */
  227. StringData *getChar(int offset) const;
  228. void setChar(int offset, CStrRef substring);
  229. void setChar(int offset, char ch);
  230. void inc();
  231. void negate();
  232. void set(bool key, CStrRef v) { setChar(key ? 1 : 0, v); }
  233. void set(char key, CStrRef v) { setChar(key, v); }
  234. void set(short key, CStrRef v) { setChar(key, v); }
  235. void set(int key, CStrRef v) { setChar(key, v); }
  236. void set(int64 key, CStrRef v) { setChar(key, v); }
  237. void set(double key, CStrRef v) { setChar((int64)key, v); }
  238. void set(CStrRef key, CStrRef v);
  239. void set(CVarRef key, CStrRef v);
  240. /**
  241. * Type conversion functions.
  242. */
  243. bool toBoolean() const;
  244. char toByte (int base = 10) const { return toInt64(base);}
  245. short toInt16 (int base = 10) const { return toInt64(base);}
  246. int toInt32 (int base = 10) const { return toInt64(base);}
  247. int64 toInt64 (int base = 10) const;
  248. double toDouble () const;
  249. DataType toNumeric(int64 &lval, double &dval) const;
  250. strhash_t getPrecomputedHash() const {
  251. ASSERT(!isShared());
  252. return m_hash & STRHASH_MASK;
  253. }
  254. strhash_t hash() const {
  255. strhash_t h = m_hash & STRHASH_MASK;
  256. return h ? h : hashHelper();
  257. }
  258. bool same(const StringData *s) const {
  259. ASSERT(s);
  260. if (m_len != s->m_len) return false;
  261. const char* s1 = rawdata();
  262. const char* s2 = s->rawdata();
  263. if (s1 == s2) return true;
  264. return !memcmp(s1, s2, m_len);
  265. }
  266. bool isame(const StringData *s) const {
  267. ASSERT(s);
  268. if (m_len != s->m_len) return false;
  269. const char* s1 = rawdata();
  270. const char* s2 = s->rawdata();
  271. if (s1 == s2) return true;
  272. return !bstrcasecmp(s1, m_len, s2, m_len);
  273. }
  274. /**
  275. * Comparisons.
  276. */
  277. int compare(const StringData *v2) const;
  278. /**
  279. * Memory allocator methods.
  280. */
  281. DECLARE_SMART_ALLOCATION(StringData, SmartAllocatorImpl::NeedSweep);
  282. void sweep() { releaseData();}
  283. void dump() const;
  284. std::string toCPPString() const;
  285. static StringData *GetStaticString(const StringData *str);
  286. static StringData *GetStaticString(const std::string &str);
  287. static StringData *GetStaticString(const char *str);
  288. static StringData *GetStaticString(char c);
  289. /**
  290. * The order of the data members is significant. The _count field must
  291. * be exactly FAST_REFCOUNT_OFFSET bytes from the beginning of the object.
  292. */
  293. private:
  294. union {
  295. const char* m_cdata;
  296. char* m_data;
  297. };
  298. protected:
  299. mutable int32_t _count;
  300. private:
  301. // m_len and m_data are not overlapped with small strings because
  302. // they are accessed so frequently that even the inline branch to
  303. // measurably slows things down. Its worse for m_len than m_data.
  304. // If frequent callers are refacotred to use slice() then we could
  305. // revisit this decision.
  306. uint32_t m_len;
  307. mutable strhash_t m_hash; // precompute hash codes for static strings
  308. union __attribute__((__packed__)) {
  309. struct __attribute__((__packed__)) {
  310. char junk[28];
  311. SharedVariant *shared;
  312. uint64_t cap;
  313. } m_big;
  314. char m_small[sizeof(m_big)];
  315. };
  316. private:
  317. /**
  318. * Helpers.
  319. */
  320. void initLiteral(const char* data);
  321. void initAttachDeprecated(const char* data);
  322. void initAttach(const char* data);
  323. void initCopy(const char* data);
  324. void initLiteral(const char* data, int len);
  325. void initAttachDeprecated(const char* data, int len);
  326. void initAttach(const char* data, int len);
  327. void initCopy(const char* data, int len);
  328. void initConcat(StringSlice r1, StringSlice r2);
  329. void releaseData();
  330. int numericCompare(const StringData *v2) const;
  331. void escalate(); // change to malloc-ed string
  332. void attach(char *data, int len);
  333. strhash_t hashHelper() const NEVER_INLINE;
  334. bool checkSane() const;
  335. const char* rawdata() const { return m_data; }
  336. Format format() const {
  337. return Format(m_big.cap & IsMask);
  338. }
  339. int bigCap() const {
  340. ASSERT(!isSmall());
  341. return m_big.cap & MaxSize;
  342. }
  343. int capacity() const { return isSmall() ? MaxSmallSize : bigCap(); }
  344. };
  345. ///////////////////////////////////////////////////////////////////////////////
  346. }
  347. #endif // __HPHP_STRING_DATA_H__