PageRenderTime 82ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/SDK/core/Remotery.c

https://gitlab.com/_Infinity_/infinity
C | 5720 lines | 3589 code | 1299 blank | 832 comment | 907 complexity | 52f5f7e2b8cf777215495d273d64de3d MD5 | raw file
Possible License(s): BSD-3-Clause
  1. //
  2. // Copyright 2014 Celtoys Ltd
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. /*
  17. @Contents:
  18. @DEPS: External Dependencies
  19. @TIMERS: Platform-specific timers
  20. @TLS: Thread-Local Storage
  21. @ATOMIC: Atomic Operations
  22. @VMBUFFER: Mirror Buffer using Virtual Memory for auto-wrap
  23. @THREADS: Threads
  24. @SAFEC: Safe C Library excerpts
  25. @OBJALLOC: Reusable Object Allocator
  26. @DYNBUF: Dynamic Buffer
  27. @SOCKETS: Sockets TCP/IP Wrapper
  28. @SHA1: SHA-1 Cryptographic Hash Function
  29. @BASE64: Base-64 encoder
  30. @MURMURHASH: Murmur-Hash 3
  31. @WEBSOCKETS: WebSockets
  32. @MESSAGEQ: Multiple producer, single consumer message queue
  33. @NETWORK: Network Server
  34. @JSON: Basic, text-based JSON serialisation
  35. @SAMPLE: Base Sample Description (CPU by default)
  36. @SAMPLETREE: A tree of samples with their allocator
  37. @TSAMPLER: Per-Thread Sampler
  38. @REMOTERY: Remotery
  39. @CUDA: CUDA event sampling
  40. @D3D11: Direct3D 11 event sampling
  41. @OPENGL: OpenGL event sampling
  42. */
  43. #include "Remotery.h"
  44. #pragma comment(lib, "ws2_32.lib")
  45. #ifdef RMT_ENABLED
  46. /*
  47. ------------------------------------------------------------------------------------------------------------------------
  48. ------------------------------------------------------------------------------------------------------------------------
  49. @DEPS: External Dependencies
  50. ------------------------------------------------------------------------------------------------------------------------
  51. ------------------------------------------------------------------------------------------------------------------------
  52. */
  53. //
  54. // Required CRT dependencies
  55. //
  56. #ifdef RMT_USE_TINYCRT
  57. #include <TinyCRT/TinyCRT.h>
  58. #include <TinyCRT/TinyWinsock.h>
  59. #define CreateFileMapping CreateFileMappingA
  60. #else
  61. #ifdef RMT_PLATFORM_MACOS
  62. #include <mach/mach_time.h>
  63. #include <mach/vm_map.h>
  64. #include <mach/mach.h>
  65. #include <sys/time.h>
  66. #else
  67. #include <malloc.h>
  68. #endif
  69. #include <assert.h>
  70. #ifdef RMT_PLATFORM_WINDOWS
  71. #include <winsock2.h>
  72. #include <intrin.h>
  73. #undef min
  74. #undef max
  75. #endif
  76. #ifdef RMT_PLATFORM_LINUX
  77. #include <time.h>
  78. #endif
  79. #if defined(RMT_PLATFORM_POSIX)
  80. #include <stdlib.h>
  81. #include <pthread.h>
  82. #include <unistd.h>
  83. #include <string.h>
  84. #include <sys/socket.h>
  85. #include <sys/mman.h>
  86. #include <netinet/in.h>
  87. #include <fcntl.h>
  88. #include <errno.h>
  89. #endif
  90. #endif
  91. #ifdef __cplusplus
  92. #define RMT_UNREFERENCED_PARAMETER( x ) ( &reinterpret_cast< const int& >( x ) )
  93. #else
  94. #define RMT_UNREFERENCED_PARAMETER( x ) ( (void)x )
  95. #endif
  96. #ifdef RMT_USE_CUDA
  97. #include <cuda.h>
  98. #endif
  99. rmtU64 min(rmtS64 a, rmtS64 b)
  100. {
  101. return a < b ? a : b;
  102. }
  103. rmtU64 max(rmtS64 a, rmtS64 b)
  104. {
  105. return a > b ? a : b;
  106. }
  107. // Config
  108. // TODO: Expose to user
  109. // How long to sleep between server updates, hopefully trying to give
  110. // a little CPU back to other threads.
  111. static const rmtU32 MS_SLEEP_BETWEEN_SERVER_UPDATES = 10;
  112. // Will be rounded to page granularity of 64k
  113. static const rmtU32 MESSAGE_QUEUE_SIZE_BYTES = 64 * 1024;
  114. // If the user continuously pushes to the message queue, the server network
  115. // code won't get a chance to update unless there's an upper-limit on how
  116. // many messages can be consumed per loop.
  117. static const rmtU32 MAX_NB_MESSAGES_PER_UPDATE = 100;
  118. /*
  119. ------------------------------------------------------------------------------------------------------------------------
  120. ------------------------------------------------------------------------------------------------------------------------
  121. @TIMERS: Platform-specific timers
  122. ------------------------------------------------------------------------------------------------------------------------
  123. ------------------------------------------------------------------------------------------------------------------------
  124. */
  125. //
  126. // Get millisecond timer value that has only one guarantee: multiple calls are consistently comparable.
  127. // On some platforms, even though this returns milliseconds, the timer may be far less accurate.
  128. //
  129. static rmtU32 msTimer_Get()
  130. {
  131. #ifdef RMT_PLATFORM_WINDOWS
  132. return (rmtU32)GetTickCount();
  133. #else
  134. clock_t time = clock();
  135. rmtU32 msTime = (rmtU32) (time / (CLOCKS_PER_SEC / 1000));
  136. return msTime;
  137. #endif
  138. }
  139. //
  140. // Micro-second accuracy high performance counter
  141. //
  142. #ifndef RMT_PLATFORM_WINDOWS
  143. typedef rmtU64 LARGE_INTEGER;
  144. #endif
  145. typedef struct
  146. {
  147. LARGE_INTEGER counter_start;
  148. double counter_scale;
  149. } usTimer;
  150. static void usTimer_Init(usTimer* timer)
  151. {
  152. #if defined(RMT_PLATFORM_WINDOWS)
  153. LARGE_INTEGER performance_frequency;
  154. assert(timer != NULL);
  155. // Calculate the scale from performance counter to microseconds
  156. QueryPerformanceFrequency(&performance_frequency);
  157. timer->counter_scale = 1000000.0 / performance_frequency.QuadPart;
  158. // Record the offset for each read of the counter
  159. QueryPerformanceCounter(&timer->counter_start);
  160. #elif defined(RMT_PLATFORM_MACOS)
  161. mach_timebase_info_data_t nsScale;
  162. mach_timebase_info( &nsScale );
  163. const double ns_per_us = 1.0e3;
  164. timer->counter_scale = (double)(nsScale.numer) / ((double)nsScale.denom * ns_per_us);
  165. timer->counter_start = mach_absolute_time();
  166. #elif defined(RMT_PLATFORM_LINUX)
  167. struct timespec tv;
  168. clock_gettime(CLOCK_REALTIME, &tv);
  169. timer->counter_start = tv.tv_nsec;
  170. #endif
  171. }
  172. static rmtU64 usTimer_Get(usTimer* timer)
  173. {
  174. #if defined(RMT_PLATFORM_WINDOWS)
  175. LARGE_INTEGER performance_count;
  176. assert(timer != NULL);
  177. // Read counter and convert to microseconds
  178. QueryPerformanceCounter(&performance_count);
  179. return (rmtU64)((performance_count.QuadPart - timer->counter_start.QuadPart) * timer->counter_scale);
  180. #elif defined(RMT_PLATFORM_MACOS)
  181. rmtU64 curr_time = mach_absolute_time();
  182. return (rmtU64)((curr_time - timer->counter_start) * timer->counter_scale);
  183. #elif defined(RMT_PLATFORM_LINUX)
  184. struct timespec tv;
  185. clock_gettime(CLOCK_REALTIME, &tv);
  186. return tv.tv_nsec - timer->counter_start;
  187. #endif
  188. }
  189. static void msSleep(rmtU32 time_ms)
  190. {
  191. #ifdef RMT_PLATFORM_WINDOWS
  192. Sleep(time_ms);
  193. #elif defined(RMT_PLATFORM_POSIX)
  194. usleep(time_ms * 1000);
  195. #endif
  196. }
  197. /*
  198. ------------------------------------------------------------------------------------------------------------------------
  199. ------------------------------------------------------------------------------------------------------------------------
  200. @TLS: Thread-Local Storage
  201. ------------------------------------------------------------------------------------------------------------------------
  202. ------------------------------------------------------------------------------------------------------------------------
  203. */
  204. #define TLS_INVALID_HANDLE 0xFFFFFFFF
  205. #if defined(RMT_PLATFORM_WINDOWS)
  206. typedef rmtU32 rmtTLS;
  207. #else
  208. typedef pthread_key_t rmtTLS;
  209. #endif
  210. static enum rmtError tlsAlloc(rmtTLS* handle)
  211. {
  212. assert(handle != NULL);
  213. #if defined(RMT_PLATFORM_WINDOWS)
  214. *handle = (rmtTLS)TlsAlloc();
  215. if (*handle == TLS_OUT_OF_INDEXES)
  216. {
  217. *handle = TLS_INVALID_HANDLE;
  218. return RMT_ERROR_TLS_ALLOC_FAIL;
  219. }
  220. #elif defined(RMT_PLATFORM_POSIX)
  221. if (pthread_key_create(handle, NULL) != 0)
  222. {
  223. *handle = TLS_INVALID_HANDLE;
  224. return RMT_ERROR_TLS_ALLOC_FAIL;
  225. }
  226. #endif
  227. return RMT_ERROR_NONE;
  228. }
  229. static void tlsFree(rmtTLS handle)
  230. {
  231. assert(handle != TLS_INVALID_HANDLE);
  232. #if defined(RMT_PLATFORM_WINDOWS)
  233. TlsFree(handle);
  234. #elif defined(RMT_PLATFORM_POSIX)
  235. pthread_key_delete((pthread_key_t)handle);
  236. #endif
  237. }
  238. static void tlsSet(rmtTLS handle, void* value)
  239. {
  240. assert(handle != TLS_INVALID_HANDLE);
  241. #if defined(RMT_PLATFORM_WINDOWS)
  242. TlsSetValue(handle, value);
  243. #elif defined(RMT_PLATFORM_POSIX)
  244. pthread_setspecific((pthread_key_t)handle, value);
  245. #endif
  246. }
  247. static void* tlsGet(rmtTLS handle)
  248. {
  249. assert(handle != TLS_INVALID_HANDLE);
  250. #if defined(RMT_PLATFORM_WINDOWS)
  251. return TlsGetValue(handle);
  252. #elif defined(RMT_PLATFORM_POSIX)
  253. return pthread_getspecific((pthread_key_t)handle);
  254. #endif
  255. }
  256. /*
  257. ------------------------------------------------------------------------------------------------------------------------
  258. ------------------------------------------------------------------------------------------------------------------------
  259. @ATOMIC: Atomic Operations
  260. ------------------------------------------------------------------------------------------------------------------------
  261. ------------------------------------------------------------------------------------------------------------------------
  262. */
  263. static rmtBool AtomicCompareAndSwap(rmtU32 volatile* val, long old_val, long new_val)
  264. {
  265. #if defined(RMT_PLATFORM_WINDOWS)
  266. return _InterlockedCompareExchange((long volatile*)val, new_val, old_val) == old_val ? RMT_TRUE : RMT_FALSE;
  267. #elif defined(RMT_PLATFORM_POSIX)
  268. return __sync_bool_compare_and_swap(val, old_val, new_val) ? RMT_TRUE : RMT_FALSE;
  269. #endif
  270. }
  271. static rmtBool AtomicCompareAndSwapPointer(long* volatile* ptr, long* old_ptr, long* new_ptr)
  272. {
  273. #if defined(RMT_PLATFORM_WINDOWS)
  274. #ifdef _WIN64
  275. return _InterlockedCompareExchange64((__int64 volatile*)ptr, (__int64)new_ptr, (__int64)old_ptr) == (__int64)old_ptr ? RMT_TRUE : RMT_FALSE;
  276. #else
  277. return _InterlockedCompareExchange((long volatile*)ptr, (long)new_ptr, (long)old_ptr) == (long)old_ptr ? RMT_TRUE : RMT_FALSE;
  278. #endif
  279. #elif defined(RMT_PLATFORM_POSIX)
  280. return __sync_bool_compare_and_swap(ptr, old_ptr, new_ptr) ? RMT_TRUE : RMT_FALSE;
  281. #endif
  282. }
  283. //
  284. // NOTE: Does not guarantee a memory barrier
  285. // TODO: Make sure all platforms don't insert a memory barrier as this is only for stats
  286. // Alternatively, add strong/weak memory order equivalents
  287. //
  288. static void AtomicAdd(rmtS32 volatile* value, rmtS32 add)
  289. {
  290. #if defined(RMT_PLATFORM_WINDOWS)
  291. _InterlockedExchangeAdd((long volatile*)value, (long)add);
  292. #elif defined(RMT_PLATFORM_POSIX)
  293. __sync_fetch_and_add(value, add);
  294. #endif
  295. }
  296. static void AtomicSub(rmtS32 volatile* value, rmtS32 sub)
  297. {
  298. // Not all platforms have an implementation so just negate and add
  299. AtomicAdd(value, -sub);
  300. }
  301. // Compiler read/write fences (windows implementation)
  302. static void ReadFence()
  303. {
  304. #ifdef RMT_PLATFORM_WINDOWS
  305. _ReadBarrier();
  306. #else
  307. asm volatile ("" : : : "memory");
  308. #endif
  309. }
  310. static void WriteFence()
  311. {
  312. #ifdef RMT_PLATFORM_WINDOWS
  313. _WriteBarrier();
  314. #else
  315. asm volatile ("" : : : "memory");
  316. #endif
  317. }
  318. // Get a shared value with acquire semantics, ensuring the read is complete
  319. // before the function returns.
  320. static void* LoadAcquire(void* volatile const* addr)
  321. {
  322. // Hardware fence is implicit on x86 so only need the compiler fence
  323. void* v = *addr;
  324. ReadFence();
  325. return v;
  326. }
  327. // Set a shared value with release semantics, ensuring any prior writes
  328. // are complete before the value is set.
  329. static void StoreRelease(void* volatile* addr, void* v)
  330. {
  331. // Hardware fence is implicit on x86 so only need the compiler fence
  332. WriteFence();
  333. *addr = v;
  334. }
  335. /*
  336. ------------------------------------------------------------------------------------------------------------------------
  337. ------------------------------------------------------------------------------------------------------------------------
  338. @VMBUFFER: Mirror Buffer using Virtual Memory for auto-wrap
  339. ------------------------------------------------------------------------------------------------------------------------
  340. ------------------------------------------------------------------------------------------------------------------------
  341. */
  342. typedef struct VirtualMirrorBuffer
  343. {
  344. // Page-rounded size of the buffer without mirroring
  345. rmtU32 size;
  346. // Pointer to the first part of the mirror
  347. // The second part comes directly after at ptr+size bytes
  348. rmtU8* ptr;
  349. #ifdef RMT_PLATFORM_WINDOWS
  350. HANDLE file_map_handle;
  351. #endif
  352. } VirtualMirrorBuffer;
  353. static void VirtualMirrorBuffer_Destroy(VirtualMirrorBuffer* buffer)
  354. {
  355. assert(buffer != 0);
  356. #ifdef RMT_PLATFORM_WINDOWS
  357. if (buffer->file_map_handle != NULL)
  358. {
  359. CloseHandle(buffer->file_map_handle);
  360. buffer->file_map_handle = NULL;
  361. }
  362. #endif
  363. #ifdef RMT_PLATFORM_MACOS
  364. if (buffer->ptr != NULL)
  365. vm_deallocate(mach_task_self(), (vm_address_t)buffer->ptr, buffer->size * 2);
  366. #endif
  367. #ifdef RMT_PLATFORM_LINUX
  368. if (buffer->ptr != NULL)
  369. munmap(buffer->ptr, buffer->size * 2);
  370. #endif
  371. buffer->ptr = NULL;
  372. free(buffer);
  373. }
  374. static enum rmtError VirtualMirrorBuffer_Create(VirtualMirrorBuffer** buffer, rmtU32 size, int nb_attempts)
  375. {
  376. static const rmtU32 k_64 = 64 * 1024;
  377. #ifdef RMT_PLATFORM_LINUX
  378. char path[] = "/dev/shm/ring-buffer-XXXXXX";
  379. int file_descriptor;
  380. #endif
  381. assert(buffer != 0);
  382. // Allocate container
  383. *buffer = (VirtualMirrorBuffer*)malloc(sizeof(VirtualMirrorBuffer));
  384. if (*buffer == 0)
  385. return RMT_ERROR_MALLOC_FAIL;
  386. // Round up to page-granulation; the nearest 64k boundary for now
  387. size = (size + k_64 - 1) / k_64 * k_64;
  388. // Set defaults
  389. (*buffer)->size = size;
  390. (*buffer)->ptr = NULL;
  391. #ifdef RMT_PLATFORM_WINDOWS
  392. (*buffer)->file_map_handle = INVALID_HANDLE_VALUE;
  393. #endif
  394. #ifdef RMT_PLATFORM_WINDOWS
  395. // Windows version based on https://gist.github.com/rygorous/3158316
  396. while (nb_attempts-- > 0)
  397. {
  398. rmtU8* desired_addr;
  399. // Create a file mapping for pointing to its physical address with multiple virtual pages
  400. (*buffer)->file_map_handle = CreateFileMapping(
  401. INVALID_HANDLE_VALUE,
  402. 0,
  403. PAGE_READWRITE,
  404. 0,
  405. size,
  406. 0);
  407. if ((*buffer)->file_map_handle == NULL)
  408. break;
  409. // Reserve two contiguous pages of virtual memory
  410. desired_addr = (rmtU8*)VirtualAlloc(0, size * 2, MEM_RESERVE, PAGE_NOACCESS);
  411. if (desired_addr == NULL)
  412. break;
  413. // Release the range immediately but retain the address for the next sequence of code to
  414. // try and map to it. In the mean-time some other OS thread may come along and allocate this
  415. // address range from underneath us so multiple attempts need to be made.
  416. VirtualFree(desired_addr, 0, MEM_RELEASE);
  417. // Immediately try to point both pages at the file mapping
  418. if (MapViewOfFileEx((*buffer)->file_map_handle, FILE_MAP_ALL_ACCESS, 0, 0, size, desired_addr) == desired_addr &&
  419. MapViewOfFileEx((*buffer)->file_map_handle, FILE_MAP_ALL_ACCESS, 0, 0, size, desired_addr + size) == desired_addr + size)
  420. {
  421. (*buffer)->ptr = desired_addr;
  422. break;
  423. }
  424. // Failed to map the virtual pages; cleanup and try again
  425. CloseHandle((*buffer)->file_map_handle);
  426. (*buffer)->file_map_handle = NULL;
  427. }
  428. #endif
  429. #ifdef RMT_PLATFORM_MACOS
  430. //
  431. // Mac version based on https://github.com/mikeash/MAMirroredQueue
  432. //
  433. // Copyright (c) 2010, Michael Ash
  434. // All rights reserved.
  435. //
  436. // Redistribution and use in source and binary forms, with or without modification, are permitted provided that
  437. // the following conditions are met:
  438. //
  439. // Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  440. // disclaimer.
  441. //
  442. // Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
  443. // following disclaimer in the documentation and/or other materials provided with the distribution.
  444. // Neither the name of Michael Ash nor the names of its contributors may be used to endorse or promote products
  445. // derived from this software without specific prior written permission.
  446. //
  447. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  448. // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  449. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  450. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  451. // GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  452. // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  453. // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  454. //
  455. while (nb_attempts-- > 0)
  456. {
  457. vm_prot_t cur_prot, max_prot;
  458. kern_return_t mach_error;
  459. rmtU8* ptr = NULL;
  460. rmtU8* target = NULL;
  461. // Allocate 2 contiguous pages of virtual memory
  462. if (vm_allocate(mach_task_self(), (vm_address_t*)&ptr, size * 2, VM_FLAGS_ANYWHERE) != KERN_SUCCESS)
  463. break;
  464. // Try to deallocate the last page, leaving its virtual memory address free
  465. target = ptr + size;
  466. if (vm_deallocate(mach_task_self(), (vm_address_t)target, size) != KERN_SUCCESS)
  467. {
  468. vm_deallocate(mach_task_self(), (vm_address_t)ptr, size * 2);
  469. break;
  470. }
  471. // Attempt to remap the page just deallocated to the buffer again
  472. mach_error = vm_remap(
  473. mach_task_self(),
  474. (vm_address_t*)&target,
  475. size,
  476. 0, // mask
  477. 0, // anywhere
  478. mach_task_self(),
  479. (vm_address_t)ptr,
  480. 0, //copy
  481. &cur_prot,
  482. &max_prot,
  483. VM_INHERIT_COPY);
  484. if (mach_error == KERN_NO_SPACE)
  485. {
  486. // Failed on this pass, cleanup and make another attempt
  487. if (vm_deallocate(mach_task_self(), (vm_address_t)ptr, size) != KERN_SUCCESS)
  488. break;
  489. }
  490. else if (mach_error == KERN_SUCCESS)
  491. {
  492. // Leave the loop on success
  493. (*buffer)->ptr = ptr;
  494. break;
  495. }
  496. else
  497. {
  498. // Unknown error, can't recover
  499. vm_deallocate(mach_task_self(), (vm_address_t)ptr, size);
  500. break;
  501. }
  502. }
  503. #endif
  504. #ifdef RMT_PLATFORM_LINUX
  505. // Linux version based on now-defunct Wikipedia section http://en.wikipedia.org/w/index.php?title=Circular_buffer&oldid=600431497
  506. // Create a unique temporary filename in the shared memory folder
  507. file_descriptor = mkstemp(path);
  508. if (file_descriptor < 0)
  509. {
  510. VirtualMirrorBuffer_Destroy(*buffer);
  511. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  512. }
  513. // Delete the name
  514. if (unlink(path))
  515. {
  516. VirtualMirrorBuffer_Destroy(*buffer);
  517. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  518. }
  519. // Set the file size to twice the buffer size
  520. // TODO: this 2x behaviour can be avoided with similar solution to Win/Mac
  521. if (ftruncate (file_descriptor, size * 2))
  522. {
  523. VirtualMirrorBuffer_Destroy(*buffer);
  524. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  525. }
  526. // Map 2 contiguous pages
  527. (*buffer)->ptr = mmap(NULL, size * 2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
  528. if ((*buffer)->ptr == MAP_FAILED)
  529. {
  530. VirtualMirrorBuffer_Destroy(*buffer);
  531. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  532. }
  533. // Point both pages to the same memory file
  534. if (mmap((*buffer)->ptr, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, file_descriptor, 0) != (*buffer)->ptr ||
  535. mmap((*buffer)->ptr + size, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, file_descriptor, 0) != (*buffer)->ptr + size)
  536. {
  537. VirtualMirrorBuffer_Destroy(*buffer);
  538. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  539. }
  540. #endif
  541. // Cleanup if exceeded number of attempts or failed
  542. if ((*buffer)->ptr == NULL)
  543. {
  544. VirtualMirrorBuffer_Destroy(*buffer);
  545. return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL;
  546. }
  547. return RMT_ERROR_NONE;
  548. }
  549. /*
  550. ------------------------------------------------------------------------------------------------------------------------
  551. ------------------------------------------------------------------------------------------------------------------------
  552. @THREADS: Threads
  553. ------------------------------------------------------------------------------------------------------------------------
  554. ------------------------------------------------------------------------------------------------------------------------
  555. */
  556. // comp.lang.c FAQ 4.13 : http://c-faq.com/ptrs/generic.html
  557. // Should not store a funcptr in a void*, but any funcptr type will do
  558. typedef int(*FuncPtr)();
  559. typedef struct
  560. {
  561. // OS-specific data
  562. #if defined(RMT_PLATFORM_WINDOWS)
  563. HANDLE handle;
  564. #else
  565. pthread_t handle;
  566. #endif
  567. // Callback executed when the thread is created
  568. FuncPtr callback;
  569. // Caller-specified parameter passed to Thread_Create
  570. void* param;
  571. // Error state returned from callback
  572. enum rmtError error;
  573. // External threads can set this to request an exit
  574. volatile rmtBool request_exit;
  575. } Thread;
  576. typedef enum rmtError (*ThreadProc)(Thread* thread);
  577. #if defined(RMT_PLATFORM_WINDOWS)
  578. static DWORD WINAPI ThreadProcWindows(LPVOID lpParameter)
  579. {
  580. Thread* thread = (Thread*)lpParameter;
  581. assert(thread != NULL);
  582. thread->error = ((ThreadProc)thread->callback)(thread);
  583. return thread->error == RMT_ERROR_NONE ? 1 : 0;
  584. }
  585. #else
  586. static void* StartFunc( void* pArgs )
  587. {
  588. Thread* thread = (Thread*)pArgs;
  589. assert(thread != NULL);
  590. thread->error = ((ThreadProc)thread->callback)(thread);
  591. return NULL; // returned error not use, check thread->error.
  592. }
  593. #endif
  594. static void Thread_Destroy(Thread* thread);
  595. static int Thread_Valid(Thread* thread)
  596. {
  597. assert(thread != NULL);
  598. #if defined(RMT_PLATFORM_WINDOWS)
  599. return thread->handle != NULL;
  600. #else
  601. return pthread_equal(thread->handle, pthread_self());
  602. #endif
  603. }
  604. static enum rmtError Thread_Create(Thread** thread, ThreadProc callback, void* param)
  605. {
  606. assert(thread != NULL);
  607. // Allocate space for the thread data
  608. *thread = (Thread*)malloc(sizeof(Thread));
  609. if (*thread == NULL)
  610. return RMT_ERROR_MALLOC_FAIL;
  611. (*thread)->callback = (FuncPtr)callback;
  612. (*thread)->param = param;
  613. (*thread)->error = RMT_ERROR_NONE;
  614. (*thread)->request_exit = RMT_FALSE;
  615. // OS-specific thread creation
  616. #if defined (RMT_PLATFORM_WINDOWS)
  617. (*thread)->handle = CreateThread(
  618. NULL, // lpThreadAttributes
  619. 0, // dwStackSize
  620. ThreadProcWindows, // lpStartAddress
  621. *thread, // lpParameter
  622. 0, // dwCreationFlags
  623. NULL); // lpThreadId
  624. if ((*thread)->handle == NULL)
  625. {
  626. Thread_Destroy(*thread);
  627. *thread = NULL;
  628. return RMT_ERROR_CREATE_THREAD_FAIL;
  629. }
  630. #else
  631. int32_t error = pthread_create( &(*thread)->handle, NULL, StartFunc, *thread );
  632. if (error)
  633. {
  634. // Contents of 'thread' parameter to pthread_create() are undefined after
  635. // failure call so can't pre-set to invalid value before hand.
  636. (*thread)->handle = pthread_self();
  637. Thread_Destroy(*thread);
  638. *thread = NULL;
  639. return RMT_ERROR_CREATE_THREAD_FAIL;
  640. }
  641. #endif
  642. return RMT_ERROR_NONE;
  643. }
  644. static void Thread_RequestExit(Thread* thread)
  645. {
  646. // Not really worried about memory barriers or delayed visibility to the target thread
  647. assert(thread != NULL);
  648. thread->request_exit = RMT_TRUE;
  649. }
  650. static void Thread_Join(Thread* thread)
  651. {
  652. assert(Thread_Valid(thread));
  653. #if defined(RMT_PLATFORM_WINDOWS)
  654. WaitForSingleObject(thread->handle, INFINITE);
  655. #else
  656. pthread_join(thread->handle, NULL);
  657. #endif
  658. }
  659. static void Thread_Destroy(Thread* thread)
  660. {
  661. assert(thread != NULL);
  662. if (Thread_Valid(thread))
  663. {
  664. // Shutdown the thread
  665. Thread_RequestExit(thread);
  666. Thread_Join(thread);
  667. // OS-specific release of thread resources
  668. #if defined(RMT_PLATFORM_WINDOWS)
  669. CloseHandle(thread->handle);
  670. thread->handle = NULL;
  671. #endif
  672. }
  673. free(thread);
  674. }
  675. /*
  676. ------------------------------------------------------------------------------------------------------------------------
  677. ------------------------------------------------------------------------------------------------------------------------
  678. @SAFEC: Safe C Library excerpts
  679. http://sourceforge.net/projects/safeclib/
  680. ------------------------------------------------------------------------------------------------------------------------
  681. ------------------------------------------------------------------------------------------------------------------------
  682. */
  683. /*------------------------------------------------------------------
  684. *
  685. * November 2008, Bo Berry
  686. *
  687. * Copyright (c) 2008-2011 by Cisco Systems, Inc
  688. * All rights reserved.
  689. *
  690. * Permission is hereby granted, free of charge, to any person
  691. * obtaining a copy of this software and associated documentation
  692. * files (the "Software"), to deal in the Software without
  693. * restriction, including without limitation the rights to use,
  694. * copy, modify, merge, publish, distribute, sublicense, and/or
  695. * sell copies of the Software, and to permit persons to whom the
  696. * Software is furnished to do so, subject to the following
  697. * conditions:
  698. *
  699. * The above copyright notice and this permission notice shall be
  700. * included in all copies or substantial portions of the Software.
  701. *
  702. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  703. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  704. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  705. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  706. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  707. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  708. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  709. * OTHER DEALINGS IN THE SOFTWARE.
  710. *------------------------------------------------------------------
  711. */
  712. // NOTE: Microsoft also has its own version of these functions so I'm do some hacky PP to remove them
  713. #define strnlen_s strnlen_s_safe_c
  714. #define strncat_s strncat_s_safe_c
  715. #define RSIZE_MAX_STR (4UL << 10) /* 4KB */
  716. #define RCNEGATE(x) x
  717. #define EOK ( 0 )
  718. #define ESNULLP ( 400 ) /* null ptr */
  719. #define ESZEROL ( 401 ) /* length is zero */
  720. #define ESLEMAX ( 403 ) /* length exceeds max */
  721. #define ESOVRLP ( 404 ) /* overlap undefined */
  722. #define ESNOSPC ( 406 ) /* not enough space for s2 */
  723. #define ESUNTERM ( 407 ) /* unterminated string */
  724. #define ESNOTFND ( 409 ) /* not found */
  725. #ifndef _ERRNO_T_DEFINED
  726. #define _ERRNO_T_DEFINED
  727. typedef int errno_t;
  728. #endif
  729. #if !defined(_WIN64) && !defined(__APPLE__)
  730. typedef unsigned int rsize_t;
  731. #endif
  732. static rsize_t
  733. strnlen_s (const char *dest, rsize_t dmax)
  734. {
  735. rsize_t count;
  736. if (dest == NULL) {
  737. return RCNEGATE(0);
  738. }
  739. if (dmax == 0) {
  740. return RCNEGATE(0);
  741. }
  742. if (dmax > RSIZE_MAX_STR) {
  743. return RCNEGATE(0);
  744. }
  745. count = 0;
  746. while (*dest && dmax) {
  747. count++;
  748. dmax--;
  749. dest++;
  750. }
  751. return RCNEGATE(count);
  752. }
  753. static errno_t
  754. strstr_s (char *dest, rsize_t dmax,
  755. const char *src, rsize_t slen, char **substring)
  756. {
  757. rsize_t len;
  758. rsize_t dlen;
  759. int i;
  760. if (substring == NULL) {
  761. return RCNEGATE(ESNULLP);
  762. }
  763. *substring = NULL;
  764. if (dest == NULL) {
  765. return RCNEGATE(ESNULLP);
  766. }
  767. if (dmax == 0) {
  768. return RCNEGATE(ESZEROL);
  769. }
  770. if (dmax > RSIZE_MAX_STR) {
  771. return RCNEGATE(ESLEMAX);
  772. }
  773. if (src == NULL) {
  774. return RCNEGATE(ESNULLP);
  775. }
  776. if (slen == 0) {
  777. return RCNEGATE(ESZEROL);
  778. }
  779. if (slen > RSIZE_MAX_STR) {
  780. return RCNEGATE(ESLEMAX);
  781. }
  782. /*
  783. * src points to a string with zero length, or
  784. * src equals dest, return dest
  785. */
  786. if (*src == '\0' || dest == src) {
  787. *substring = dest;
  788. return RCNEGATE(EOK);
  789. }
  790. while (*dest && dmax) {
  791. i = 0;
  792. len = slen;
  793. dlen = dmax;
  794. while (src[i] && dlen) {
  795. /* not a match, not a substring */
  796. if (dest[i] != src[i]) {
  797. break;
  798. }
  799. /* move to the next char */
  800. i++;
  801. len--;
  802. dlen--;
  803. if (src[i] == '\0' || !len) {
  804. *substring = dest;
  805. return RCNEGATE(EOK);
  806. }
  807. }
  808. dest++;
  809. dmax--;
  810. }
  811. /*
  812. * substring was not found, return NULL
  813. */
  814. *substring = NULL;
  815. return RCNEGATE(ESNOTFND);
  816. }
  817. static errno_t
  818. strncat_s (char *dest, rsize_t dmax, const char *src, rsize_t slen)
  819. {
  820. rsize_t orig_dmax;
  821. char *orig_dest;
  822. const char *overlap_bumper;
  823. if (dest == NULL) {
  824. return RCNEGATE(ESNULLP);
  825. }
  826. if (src == NULL) {
  827. return RCNEGATE(ESNULLP);
  828. }
  829. if (slen > RSIZE_MAX_STR) {
  830. return RCNEGATE(ESLEMAX);
  831. }
  832. if (dmax == 0) {
  833. return RCNEGATE(ESZEROL);
  834. }
  835. if (dmax > RSIZE_MAX_STR) {
  836. return RCNEGATE(ESLEMAX);
  837. }
  838. /* hold base of dest in case src was not copied */
  839. orig_dmax = dmax;
  840. orig_dest = dest;
  841. if (dest < src) {
  842. overlap_bumper = src;
  843. /* Find the end of dest */
  844. while (*dest != '\0') {
  845. if (dest == overlap_bumper) {
  846. return RCNEGATE(ESOVRLP);
  847. }
  848. dest++;
  849. dmax--;
  850. if (dmax == 0) {
  851. return RCNEGATE(ESUNTERM);
  852. }
  853. }
  854. while (dmax > 0) {
  855. if (dest == overlap_bumper) {
  856. return RCNEGATE(ESOVRLP);
  857. }
  858. /*
  859. * Copying truncated before the source null is encountered
  860. */
  861. if (slen == 0) {
  862. *dest = '\0';
  863. return RCNEGATE(EOK);
  864. }
  865. *dest = *src;
  866. if (*dest == '\0') {
  867. return RCNEGATE(EOK);
  868. }
  869. dmax--;
  870. slen--;
  871. dest++;
  872. src++;
  873. }
  874. } else {
  875. overlap_bumper = dest;
  876. /* Find the end of dest */
  877. while (*dest != '\0') {
  878. /*
  879. * NOTE: no need to check for overlap here since src comes first
  880. * in memory and we're not incrementing src here.
  881. */
  882. dest++;
  883. dmax--;
  884. if (dmax == 0) {
  885. return RCNEGATE(ESUNTERM);
  886. }
  887. }
  888. while (dmax > 0) {
  889. if (src == overlap_bumper) {
  890. return RCNEGATE(ESOVRLP);
  891. }
  892. /*
  893. * Copying truncated
  894. */
  895. if (slen == 0) {
  896. *dest = '\0';
  897. return RCNEGATE(EOK);
  898. }
  899. *dest = *src;
  900. if (*dest == '\0') {
  901. return RCNEGATE(EOK);
  902. }
  903. dmax--;
  904. slen--;
  905. dest++;
  906. src++;
  907. }
  908. }
  909. /*
  910. * the entire src was not copied, so the string will be nulled.
  911. */
  912. return RCNEGATE(ESNOSPC);
  913. }
  914. /*
  915. ------------------------------------------------------------------------------------------------------------------------
  916. ------------------------------------------------------------------------------------------------------------------------
  917. @OBJALLOC: Reusable Object Allocator
  918. ------------------------------------------------------------------------------------------------------------------------
  919. ------------------------------------------------------------------------------------------------------------------------
  920. */
  921. //
  922. // All objects that require free-list-backed allocation need to inherit from this type.
  923. //
  924. typedef struct ObjectLink
  925. {
  926. struct ObjectLink* volatile next;
  927. } ObjectLink;
  928. static void ObjectLink_Constructor(ObjectLink* link)
  929. {
  930. assert(link != NULL);
  931. link->next = NULL;
  932. }
  933. typedef enum rmtError (*ObjConstructor)(void*);
  934. typedef void (*ObjDestructor)(void*);
  935. typedef struct
  936. {
  937. // Object create/destroy parameters
  938. rmtU32 object_size;
  939. ObjConstructor constructor;
  940. ObjDestructor destructor;
  941. // Number of objects in the free list
  942. volatile rmtS32 nb_free;
  943. // Number of objects used by callers
  944. volatile rmtS32 nb_inuse;
  945. // Total allocation count
  946. volatile rmtS32 nb_allocated;
  947. ObjectLink* first_free;
  948. } ObjectAllocator;
  949. static enum rmtError ObjectAllocator_Create(ObjectAllocator** allocator, rmtU32 object_size, ObjConstructor constructor, ObjDestructor destructor)
  950. {
  951. // Allocate space for the allocator
  952. assert(allocator != NULL);
  953. *allocator = (ObjectAllocator*)malloc(sizeof(ObjectAllocator));
  954. if (*allocator == NULL)
  955. return RMT_ERROR_MALLOC_FAIL;
  956. // Construct it
  957. (*allocator)->object_size = object_size;
  958. (*allocator)->constructor = constructor;
  959. (*allocator)->destructor = destructor;
  960. (*allocator)->nb_free = 0;
  961. (*allocator)->nb_inuse = 0;
  962. (*allocator)->nb_allocated = 0;
  963. (*allocator)->first_free = NULL;
  964. return RMT_ERROR_NONE;
  965. }
  966. static void ObjectAllocator_Push(ObjectAllocator* allocator, ObjectLink* start, ObjectLink* end)
  967. {
  968. assert(allocator != NULL);
  969. assert(start != NULL);
  970. assert(end != NULL);
  971. // CAS pop add range to the front of the list
  972. while (1)
  973. {
  974. ObjectLink* old_link = (ObjectLink*)allocator->first_free;
  975. end->next = old_link;
  976. if (AtomicCompareAndSwapPointer((long* volatile*)&allocator->first_free, (long*)old_link, (long*)start) == RMT_TRUE)
  977. break;
  978. }
  979. }
  980. static ObjectLink* ObjectAllocator_Pop(ObjectAllocator* allocator)
  981. {
  982. ObjectLink* link;
  983. assert(allocator != NULL);
  984. assert(allocator->first_free != NULL);
  985. // CAS pop from the front of the list
  986. while (1)
  987. {
  988. ObjectLink* old_link = (ObjectLink*)allocator->first_free;
  989. ObjectLink* next_link = old_link->next;
  990. if (AtomicCompareAndSwapPointer((long* volatile*)&allocator->first_free, (long*)old_link, (long*)next_link) == RMT_TRUE)
  991. {
  992. link = old_link;
  993. break;
  994. }
  995. }
  996. link->next = NULL;
  997. return link;
  998. }
  999. static enum rmtError ObjectAllocator_Alloc(ObjectAllocator* allocator, void** object)
  1000. {
  1001. // This function only calls the object constructor on initial malloc of an object
  1002. assert(allocator != NULL);
  1003. assert(object != NULL);
  1004. // Has the free list run out?
  1005. if (allocator->first_free == NULL)
  1006. {
  1007. enum rmtError error;
  1008. // Allocate/construct a new object
  1009. void* free_object = malloc(allocator->object_size);
  1010. if (free_object == NULL)
  1011. return RMT_ERROR_MALLOC_FAIL;
  1012. assert(allocator->constructor != NULL);
  1013. error = allocator->constructor(free_object);
  1014. if (error != RMT_ERROR_NONE)
  1015. {
  1016. // Auto-teardown on failure
  1017. assert(allocator->destructor != NULL);
  1018. allocator->destructor(free_object);
  1019. free(free_object);
  1020. return error;
  1021. }
  1022. // Add to the free list
  1023. ObjectAllocator_Push(allocator, (ObjectLink*)free_object, (ObjectLink*)free_object);
  1024. AtomicAdd(&allocator->nb_allocated, 1);
  1025. AtomicAdd(&allocator->nb_free, 1);
  1026. }
  1027. // Pull available objects from the free list
  1028. *object = ObjectAllocator_Pop(allocator);
  1029. AtomicSub(&allocator->nb_free, 1);
  1030. AtomicAdd(&allocator->nb_inuse, 1);
  1031. return RMT_ERROR_NONE;
  1032. }
  1033. static void ObjectAllocator_Free(ObjectAllocator* allocator, void* object)
  1034. {
  1035. // Add back to the free-list
  1036. assert(allocator != NULL);
  1037. ObjectAllocator_Push(allocator, (ObjectLink*)object, (ObjectLink*)object);
  1038. AtomicSub(&allocator->nb_inuse, 1);
  1039. AtomicAdd(&allocator->nb_free, 1);
  1040. }
  1041. static void ObjectAllocator_FreeRange(ObjectAllocator* allocator, void* start, void* end, rmtU32 count)
  1042. {
  1043. assert(allocator != NULL);
  1044. ObjectAllocator_Push(allocator, (ObjectLink*)start, (ObjectLink*)end);
  1045. AtomicSub(&allocator->nb_inuse, count);
  1046. AtomicAdd(&allocator->nb_free, count);
  1047. }
  1048. static void ObjectAllocator_Destroy(ObjectAllocator* allocator)
  1049. {
  1050. // Ensure everything has been released to the allocator
  1051. assert(allocator != NULL);
  1052. assert(allocator->nb_inuse == 0);
  1053. // Destroy all objects released to the allocator
  1054. assert(allocator != NULL);
  1055. while (allocator->first_free != NULL)
  1056. {
  1057. ObjectLink* next = allocator->first_free->next;
  1058. assert(allocator->destructor != NULL);
  1059. allocator->destructor(allocator->first_free);
  1060. free(allocator->first_free);
  1061. allocator->first_free = next;
  1062. }
  1063. free(allocator);
  1064. }
  1065. /*
  1066. ------------------------------------------------------------------------------------------------------------------------
  1067. ------------------------------------------------------------------------------------------------------------------------
  1068. @DYNBUF: Dynamic Buffer
  1069. ------------------------------------------------------------------------------------------------------------------------
  1070. ------------------------------------------------------------------------------------------------------------------------
  1071. */
  1072. typedef struct
  1073. {
  1074. rmtU32 alloc_granularity;
  1075. rmtU32 bytes_allocated;
  1076. rmtU32 bytes_used;
  1077. rmtU8* data;
  1078. } Buffer;
  1079. static enum rmtError Buffer_Create(Buffer** buffer, rmtU32 alloc_granularity)
  1080. {
  1081. assert(buffer != NULL);
  1082. // Allocate and set defaults as nothing allocated
  1083. *buffer = (Buffer*)malloc(sizeof(Buffer));
  1084. if (*buffer == NULL)
  1085. return RMT_ERROR_MALLOC_FAIL;
  1086. (*buffer)->alloc_granularity = alloc_granularity;
  1087. (*buffer)->bytes_allocated = 0;
  1088. (*buffer)->bytes_used = 0;
  1089. (*buffer)->data = NULL;
  1090. return RMT_ERROR_NONE;
  1091. }
  1092. static void Buffer_Destroy(Buffer* buffer)
  1093. {
  1094. assert(buffer != NULL);
  1095. if (buffer->data != NULL)
  1096. {
  1097. free(buffer->data);
  1098. buffer->data = NULL;
  1099. }
  1100. free(buffer);
  1101. }
  1102. static enum rmtError Buffer_Write(Buffer* buffer, void* data, rmtU32 length)
  1103. {
  1104. assert(buffer != NULL);
  1105. // Reallocate the buffer on overflow
  1106. if (buffer->bytes_used + length > buffer->bytes_allocated)
  1107. {
  1108. // Calculate size increase rounded up to the requested allocation granularity
  1109. rmtU32 g = buffer->alloc_granularity;
  1110. rmtU32 a = buffer->bytes_allocated + length;
  1111. a = a + ((g - 1) - ((a - 1) % g));
  1112. buffer->bytes_allocated = a;
  1113. buffer->data = (rmtU8*)realloc(buffer->data, buffer->bytes_allocated);
  1114. if (buffer->data == NULL)
  1115. return RMT_ERROR_MALLOC_FAIL;
  1116. }
  1117. // Copy all bytes
  1118. memcpy(buffer->data + buffer->bytes_used, data, length);
  1119. buffer->bytes_used += length;
  1120. // NULL terminate (if possible) for viewing in debug
  1121. if (buffer->bytes_used < buffer->bytes_allocated)
  1122. buffer->data[buffer->bytes_used] = 0;
  1123. return RMT_ERROR_NONE;
  1124. }
  1125. static enum rmtError Buffer_WriteString(Buffer* buffer, rmtPStr string)
  1126. {
  1127. assert(string != NULL);
  1128. return Buffer_Write(buffer, (void*)string, (rmtU32)strnlen_s(string, 2048));
  1129. }
  1130. /*
  1131. ------------------------------------------------------------------------------------------------------------------------
  1132. ------------------------------------------------------------------------------------------------------------------------
  1133. @SOCKETS: Sockets TCP/IP Wrapper
  1134. ------------------------------------------------------------------------------------------------------------------------
  1135. ------------------------------------------------------------------------------------------------------------------------
  1136. */
  1137. #ifndef RMT_PLATFORM_WINDOWS
  1138. typedef int SOCKET;
  1139. #define INVALID_SOCKET -1
  1140. #define SOCKET_ERROR -1
  1141. #define SD_SEND SHUT_WR
  1142. #define closesocket close
  1143. #endif
  1144. typedef struct
  1145. {
  1146. SOCKET socket;
  1147. } TCPSocket;
  1148. typedef struct
  1149. {
  1150. rmtBool can_read;
  1151. rmtBool can_write;
  1152. enum rmtError error_state;
  1153. } SocketStatus;
  1154. //
  1155. // Function prototypes
  1156. //
  1157. static void TCPSocket_Close(TCPSocket* tcp_socket);
  1158. static enum rmtError TCPSocket_Destroy(TCPSocket** tcp_socket, enum rmtError error);
  1159. static enum rmtError InitialiseNetwork()
  1160. {
  1161. #ifdef RMT_PLATFORM_WINDOWS
  1162. WSADATA wsa_data;
  1163. if (WSAStartup(MAKEWORD(2, 2), &wsa_data))
  1164. return RMT_ERROR_SOCKET_INIT_NETWORK_FAIL;
  1165. if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2)
  1166. return RMT_ERROR_SOCKET_INIT_NETWORK_FAIL;
  1167. return RMT_ERROR_NONE;
  1168. #else
  1169. return RMT_ERROR_NONE;
  1170. #endif
  1171. }
  1172. static void ShutdownNetwork()
  1173. {
  1174. #ifdef RMT_PLATFORM_WINDOWS
  1175. WSACleanup();
  1176. #endif
  1177. }
  1178. static enum rmtError TCPSocket_Create(TCPSocket** tcp_socket)
  1179. {
  1180. enum rmtError error;
  1181. assert(tcp_socket != NULL);
  1182. // Allocate and initialise
  1183. *tcp_socket = (TCPSocket*)malloc(sizeof(TCPSocket));
  1184. if (*tcp_socket == NULL)
  1185. return RMT_ERROR_MALLOC_FAIL;
  1186. (*tcp_socket)->socket = INVALID_SOCKET;
  1187. error = InitialiseNetwork();
  1188. if (error != RMT_ERROR_NONE)
  1189. return TCPSocket_Destroy(tcp_socket, error);
  1190. return RMT_ERROR_NONE;
  1191. }
  1192. static enum rmtError TCPSocket_CreateServer(rmtU16 port, TCPSocket** tcp_socket)
  1193. {
  1194. SOCKET s = INVALID_SOCKET;
  1195. struct sockaddr_in sin = { 0 };
  1196. #ifdef RMT_PLATFORM_WINDOWS
  1197. u_long nonblock = 1;
  1198. #endif
  1199. // Create socket container
  1200. enum rmtError error = TCPSocket_Create(tcp_socket);
  1201. if (error != RMT_ERROR_NONE)
  1202. return error;
  1203. // Try to create the socket
  1204. s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  1205. if (s == SOCKET_ERROR)
  1206. return TCPSocket_Destroy(tcp_socket, RMT_ERROR_SOCKET_CREATE_FAIL);
  1207. // Bind the socket to the incoming port
  1208. sin.sin_family = AF_INET;
  1209. sin.sin_addr.s_addr = INADDR_ANY;
  1210. sin.sin_port = htons(port);
  1211. if (bind(s, (struct sockaddr*)&sin, sizeof(sin)) == SOCKET_ERROR)
  1212. return TCPSocket_Destroy(tcp_socket, RMT_ERROR_SOCKET_BIND_FAIL);
  1213. // Connection is valid, remaining code is socket state modification
  1214. (*tcp_socket)->socket = s;
  1215. // Enter a listening state with a backlog of 1 connection
  1216. if (listen(s, 1) == SOCKET_ERROR)
  1217. return TCPSocket_Destroy(tcp_socket, RMT_ERROR_SOCKET_LISTEN_FAIL);
  1218. // Set as non-blocking
  1219. #ifdef RMT_PLATFORM_WINDOWS
  1220. if (ioctlsocket((*tcp_socket)->socket, FIONBIO, &nonblock) == SOCKET_ERROR)
  1221. return TCPSocket_Destroy(tcp_socket, RMT_ERROR_SOCKET_SET_NON_BLOCKING_FAIL);
  1222. #else
  1223. if (fcntl((*tcp_socket)->socket, F_SETFL, O_NONBLOCK) == SOCKET_ERROR)
  1224. return TCPSocket_Destroy(tcp_socket, RMT_ERROR_SOCKET_SET_NON_BLOCKING_FAIL);
  1225. #endif
  1226. return RMT_ERROR_NONE;
  1227. }
  1228. static enum rmtError TCPSocket_Destroy(TCPSocket** tcp_socket, enum rmtError error)
  1229. {
  1230. assert(tcp_socket != NULL);
  1231. TCPSocket_Close(*tcp_socket);
  1232. ShutdownNetwork();
  1233. free(*tcp_socket);
  1234. *tcp_socket = NULL;
  1235. return error;
  1236. }
  1237. static void TCPSocket_Close(TCPSocket* tcp_socket)
  1238. {
  1239. assert(tcp_socket != NULL);
  1240. if (tcp_socket->socket != INVALID_SOCKET)
  1241. {
  1242. // Shutdown the connection, stopping all sends
  1243. int result = shutdown(tcp_socket->socket, SD_SEND);
  1244. if (result != SOCKET_ERROR)
  1245. {
  1246. // Keep receiving until the peer closes the connection
  1247. int total = 0;
  1248. char temp_buf[128];
  1249. while (result > 0)
  1250. {
  1251. result = (int)recv(tcp_socket->socket, temp_buf, sizeof(temp_buf), 0);
  1252. total += result;
  1253. }
  1254. }
  1255. // Close the socket and issue a network shutdown request
  1256. closesocket(tcp_socket->socket);
  1257. tcp_socket->socket = INVALID_SOCKET;
  1258. }
  1259. }
  1260. static SocketStatus TCPSocket_PollStatus(TCPSocket* tcp_socket)
  1261. {
  1262. SocketStatus status;
  1263. fd_set fd_read, fd_write, fd_errors;
  1264. struct timeval tv;
  1265. status.can_read = RMT_FALSE;
  1266. status.can_write = RMT_FALSE;
  1267. status.error_state = RMT_ERROR_NONE;
  1268. assert(tcp_socket != NULL);
  1269. if (tcp_socket->socket == INVALID_SOCKET)
  1270. {
  1271. status.error_state = RMT_ERROR_SOCKET_INVALID_POLL;
  1272. return status;
  1273. }
  1274. // Set read/write/error markers for the socket
  1275. FD_ZERO(&fd_read);
  1276. FD_ZERO(&fd_write);
  1277. FD_ZERO(&fd_errors);
  1278. FD_SET(tcp_socket->socket, &fd_read);
  1279. FD_SET(tcp_socket->socket, &fd_write);
  1280. FD_SET(tcp_socket->socket, &fd_errors);
  1281. // Poll socket status without blocking
  1282. tv.tv_sec = 0;
  1283. tv.tv_usec = 0;
  1284. if (select(((int)tcp_socket->socket)+1, &fd_read, &fd_write, &fd_errors, &tv) == SOCKET_ERROR)
  1285. {
  1286. status.error_state = RMT_ERROR_SOCKET_SELECT_FAIL;
  1287. return status;
  1288. }
  1289. status.can_read = FD_ISSET(tcp_socket->socket, &fd_read) != 0 ? RMT_TRUE : RMT_FALSE;
  1290. status.can_write = FD_ISSET(tcp_socket->socket, &fd_write) != 0 ? RMT_TRUE : RMT_FALSE;
  1291. status.error_state = FD_ISSET(tcp_socket->socket, &fd_errors) != 0 ? RMT_ERROR_SOCKET_POLL_ERRORS : RMT_ERROR_NONE;
  1292. return status;
  1293. }
  1294. static enum rmtError TCPSocket_AcceptConnection(TCPSocket* tcp_socket, TCPSocket** client_socket)
  1295. {
  1296. SocketStatus status;
  1297. SOCKET s;
  1298. enum rmtError error;
  1299. // Ensure there is an incoming connection
  1300. assert(tcp_socket != NULL);
  1301. status = TCPSocket_PollStatus(tcp_socket);
  1302. if (status.error_state != RMT_ERROR_NONE || !status.can_read)
  1303. return status.error_state;
  1304. // Accept the connection
  1305. s = accept(tcp_socket->socket, 0, 0);
  1306. if (s == SOCKET_ERROR)
  1307. return RMT_ERROR_SOCKET_ACCEPT_FAIL;
  1308. // Create a client socket for the new connection
  1309. assert(client_socket != NULL);
  1310. error = TCPSocket_Create(client_socket);
  1311. if (error != RMT_ERROR_NONE)
  1312. return error;
  1313. (*client_socket)->socket = s;
  1314. return RMT_ERROR_NONE;
  1315. }
  1316. static int TCPSocketWouldBlock()
  1317. {
  1318. #ifdef RMT_PLATFORM_WINDOWS
  1319. DWORD error = WSAGetLastError();
  1320. return (error == WSAEWOULDBLOCK);
  1321. #else
  1322. int error = errno;
  1323. return (error == EAGAIN || error == EWOULDBLOCK);
  1324. #endif
  1325. }
  1326. static enum rmtError TCPSocket_Send(TCPSocket* tcp_socket, const void* data, rmtU32 length, rmtU32 timeout_ms)
  1327. {
  1328. SocketStatus status;
  1329. char* cur_data = NULL;
  1330. char* end_data = NULL;
  1331. rmtU32 start_ms = 0;
  1332. rmtU32 cur_ms = 0;
  1333. assert(tcp_socket != NULL);
  1334. // Can't send if there are socket errors
  1335. status = TCPSocket_PollStatus(tcp_socket);
  1336. if (status.error_state != RMT_ERROR_NONE)
  1337. return status.error_state;
  1338. if (!status.can_write)
  1339. return RMT_ERROR_SOCKET_SEND_TIMEOUT;
  1340. cur_data = (char*)data;
  1341. end_data = cur_data + length;
  1342. start_ms = msTimer_Get();
  1343. while (cur_data < end_data)
  1344. {
  1345. // Attempt to send the remaining chunk of data
  1346. int bytes_sent = (int)send(tcp_socket->socket, cur_data, (int)(end_data - cur_data), 0);
  1347. if (bytes_sent == SOCKET_ERROR || bytes_sent == 0)
  1348. {
  1349. // Close the connection if sending fails for any other reason other than blocking
  1350. if (bytes_sent != 0 && !TCPSocketWouldBlock())
  1351. return RMT_ERROR_SOCKET_SEND_FAIL;
  1352. // First check for tick-count overflow and reset, giving a slight hitch every 49.7 days
  1353. cur_ms = msTimer_Get();
  1354. if (cur_ms < start_ms)
  1355. {
  1356. start_ms = cur_ms;
  1357. continue;
  1358. }
  1359. //
  1360. // Timeout can happen when:
  1361. //
  1362. // 1) endpoint is no longer there
  1363. // 2) endpoint can't consume quick enough
  1364. // 3) local buffers overflow
  1365. //
  1366. // As none of these are actually errors, we have to pass this timeout back to the caller.
  1367. //
  1368. // TODO: This strategy breaks down if a send partially completes and then times out!
  1369. //
  1370. if (cur_ms - start_ms > timeout_ms)
  1371. {
  1372. return RMT_ERROR_SOCKET_SEND_TIMEOUT;
  1373. }
  1374. }
  1375. else
  1376. {
  1377. // Jump over the data sent
  1378. cur_data += bytes_sent;
  1379. }
  1380. }
  1381. return RMT_ERROR_NONE;
  1382. }
  1383. static enum rmtError TCPSocket_Receive(TCPSocket* tcp_socket, void* data, rmtU32 length, rmtU32 timeout_ms)
  1384. {
  1385. SocketStatus status;
  1386. char* cur_data = NULL;
  1387. char* end_data = NULL;
  1388. rmtU32 start_ms = 0;
  1389. rmtU32 cur_ms = 0;
  1390. assert(tcp_socket != NULL);
  1391. // Ensure there is data to receive
  1392. status = TCPSocket_PollStatus(tcp_socket);
  1393. if (status.error_state != RMT_ERROR_NONE)
  1394. return status.error_state;
  1395. if (!status.can_read)
  1396. return RMT_ERROR_SOCKET_RECV_NO_DATA;
  1397. cur_data = (char*)data;
  1398. end_data = cur_data + length;
  1399. // Loop until all data has been received
  1400. start_ms = msTimer_Get();
  1401. while (cur_data < end_data)
  1402. {
  1403. int bytes_received = (int)recv(tcp_socket->socket, cur_data, (int)(end_data - cur_data), 0);
  1404. if (bytes_received == SOCKET_ERROR || bytes_received == 0)
  1405. {
  1406. // Close the connection if receiving fails for any other reason other than blocking
  1407. if (bytes_received != 0 && !TCPSocketWouldBlock())
  1408. return RMT_ERROR_SOCKET_RECV_FAILED;
  1409. // First check for tick-count overflow and reset, giving a slight hitch every 49.7 days
  1410. cur_ms = msTimer_Get();
  1411. if (cur_ms < start_ms)
  1412. {
  1413. start_ms = cur_ms;
  1414. continue;
  1415. }
  1416. //
  1417. // Timeout can happen when:
  1418. //
  1419. // 1) data is delayed by sender
  1420. // 2) sender fails to send a complete set of packets
  1421. //
  1422. // As not all of these scenarios are errors, we need to pass this information back to the caller.
  1423. //
  1424. // TODO: This strategy breaks down if a receive partially completes and then times out!
  1425. //
  1426. if (cur_ms - start_ms > timeout_ms)
  1427. {
  1428. return RMT_ERROR_SOCKET_RECV_TIMEOUT;
  1429. }
  1430. }
  1431. else
  1432. {
  1433. // Jump over the data received
  1434. cur_data += bytes_received;
  1435. }
  1436. }
  1437. return RMT_ERROR_NONE;
  1438. }
  1439. /*
  1440. ------------------------------------------------------------------------------------------------------------------------
  1441. ------------------------------------------------------------------------------------------------------------------------
  1442. @SHA1: SHA-1 Cryptographic Hash Function
  1443. ------------------------------------------------------------------------------------------------------------------------
  1444. ------------------------------------------------------------------------------------------------------------------------
  1445. */
  1446. //
  1447. // Typed to allow enforced data size specification
  1448. //
  1449. typedef struct
  1450. {
  1451. rmtU8 data[20];
  1452. } SHA1;
  1453. /*
  1454. Copyright (c) 2011, Micael Hildenborg
  1455. All rights reserved.
  1456. Redistribution and use in source and binary forms, with or without
  1457. modification, are permitted provided that the following conditions are met:
  1458. * Redistributions of source code must retain the above copyright
  1459. notice, this list of conditions and the following disclaimer.
  1460. * Redistributions in binary form must reproduce the above copyright
  1461. notice, this list of conditions and the following disclaimer in the
  1462. documentation and/or other materials provided with the distribution.
  1463. * Neither the name of Micael Hildenborg nor the
  1464. names of its contributors may be used to endorse or promote products
  1465. derived from this software without specific prior written permission.
  1466. THIS SOFTWARE IS PROVIDED BY Micael Hildenborg ''AS IS'' AND ANY
  1467. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  1468. WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  1469. DISCLAIMED. IN NO EVENT SHALL Micael Hildenborg BE LIABLE FOR ANY
  1470. DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  1471. (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  1472. LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  1473. ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  1474. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  1475. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  1476. */
  1477. /*
  1478. Contributors:
  1479. Gustav
  1480. Several members in the gamedev.se forum.
  1481. Gregory Petrosyan
  1482. */
  1483. // Rotate an integer value to left.
  1484. static unsigned int rol(const unsigned int value, const unsigned int steps)
  1485. {
  1486. return ((value << steps) | (value >> (32 - steps)));
  1487. }
  1488. // Sets the first 16 integers in the buffert to zero.
  1489. // Used for clearing the W buffert.
  1490. static void clearWBuffert(unsigned int* buffert)
  1491. {
  1492. int pos;
  1493. for (pos = 16; --pos >= 0;)
  1494. {
  1495. buffert[pos] = 0;
  1496. }
  1497. }
  1498. static void innerHash(unsigned int* result, unsigned int* w)
  1499. {
  1500. unsigned int a = result[0];
  1501. unsigned int b = result[1];
  1502. unsigned int c = result[2];
  1503. unsigned int d = result[3];
  1504. unsigned int e = result[4];
  1505. int round = 0;
  1506. #define sha1macro(func,val) \
  1507. { \
  1508. const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
  1509. e = d; \
  1510. d = c; \
  1511. c = rol(b, 30); \
  1512. b = a; \
  1513. a = t; \
  1514. }
  1515. while (round < 16)
  1516. {
  1517. sha1macro((b & c) | (~b & d), 0x5a827999)
  1518. ++round;
  1519. }
  1520. while (round < 20)
  1521. {
  1522. w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
  1523. sha1macro((b & c) | (~b & d), 0x5a827999)
  1524. ++round;
  1525. }
  1526. while (round < 40)
  1527. {
  1528. w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
  1529. sha1macro(b ^ c ^ d, 0x6ed9eba1)
  1530. ++round;
  1531. }
  1532. while (round < 60)
  1533. {
  1534. w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
  1535. sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)
  1536. ++round;
  1537. }
  1538. while (round < 80)
  1539. {
  1540. w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
  1541. sha1macro(b ^ c ^ d, 0xca62c1d6)
  1542. ++round;
  1543. }
  1544. #undef sha1macro
  1545. result[0] += a;
  1546. result[1] += b;
  1547. result[2] += c;
  1548. result[3] += d;
  1549. result[4] += e;
  1550. }
  1551. static void calc(const void* src, const int bytelength, unsigned char* hash)
  1552. {
  1553. int roundPos;
  1554. int lastBlockBytes;
  1555. int hashByte;
  1556. // Init the result array.
  1557. unsigned int result[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 };
  1558. // Cast the void src pointer to be the byte array we can work with.
  1559. const unsigned char* sarray = (const unsigned char*) src;
  1560. // The reusable round buffer
  1561. unsigned int w[80];
  1562. // Loop through all complete 64byte blocks.
  1563. const int endOfFullBlocks = bytelength - 64;
  1564. int endCurrentBlock;
  1565. int currentBlock = 0;
  1566. while (currentBlock <= endOfFullBlocks)
  1567. {
  1568. endCurrentBlock = currentBlock + 64;
  1569. // Init the round buffer with the 64 byte block data.
  1570. for (roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4)
  1571. {
  1572. // This line will swap endian on big endian and keep endian on little endian.
  1573. w[roundPos++] = (unsigned int) sarray[currentBlock + 3]
  1574. | (((unsigned int) sarray[currentBlock + 2]) << 8)
  1575. | (((unsigned int) sarray[currentBlock + 1]) << 16)
  1576. | (((unsigned int) sarray[currentBlock]) << 24);
  1577. }
  1578. innerHash(result, w);
  1579. }
  1580. // Handle the last and not full 64 byte block if existing.
  1581. endCurrentBlock = bytelength - currentBlock;
  1582. clearWBuffert(w);
  1583. lastBlockBytes = 0;
  1584. for (;lastBlockBytes < endCurrentBlock; ++lastBlockBytes)
  1585. {
  1586. w[lastBlockBytes >> 2] |= (unsigned int) sarray[lastBlockBytes + currentBlock] << ((3 - (lastBlockBytes & 3)) << 3);
  1587. }
  1588. w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
  1589. if (endCurrentBlock >= 56)
  1590. {
  1591. innerHash(result, w);
  1592. clearWBuffert(w);
  1593. }
  1594. w[15] = bytelength << 3;
  1595. innerHash(result, w);
  1596. // Store hash in result pointer, and make sure we get in in the correct order on both endian models.
  1597. for (hashByte = 20; --hashByte >= 0;)
  1598. {
  1599. hash[hashByte] = (result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
  1600. }
  1601. }
  1602. static SHA1 SHA1_Calculate(const void* src, unsigned int length)
  1603. {
  1604. SHA1 hash;
  1605. assert((int)length >= 0);
  1606. calc(src, length, hash.data);
  1607. return hash;
  1608. }
  1609. /*
  1610. ------------------------------------------------------------------------------------------------------------------------
  1611. ------------------------------------------------------------------------------------------------------------------------
  1612. @BASE64: Base-64 encoder
  1613. ------------------------------------------------------------------------------------------------------------------------
  1614. ------------------------------------------------------------------------------------------------------------------------
  1615. */
  1616. static const char* b64_encoding_table =
  1617. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  1618. "abcdefghijklmnopqrstuvwxyz"
  1619. "0123456789+/";
  1620. static rmtU32 Base64_CalculateEncodedLength(rmtU32 length)
  1621. {
  1622. // ceil(l * 4/3)
  1623. return 4 * ((length + 2) / 3);
  1624. }
  1625. static void Base64_Encode(const rmtU8* in_bytes, rmtU32 length, rmtU8* out_bytes)
  1626. {
  1627. rmtU32 i;
  1628. rmtU32 encoded_length;
  1629. rmtU32 remaining_bytes;
  1630. rmtU8* optr = out_bytes;
  1631. for (i = 0; i < length; )
  1632. {
  1633. // Read input 3 values at a time, null terminating
  1634. rmtU32 c0 = i < length ? in_bytes[i++] : 0;
  1635. rmtU32 c1 = i < length ? in_bytes[i++] : 0;
  1636. rmtU32 c2 = i < length ? in_bytes[i++] : 0;
  1637. // Encode 4 bytes for ever 3 input bytes
  1638. rmtU32 triple = (c0 << 0x10) + (c1 << 0x08) + c2;
  1639. *optr++ = b64_encoding_table[(triple >> 3 * 6) & 0x3F];
  1640. *optr++ = b64_encoding_table[(triple >> 2 * 6) & 0x3F];
  1641. *optr++ = b64_encoding_table[(triple >> 1 * 6) & 0x3F];
  1642. *optr++ = b64_encoding_table[(triple >> 0 * 6) & 0x3F];
  1643. }
  1644. // Pad output to multiple of 3 bytes with terminating '='
  1645. encoded_length = Base64_CalculateEncodedLength(length);
  1646. remaining_bytes = (3 - ((length + 2) % 3)) - 1;
  1647. for (i = 0; i < remaining_bytes; i++)
  1648. out_bytes[encoded_length - 1 - i] = '=';
  1649. // Null terminate
  1650. out_bytes[encoded_length] = 0;
  1651. }
  1652. /*
  1653. ------------------------------------------------------------------------------------------------------------------------
  1654. ------------------------------------------------------------------------------------------------------------------------
  1655. @MURMURHASH: MurmurHash3
  1656. https://code.google.com/p/smhasher
  1657. ------------------------------------------------------------------------------------------------------------------------
  1658. ------------------------------------------------------------------------------------------------------------------------
  1659. */
  1660. //-----------------------------------------------------------------------------
  1661. // MurmurHash3 was written by Austin Appleby, and is placed in the public
  1662. // domain. The author hereby disclaims copyright to this source code.
  1663. //-----------------------------------------------------------------------------
  1664. static rmtU32 rotl32(rmtU32 x, rmtS8 r)
  1665. {
  1666. return (x << r) | (x >> (32 - r));
  1667. }
  1668. // Block read - if your platform needs to do endian-swapping or can only
  1669. // handle aligned reads, do the conversion here
  1670. static rmtU32 getblock32(const rmtU32* p, int i)
  1671. {
  1672. return p[i];
  1673. }
  1674. // Finalization mix - force all bits of a hash block to avalanche
  1675. static rmtU32 fmix32(rmtU32 h)
  1676. {
  1677. h ^= h >> 16;
  1678. h *= 0x85ebca6b;
  1679. h ^= h >> 13;
  1680. h *= 0xc2b2ae35;
  1681. h ^= h >> 16;
  1682. return h;
  1683. }
  1684. static rmtU32 MurmurHash3_x86_32(const void* key, int len, rmtU32 seed)
  1685. {
  1686. const rmtU8* data = (const rmtU8*)key;
  1687. const int nblocks = len / 4;
  1688. rmtU32 h1 = seed;
  1689. const rmtU32 c1 = 0xcc9e2d51;
  1690. const rmtU32 c2 = 0x1b873593;
  1691. int i;
  1692. const rmtU32 * blocks = (const rmtU32 *)(data + nblocks*4);
  1693. const rmtU8 * tail = (const rmtU8*)(data + nblocks*4);
  1694. rmtU32 k1 = 0;
  1695. //----------
  1696. // body
  1697. for (i = -nblocks; i; i++)
  1698. {
  1699. rmtU32 k2 = getblock32(blocks,i);
  1700. k2 *= c1;
  1701. k2 = rotl32(k2,15);
  1702. k2 *= c2;
  1703. h1 ^= k2;
  1704. h1 = rotl32(h1,13);
  1705. h1 = h1*5+0xe6546b64;
  1706. }
  1707. //----------
  1708. // tail
  1709. switch(len & 3)
  1710. {
  1711. case 3: k1 ^= tail[2] << 16;
  1712. case 2: k1 ^= tail[1] << 8;
  1713. case 1: k1 ^= tail[0];
  1714. k1 *= c1;
  1715. k1 = rotl32(k1,15);
  1716. k1 *= c2;
  1717. h1 ^= k1;
  1718. };
  1719. //----------
  1720. // finalization
  1721. h1 ^= len;
  1722. h1 = fmix32(h1);
  1723. return h1;
  1724. }
  1725. /*
  1726. ------------------------------------------------------------------------------------------------------------------------
  1727. ------------------------------------------------------------------------------------------------------------------------
  1728. @WEBSOCKETS: WebSockets
  1729. ------------------------------------------------------------------------------------------------------------------------
  1730. ------------------------------------------------------------------------------------------------------------------------
  1731. */
  1732. enum WebSocketMode
  1733. {
  1734. WEBSOCKET_NONE = 0,
  1735. WEBSOCKET_TEXT = 1,
  1736. WEBSOCKET_BINARY = 2,
  1737. };
  1738. typedef struct
  1739. {
  1740. TCPSocket* tcp_socket;
  1741. enum WebSocketMode mode;
  1742. rmtU32 frame_bytes_remaining;
  1743. rmtU32 mask_offset;
  1744. rmtU8 data_mask[4];
  1745. } WebSocket;
  1746. static void WebSocket_Destroy(WebSocket* web_socket);
  1747. static char* GetField(char* buffer, rsize_t buffer_length, rmtPStr field_name)
  1748. {
  1749. char* field = NULL;
  1750. char* buffer_end = buffer + buffer_length - 1;
  1751. rsize_t field_length = strnlen_s(field_name, buffer_length);
  1752. if (field_length == 0)
  1753. return NULL;
  1754. // Search for the start of the field
  1755. if (strstr_s(buffer, buffer_length, field_name, field_length, &field) != EOK)
  1756. return NULL;
  1757. // Field name is now guaranteed to be in the buffer so its safe to jump over it without hitting the bounds
  1758. field += strlen(field_name);
  1759. // Skip any trailing whitespace
  1760. while (*field == ' ')
  1761. {
  1762. if (field >= buffer_end)
  1763. return NULL;
  1764. field++;
  1765. }
  1766. return field;
  1767. }
  1768. static const char websocket_guid[] = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
  1769. static const char websocket_response[] =
  1770. "HTTP/1.1 101 Switching Protocols\r\n"
  1771. "Upgrade: websocket\r\n"
  1772. "Connection: Upgrade\r\n"
  1773. "Sec-WebSocket-Accept: ";
  1774. static enum rmtError WebSocketHandshake(TCPSocket* tcp_socket, rmtPStr limit_host)
  1775. {
  1776. rmtU32 start_ms, now_ms;
  1777. // Parsing scratchpad
  1778. char buffer[1024];
  1779. char* buffer_ptr = buffer;
  1780. int buffer_len = sizeof(buffer) - 1;
  1781. char* buffer_end = buffer + buffer_len;
  1782. char response_buffer[256];
  1783. int response_buffer_len = sizeof(response_buffer) - 1;
  1784. char* version;
  1785. char* host;
  1786. char* key;
  1787. char* key_end;
  1788. SHA1 hash;
  1789. assert(tcp_socket != NULL);
  1790. start_ms = msTimer_Get();
  1791. // Really inefficient way of receiving the handshake data from the browser
  1792. // Not really sure how to do this any better, as the termination requirement is \r\n\r\n
  1793. while (buffer_ptr - buffer < buffer_len)
  1794. {
  1795. enum rmtError error = TCPSocket_Receive(tcp_socket, buffer_ptr, 1, 20);
  1796. if (error == RMT_ERROR_SOCKET_RECV_FAILED)
  1797. return error;
  1798. // If there's a stall receiving the data, check for a handshake timeout
  1799. if (error == RMT_ERROR_SOCKET_RECV_NO_DATA || error == RMT_ERROR_SOCKET_RECV_TIMEOUT)
  1800. {
  1801. now_ms = msTimer_Get();
  1802. if (now_ms - start_ms > 1000)
  1803. return RMT_ERROR_SOCKET_RECV_TIMEOUT;
  1804. continue;
  1805. }
  1806. // Just in case new enums are added...
  1807. assert(error == RMT_ERROR_NONE);
  1808. if (buffer_ptr - buffer >= 4)
  1809. {
  1810. if (*(buffer_ptr - 3) == '\r' &&
  1811. *(buffer_ptr - 2) == '\n' &&
  1812. *(buffer_ptr - 1) == '\r' &&
  1813. *(buffer_ptr - 0) == '\n')
  1814. break;
  1815. }
  1816. buffer_ptr++;
  1817. }
  1818. *buffer_ptr = 0;
  1819. // HTTP GET instruction
  1820. if (memcmp(buffer, "GET", 3) != 0)
  1821. return RMT_ERROR_WEBSOCKET_HANDSHAKE_NOT_GET;
  1822. // Look for the version number and verify that it's supported
  1823. version = GetField(buffer, buffer_len, "Sec-WebSocket-Version:");
  1824. if (version == NULL)
  1825. return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_VERSION;
  1826. if (buffer_end - version < 2 || (version[0] != '8' && (version[0] != '1' || version[1] != '3')))
  1827. return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_VERSION;
  1828. // Make sure this connection comes from a known host
  1829. host = GetField(buffer, buffer_len, "Host:");
  1830. if (host == NULL)
  1831. return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_HOST;
  1832. if (limit_host != NULL)
  1833. {
  1834. rsize_t limit_host_len = strnlen_s(limit_host, 128);
  1835. char* found = NULL;
  1836. if (strstr_s(host, buffer_end - host, limit_host, limit_host_len, &found) != EOK)
  1837. return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_HOST;
  1838. }
  1839. // Look for the key start and null-terminate it within the receive buffer
  1840. key = GetField(buffer, buffer_len, "Sec-WebSocket-Key:");
  1841. if (key == NULL)
  1842. return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_KEY;
  1843. if (strstr_s(key, buffer_end - key, "\r\n", 2, &key_end) != EOK)
  1844. return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_KEY;
  1845. *key_end = 0;
  1846. // Concatenate the browser's key with the WebSocket Protocol GUID and base64 encode
  1847. // the hash, to prove to the browser that this is a bonafide WebSocket server
  1848. buffer[0] = 0;
  1849. if (strncat_s(buffer, buffer_len, key, key_end - key) != EOK)
  1850. return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL;
  1851. if (strncat_s(buffer, buffer_len, websocket_guid, sizeof(websocket_guid)) != EOK)
  1852. return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL;
  1853. hash = SHA1_Calculate(buffer, (rmtU32)strnlen_s(buffer, buffer_len));
  1854. Base64_Encode(hash.data, sizeof(hash.data), (rmtU8*)buffer);
  1855. // Send the response back to the server with a longer timeout than usual
  1856. response_buffer[0] = 0;
  1857. if (strncat_s(response_buffer, response_buffer_len, websocket_response, sizeof(websocket_response)) != EOK)
  1858. return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL;
  1859. if (strncat_s(response_buffer, response_buffer_len, buffer, buffer_len) != EOK)
  1860. return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL;
  1861. if (strncat_s(response_buffer, response_buffer_len, "\r\n\r\n", 4) != EOK)
  1862. return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL;
  1863. return TCPSocket_Send(tcp_socket, response_buffer, (rmtU32)strnlen_s(response_buffer, response_buffer_len), 1000);
  1864. }
  1865. static enum rmtError WebSocket_Create(WebSocket** web_socket)
  1866. {
  1867. *web_socket = (WebSocket*)malloc(sizeof(WebSocket));
  1868. if (*web_socket == NULL)
  1869. return RMT_ERROR_MALLOC_FAIL;
  1870. // Set default state
  1871. (*web_socket)->tcp_socket = NULL;
  1872. (*web_socket)->mode = WEBSOCKET_NONE;
  1873. (*web_socket)->frame_bytes_remaining = 0;
  1874. (*web_socket)->mask_offset = 0;
  1875. (*web_socket)->data_mask[0] = 0;
  1876. (*web_socket)->data_mask[1] = 0;
  1877. (*web_socket)->data_mask[2] = 0;
  1878. (*web_socket)->data_mask[3] = 0;
  1879. return RMT_ERROR_NONE;
  1880. }
  1881. static enum rmtError WebSocket_CreateServer(rmtU32 port, enum WebSocketMode mode, WebSocket** web_socket)
  1882. {
  1883. enum rmtError error;
  1884. assert(web_socket != NULL);
  1885. error = WebSocket_Create(web_socket);
  1886. if (error != RMT_ERROR_NONE)
  1887. return error;
  1888. (*web_socket)->mode = mode;
  1889. // Create the server's listening socket
  1890. error = TCPSocket_CreateServer((rmtU16)port, &(*web_socket)->tcp_socket);
  1891. if (error != RMT_ERROR_NONE)
  1892. {
  1893. WebSocket_Destroy(*web_socket);
  1894. *web_socket = NULL;
  1895. return error;
  1896. }
  1897. return RMT_ERROR_NONE;
  1898. }
  1899. static void WebSocket_Close(WebSocket* web_socket)
  1900. {
  1901. assert(web_socket != NULL);
  1902. if (web_socket->tcp_socket != NULL)
  1903. {
  1904. TCPSocket_Destroy(&web_socket->tcp_socket, RMT_ERROR_NONE);
  1905. web_socket->tcp_socket = NULL;
  1906. }
  1907. }
  1908. static void WebSocket_Destroy(WebSocket* web_socket)
  1909. {
  1910. assert(web_socket != NULL);
  1911. WebSocket_Close(web_socket);
  1912. free(web_socket);
  1913. }
  1914. static SocketStatus WebSocket_PollStatus(WebSocket* web_socket)
  1915. {
  1916. assert(web_socket != NULL);
  1917. return TCPSocket_PollStatus(web_socket->tcp_socket);
  1918. }
  1919. static enum rmtError WebSocket_AcceptConnection(WebSocket* web_socket, WebSocket** client_socket)
  1920. {
  1921. TCPSocket* tcp_socket = NULL;
  1922. enum rmtError error;
  1923. // Is there a waiting connection?
  1924. assert(web_socket != NULL);
  1925. error = TCPSocket_AcceptConnection(web_socket->tcp_socket, &tcp_socket);
  1926. if (error != RMT_ERROR_NONE || tcp_socket == NULL)
  1927. return error;
  1928. // Need a successful handshake between client/server before allowing the connection
  1929. // TODO: Specify limit_host
  1930. error = WebSocketHandshake(tcp_socket, NULL);
  1931. if (error != RMT_ERROR_NONE)
  1932. return error;
  1933. // Allocate and return a new client socket
  1934. assert(client_socket != NULL);
  1935. error = WebSocket_Create(client_socket);
  1936. if (error != RMT_ERROR_NONE)
  1937. return error;
  1938. (*client_socket)->tcp_socket = tcp_socket;
  1939. (*client_socket)->mode = web_socket->mode;
  1940. return RMT_ERROR_NONE;
  1941. }
  1942. static void WriteSize(rmtU32 size, rmtU8* dest, rmtU32 dest_size, rmtU32 dest_offset)
  1943. {
  1944. int size_size = dest_size - dest_offset;
  1945. rmtU32 i;
  1946. for (i = 0; i < dest_size; i++)
  1947. {
  1948. int j = i - dest_offset;
  1949. dest[i] = (j < 0) ? 0 : (size >> ((size_size - j - 1) * 8)) & 0xFF;
  1950. }
  1951. }
  1952. static enum rmtError WebSocket_Send(WebSocket* web_socket, const void* data, rmtU32 length, rmtU32 timeout_ms)
  1953. {
  1954. enum rmtError error;
  1955. SocketStatus status;
  1956. rmtU8 final_fragment, frame_type, frame_header[10];
  1957. rmtU32 frame_header_size;
  1958. assert(web_socket != NULL);
  1959. // Can't send if there are socket errors
  1960. status = WebSocket_PollStatus(web_socket);
  1961. if (status.error_state != RMT_ERROR_NONE)
  1962. return status.error_state;
  1963. if (!status.can_write)
  1964. return RMT_ERROR_SOCKET_SEND_TIMEOUT;
  1965. final_fragment = 0x1 << 7;
  1966. frame_type = (rmtU8)web_socket->mode;
  1967. frame_header[0] = final_fragment | frame_type;
  1968. // Construct the frame header, correctly applying the narrowest size
  1969. frame_header_size = 0;
  1970. if (length <= 125)
  1971. {
  1972. frame_header_size = 2;
  1973. frame_header[1] = (rmtU8)length;
  1974. }
  1975. else if (length <= 65535)
  1976. {
  1977. frame_header_size = 2 + 2;
  1978. frame_header[1] = 126;
  1979. WriteSize(length, frame_header + 2, 2, 0);
  1980. }
  1981. else
  1982. {
  1983. frame_header_size = 2 + 8;
  1984. frame_header[1] = 127;
  1985. WriteSize(length, frame_header + 2, 8, 4);
  1986. }
  1987. // Send frame header followed by data
  1988. assert(data != NULL);
  1989. error = TCPSocket_Send(web_socket->tcp_socket, frame_header, frame_header_size, timeout_ms);
  1990. if (error != RMT_ERROR_NONE)
  1991. return error;
  1992. return TCPSocket_Send(web_socket->tcp_socket, data, length, timeout_ms);
  1993. }
  1994. static enum rmtError ReceiveFrameHeader(WebSocket* web_socket)
  1995. {
  1996. // TODO: Specify infinite timeout?
  1997. enum rmtError error;
  1998. rmtU8 msg_header[2] = { 0, 0 };
  1999. int msg_length, size_bytes_remaining, i;
  2000. rmtBool mask_present;
  2001. assert(web_socket != NULL);
  2002. // Get message header
  2003. error = TCPSocket_Receive(web_socket->tcp_socket, msg_header, 2, 20);
  2004. if (error != RMT_ERROR_NONE)
  2005. return error;
  2006. // Check for WebSocket Protocol disconnect
  2007. if (msg_header[0] == 0x88)
  2008. return RMT_ERROR_WEBSOCKET_DISCONNECTED;
  2009. // Check that the client isn't sending messages we don't understand
  2010. if (msg_header[0] != 0x81 && msg_header[0] != 0x82)
  2011. return RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER;
  2012. // Get message length and check to see if it's a marker for a wider length
  2013. msg_length = msg_header[1] & 0x7F;
  2014. size_bytes_remaining = 0;
  2015. switch (msg_length)
  2016. {
  2017. case 126: size_bytes_remaining = 2; break;
  2018. case 127: size_bytes_remaining = 8; break;
  2019. }
  2020. if (size_bytes_remaining > 0)
  2021. {
  2022. // Receive the wider bytes of the length
  2023. rmtU8 size_bytes[4];
  2024. error = TCPSocket_Receive(web_socket->tcp_socket, size_bytes, size_bytes_remaining, 20);
  2025. if (error != RMT_ERROR_NONE)
  2026. return RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER_SIZE;
  2027. // Calculate new length, MSB first
  2028. msg_length = 0;
  2029. for (i = 0; i < size_bytes_remaining; i++)
  2030. msg_length |= size_bytes[i] << ((size_bytes_remaining - 1 - i) * 8);
  2031. }
  2032. // Receive any message data masks
  2033. mask_present = (msg_header[1] & 0x80) != 0 ? RMT_TRUE : RMT_FALSE;
  2034. if (mask_present)
  2035. {
  2036. error = TCPSocket_Receive(web_socket->tcp_socket, web_socket->data_mask, 4, 20);
  2037. if (error != RMT_ERROR_NONE)
  2038. return error;
  2039. }
  2040. web_socket->frame_bytes_remaining = msg_length;
  2041. web_socket->mask_offset = 0;
  2042. return RMT_ERROR_NONE;
  2043. }
  2044. static enum rmtError WebSocket_Receive(WebSocket* web_socket, void* data, rmtU32 length, rmtU32 timeout_ms)
  2045. {
  2046. SocketStatus status;
  2047. char* cur_data;
  2048. char* end_data;
  2049. rmtU32 start_ms, now_ms;
  2050. rmtU32 bytes_to_read;
  2051. enum rmtError error;
  2052. assert(web_socket != NULL);
  2053. // Ensure there is data to receive
  2054. status = WebSocket_PollStatus(web_socket);
  2055. if (status.error_state != RMT_ERROR_NONE)
  2056. return status.error_state;
  2057. if (!status.can_read)
  2058. return RMT_ERROR_SOCKET_RECV_NO_DATA;
  2059. cur_data = (char*)data;
  2060. end_data = cur_data + length;
  2061. start_ms = msTimer_Get();
  2062. while (cur_data < end_data)
  2063. {
  2064. // Get next WebSocket frame if we've run out of data to read from the socket
  2065. if (web_socket->frame_bytes_remaining == 0)
  2066. {
  2067. error = ReceiveFrameHeader(web_socket);
  2068. if (error != RMT_ERROR_NONE)
  2069. return error;
  2070. }
  2071. // Read as much required data as possible
  2072. bytes_to_read = web_socket->frame_bytes_remaining < length ? web_socket->frame_bytes_remaining : length;
  2073. error = TCPSocket_Receive(web_socket->tcp_socket, cur_data, bytes_to_read, 20);
  2074. if (error == RMT_ERROR_SOCKET_RECV_FAILED)
  2075. return error;
  2076. // If there's a stall receiving the data, check for timeout
  2077. if (error == RMT_ERROR_SOCKET_RECV_NO_DATA || error == RMT_ERROR_SOCKET_RECV_TIMEOUT)
  2078. {
  2079. now_ms = msTimer_Get();
  2080. if (now_ms - start_ms > timeout_ms)
  2081. return RMT_ERROR_SOCKET_RECV_TIMEOUT;
  2082. continue;
  2083. }
  2084. // Apply data mask
  2085. if (*(rmtU32*)web_socket->data_mask != 0)
  2086. {
  2087. rmtU32 i;
  2088. for (i = 0; i < bytes_to_read; i++)
  2089. {
  2090. *((rmtU8*)cur_data + i) ^= web_socket->data_mask[web_socket->mask_offset & 3];
  2091. web_socket->mask_offset++;
  2092. }
  2093. }
  2094. cur_data += bytes_to_read;
  2095. web_socket->frame_bytes_remaining -= bytes_to_read;
  2096. }
  2097. return RMT_ERROR_NONE;
  2098. }
  2099. /*
  2100. ------------------------------------------------------------------------------------------------------------------------
  2101. ------------------------------------------------------------------------------------------------------------------------
  2102. @MESSAGEQ: Multiple producer, single consumer message queue
  2103. ------------------------------------------------------------------------------------------------------------------------
  2104. ------------------------------------------------------------------------------------------------------------------------
  2105. */
  2106. typedef enum MessageID
  2107. {
  2108. MsgID_NotReady,
  2109. MsgID_LogText,
  2110. MsgID_SampleTree,
  2111. } MessageID;
  2112. typedef struct Message
  2113. {
  2114. MessageID id;
  2115. rmtU32 payload_size;
  2116. // For telling which thread the message came from in the debugger
  2117. struct ThreadSampler* thread_sampler;
  2118. rmtU8 payload[1];
  2119. } Message;
  2120. // Multiple producer, single consumer message queue that uses its own data buffer
  2121. // to store the message data.
  2122. typedef struct MessageQueue
  2123. {
  2124. rmtU32 size;
  2125. // The physical address of this data buffer is pointed to by two sequential
  2126. // virtual memory pages, allowing automatic wrap-around of any reads or writes
  2127. // that exceed the limits of the buffer.
  2128. VirtualMirrorBuffer* data;
  2129. // Read/write position never wrap allowing trivial overflow checks
  2130. // with easier debugging
  2131. rmtU32 read_pos;
  2132. rmtU32 write_pos;
  2133. } MessageQueue;
  2134. static void MessageQueue_Destroy(MessageQueue* queue)
  2135. {
  2136. assert(queue != NULL);
  2137. if (queue->data != NULL)
  2138. {
  2139. VirtualMirrorBuffer_Destroy(queue->data);
  2140. queue->data = NULL;
  2141. }
  2142. free(queue);
  2143. }
  2144. static enum rmtError MessageQueue_Create(MessageQueue** queue, rmtU32 size)
  2145. {
  2146. enum rmtError error;
  2147. assert(queue != NULL);
  2148. // Allocate the container
  2149. *queue = (MessageQueue*)malloc(sizeof(MessageQueue));
  2150. if (*queue == NULL)
  2151. return RMT_ERROR_MALLOC_FAIL;
  2152. // Set defaults
  2153. (*queue)->size = 0;
  2154. (*queue)->data = NULL;
  2155. (*queue)->read_pos = 0;
  2156. (*queue)->write_pos = 0;
  2157. error = VirtualMirrorBuffer_Create(&(*queue)->data, size, 10);
  2158. if (error != RMT_ERROR_NONE)
  2159. {
  2160. MessageQueue_Destroy(*queue);
  2161. *queue = NULL;
  2162. return error;
  2163. }
  2164. // The mirror buffer needs to be page-aligned and will change the requested
  2165. // size to match that.
  2166. (*queue)->size = (*queue)->data->size;
  2167. // Set the entire buffer to not ready message
  2168. memset((*queue)->data->ptr, MsgID_NotReady, (*queue)->size);
  2169. return RMT_ERROR_NONE;
  2170. }
  2171. static Message* MessageQueue_AllocMessage(MessageQueue* queue, rmtU32 payload_size, struct ThreadSampler* thread_sampler)
  2172. {
  2173. Message* msg;
  2174. rmtU32 write_size = sizeof(Message) + payload_size;
  2175. assert(queue != NULL);
  2176. while (1)
  2177. {
  2178. // Check for potential overflow
  2179. rmtU32 s = queue->size;
  2180. rmtU32 r = queue->read_pos;
  2181. rmtU32 w = queue->write_pos;
  2182. if ((int)(w - r) > s - write_size)
  2183. return NULL;
  2184. // Point to the newly allocated space
  2185. msg = (Message*)(queue->data->ptr + (w & (s - 1)));
  2186. // Increment the write position, leaving the loop if this is the thread that succeeded
  2187. if (AtomicCompareAndSwap(&queue->write_pos, w, w + write_size) == RMT_TRUE)
  2188. {
  2189. // Safe to set payload size after thread claims ownership of this allocated range
  2190. msg->payload_size = payload_size;
  2191. msg->thread_sampler = thread_sampler;
  2192. break;
  2193. }
  2194. }
  2195. return msg;
  2196. }
  2197. static void MessageQueue_CommitMessage(MessageQueue* queue, Message* message, MessageID id)
  2198. {
  2199. assert(queue != NULL);
  2200. assert(message != NULL);
  2201. // Ensure message writes complete before commit
  2202. WriteFence();
  2203. // Setting the message ID signals to the consumer that the message is ready
  2204. assert(message->id == MsgID_NotReady);
  2205. message->id = id;
  2206. }
  2207. Message* MessageQueue_PeekNextMessage(MessageQueue* queue)
  2208. {
  2209. Message* ptr;
  2210. rmtU32 r;
  2211. assert(queue != NULL);
  2212. // First check that there are bytes queued
  2213. if (queue->write_pos - queue->read_pos == 0)
  2214. return NULL;
  2215. // Messages are in the queue but may not have been commit yet
  2216. // Messages behind this one may have been commit but it's not reachable until
  2217. // the next one in the queue is ready.
  2218. r = queue->read_pos & (queue->size - 1);
  2219. ptr = (Message*)(queue->data->ptr + r);
  2220. if (ptr->id != MsgID_NotReady)
  2221. return ptr;
  2222. return NULL;
  2223. }
  2224. static void MessageQueue_ConsumeNextMessage(MessageQueue* queue, Message* message)
  2225. {
  2226. rmtU32 message_size;
  2227. assert(queue != NULL);
  2228. assert(message != NULL);
  2229. // Setting the message ID to "not ready" serves as a marker to the consumer that even though
  2230. // space has been allocated for a message, the message isn't ready to be consumed
  2231. // yet.
  2232. //
  2233. // We can't do that when allocating the message because multiple threads will be fighting for
  2234. // the same location. Instead, clear out any messages just read by the consumer before advancing
  2235. // the read position so that a winning thread's allocation will inherit the "not ready" state.
  2236. //
  2237. // This costs some write bandwidth and has the potential to flush cache to other cores.
  2238. message_size = sizeof(Message) + message->payload_size;
  2239. memset(message, MsgID_NotReady, message_size);
  2240. // Ensure clear completes before advancing the read position
  2241. WriteFence();
  2242. queue->read_pos += message_size;
  2243. }
  2244. /*
  2245. ------------------------------------------------------------------------------------------------------------------------
  2246. ------------------------------------------------------------------------------------------------------------------------
  2247. @NETWORK: Network Server
  2248. ------------------------------------------------------------------------------------------------------------------------
  2249. ------------------------------------------------------------------------------------------------------------------------
  2250. */
  2251. typedef struct
  2252. {
  2253. WebSocket* listen_socket;
  2254. WebSocket* client_socket;
  2255. rmtU32 last_ping_time;
  2256. rmtU16 port;
  2257. } Server;
  2258. static void Server_Destroy(Server* server);
  2259. static enum rmtError Server_Create(rmtU16 port, Server** server)
  2260. {
  2261. enum rmtError error;
  2262. assert(server != NULL);
  2263. *server = (Server*)malloc(sizeof(Server));
  2264. if (*server == NULL)
  2265. return RMT_ERROR_MALLOC_FAIL;
  2266. // Initialise defaults
  2267. (*server)->listen_socket = NULL;
  2268. (*server)->client_socket = NULL;
  2269. (*server)->last_ping_time = 0;
  2270. (*server)->port = port;
  2271. // Create the listening WebSocket
  2272. error = WebSocket_CreateServer(port, WEBSOCKET_TEXT, &(*server)->listen_socket);
  2273. if (error != RMT_ERROR_NONE)
  2274. {
  2275. Server_Destroy(*server);
  2276. *server = NULL;
  2277. return error;
  2278. }
  2279. return RMT_ERROR_NONE;
  2280. }
  2281. static void Server_Destroy(Server* server)
  2282. {
  2283. assert(server != NULL);
  2284. if (server->client_socket != NULL)
  2285. WebSocket_Destroy(server->client_socket);
  2286. if (server->listen_socket != NULL)
  2287. WebSocket_Destroy(server->listen_socket);
  2288. free(server);
  2289. }
  2290. static rmtBool Server_IsClientConnected(Server* server)
  2291. {
  2292. assert(server != NULL);
  2293. return server->client_socket != NULL ? RMT_TRUE : RMT_FALSE;
  2294. }
  2295. static enum rmtError Server_Send(Server* server, const void* data, rmtU32 length, rmtU32 timeout)
  2296. {
  2297. assert(server != NULL);
  2298. if (Server_IsClientConnected(server))
  2299. {
  2300. enum rmtError error = WebSocket_Send(server->client_socket, data, length, timeout);
  2301. if (error == RMT_ERROR_SOCKET_SEND_FAIL)
  2302. {
  2303. WebSocket_Destroy(server->client_socket);
  2304. server->client_socket = NULL;
  2305. }
  2306. return error;
  2307. }
  2308. return RMT_ERROR_NONE;
  2309. }
  2310. static void Server_Update(Server* server)
  2311. {
  2312. rmtU32 cur_time;
  2313. assert(server != NULL);
  2314. // Recreate the listening socket if it's been destroyed earlier
  2315. if (server->listen_socket == NULL)
  2316. WebSocket_CreateServer(server->port, WEBSOCKET_TEXT, &server->listen_socket);
  2317. if (server->listen_socket != NULL && server->client_socket == NULL)
  2318. {
  2319. // Accept connections as long as there is no client connected
  2320. WebSocket* client_socket = NULL;
  2321. enum rmtError error = WebSocket_AcceptConnection(server->listen_socket, &client_socket);
  2322. if (error == RMT_ERROR_NONE)
  2323. {
  2324. server->client_socket = client_socket;
  2325. }
  2326. else
  2327. {
  2328. // Destroy the listen socket on failure to accept
  2329. // It will get recreated in another update
  2330. WebSocket_Destroy(server->listen_socket);
  2331. server->listen_socket = NULL;
  2332. }
  2333. }
  2334. else
  2335. {
  2336. // Check for any incoming messages
  2337. char message_first_byte;
  2338. enum rmtError error = WebSocket_Receive(server->client_socket, &message_first_byte, 1, 0);
  2339. if (error == RMT_ERROR_NONE)
  2340. {
  2341. // data available to read
  2342. }
  2343. else if (error == RMT_ERROR_SOCKET_RECV_NO_DATA)
  2344. {
  2345. // no data available
  2346. }
  2347. else if (error == RMT_ERROR_SOCKET_RECV_TIMEOUT)
  2348. {
  2349. // data not available yet, can afford to ignore as we're only reading the first byte
  2350. }
  2351. else
  2352. {
  2353. // Anything else is an error that may have closed the connection
  2354. // NULL the variable before destroying the socket
  2355. WebSocket* client_socket = server->client_socket;
  2356. server->client_socket = NULL;
  2357. WebSocket_Destroy(client_socket);
  2358. }
  2359. }
  2360. // Send pings to the client every second
  2361. cur_time = msTimer_Get();
  2362. if (cur_time - server->last_ping_time > 1000)
  2363. {
  2364. rmtPStr ping_message = "{ \"id\": \"PING\" }";
  2365. Server_Send(server, ping_message, (rmtU32)strlen(ping_message), 20);
  2366. server->last_ping_time = cur_time;
  2367. }
  2368. }
  2369. /*
  2370. ------------------------------------------------------------------------------------------------------------------------
  2371. ------------------------------------------------------------------------------------------------------------------------
  2372. @JSON: Basic, text-based JSON serialisation
  2373. ------------------------------------------------------------------------------------------------------------------------
  2374. ------------------------------------------------------------------------------------------------------------------------
  2375. */
  2376. //
  2377. // Simple macro for hopefully making the serialisation a little clearer by hiding the error handling
  2378. //
  2379. #define JSON_ERROR_CHECK(stmt) { error = stmt; if (error != RMT_ERROR_NONE) return error; }
  2380. static enum rmtError json_OpenObject(Buffer* buffer)
  2381. {
  2382. return Buffer_Write(buffer, (void*)"{", 1);
  2383. }
  2384. static enum rmtError json_CloseObject(Buffer* buffer)
  2385. {
  2386. return Buffer_Write(buffer, (void*)"}", 1);
  2387. }
  2388. static enum rmtError json_Comma(Buffer* buffer)
  2389. {
  2390. return Buffer_Write(buffer, (void*)",", 1);
  2391. }
  2392. static enum rmtError json_Colon(Buffer* buffer)
  2393. {
  2394. return Buffer_Write(buffer, (void*)":", 1);
  2395. }
  2396. static enum rmtError json_String(Buffer* buffer, rmtPStr string)
  2397. {
  2398. enum rmtError error;
  2399. JSON_ERROR_CHECK(Buffer_Write(buffer, (void*)"\"", 1));
  2400. JSON_ERROR_CHECK(Buffer_WriteString(buffer, string));
  2401. return Buffer_Write(buffer, (void*)"\"", 1);
  2402. }
  2403. static enum rmtError json_FieldStr(Buffer* buffer, rmtPStr name, rmtPStr value)
  2404. {
  2405. enum rmtError error;
  2406. JSON_ERROR_CHECK(json_String(buffer, name));
  2407. JSON_ERROR_CHECK(json_Colon(buffer));
  2408. return json_String(buffer, value);
  2409. }
  2410. static enum rmtError json_FieldU64(Buffer* buffer, rmtPStr name, rmtU64 value)
  2411. {
  2412. static char temp_buf[32];
  2413. char* end;
  2414. char* tptr;
  2415. json_String(buffer, name);
  2416. json_Colon(buffer);
  2417. if (value == 0)
  2418. return Buffer_Write(buffer, (void*)"0", 1);
  2419. // Null terminate and start at the end
  2420. end = temp_buf + sizeof(temp_buf) - 1;
  2421. *end = 0;
  2422. tptr = end;
  2423. // Loop through the value with radix 10
  2424. do
  2425. {
  2426. rmtU64 next_value = value / 10;
  2427. *--tptr = (char)('0' + (value - next_value * 10));
  2428. value = next_value;
  2429. } while (value);
  2430. return Buffer_Write(buffer, tptr, (rmtU32)(end - tptr));
  2431. }
  2432. static enum rmtError json_OpenArray(Buffer* buffer, rmtPStr name)
  2433. {
  2434. enum rmtError error;
  2435. JSON_ERROR_CHECK(json_String(buffer, name));
  2436. JSON_ERROR_CHECK(json_Colon(buffer));
  2437. return Buffer_Write(buffer, (void*)"[", 1);
  2438. }
  2439. static enum rmtError json_CloseArray(Buffer* buffer)
  2440. {
  2441. return Buffer_Write(buffer, (void*)"]", 1);
  2442. }
  2443. /*
  2444. ------------------------------------------------------------------------------------------------------------------------
  2445. ------------------------------------------------------------------------------------------------------------------------
  2446. @SAMPLE: Base Sample Description for CPU by default
  2447. ------------------------------------------------------------------------------------------------------------------------
  2448. ------------------------------------------------------------------------------------------------------------------------
  2449. */
  2450. enum SampleType
  2451. {
  2452. SampleType_CPU,
  2453. SampleType_CUDA,
  2454. SampleType_D3D11,
  2455. SampleType_OpenGL,
  2456. SampleType_Count,
  2457. };
  2458. typedef struct Sample
  2459. {
  2460. // Inherit so that samples can be quickly allocated
  2461. ObjectLink ObjectLink;
  2462. enum SampleType type;
  2463. // Used to anonymously copy sample data without knowning its type
  2464. rmtU32 size_bytes;
  2465. // Sample name and unique hash
  2466. rmtPStr name;
  2467. rmtU32 name_hash;
  2468. // Unique, persistent ID among all samples
  2469. rmtU32 unique_id;
  2470. // Links to related samples in the tree
  2471. struct Sample* parent;
  2472. struct Sample* first_child;
  2473. struct Sample* last_child;
  2474. struct Sample* next_sibling;
  2475. // Keep track of child count to distinguish from repeated calls to the same function at the same stack level
  2476. // This is also mixed with the callstack hash to allow consistent addressing of any point in the tree
  2477. rmtU32 nb_children;
  2478. // Start and end of the sample in microseconds
  2479. rmtU64 us_start;
  2480. rmtU64 us_end;
  2481. } Sample;
  2482. static enum rmtError Sample_Constructor(Sample* sample)
  2483. {
  2484. assert(sample != NULL);
  2485. ObjectLink_Constructor((ObjectLink*)sample);
  2486. sample->type = SampleType_CPU;
  2487. sample->size_bytes = sizeof(Sample);
  2488. sample->name = NULL;
  2489. sample->name_hash = 0;
  2490. sample->unique_id = 0;
  2491. sample->parent = NULL;
  2492. sample->first_child = NULL;
  2493. sample->last_child = NULL;
  2494. sample->next_sibling = NULL;
  2495. sample->nb_children = 0;
  2496. sample->us_start = 0;
  2497. sample->us_end = 0;
  2498. return RMT_ERROR_NONE;
  2499. }
  2500. static void Sample_Destructor(Sample* sample)
  2501. {
  2502. RMT_UNREFERENCED_PARAMETER(sample);
  2503. }
  2504. static void Sample_Prepare(Sample* sample, rmtPStr name, rmtU32 name_hash, Sample* parent)
  2505. {
  2506. sample->name = name;
  2507. sample->name_hash = name_hash;
  2508. sample->unique_id = 0;
  2509. sample->parent = parent;
  2510. sample->first_child = NULL;
  2511. sample->last_child = NULL;
  2512. sample->next_sibling = NULL;
  2513. sample->nb_children = 0;
  2514. sample->us_start = 0;
  2515. sample->us_end = 0;
  2516. }
  2517. static enum rmtError json_SampleArray(Buffer* buffer, Sample* first_sample, rmtPStr name);
  2518. static enum rmtError json_Sample(Buffer* buffer, Sample* sample)
  2519. {
  2520. enum rmtError error;
  2521. assert(sample != NULL);
  2522. JSON_ERROR_CHECK(json_OpenObject(buffer));
  2523. JSON_ERROR_CHECK(json_FieldStr(buffer, "name", sample->name));
  2524. JSON_ERROR_CHECK(json_Comma(buffer));
  2525. JSON_ERROR_CHECK(json_FieldU64(buffer, "id", sample->unique_id));
  2526. JSON_ERROR_CHECK(json_Comma(buffer));
  2527. JSON_ERROR_CHECK(json_FieldU64(buffer, "us_start", sample->us_start));
  2528. JSON_ERROR_CHECK(json_Comma(buffer));
  2529. JSON_ERROR_CHECK(json_FieldU64(buffer, "us_length", max(sample->us_end - sample->us_start, 0)));
  2530. if (sample->first_child != NULL)
  2531. {
  2532. JSON_ERROR_CHECK(json_Comma(buffer));
  2533. JSON_ERROR_CHECK(json_SampleArray(buffer, sample->first_child, "children"));
  2534. }
  2535. return json_CloseObject(buffer);
  2536. }
  2537. static enum rmtError json_SampleArray(Buffer* buffer, Sample* first_sample, rmtPStr name)
  2538. {
  2539. enum rmtError error;
  2540. Sample* sample;
  2541. JSON_ERROR_CHECK(json_OpenArray(buffer, name));
  2542. for (sample = first_sample; sample != NULL; sample = sample->next_sibling)
  2543. {
  2544. JSON_ERROR_CHECK(json_Sample(buffer, sample));
  2545. if (sample->next_sibling != NULL)
  2546. JSON_ERROR_CHECK(json_Comma(buffer));
  2547. }
  2548. return json_CloseArray(buffer);
  2549. }
  2550. /*
  2551. ------------------------------------------------------------------------------------------------------------------------
  2552. ------------------------------------------------------------------------------------------------------------------------
  2553. @SAMPLETREE: A tree of samples with their allocator
  2554. ------------------------------------------------------------------------------------------------------------------------
  2555. ------------------------------------------------------------------------------------------------------------------------
  2556. */
  2557. typedef struct SampleTree
  2558. {
  2559. // Allocator for all samples
  2560. ObjectAllocator* allocator;
  2561. // Root sample for all samples created by this thread
  2562. Sample* root;
  2563. // Most recently pushed sample
  2564. Sample* current_parent;
  2565. } SampleTree;
  2566. static void SampleTree_Destroy(SampleTree* tree);
  2567. static enum rmtError SampleTree_Create(SampleTree** tree, rmtU32 sample_size, ObjConstructor constructor, ObjDestructor destructor)
  2568. {
  2569. enum rmtError error;
  2570. assert(tree != NULL);
  2571. *tree = (SampleTree*)malloc(sizeof(SampleTree));
  2572. if (*tree == NULL)
  2573. return RMT_ERROR_MALLOC_FAIL;
  2574. (*tree)->allocator = NULL;
  2575. (*tree)->root = NULL;
  2576. (*tree)->current_parent = NULL;
  2577. // Create the sample allocator
  2578. error = ObjectAllocator_Create(&(*tree)->allocator, sample_size, constructor, destructor);
  2579. if (error != RMT_ERROR_NONE)
  2580. {
  2581. SampleTree_Destroy(*tree);
  2582. *tree = NULL;
  2583. return error;
  2584. }
  2585. // Create a root sample that's around for the lifetime of the thread
  2586. error = ObjectAllocator_Alloc((*tree)->allocator, (void**)&(*tree)->root);
  2587. if (error != RMT_ERROR_NONE)
  2588. {
  2589. SampleTree_Destroy(*tree);
  2590. *tree = NULL;
  2591. return error;
  2592. }
  2593. Sample_Prepare((*tree)->root, "<Root Sample>", 0, NULL);
  2594. (*tree)->current_parent = (*tree)->root;
  2595. return RMT_ERROR_NONE;
  2596. }
  2597. static void SampleTree_Destroy(SampleTree* tree)
  2598. {
  2599. assert(tree != NULL);
  2600. if (tree->root != NULL)
  2601. {
  2602. ObjectAllocator_Free(tree->allocator, tree->root);
  2603. tree->root = NULL;
  2604. }
  2605. if (tree->allocator != NULL)
  2606. {
  2607. ObjectAllocator_Destroy(tree->allocator);
  2608. tree->allocator = NULL;
  2609. }
  2610. free(tree);
  2611. }
  2612. rmtU32 HashCombine(rmtU32 hash_a, rmtU32 hash_b)
  2613. {
  2614. // A sequence of 32 uniformly random bits so that each bit of the combined hash is changed on application
  2615. // Derived from the golden ratio: UINT_MAX / ((1 + sqrt(5)) / 2)
  2616. // In reality it's just an arbitrary value which happens to work well, avoiding mapping all zeros to zeros.
  2617. // http://burtleburtle.net/bob/hash/doobs.html
  2618. static rmtU32 random_bits = 0x9E3779B9;
  2619. hash_a ^= hash_b + random_bits + (hash_a << 6) + (hash_a >> 2);
  2620. return hash_a;
  2621. }
  2622. static enum rmtError SampleTree_Push(SampleTree* tree, rmtPStr name, rmtU32 name_hash, Sample** sample)
  2623. {
  2624. Sample* parent;
  2625. enum rmtError error;
  2626. rmtU32 unique_id;
  2627. // As each tree has a root sample node allocated, a parent must always be present
  2628. assert(tree != NULL);
  2629. assert(tree->current_parent != NULL);
  2630. parent = tree->current_parent;
  2631. if (parent->last_child != NULL && parent->last_child->name_hash == name_hash)
  2632. {
  2633. // TODO: Collapse siblings with flag exception?
  2634. // Note that above check is not enough - requires a linear search
  2635. }
  2636. if (parent->name_hash == name_hash)
  2637. {
  2638. // TODO: Collapse recursion on flag?
  2639. }
  2640. // Allocate a new sample
  2641. error = ObjectAllocator_Alloc(tree->allocator, (void**)sample);
  2642. if (error != RMT_ERROR_NONE)
  2643. return error;
  2644. Sample_Prepare(*sample, name, name_hash, parent);
  2645. // Generate a unique ID for this sample in the tree
  2646. unique_id = parent->unique_id;
  2647. unique_id = HashCombine(unique_id, (*sample)->name_hash);
  2648. unique_id = HashCombine(unique_id, parent->nb_children);
  2649. (*sample)->unique_id = unique_id;
  2650. // Add sample to its parent
  2651. parent->nb_children++;
  2652. if (parent->first_child == NULL)
  2653. {
  2654. parent->first_child = *sample;
  2655. parent->last_child = *sample;
  2656. }
  2657. else
  2658. {
  2659. assert(parent->last_child != NULL);
  2660. parent->last_child->next_sibling = *sample;
  2661. parent->last_child = *sample;
  2662. }
  2663. // Make this sample the new parent of any newly created samples
  2664. tree->current_parent = *sample;
  2665. return RMT_ERROR_NONE;
  2666. }
  2667. static void SampleTree_Pop(SampleTree* tree, Sample* sample)
  2668. {
  2669. assert(tree != NULL);
  2670. assert(sample != NULL);
  2671. assert(sample != tree->root);
  2672. tree->current_parent = sample->parent;
  2673. }
  2674. static ObjectLink* FlattenSampleTree(Sample* sample, rmtU32* nb_samples)
  2675. {
  2676. Sample* child;
  2677. ObjectLink* cur_link = &sample->ObjectLink;
  2678. assert(sample != NULL);
  2679. assert(nb_samples != NULL);
  2680. *nb_samples += 1;
  2681. sample->ObjectLink.next = (ObjectLink*)sample->first_child;
  2682. // Link all children together
  2683. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  2684. {
  2685. ObjectLink* last_link = FlattenSampleTree(child, nb_samples);
  2686. last_link->next = (ObjectLink*)child->next_sibling;
  2687. cur_link = last_link;
  2688. }
  2689. // Clear child info
  2690. sample->first_child = NULL;
  2691. sample->last_child = NULL;
  2692. sample->nb_children = 0;
  2693. return cur_link;
  2694. }
  2695. static void FreeSampleTree(Sample* sample, ObjectAllocator* allocator)
  2696. {
  2697. // Chain all samples together in a flat list
  2698. rmtU32 nb_cleared_samples = 0;
  2699. ObjectLink* last_link = FlattenSampleTree(sample, &nb_cleared_samples);
  2700. // Release the complete sample memory range
  2701. if (sample->ObjectLink.next != NULL)
  2702. ObjectAllocator_FreeRange(allocator, sample, last_link, nb_cleared_samples);
  2703. else
  2704. ObjectAllocator_Free(allocator, sample);
  2705. }
  2706. typedef struct Msg_SampleTree
  2707. {
  2708. Sample* root_sample;
  2709. ObjectAllocator* allocator;
  2710. rmtPStr thread_name;
  2711. } Msg_SampleTree;
  2712. static void AddSampleTreeMessage(MessageQueue* queue, Sample* sample, ObjectAllocator* allocator, rmtPStr thread_name, struct ThreadSampler* thread_sampler)
  2713. {
  2714. Msg_SampleTree* payload;
  2715. // Attempt to allocate a message for sending the tree to the viewer
  2716. Message* message = MessageQueue_AllocMessage(queue, sizeof(Msg_SampleTree), thread_sampler);
  2717. if (message == NULL)
  2718. {
  2719. // Discard the tree on failure
  2720. FreeSampleTree(sample, allocator);
  2721. return;
  2722. }
  2723. // Populate and commit
  2724. payload = (Msg_SampleTree*)message->payload;
  2725. payload->root_sample = sample;
  2726. payload->allocator = allocator;
  2727. payload->thread_name = thread_name;
  2728. MessageQueue_CommitMessage(queue, message, MsgID_SampleTree);
  2729. }
  2730. /*
  2731. ------------------------------------------------------------------------------------------------------------------------
  2732. ------------------------------------------------------------------------------------------------------------------------
  2733. @TSAMPLER: Per-Thread Sampler
  2734. ------------------------------------------------------------------------------------------------------------------------
  2735. ------------------------------------------------------------------------------------------------------------------------
  2736. */
  2737. typedef struct ThreadSampler
  2738. {
  2739. // Name to assign to the thread in the viewer
  2740. rmtS8 name[64];
  2741. // Store a unique sample tree for each type
  2742. SampleTree* sample_trees[SampleType_Count];
  2743. // Microsecond accuracy timer for CPU timestamps
  2744. usTimer timer;
  2745. // Next in the global list of active thread samplers
  2746. struct ThreadSampler* volatile next;
  2747. } ThreadSampler;
  2748. static void ThreadSampler_Destroy(ThreadSampler* ts);
  2749. static enum rmtError ThreadSampler_Create(ThreadSampler** thread_sampler)
  2750. {
  2751. enum rmtError error;
  2752. int i;
  2753. // Allocate space for the thread sampler
  2754. *thread_sampler = (ThreadSampler*)malloc(sizeof(ThreadSampler));
  2755. if (*thread_sampler == NULL)
  2756. return RMT_ERROR_MALLOC_FAIL;
  2757. // Set defaults
  2758. for (i = 0; i < SampleType_Count; i++)
  2759. (*thread_sampler)->sample_trees[i] = NULL;
  2760. (*thread_sampler)->next = NULL;
  2761. // Set the initial name based on the unique thread sampler address
  2762. Base64_Encode((rmtU8*)thread_sampler, sizeof(thread_sampler), (rmtU8*)(*thread_sampler)->name);
  2763. // Create the CPU sample tree only - the rest are created on-demand as they need
  2764. // extra context information to function correctly.
  2765. error = SampleTree_Create(&(*thread_sampler)->sample_trees[SampleType_CPU], sizeof(Sample), (ObjConstructor)Sample_Constructor, (ObjDestructor)Sample_Destructor);
  2766. if (error != RMT_ERROR_NONE)
  2767. {
  2768. ThreadSampler_Destroy(*thread_sampler);
  2769. *thread_sampler = NULL;
  2770. return error;
  2771. }
  2772. // Kick-off the timer
  2773. usTimer_Init(&(*thread_sampler)->timer);
  2774. return RMT_ERROR_NONE;
  2775. }
  2776. static void ThreadSampler_Destroy(ThreadSampler* ts)
  2777. {
  2778. int i;
  2779. assert(ts != NULL);
  2780. for (i = 0; i < SampleType_Count; i++)
  2781. {
  2782. if (ts->sample_trees[i] != NULL)
  2783. {
  2784. SampleTree_Destroy(ts->sample_trees[i]);
  2785. ts->sample_trees[i] = NULL;
  2786. }
  2787. }
  2788. free(ts);
  2789. }
  2790. static enum rmtError ThreadSampler_Push(ThreadSampler* ts, SampleTree* tree, rmtPStr name, rmtU32 name_hash, Sample** sample)
  2791. {
  2792. RMT_UNREFERENCED_PARAMETER(ts);
  2793. return SampleTree_Push(tree, name, name_hash, sample);
  2794. }
  2795. static void ThreadSampler_Pop(ThreadSampler* ts, MessageQueue* queue, Sample* sample)
  2796. {
  2797. SampleTree* tree = ts->sample_trees[sample->type];
  2798. SampleTree_Pop(tree, sample);
  2799. // Are we back at the root?
  2800. if (tree->current_parent == tree->root)
  2801. {
  2802. // Disconnect all samples from the root and pack in the chosen message queue
  2803. Sample* root = tree->root;
  2804. root->first_child = NULL;
  2805. root->last_child = NULL;
  2806. root->nb_children = 0;
  2807. AddSampleTreeMessage(queue, sample, tree->allocator, ts->name, ts);
  2808. }
  2809. }
  2810. /*
  2811. ------------------------------------------------------------------------------------------------------------------------
  2812. ------------------------------------------------------------------------------------------------------------------------
  2813. @REMOTERY: Remotery
  2814. ------------------------------------------------------------------------------------------------------------------------
  2815. ------------------------------------------------------------------------------------------------------------------------
  2816. */
  2817. #ifdef RMT_USE_OPENGL
  2818. #ifndef APIENTRY
  2819. # if defined(__MINGW32__) || defined(__CYGWIN__)
  2820. # define APIENTRY __stdcall
  2821. # elif (_MSC_VER >= 800) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__)
  2822. # define APIENTRY __stdcall
  2823. # else
  2824. # define APIENTRY
  2825. # endif
  2826. #endif
  2827. #ifndef GLAPI
  2828. # if defined(__MINGW32__) || defined(__CYGWIN__)
  2829. # define GLAPI extern
  2830. # else
  2831. # define GLAPI WINGDIAPI
  2832. # endif
  2833. #endif
  2834. #ifndef GLAPIENTRY
  2835. #define GLAPIENTRY APIENTRY
  2836. #endif
  2837. typedef rmtU32 GLenum;
  2838. typedef rmtU32 GLuint;
  2839. typedef rmtS32 GLint;
  2840. typedef rmtS32 GLsizei;
  2841. typedef rmtU64 GLuint64;
  2842. typedef rmtS64 GLint64;
  2843. typedef unsigned char GLubyte;
  2844. typedef void (GLAPIENTRY * PFNGLGENQUERIESPROC) (GLsizei n, GLuint* ids);
  2845. typedef void (GLAPIENTRY * PFNGLDELETEQUERIESPROC) (GLsizei n, const GLuint* ids);
  2846. typedef void (GLAPIENTRY * PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
  2847. typedef void (GLAPIENTRY * PFNGLENDQUERYPROC) (GLenum target);
  2848. typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint* params);
  2849. typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTUIVPROC) (GLuint id, GLenum pname, GLuint* params);
  2850. typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTI64VPROC) (GLuint id, GLenum pname, GLint64* params);
  2851. typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTUI64VPROC) (GLuint id, GLenum pname, GLuint64* params);
  2852. typedef void (GLAPIENTRY * PFNGLQUERYCOUNTERPROC) (GLuint id, GLenum target);
  2853. GLAPI GLenum GLAPIENTRY glGetError(void);
  2854. #endif
  2855. #ifdef RMT_USE_D3D11
  2856. typedef struct D3D11 D3D11;
  2857. static enum rmtError D3D11_Create(D3D11** d3d11);
  2858. static void D3D11_Destroy(D3D11* d3d11);
  2859. #endif
  2860. struct Remotery
  2861. {
  2862. Server* server;
  2863. rmtTLS thread_sampler_tls_handle;
  2864. // Linked list of all known threads being sampled
  2865. ThreadSampler* volatile first_thread_sampler;
  2866. // Queue between clients and main remotery thread
  2867. MessageQueue* mq_to_rmt_thread;
  2868. // A dynamically-sized buffer used for encoding the sample tree as JSON and sending to the client
  2869. Buffer* json_buf;
  2870. // The main server thread
  2871. Thread* thread;
  2872. #ifdef RMT_USE_CUDA
  2873. rmtCUDABind cuda;
  2874. #endif
  2875. #ifdef RMT_USE_D3D11
  2876. D3D11* d3d11;
  2877. #endif
  2878. #ifdef RMT_USE_OPENGL
  2879. PFNGLGENQUERIESPROC __glGenQueries;
  2880. PFNGLDELETEQUERIESPROC __glDeleteQueries;
  2881. PFNGLBEGINQUERYPROC __glBeginQuery;
  2882. PFNGLENDQUERYPROC __glEndQuery;
  2883. PFNGLGETQUERYOBJECTIVPROC __glGetQueryObjectiv;
  2884. PFNGLGETQUERYOBJECTUIVPROC __glGetQueryObjectuiv;
  2885. PFNGLGETQUERYOBJECTI64VPROC __glGetQueryObjecti64v;
  2886. PFNGLGETQUERYOBJECTUI64VPROC __glGetQueryObjectui64v;
  2887. PFNGLQUERYCOUNTERPROC __glQueryCounter;
  2888. // An allocator separate to the samples themselves so that OpenGL resource lifetime can be controlled
  2889. // outside of the Remotery thread.
  2890. ObjectAllocator* opengl_timestamp_allocator;
  2891. // Queue to the OpenGL main update thread
  2892. // Given that BeginSample/EndSample need to be called from the same thread that does the update, there
  2893. // is really no need for this to be a thread-safe queue. I'm using it for its convenience.
  2894. MessageQueue* mq_to_opengl_main;
  2895. // Mark the first time so that remaining timestamps are offset from this
  2896. rmtU64 opengl_first_timestamp;
  2897. #endif
  2898. };
  2899. //
  2900. // Global remotery context
  2901. //
  2902. static Remotery* g_Remotery = NULL;
  2903. //
  2904. // This flag marks the EXE/DLL that created the global remotery instance. We want to allow
  2905. // only the creating EXE/DLL to destroy the remotery instance.
  2906. //
  2907. static rmtBool g_RemoteryCreated = RMT_FALSE;
  2908. static void Remotery_Destroy(Remotery* rmt);
  2909. static void Remotery_DestroyThreadSamplers(Remotery* rmt);
  2910. static void GetSampleDigest(Sample* sample, rmtU32* digest_hash, rmtU32* nb_samples)
  2911. {
  2912. Sample* child;
  2913. assert(sample != NULL);
  2914. assert(digest_hash != NULL);
  2915. assert(nb_samples != NULL);
  2916. // Concatenate this sample
  2917. (*nb_samples)++;
  2918. *digest_hash = MurmurHash3_x86_32(&sample->unique_id, sizeof(sample->unique_id), *digest_hash);
  2919. // Concatenate children
  2920. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  2921. GetSampleDigest(child, digest_hash, nb_samples);
  2922. }
  2923. static enum rmtError Remotery_SendLogTextMessage(Remotery* rmt, Message* message)
  2924. {
  2925. assert(rmt != NULL);
  2926. assert(message != NULL);
  2927. return Server_Send(rmt->server, message->payload, message->payload_size, 20);
  2928. }
  2929. static enum rmtError json_SampleTree(Buffer* buffer, Msg_SampleTree* msg)
  2930. {
  2931. Sample* root_sample;
  2932. char thread_name[64];
  2933. rmtU32 digest_hash = 0, nb_samples = 0;
  2934. enum rmtError error;
  2935. assert(buffer != NULL);
  2936. assert(msg != NULL);
  2937. // Get the message root sample
  2938. root_sample = msg->root_sample;
  2939. assert(root_sample != NULL);
  2940. // Reset the buffer position to the start
  2941. buffer->bytes_used = 0;
  2942. // Add any sample types as a thread name post-fix to ensure they get their own viewer
  2943. thread_name[0] = 0;
  2944. strncat_s(thread_name, sizeof(thread_name), msg->thread_name, strnlen_s(msg->thread_name, 64));
  2945. if (root_sample->type == SampleType_CUDA)
  2946. strncat_s(thread_name, sizeof(thread_name), " (CUDA)", 7);
  2947. if (root_sample->type == SampleType_D3D11)
  2948. strncat_s(thread_name, sizeof(thread_name), " (D3D11)", 8);
  2949. if (root_sample->type == SampleType_OpenGL)
  2950. strncat_s(thread_name, sizeof(thread_name), " (OpenGL)", 9);
  2951. // Get digest hash of samples so that viewer can efficiently rebuild its tables
  2952. GetSampleDigest(root_sample, &digest_hash, &nb_samples);
  2953. // Build the sample data
  2954. JSON_ERROR_CHECK(json_OpenObject(buffer));
  2955. JSON_ERROR_CHECK(json_FieldStr(buffer, "id", "SAMPLES"));
  2956. JSON_ERROR_CHECK(json_Comma(buffer));
  2957. JSON_ERROR_CHECK(json_FieldStr(buffer, "thread_name", thread_name));
  2958. JSON_ERROR_CHECK(json_Comma(buffer));
  2959. JSON_ERROR_CHECK(json_FieldU64(buffer, "nb_samples", nb_samples));
  2960. JSON_ERROR_CHECK(json_Comma(buffer));
  2961. JSON_ERROR_CHECK(json_FieldU64(buffer, "sample_digest", digest_hash));
  2962. JSON_ERROR_CHECK(json_Comma(buffer));
  2963. JSON_ERROR_CHECK(json_SampleArray(buffer, root_sample, "samples"));
  2964. JSON_ERROR_CHECK(json_CloseObject(buffer));
  2965. return RMT_ERROR_NONE;
  2966. }
  2967. #ifdef RMT_USE_CUDA
  2968. static rmtBool AreCUDASamplesReady(Sample* sample);
  2969. static rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample);
  2970. #endif
  2971. static enum rmtError Remotery_SendSampleTreeMessage(Remotery* rmt, Message* message)
  2972. {
  2973. Msg_SampleTree* sample_tree;
  2974. enum rmtError error = RMT_ERROR_NONE;
  2975. Sample* sample;
  2976. assert(rmt != NULL);
  2977. assert(message != NULL);
  2978. // Get the message root sample
  2979. sample_tree = (Msg_SampleTree*)message->payload;
  2980. sample = sample_tree->root_sample;
  2981. assert(sample != NULL);
  2982. #ifdef RMT_USE_CUDA
  2983. if (sample->type == SampleType_CUDA)
  2984. {
  2985. // If these CUDA samples aren't ready yet, stick them to the back of the queue and continue
  2986. rmtBool are_samples_ready;
  2987. rmt_BeginCPUSample(AreCUDASamplesReady);
  2988. are_samples_ready = AreCUDASamplesReady(sample);
  2989. rmt_EndCPUSample();
  2990. if (!are_samples_ready)
  2991. {
  2992. AddSampleTreeMessage(rmt->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->thread_name, message->thread_sampler);
  2993. return RMT_ERROR_NONE;
  2994. }
  2995. // Retrieve timing of all CUDA samples
  2996. rmt_BeginCPUSample(GetCUDASampleTimes);
  2997. GetCUDASampleTimes(sample->parent, sample);
  2998. rmt_EndCPUSample();
  2999. }
  3000. #endif
  3001. // Serialise the sample tree and send to the viewer
  3002. error = json_SampleTree(rmt->json_buf, sample_tree);
  3003. if (error == RMT_ERROR_NONE)
  3004. error = Server_Send(rmt->server, rmt->json_buf->data, rmt->json_buf->bytes_used, 20);
  3005. // Release the sample tree back to its allocator
  3006. FreeSampleTree(sample, sample_tree->allocator);
  3007. return error;
  3008. }
  3009. static enum rmtError Remotery_ConsumeMessageQueue(Remotery* rmt)
  3010. {
  3011. rmtU32 nb_messages_sent = 0;
  3012. assert(rmt != NULL);
  3013. // Absorb as many messages in the queue while disconnected
  3014. if (Server_IsClientConnected(rmt->server) == RMT_FALSE)
  3015. return RMT_ERROR_NONE;
  3016. // Loop reading the max number of messages for this update
  3017. while (nb_messages_sent++ < MAX_NB_MESSAGES_PER_UPDATE)
  3018. {
  3019. enum rmtError error = RMT_ERROR_NONE;
  3020. Message* message = MessageQueue_PeekNextMessage(rmt->mq_to_rmt_thread);
  3021. if (message == NULL)
  3022. break;
  3023. switch (message->id)
  3024. {
  3025. // This shouldn't be possible
  3026. case MsgID_NotReady:
  3027. assert(RMT_FALSE);
  3028. break;
  3029. // Dispatch to message handler
  3030. case MsgID_LogText:
  3031. error = Remotery_SendLogTextMessage(rmt, message);
  3032. break;
  3033. case MsgID_SampleTree:
  3034. error = Remotery_SendSampleTreeMessage(rmt, message);
  3035. break;
  3036. }
  3037. // Consume the message before reacting to any errors
  3038. MessageQueue_ConsumeNextMessage(rmt->mq_to_rmt_thread, message);
  3039. if (error != RMT_ERROR_NONE)
  3040. return error;
  3041. }
  3042. return RMT_ERROR_NONE;
  3043. }
  3044. static void Remotery_FlushMessageQueue(Remotery* rmt)
  3045. {
  3046. assert(rmt != NULL);
  3047. // Loop reading all remaining messages
  3048. while (1)
  3049. {
  3050. Message* message = MessageQueue_PeekNextMessage(rmt->mq_to_rmt_thread);
  3051. if (message == NULL)
  3052. break;
  3053. switch (message->id)
  3054. {
  3055. // These can be safely ignored
  3056. case MsgID_NotReady:
  3057. case MsgID_LogText:
  3058. break;
  3059. // Release all samples back to their allocators
  3060. case MsgID_SampleTree:
  3061. {
  3062. Msg_SampleTree* sample_tree = (Msg_SampleTree*)message->payload;
  3063. FreeSampleTree(sample_tree->root_sample, sample_tree->allocator);
  3064. break;
  3065. }
  3066. }
  3067. MessageQueue_ConsumeNextMessage(rmt->mq_to_rmt_thread, message);
  3068. }
  3069. }
  3070. static enum rmtError Remotery_ThreadMain(Thread* thread)
  3071. {
  3072. Remotery* rmt = (Remotery*)thread->param;
  3073. assert(rmt != NULL);
  3074. rmt_SetCurrentThreadName("Remotery");
  3075. while (thread->request_exit == RMT_FALSE)
  3076. {
  3077. rmt_BeginCPUSample(Wakeup);
  3078. rmt_BeginCPUSample(ServerUpdate);
  3079. Server_Update(rmt->server);
  3080. rmt_EndCPUSample();
  3081. rmt_BeginCPUSample(ConsumeMessageQueue);
  3082. Remotery_ConsumeMessageQueue(rmt);
  3083. rmt_EndCPUSample();
  3084. rmt_EndCPUSample();
  3085. //
  3086. // [NOTE-A]
  3087. //
  3088. // Possible sequence of user events at this point:
  3089. //
  3090. // 1. Add samples to the queue.
  3091. // 2. Shutdown remotery.
  3092. //
  3093. // This loop will exit with unrelease samples.
  3094. //
  3095. msSleep(MS_SLEEP_BETWEEN_SERVER_UPDATES);
  3096. }
  3097. // Release all samples to their allocators as a consequence of [NOTE-A]
  3098. Remotery_FlushMessageQueue(rmt);
  3099. return RMT_ERROR_NONE;
  3100. }
  3101. static enum rmtError Remotery_Create(Remotery** rmt)
  3102. {
  3103. enum rmtError error;
  3104. assert(rmt != NULL);
  3105. *rmt = (Remotery*)malloc(sizeof(Remotery));
  3106. if (*rmt == NULL)
  3107. return RMT_ERROR_MALLOC_FAIL;
  3108. // Set default state
  3109. (*rmt)->server = NULL;
  3110. (*rmt)->thread_sampler_tls_handle = TLS_INVALID_HANDLE;
  3111. (*rmt)->first_thread_sampler = NULL;
  3112. (*rmt)->mq_to_rmt_thread = NULL;
  3113. (*rmt)->json_buf = NULL;
  3114. (*rmt)->thread = NULL;
  3115. // Allocate a TLS handle for the thread sampler
  3116. error = tlsAlloc(&(*rmt)->thread_sampler_tls_handle);
  3117. if (error != RMT_ERROR_NONE)
  3118. {
  3119. Remotery_Destroy(*rmt);
  3120. *rmt = NULL;
  3121. return error;
  3122. }
  3123. // Create the server
  3124. error = Server_Create(0x4597, &(*rmt)->server);
  3125. if (error != RMT_ERROR_NONE)
  3126. {
  3127. Remotery_Destroy(*rmt);
  3128. *rmt = NULL;
  3129. return error;
  3130. }
  3131. // Create the main message thread with only one page
  3132. error = MessageQueue_Create(&(*rmt)->mq_to_rmt_thread, MESSAGE_QUEUE_SIZE_BYTES);
  3133. if (error != RMT_ERROR_NONE)
  3134. {
  3135. Remotery_Destroy(*rmt);
  3136. *rmt = NULL;
  3137. return error;
  3138. }
  3139. // Create the JSON serialisation buffer
  3140. error = Buffer_Create(&(*rmt)->json_buf, 4096);
  3141. if (error != RMT_ERROR_NONE)
  3142. {
  3143. Remotery_Destroy(*rmt);
  3144. *rmt = NULL;
  3145. return error;
  3146. }
  3147. #ifdef RMT_USE_CUDA
  3148. (*rmt)->cuda.CtxSetCurrent = NULL;
  3149. (*rmt)->cuda.EventCreate = NULL;
  3150. (*rmt)->cuda.EventDestroy = NULL;
  3151. (*rmt)->cuda.EventElapsedTime = NULL;
  3152. (*rmt)->cuda.EventQuery = NULL;
  3153. (*rmt)->cuda.EventRecord = NULL;
  3154. #endif
  3155. #ifdef RMT_USE_D3D11
  3156. (*rmt)->d3d11 = NULL;
  3157. error = D3D11_Create(&(*rmt)->d3d11);
  3158. if (error != RMT_ERROR_NONE)
  3159. {
  3160. Remotery_Destroy(*rmt);
  3161. *rmt = NULL;
  3162. return error;
  3163. }
  3164. #endif
  3165. #ifdef RMT_USE_OPENGL
  3166. (*rmt)->__glGenQueries = NULL;
  3167. (*rmt)->__glDeleteQueries = NULL;
  3168. (*rmt)->__glBeginQuery = NULL;
  3169. (*rmt)->__glEndQuery = NULL;
  3170. (*rmt)->__glGetQueryObjectiv = NULL;
  3171. (*rmt)->__glGetQueryObjectuiv = NULL;
  3172. (*rmt)->__glGetQueryObjecti64v = NULL;
  3173. (*rmt)->__glGetQueryObjectui64v = NULL;
  3174. (*rmt)->__glQueryCounter = NULL;
  3175. (*rmt)->opengl_timestamp_allocator = NULL;
  3176. (*rmt)->mq_to_opengl_main = NULL;
  3177. (*rmt)->opengl_first_timestamp = 0;
  3178. error = MessageQueue_Create(&(*rmt)->mq_to_opengl_main, MESSAGE_QUEUE_SIZE_BYTES);
  3179. if (error != RMT_ERROR_NONE)
  3180. {
  3181. Remotery_Destroy(*rmt);
  3182. *rmt = NULL;
  3183. return error;
  3184. }
  3185. #endif
  3186. // Set as the global instance before creating any threads that uses it for sampling itself
  3187. assert(g_Remotery == NULL);
  3188. g_Remotery = *rmt;
  3189. g_RemoteryCreated = RMT_TRUE;
  3190. // Ensure global instance writes complete before other threads get a chance to use it
  3191. WriteFence();
  3192. // Create the main update thread once everything has been defined for the global remotery object
  3193. error = Thread_Create(&(*rmt)->thread, Remotery_ThreadMain, *rmt);
  3194. if (error != RMT_ERROR_NONE)
  3195. {
  3196. Remotery_Destroy(*rmt);
  3197. *rmt = NULL;
  3198. return error;
  3199. }
  3200. return RMT_ERROR_NONE;
  3201. }
  3202. static void Remotery_Destroy(Remotery* rmt)
  3203. {
  3204. assert(rmt != NULL);
  3205. // Join the remotery thread before clearing the global object as the thread is profiling itself
  3206. if (rmt->thread != NULL)
  3207. {
  3208. Thread_Destroy(rmt->thread);
  3209. rmt->thread = NULL;
  3210. }
  3211. // Ensure this is the module that created it
  3212. assert(g_RemoteryCreated == RMT_TRUE);
  3213. assert(g_Remotery == rmt);
  3214. g_Remotery = NULL;
  3215. g_RemoteryCreated = RMT_FALSE;
  3216. #ifdef RMT_USE_D3D11
  3217. if (rmt->d3d11 != NULL)
  3218. {
  3219. D3D11_Destroy(rmt->d3d11);
  3220. rmt->d3d11 = NULL;
  3221. }
  3222. #endif
  3223. #ifdef RMT_USE_OPENGL
  3224. if (rmt->opengl_timestamp_allocator != NULL)
  3225. {
  3226. ObjectAllocator_Destroy(rmt->opengl_timestamp_allocator);
  3227. rmt->opengl_timestamp_allocator = NULL;
  3228. }
  3229. if (rmt->mq_to_opengl_main != NULL)
  3230. {
  3231. MessageQueue_Destroy(rmt->mq_to_opengl_main);
  3232. rmt->mq_to_opengl_main = NULL;
  3233. }
  3234. #endif
  3235. if (rmt->json_buf != NULL)
  3236. {
  3237. Buffer_Destroy(rmt->json_buf);
  3238. rmt->json_buf = NULL;
  3239. }
  3240. if (rmt->mq_to_rmt_thread != NULL)
  3241. {
  3242. MessageQueue_Destroy(rmt->mq_to_rmt_thread);
  3243. rmt->mq_to_rmt_thread = NULL;
  3244. }
  3245. Remotery_DestroyThreadSamplers(rmt);
  3246. if (rmt->server != NULL)
  3247. {
  3248. Server_Destroy(rmt->server);
  3249. rmt->server = NULL;
  3250. }
  3251. if (rmt->thread_sampler_tls_handle != TLS_INVALID_HANDLE)
  3252. {
  3253. tlsFree(rmt->thread_sampler_tls_handle);
  3254. rmt->thread_sampler_tls_handle = 0;
  3255. }
  3256. free(rmt);
  3257. }
  3258. static enum rmtError Remotery_GetThreadSampler(Remotery* rmt, ThreadSampler** thread_sampler)
  3259. {
  3260. ThreadSampler* ts;
  3261. // Is there a thread sampler associated with this thread yet?
  3262. assert(rmt != NULL);
  3263. ts = (ThreadSampler*)tlsGet(rmt->thread_sampler_tls_handle);
  3264. if (ts == NULL)
  3265. {
  3266. // Allocate on-demand
  3267. enum rmtError error = ThreadSampler_Create(thread_sampler);
  3268. if (error != RMT_ERROR_NONE)
  3269. return error;
  3270. ts = *thread_sampler;
  3271. // Add to the beginning of the global linked list of thread samplers
  3272. while (1)
  3273. {
  3274. ThreadSampler* old_ts = rmt->first_thread_sampler;
  3275. ts->next = old_ts;
  3276. // If the old value is what we expect it to be then no other thread has
  3277. // changed it since this thread sampler was used as a candidate first list item
  3278. if (AtomicCompareAndSwapPointer((long* volatile*)&rmt->first_thread_sampler, (long*)old_ts, (long*)ts) == RMT_TRUE)
  3279. break;
  3280. }
  3281. tlsSet(rmt->thread_sampler_tls_handle, ts);
  3282. }
  3283. assert(thread_sampler != NULL);
  3284. *thread_sampler = ts;
  3285. return RMT_ERROR_NONE;
  3286. }
  3287. static void Remotery_DestroyThreadSamplers(Remotery* rmt)
  3288. {
  3289. // If the handle failed to create in the first place then it shouldn't be possible to create thread samplers
  3290. assert(rmt != NULL);
  3291. if (rmt->thread_sampler_tls_handle == TLS_INVALID_HANDLE)
  3292. {
  3293. assert(rmt->first_thread_sampler == NULL);
  3294. return;
  3295. }
  3296. // Keep popping thread samplers off the linked list until they're all gone
  3297. // This does not make any assumptions, making it possible for thread samplers to be created while they're all
  3298. // deleted. While this is erroneous calling code, this will prevent a confusing crash.
  3299. while (rmt->first_thread_sampler != NULL)
  3300. {
  3301. ThreadSampler* ts;
  3302. while (1)
  3303. {
  3304. ThreadSampler* old_ts = rmt->first_thread_sampler;
  3305. ThreadSampler* next_ts = old_ts->next;
  3306. if (AtomicCompareAndSwapPointer((long* volatile*)&rmt->first_thread_sampler, (long*)old_ts, (long*)next_ts) == RMT_TRUE)
  3307. {
  3308. ts = old_ts;
  3309. break;
  3310. }
  3311. }
  3312. // Release the thread sampler
  3313. ThreadSampler_Destroy(ts);
  3314. }
  3315. }
  3316. enum rmtError _rmt_CreateGlobalInstance(Remotery** remotery)
  3317. {
  3318. // Creating the Remotery instance also records it as the global instance
  3319. assert(remotery != NULL);
  3320. return Remotery_Create(remotery);
  3321. }
  3322. void _rmt_DestroyGlobalInstance(Remotery* remotery)
  3323. {
  3324. if (remotery != NULL)
  3325. Remotery_Destroy(remotery);
  3326. }
  3327. void _rmt_SetGlobalInstance(Remotery* remotery)
  3328. {
  3329. g_Remotery = remotery;
  3330. }
  3331. Remotery* _rmt_GetGlobalInstance(void)
  3332. {
  3333. return g_Remotery;
  3334. }
  3335. #ifdef RMT_PLATFORM_WINDOWS
  3336. #pragma pack(push,8)
  3337. typedef struct tagTHREADNAME_INFO
  3338. {
  3339. DWORD dwType; // Must be 0x1000.
  3340. LPCSTR szName; // Pointer to name (in user addr space).
  3341. DWORD dwThreadID; // Thread ID (-1=caller thread).
  3342. DWORD dwFlags; // Reserved for future use, must be zero.
  3343. } THREADNAME_INFO;
  3344. #pragma pack(pop)
  3345. #endif
  3346. static void SetDebuggerThreadName(const char* name)
  3347. {
  3348. #ifdef RMT_PLATFORM_WINDOWS
  3349. THREADNAME_INFO info;
  3350. info.dwType = 0x1000;
  3351. info.szName = name;
  3352. info.dwThreadID = (DWORD)-1;
  3353. info.dwFlags = 0;
  3354. __try
  3355. {
  3356. RaiseException(0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info);
  3357. }
  3358. __except(1 /* EXCEPTION_EXECUTE_HANDLER */)
  3359. {
  3360. }
  3361. #endif
  3362. }
  3363. void _rmt_SetCurrentThreadName(rmtPStr thread_name)
  3364. {
  3365. ThreadSampler* ts;
  3366. rsize_t slen;
  3367. if (g_Remotery == NULL)
  3368. return;
  3369. // Get data for this thread
  3370. if (Remotery_GetThreadSampler(g_Remotery, &ts) != RMT_ERROR_NONE)
  3371. return;
  3372. // Use strcat to strcpy the thread name over
  3373. slen = strnlen_s(thread_name, sizeof(ts->name));
  3374. ts->name[0] = 0;
  3375. strncat_s(ts->name, sizeof(ts->name), thread_name, slen);
  3376. // Apply to the debugger
  3377. SetDebuggerThreadName(thread_name);
  3378. }
  3379. static rmtBool QueueLine(MessageQueue* queue, char* text, rmtU32 size, struct ThreadSampler* thread_sampler)
  3380. {
  3381. Message* message;
  3382. assert(queue != NULL);
  3383. // String/JSON block/null terminate
  3384. text[size++] = '\"';
  3385. text[size++] = '}';
  3386. text[size] = 0;
  3387. // Allocate some space for the line
  3388. message = MessageQueue_AllocMessage(queue, size, thread_sampler);
  3389. if (message == NULL)
  3390. return RMT_FALSE;
  3391. // Copy the text and commit the message
  3392. memcpy(message->payload, text, size);
  3393. MessageQueue_CommitMessage(queue, message, MsgID_LogText);
  3394. return RMT_TRUE;
  3395. }
  3396. static const char log_message[] = "{ \"id\": \"LOG\", \"text\": \"";
  3397. void _rmt_LogText(rmtPStr text)
  3398. {
  3399. int start_offset, prev_offset, i;
  3400. char line_buffer[1024] = { 0 };
  3401. ThreadSampler* ts;
  3402. if (g_Remotery == NULL)
  3403. return;
  3404. Remotery_GetThreadSampler(g_Remotery, &ts);
  3405. // Start the line buffer off with the JSON message markup
  3406. strncat_s(line_buffer, sizeof(line_buffer), log_message, sizeof(log_message));
  3407. start_offset = (int)strnlen_s(line_buffer, sizeof(line_buffer) - 1);
  3408. // There might be newlines in the buffer, so split them into multiple network calls
  3409. prev_offset = start_offset;
  3410. for (i = 0; text[i] != 0; i++)
  3411. {
  3412. char c = text[i];
  3413. // Line wrap when too long or newline encountered
  3414. if (prev_offset == sizeof(line_buffer) - 3 || c == '\n')
  3415. {
  3416. if (QueueLine(g_Remotery->mq_to_rmt_thread, line_buffer, prev_offset, ts) == RMT_FALSE)
  3417. return;
  3418. // Restart line
  3419. prev_offset = start_offset;
  3420. }
  3421. // Safe to insert 2 characters here as previous check would split lines if not enough space left
  3422. switch (c)
  3423. {
  3424. // Skip newline, dealt with above
  3425. case '\n':
  3426. break;
  3427. // Escape these
  3428. case '\\':
  3429. line_buffer[prev_offset++] = '\\';
  3430. line_buffer[prev_offset++] = '\\';
  3431. break;
  3432. case '\"':
  3433. line_buffer[prev_offset++] = '\\';
  3434. line_buffer[prev_offset++] = '\"';
  3435. break;
  3436. // Add the rest
  3437. default:
  3438. line_buffer[prev_offset++] = c;
  3439. break;
  3440. }
  3441. }
  3442. // Send the last line
  3443. if (prev_offset > start_offset)
  3444. {
  3445. assert(prev_offset < sizeof(line_buffer) - 3);
  3446. QueueLine(g_Remotery->mq_to_rmt_thread, line_buffer, prev_offset, ts);
  3447. }
  3448. }
  3449. static rmtU32 GetNameHash(rmtPStr name, rmtU32* hash_cache)
  3450. {
  3451. // Hash cache provided?
  3452. if (hash_cache != NULL)
  3453. {
  3454. // Calculate the hash first time round only
  3455. if (*hash_cache == 0)
  3456. {
  3457. assert(name != NULL);
  3458. *hash_cache = MurmurHash3_x86_32(name, (int)strnlen_s(name, 256), 0);
  3459. }
  3460. return *hash_cache;
  3461. }
  3462. // Have to recalculate every time when no cache storage exists
  3463. return MurmurHash3_x86_32(name, (int)strnlen_s(name, 256), 0);
  3464. }
  3465. void _rmt_BeginCPUSample(rmtPStr name, rmtU32* hash_cache)
  3466. {
  3467. // 'hash_cache' stores a pointer to a sample name's hash value. Internally this is used to identify unique callstacks and it
  3468. // would be ideal that it's not recalculated each time the sample is used. This can be statically cached at the point
  3469. // of call or stored elsewhere when dynamic names are required.
  3470. //
  3471. // If 'hash_cache' is NULL then this call becomes more expensive, as it has to recalculate the hash of the name.
  3472. ThreadSampler* ts;
  3473. if (g_Remotery == NULL)
  3474. return;
  3475. // TODO: Time how long the bits outside here cost and subtract them from the parent
  3476. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  3477. {
  3478. Sample* sample;
  3479. rmtU32 name_hash = GetNameHash(name, hash_cache);
  3480. if (ThreadSampler_Push(ts, ts->sample_trees[SampleType_CPU], name, name_hash, &sample) == RMT_ERROR_NONE)
  3481. sample->us_start = usTimer_Get(&ts->timer);
  3482. }
  3483. }
  3484. void _rmt_EndCPUSample(void)
  3485. {
  3486. ThreadSampler* ts;
  3487. if (g_Remotery == NULL)
  3488. return;
  3489. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  3490. {
  3491. Sample* sample = ts->sample_trees[SampleType_CPU]->current_parent;
  3492. sample->us_end = usTimer_Get(&ts->timer);
  3493. ThreadSampler_Pop(ts, g_Remotery->mq_to_rmt_thread, sample);
  3494. }
  3495. }
  3496. /*
  3497. ------------------------------------------------------------------------------------------------------------------------
  3498. ------------------------------------------------------------------------------------------------------------------------
  3499. @CUDA: CUDA event sampling
  3500. ------------------------------------------------------------------------------------------------------------------------
  3501. ------------------------------------------------------------------------------------------------------------------------
  3502. */
  3503. #ifdef RMT_USE_CUDA
  3504. typedef struct CUDASample
  3505. {
  3506. // IS-A inheritance relationship
  3507. Sample Sample;
  3508. // Pair of events that wrap the sample
  3509. CUevent event_start;
  3510. CUevent event_end;
  3511. } CUDASample;
  3512. static enum rmtError MapCUDAResult(CUresult result)
  3513. {
  3514. switch (result)
  3515. {
  3516. case CUDA_SUCCESS: return RMT_ERROR_NONE;
  3517. case CUDA_ERROR_DEINITIALIZED: return RMT_ERROR_CUDA_DEINITIALIZED;
  3518. case CUDA_ERROR_NOT_INITIALIZED: return RMT_ERROR_CUDA_NOT_INITIALIZED;
  3519. case CUDA_ERROR_INVALID_CONTEXT: return RMT_ERROR_CUDA_INVALID_CONTEXT;
  3520. case CUDA_ERROR_INVALID_VALUE: return RMT_ERROR_CUDA_INVALID_VALUE;
  3521. case CUDA_ERROR_INVALID_HANDLE: return RMT_ERROR_CUDA_INVALID_HANDLE;
  3522. case CUDA_ERROR_OUT_OF_MEMORY: return RMT_ERROR_CUDA_OUT_OF_MEMORY;
  3523. case CUDA_ERROR_NOT_READY: return RMT_ERROR_ERROR_NOT_READY;
  3524. default: return RMT_ERROR_CUDA_UNKNOWN;
  3525. }
  3526. }
  3527. #define CUDA_MAKE_FUNCTION(name, params) \
  3528. typedef CUresult (CUDAAPI *name##Ptr) params; \
  3529. name##Ptr name = (name##Ptr)g_Remotery->cuda.name;
  3530. #define CUDA_GUARD(call) \
  3531. { \
  3532. enum rmtError error = call; \
  3533. if (error != RMT_ERROR_NONE) \
  3534. return error; \
  3535. }
  3536. // Wrappers around CUDA driver functions that manage the active context.
  3537. static enum rmtError CUDASetContext(void* context)
  3538. {
  3539. CUDA_MAKE_FUNCTION(CtxSetCurrent, (CUcontext ctx));
  3540. assert(CtxSetCurrent != NULL);
  3541. return MapCUDAResult(CtxSetCurrent((CUcontext)context));
  3542. }
  3543. static enum rmtError CUDAGetContext(void** context)
  3544. {
  3545. CUDA_MAKE_FUNCTION(CtxGetCurrent, (CUcontext* ctx));
  3546. assert(CtxGetCurrent != NULL);
  3547. return MapCUDAResult(CtxGetCurrent((CUcontext*)context));
  3548. }
  3549. static enum rmtError CUDAEnsureContext()
  3550. {
  3551. void* current_context;
  3552. CUDA_GUARD(CUDAGetContext(&current_context));
  3553. assert(g_Remotery != NULL);
  3554. if (current_context != g_Remotery->cuda.context)
  3555. CUDA_GUARD(CUDASetContext(g_Remotery->cuda.context));
  3556. return RMT_ERROR_NONE;
  3557. }
  3558. // Wrappers around CUDA driver functions that manage events
  3559. static enum rmtError CUDAEventCreate(CUevent* phEvent, unsigned int Flags)
  3560. {
  3561. CUDA_MAKE_FUNCTION(EventCreate, (CUevent *phEvent, unsigned int Flags));
  3562. CUDA_GUARD(CUDAEnsureContext());
  3563. return MapCUDAResult(EventCreate(phEvent, Flags));
  3564. }
  3565. static enum rmtError CUDAEventDestroy(CUevent hEvent)
  3566. {
  3567. CUDA_MAKE_FUNCTION(EventDestroy, (CUevent hEvent));
  3568. CUDA_GUARD(CUDAEnsureContext());
  3569. return MapCUDAResult(EventDestroy(hEvent));
  3570. }
  3571. static enum rmtError CUDAEventRecord(CUevent hEvent, void* hStream)
  3572. {
  3573. CUDA_MAKE_FUNCTION(EventRecord, (CUevent hEvent, CUstream hStream));
  3574. CUDA_GUARD(CUDAEnsureContext());
  3575. return MapCUDAResult(EventRecord(hEvent, (CUstream)hStream));
  3576. }
  3577. static enum rmtError CUDAEventQuery(CUevent hEvent)
  3578. {
  3579. CUDA_MAKE_FUNCTION(EventQuery, (CUevent hEvent));
  3580. CUDA_GUARD(CUDAEnsureContext());
  3581. return MapCUDAResult(EventQuery(hEvent));
  3582. }
  3583. static enum rmtError CUDAEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd)
  3584. {
  3585. CUDA_MAKE_FUNCTION(EventElapsedTime, (float *pMilliseconds, CUevent hStart, CUevent hEnd));
  3586. CUDA_GUARD(CUDAEnsureContext());
  3587. return MapCUDAResult(EventElapsedTime(pMilliseconds, hStart, hEnd));
  3588. }
  3589. static enum rmtError CUDASample_Constructor(CUDASample* sample)
  3590. {
  3591. enum rmtError error;
  3592. assert(sample != NULL);
  3593. // Chain to sample constructor
  3594. Sample_Constructor((Sample*)sample);
  3595. sample->Sample.type = SampleType_CUDA;
  3596. sample->Sample.size_bytes = sizeof(CUDASample);
  3597. sample->event_start = NULL;
  3598. sample->event_end = NULL;
  3599. // Create non-blocking events with timing
  3600. assert(g_Remotery != NULL);
  3601. error = CUDAEventCreate(&sample->event_start, CU_EVENT_DEFAULT);
  3602. if (error == RMT_ERROR_NONE)
  3603. error = CUDAEventCreate(&sample->event_end, CU_EVENT_DEFAULT);
  3604. return error;
  3605. }
  3606. static void CUDASample_Destructor(CUDASample* sample)
  3607. {
  3608. assert(sample != NULL);
  3609. // Destroy events
  3610. if (sample->event_start != NULL)
  3611. CUDAEventDestroy(sample->event_start);
  3612. if (sample->event_end != NULL)
  3613. CUDAEventDestroy(sample->event_end);
  3614. Sample_Destructor((Sample*)sample);
  3615. }
  3616. static rmtBool AreCUDASamplesReady(Sample* sample)
  3617. {
  3618. enum rmtError error;
  3619. Sample* child;
  3620. CUDASample* cuda_sample = (CUDASample*)sample;
  3621. assert(sample->type == SampleType_CUDA);
  3622. // Check to see if both of the CUDA events have been processed
  3623. error = CUDAEventQuery(cuda_sample->event_start);
  3624. if (error != RMT_ERROR_NONE)
  3625. return RMT_FALSE;
  3626. error = CUDAEventQuery(cuda_sample->event_end);
  3627. if (error != RMT_ERROR_NONE)
  3628. return RMT_FALSE;
  3629. // Check child sample events
  3630. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  3631. {
  3632. if (!AreCUDASamplesReady(child))
  3633. return RMT_FALSE;
  3634. }
  3635. return RMT_TRUE;
  3636. }
  3637. static rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample)
  3638. {
  3639. Sample* child;
  3640. CUDASample* cuda_root_sample = (CUDASample*)root_sample;
  3641. CUDASample* cuda_sample = (CUDASample*)sample;
  3642. float ms_start, ms_end;
  3643. assert(root_sample != NULL);
  3644. assert(sample != NULL);
  3645. // Get millisecond timing of each sample event, relative to initial root sample
  3646. if (CUDAEventElapsedTime(&ms_start, cuda_root_sample->event_start, cuda_sample->event_start) != RMT_ERROR_NONE)
  3647. return RMT_FALSE;
  3648. if (CUDAEventElapsedTime(&ms_end, cuda_root_sample->event_start, cuda_sample->event_end) != RMT_ERROR_NONE)
  3649. return RMT_FALSE;
  3650. // Convert to microseconds and add to the sample
  3651. sample->us_start = (rmtU64)(ms_start * 1000);
  3652. sample->us_end = (rmtU64)(ms_end * 1000);
  3653. // Get child sample times
  3654. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  3655. {
  3656. if (!GetCUDASampleTimes(root_sample, child))
  3657. return RMT_FALSE;
  3658. }
  3659. return RMT_TRUE;
  3660. }
  3661. void _rmt_BindCUDA(const rmtCUDABind* bind)
  3662. {
  3663. assert(bind != NULL);
  3664. if (g_Remotery != NULL)
  3665. g_Remotery->cuda = *bind;
  3666. }
  3667. void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream)
  3668. {
  3669. ThreadSampler* ts;
  3670. if (g_Remotery == NULL)
  3671. return;
  3672. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  3673. {
  3674. Sample* sample;
  3675. rmtU32 name_hash = GetNameHash(name, hash_cache);
  3676. // Create the CUDA tree on-demand as the tree needs an up-front-created root.
  3677. // This is not possible to create on initialisation as a CUDA binding is not yet available.
  3678. SampleTree** cuda_tree = &ts->sample_trees[SampleType_CUDA];
  3679. if (*cuda_tree == NULL)
  3680. {
  3681. CUDASample* root_sample;
  3682. enum rmtError error = SampleTree_Create(cuda_tree, sizeof(CUDASample), (ObjConstructor)CUDASample_Constructor, (ObjDestructor)CUDASample_Destructor);
  3683. if (error != RMT_ERROR_NONE)
  3684. return;
  3685. // Record an event once on the root sample, used to measure absolute sample
  3686. // times since this point
  3687. root_sample = (CUDASample*)(*cuda_tree)->root;
  3688. error = CUDAEventRecord(root_sample->event_start, stream);
  3689. if (error != RMT_ERROR_NONE)
  3690. return;
  3691. }
  3692. // Push the same and record its event
  3693. if (ThreadSampler_Push(ts, *cuda_tree, name, name_hash, &sample) == RMT_ERROR_NONE)
  3694. {
  3695. CUDASample* cuda_sample = (CUDASample*)sample;
  3696. CUDAEventRecord(cuda_sample->event_start, stream);
  3697. }
  3698. }
  3699. }
  3700. void _rmt_EndCUDASample(void* stream)
  3701. {
  3702. ThreadSampler* ts;
  3703. if (g_Remotery == NULL)
  3704. return;
  3705. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  3706. {
  3707. CUDASample* sample = (CUDASample*)ts->sample_trees[SampleType_CUDA]->current_parent;
  3708. CUDAEventRecord(sample->event_end, stream);
  3709. ThreadSampler_Pop(ts, g_Remotery->mq_to_rmt_thread, (Sample*)sample);
  3710. }
  3711. }
  3712. #endif // RMT_USE_CUDA
  3713. /*
  3714. ------------------------------------------------------------------------------------------------------------------------
  3715. ------------------------------------------------------------------------------------------------------------------------
  3716. @D3D11: Direct3D 11 event sampling
  3717. ------------------------------------------------------------------------------------------------------------------------
  3718. ------------------------------------------------------------------------------------------------------------------------
  3719. */
  3720. #ifdef RMT_USE_D3D11
  3721. // As clReflect has no way of disabling C++ compile mode, this forces C interfaces everywhere...
  3722. #define CINTERFACE
  3723. // ...unfortunately these C++ helpers aren't wrapped by the same macro but they can be disabled individually
  3724. #define D3D11_NO_HELPERS
  3725. // Allow use of the D3D11 helper macros for accessing the C-style vtable
  3726. #define COBJMACROS
  3727. #include <d3d11.h>
  3728. typedef struct D3D11
  3729. {
  3730. // Context set by user
  3731. ID3D11Device* device;
  3732. ID3D11DeviceContext* context;
  3733. HRESULT last_error;
  3734. // An allocator separate to the samples themselves so that D3D resource lifetime can be controlled
  3735. // outside of the Remotery thread.
  3736. ObjectAllocator* timestamp_allocator;
  3737. // Queue to the D3D 11 main update thread
  3738. // Given that BeginSample/EndSample need to be called from the same thread that does the update, there
  3739. // is really no need for this to be a thread-safe queue. I'm using it for its convenience.
  3740. MessageQueue* mq_to_d3d11_main;
  3741. // Mark the first time so that remaining timestamps are offset from this
  3742. rmtU64 first_timestamp;
  3743. } D3D11;
  3744. static enum rmtError D3D11_Create(D3D11** d3d11)
  3745. {
  3746. enum rmtError error;
  3747. assert(d3d11 != NULL);
  3748. // Allocate space for the D3D11 data
  3749. *d3d11 = (D3D11*)malloc(sizeof(D3D11));
  3750. if (*d3d11 == NULL)
  3751. return RMT_ERROR_MALLOC_FAIL;
  3752. // Set defaults
  3753. (*d3d11)->device = NULL;
  3754. (*d3d11)->context = NULL;
  3755. (*d3d11)->last_error = S_OK;
  3756. (*d3d11)->timestamp_allocator = NULL;
  3757. (*d3d11)->mq_to_d3d11_main = NULL;
  3758. (*d3d11)->first_timestamp = 0;
  3759. error = MessageQueue_Create(&(*d3d11)->mq_to_d3d11_main, MESSAGE_QUEUE_SIZE_BYTES);
  3760. if (error != RMT_ERROR_NONE)
  3761. {
  3762. D3D11_Destroy(*d3d11);
  3763. *d3d11 = NULL;
  3764. return error;
  3765. }
  3766. return RMT_ERROR_NONE;
  3767. }
  3768. static void D3D11_Destroy(D3D11* d3d11)
  3769. {
  3770. assert(d3d11 != NULL);
  3771. if (d3d11->timestamp_allocator != NULL)
  3772. {
  3773. ObjectAllocator_Destroy(d3d11->timestamp_allocator);
  3774. d3d11->timestamp_allocator = NULL;
  3775. }
  3776. if (d3d11->mq_to_d3d11_main != NULL)
  3777. {
  3778. MessageQueue_Destroy(d3d11->mq_to_d3d11_main);
  3779. d3d11->mq_to_d3d11_main = NULL;
  3780. }
  3781. free(d3d11);
  3782. }
  3783. typedef struct D3D11Timestamp
  3784. {
  3785. // Inherit so that timestamps can be quickly allocated
  3786. ObjectLink ObjectLink;
  3787. // Pair of timestamp queries that wrap the sample
  3788. ID3D11Query* query_start;
  3789. ID3D11Query* query_end;
  3790. // A disjoint to measure frequency/stability
  3791. // TODO: Does *each* sample need one of these?
  3792. ID3D11Query* query_disjoint;
  3793. } D3D11Timestamp;
  3794. static enum rmtError D3D11Timestamp_Constructor(D3D11Timestamp* stamp)
  3795. {
  3796. D3D11_QUERY_DESC timestamp_desc;
  3797. D3D11_QUERY_DESC disjoint_desc;
  3798. ID3D11Device* device;
  3799. HRESULT* last_error;
  3800. assert(stamp != NULL);
  3801. ObjectLink_Constructor((ObjectLink*)stamp);
  3802. // Set defaults
  3803. stamp->query_start = NULL;
  3804. stamp->query_end = NULL;
  3805. stamp->query_disjoint = NULL;
  3806. assert(g_Remotery != NULL);
  3807. assert(g_Remotery->d3d11 != NULL);
  3808. device = g_Remotery->d3d11->device;
  3809. last_error = &g_Remotery->d3d11->last_error;
  3810. // Create start/end timestamp queries
  3811. timestamp_desc.Query = D3D11_QUERY_TIMESTAMP;
  3812. timestamp_desc.MiscFlags = 0;
  3813. *last_error = ID3D11Device_CreateQuery(device, &timestamp_desc, &stamp->query_start);
  3814. if (*last_error != S_OK)
  3815. return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY;
  3816. *last_error = ID3D11Device_CreateQuery(device, &timestamp_desc, &stamp->query_end);
  3817. if (*last_error != S_OK)
  3818. return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY;
  3819. // Create disjoint query
  3820. disjoint_desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
  3821. disjoint_desc.MiscFlags = 0;
  3822. *last_error = ID3D11Device_CreateQuery(device, &disjoint_desc, &stamp->query_disjoint);
  3823. if (*last_error != S_OK)
  3824. return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY;
  3825. return RMT_ERROR_NONE;
  3826. }
  3827. static void D3D11Timestamp_Destructor(D3D11Timestamp* stamp)
  3828. {
  3829. assert(stamp != NULL);
  3830. // Destroy queries
  3831. if (stamp->query_disjoint != NULL)
  3832. ID3D11Query_Release(stamp->query_disjoint);
  3833. if (stamp->query_end != NULL)
  3834. ID3D11Query_Release(stamp->query_end);
  3835. if (stamp->query_start != NULL)
  3836. ID3D11Query_Release(stamp->query_start);
  3837. }
  3838. static void D3D11Timestamp_Begin(D3D11Timestamp* stamp, ID3D11DeviceContext* context)
  3839. {
  3840. assert(stamp != NULL);
  3841. // Start of disjoint and first query
  3842. ID3D11DeviceContext_Begin(context, (ID3D11Asynchronous*)stamp->query_disjoint);
  3843. ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_start);
  3844. }
  3845. static void D3D11Timestamp_End(D3D11Timestamp* stamp, ID3D11DeviceContext* context)
  3846. {
  3847. assert(stamp != NULL);
  3848. // End of disjoint and second query
  3849. ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_end);
  3850. ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_disjoint);
  3851. }
  3852. static HRESULT D3D11Timestamp_GetData(D3D11Timestamp* stamp, ID3D11DeviceContext* context, rmtU64 first_timestamp, rmtU64* out_start, rmtU64* out_end, rmtU64* out_first_timestamp)
  3853. {
  3854. ID3D11Asynchronous* query_start;
  3855. ID3D11Asynchronous* query_end;
  3856. ID3D11Asynchronous* query_disjoint;
  3857. HRESULT result;
  3858. UINT64 start;
  3859. UINT64 end;
  3860. D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
  3861. assert(stamp != NULL);
  3862. query_start = (ID3D11Asynchronous*)stamp->query_start;
  3863. query_end = (ID3D11Asynchronous*)stamp->query_end;
  3864. query_disjoint = (ID3D11Asynchronous*)stamp->query_disjoint;
  3865. // Check to see if all queries are ready
  3866. // If any fail to arrive, wait until later
  3867. result = ID3D11DeviceContext_GetData(context, query_start, &start, sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH);
  3868. if (result != S_OK)
  3869. return result;
  3870. result = ID3D11DeviceContext_GetData(context, query_end, &end, sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH);
  3871. if (result != S_OK)
  3872. return result;
  3873. result = ID3D11DeviceContext_GetData(context, query_disjoint, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH);
  3874. if (result != S_OK)
  3875. return result;
  3876. if (disjoint.Disjoint == FALSE)
  3877. {
  3878. double frequency = disjoint.Frequency / 1000000.0;
  3879. // Mark the first timestamp
  3880. assert(out_first_timestamp != NULL);
  3881. if (*out_first_timestamp == 0)
  3882. *out_first_timestamp = start;
  3883. // Calculate start and end timestamps from the disjoint info
  3884. *out_start = (rmtU64)((start - first_timestamp) / frequency);
  3885. *out_end = (rmtU64)((end - first_timestamp) / frequency);
  3886. }
  3887. return S_OK;
  3888. }
  3889. typedef struct D3D11Sample
  3890. {
  3891. // IS-A inheritance relationship
  3892. Sample Sample;
  3893. D3D11Timestamp* timestamp;
  3894. } D3D11Sample;
  3895. static enum rmtError D3D11Sample_Constructor(D3D11Sample* sample)
  3896. {
  3897. assert(sample != NULL);
  3898. // Chain to sample constructor
  3899. Sample_Constructor((Sample*)sample);
  3900. sample->Sample.type = SampleType_D3D11;
  3901. sample->Sample.size_bytes = sizeof(D3D11Sample);
  3902. sample->timestamp = NULL;
  3903. return RMT_ERROR_NONE;
  3904. }
  3905. static void D3D11Sample_Destructor(D3D11Sample* sample)
  3906. {
  3907. Sample_Destructor((Sample*)sample);
  3908. }
  3909. void _rmt_BindD3D11(void* device, void* context)
  3910. {
  3911. if (g_Remotery != NULL)
  3912. {
  3913. assert(g_Remotery->d3d11 != NULL);
  3914. assert(device != NULL);
  3915. g_Remotery->d3d11->device = (ID3D11Device*)device;
  3916. assert(context != NULL);
  3917. g_Remotery->d3d11->context = (ID3D11DeviceContext*)context;
  3918. }
  3919. }
  3920. static void FreeD3D11TimeStamps(Sample* sample)
  3921. {
  3922. Sample* child;
  3923. D3D11Sample* d3d_sample = (D3D11Sample*)sample;
  3924. assert(g_Remotery != NULL);
  3925. assert(g_Remotery->d3d11 != NULL);
  3926. assert(d3d_sample->timestamp != NULL);
  3927. ObjectAllocator_Free(g_Remotery->d3d11->timestamp_allocator, (void*)d3d_sample->timestamp);
  3928. d3d_sample->timestamp = NULL;
  3929. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  3930. FreeD3D11TimeStamps(child);
  3931. }
  3932. void _rmt_UnbindD3D11(void)
  3933. {
  3934. if (g_Remotery != NULL)
  3935. {
  3936. D3D11* d3d11 = g_Remotery->d3d11;
  3937. assert(d3d11 != NULL);
  3938. // Inform sampler to not add any more samples
  3939. d3d11->device = NULL;
  3940. d3d11->context = NULL;
  3941. // Flush the main queue of allocated D3D timestamps
  3942. while (1)
  3943. {
  3944. Msg_SampleTree* sample_tree;
  3945. Sample* sample;
  3946. Message* message = MessageQueue_PeekNextMessage(d3d11->mq_to_d3d11_main);
  3947. if (message == NULL)
  3948. break;
  3949. // There's only one valid message type in this queue
  3950. assert(message->id == MsgID_SampleTree);
  3951. sample_tree = (Msg_SampleTree*)message->payload;
  3952. sample = sample_tree->root_sample;
  3953. assert(sample->type == SampleType_D3D11);
  3954. FreeD3D11TimeStamps(sample);
  3955. FreeSampleTree(sample, sample_tree->allocator);
  3956. MessageQueue_ConsumeNextMessage(d3d11->mq_to_d3d11_main, message);
  3957. }
  3958. // Free all allocated D3D resources
  3959. ObjectAllocator_Destroy(d3d11->timestamp_allocator);
  3960. d3d11->timestamp_allocator = NULL;
  3961. }
  3962. }
  3963. void _rmt_BeginD3D11Sample(rmtPStr name, rmtU32* hash_cache)
  3964. {
  3965. ThreadSampler* ts;
  3966. D3D11* d3d11;
  3967. if (g_Remotery == NULL)
  3968. return;
  3969. // Has D3D11 been unbound?
  3970. d3d11 = g_Remotery->d3d11;
  3971. assert(d3d11 != NULL);
  3972. if (d3d11->device == NULL || d3d11->context == NULL)
  3973. return;
  3974. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  3975. {
  3976. enum rmtError error;
  3977. Sample* sample;
  3978. rmtU32 name_hash = GetNameHash(name, hash_cache);
  3979. // Create the D3D11 tree on-demand as the tree needs an up-front-created root.
  3980. // This is not possible to create on initialisation as a D3D11 binding is not yet available.
  3981. SampleTree** d3d_tree = &ts->sample_trees[SampleType_D3D11];
  3982. if (*d3d_tree == NULL)
  3983. {
  3984. error = SampleTree_Create(d3d_tree, sizeof(D3D11Sample), (ObjConstructor)D3D11Sample_Constructor, (ObjDestructor)D3D11Sample_Destructor);
  3985. if (error != RMT_ERROR_NONE)
  3986. return;
  3987. }
  3988. // Also create the timestamp allocator on-demand to keep the D3D11 code localised to the same file section
  3989. if (d3d11->timestamp_allocator == NULL)
  3990. error = ObjectAllocator_Create(&d3d11->timestamp_allocator, sizeof(D3D11Timestamp), (ObjConstructor)D3D11Timestamp_Constructor, (ObjDestructor)D3D11Timestamp_Destructor);
  3991. // Push the sample
  3992. if (ThreadSampler_Push(ts, *d3d_tree, name, name_hash, &sample) == RMT_ERROR_NONE)
  3993. {
  3994. D3D11Sample* d3d_sample = (D3D11Sample*)sample;
  3995. // Allocate a timestamp for the sample and activate it
  3996. assert(d3d_sample->timestamp == NULL);
  3997. error = ObjectAllocator_Alloc(d3d11->timestamp_allocator, (void**)&d3d_sample->timestamp);
  3998. if (error == RMT_ERROR_NONE)
  3999. D3D11Timestamp_Begin(d3d_sample->timestamp, d3d11->context);
  4000. }
  4001. }
  4002. }
  4003. void _rmt_EndD3D11Sample(void)
  4004. {
  4005. ThreadSampler* ts;
  4006. D3D11* d3d11;
  4007. if (g_Remotery == NULL)
  4008. return;
  4009. // Has D3D11 been unbound?
  4010. d3d11 = g_Remotery->d3d11;
  4011. assert(d3d11 != NULL);
  4012. if (d3d11->device == NULL || d3d11->context == NULL)
  4013. return;
  4014. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  4015. {
  4016. // Close the timestamp
  4017. D3D11Sample* d3d_sample = (D3D11Sample*)ts->sample_trees[SampleType_D3D11]->current_parent;
  4018. if (d3d_sample->timestamp != NULL)
  4019. D3D11Timestamp_End(d3d_sample->timestamp, d3d11->context);
  4020. // Send to the update loop for ready-polling
  4021. ThreadSampler_Pop(ts, d3d11->mq_to_d3d11_main, (Sample*)d3d_sample);
  4022. }
  4023. }
  4024. static rmtBool GetD3D11SampleTimes(Sample* sample, rmtU64 first_timestamp, rmtU64* out_first_timestamp)
  4025. {
  4026. Sample* child;
  4027. D3D11Sample* d3d_sample = (D3D11Sample*)sample;
  4028. assert(sample != NULL);
  4029. if (d3d_sample->timestamp != NULL)
  4030. {
  4031. HRESULT result;
  4032. D3D11* d3d11 = g_Remotery->d3d11;
  4033. assert(d3d11 != NULL);
  4034. result = D3D11Timestamp_GetData(
  4035. d3d_sample->timestamp,
  4036. d3d11->context,
  4037. first_timestamp,
  4038. &sample->us_start,
  4039. &sample->us_end,
  4040. out_first_timestamp);
  4041. if (result != S_OK)
  4042. {
  4043. d3d11->last_error = result;
  4044. return RMT_FALSE;
  4045. }
  4046. }
  4047. // Get child sample times
  4048. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  4049. {
  4050. if (!GetD3D11SampleTimes(child, first_timestamp, out_first_timestamp))
  4051. return RMT_FALSE;
  4052. }
  4053. return RMT_TRUE;
  4054. }
  4055. void _rmt_UpdateD3D11Frame(void)
  4056. {
  4057. Message* first_message = NULL;
  4058. D3D11* d3d11;
  4059. if (g_Remotery == NULL)
  4060. return;
  4061. d3d11 = g_Remotery->d3d11;
  4062. assert(d3d11 != NULL);
  4063. rmt_BeginCPUSample(rmt_UpdateD3D11Frame);
  4064. // Process all messages in the D3D queue
  4065. while (1)
  4066. {
  4067. Msg_SampleTree* sample_tree;
  4068. Sample* sample;
  4069. rmtU64 first_timestamp;
  4070. rmtBool are_samples_ready;
  4071. Message* message = MessageQueue_PeekNextMessage(d3d11->mq_to_d3d11_main);
  4072. if (message == NULL)
  4073. break;
  4074. // Keep track of the first message encountered during this loop and leave it's encountered
  4075. // again. This means the loop as had a good attempt at trying to get timing data for all messages
  4076. // in the queue.
  4077. if (first_message == NULL)
  4078. first_message = message;
  4079. else if (first_message == message)
  4080. break;
  4081. // There's only one valid message type in this queue
  4082. assert(message->id == MsgID_SampleTree);
  4083. sample_tree = (Msg_SampleTree*)message->payload;
  4084. sample = sample_tree->root_sample;
  4085. assert(sample->type == SampleType_D3D11);
  4086. // Retrieve timing of all D3D11 samples
  4087. first_timestamp = d3d11->first_timestamp;
  4088. are_samples_ready = GetD3D11SampleTimes(sample, first_timestamp, &d3d11->first_timestamp);
  4089. // If the samples are ready, pass them onto the remotery thread for sending to the viewer
  4090. if (are_samples_ready)
  4091. {
  4092. FreeD3D11TimeStamps(sample);
  4093. AddSampleTreeMessage(g_Remotery->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->thread_name, message->thread_sampler);
  4094. }
  4095. else
  4096. {
  4097. // Otherwise just put them to the back of the queue
  4098. AddSampleTreeMessage(d3d11->mq_to_d3d11_main, sample, sample_tree->allocator, sample_tree->thread_name, message->thread_sampler);
  4099. }
  4100. MessageQueue_ConsumeNextMessage(d3d11->mq_to_d3d11_main, message);
  4101. }
  4102. rmt_EndCPUSample();
  4103. }
  4104. #endif // RMT_USE_D3D11
  4105. /*
  4106. ------------------------------------------------------------------------------------------------------------------------
  4107. ------------------------------------------------------------------------------------------------------------------------
  4108. @OpenGL: OpenGL event sampling
  4109. ------------------------------------------------------------------------------------------------------------------------
  4110. ------------------------------------------------------------------------------------------------------------------------
  4111. */
  4112. #ifdef RMT_USE_OPENGL
  4113. #define GL_NO_ERROR 0
  4114. #define GL_QUERY_RESULT 0x8866
  4115. #define GL_QUERY_RESULT_AVAILABLE 0x8867
  4116. #define GL_TIME_ELAPSED 0x88BF
  4117. #define GL_TIMESTAMP 0x8E28
  4118. // Not sure which platforms we need
  4119. #if defined(_WIN32)
  4120. # define rmtGetProcAddress(name) wglGetProcAddress((LPCSTR)name)
  4121. #elif defined(__APPLE__) && !defined(GLEW_APPLE_GLX)
  4122. # define rmtGetProcAddress(name) NSGLGetProcAddress(name)
  4123. #elif defined(__sgi) || defined(__sun)
  4124. # define rmtGetProcAddress(name) dlGetProcAddress(name)
  4125. #elif defined(__ANDROID__)
  4126. # define rmtGetProcAddress(name) NULL /* TODO */
  4127. #elif defined(__native_client__)
  4128. # define rmtGetProcAddress(name) NULL /* TODO */
  4129. #else /* __linux */
  4130. # define rmtGetProcAddress(name) (*glXGetProcAddressARB)(name)
  4131. #endif
  4132. #define RMT_GL_GET_FUN(x) g_Remotery->x
  4133. #define glGenQueries RMT_GL_GET_FUN(__glGenQueries)
  4134. #define glDeleteQueries RMT_GL_GET_FUN(__glDeleteQueries)
  4135. #define glBeginQuery RMT_GL_GET_FUN(__glBeginQuery)
  4136. #define glEndQuery RMT_GL_GET_FUN(__glEndQuery)
  4137. #define glGetQueryObjectiv RMT_GL_GET_FUN(__glGetQueryObjectiv)
  4138. #define glGetQueryObjectuiv RMT_GL_GET_FUN(__glGetQueryObjectuiv)
  4139. #define glGetQueryObjecti64v RMT_GL_GET_FUN(__glGetQueryObjecti64v)
  4140. #define glGetQueryObjectui64v RMT_GL_GET_FUN(__glGetQueryObjectui64v)
  4141. #define glQueryCounter RMT_GL_GET_FUN(__glQueryCounter)
  4142. typedef struct OpenGLTimestamp
  4143. {
  4144. // Inherit so that timestamps can be quickly allocated
  4145. ObjectLink ObjectLink;
  4146. // Pair of timestamp queries that wrap the sample
  4147. GLuint queries[2];
  4148. } OpenGLTimestamp;
  4149. static enum rmtError OpenGLTimestamp_Constructor(OpenGLTimestamp* stamp)
  4150. {
  4151. assert(stamp != NULL);
  4152. ObjectLink_Constructor((ObjectLink*)stamp);
  4153. // Set defaults
  4154. stamp->queries[0] = stamp->queries[1] = 0;
  4155. // Create start/end timestamp queries
  4156. assert(g_Remotery != NULL);
  4157. assert(g_Remotery->__glGenQueries != NULL);
  4158. glGenQueries(2, stamp->queries);
  4159. int error = glGetError();
  4160. if (error != GL_NO_ERROR)
  4161. return RMT_ERROR_OPENGL_ERROR;
  4162. return RMT_ERROR_NONE;
  4163. }
  4164. static void OpenGLTimestamp_Destructor(OpenGLTimestamp* stamp)
  4165. {
  4166. assert(stamp != NULL);
  4167. // Destroy queries
  4168. if (stamp->queries[0] != 0)
  4169. {
  4170. assert(g_Remotery->__glDeleteQueries != NULL);
  4171. glDeleteQueries(2, stamp->queries);
  4172. int error = glGetError();
  4173. assert(error == GL_NO_ERROR);
  4174. }
  4175. }
  4176. static void OpenGLTimestamp_Begin(OpenGLTimestamp* stamp)
  4177. {
  4178. assert(stamp != NULL);
  4179. // Start of disjoint and first query
  4180. assert(g_Remotery != NULL);
  4181. assert(g_Remotery->__glQueryCounter != NULL);
  4182. glQueryCounter(stamp->queries[0], GL_TIMESTAMP);
  4183. int error = glGetError();
  4184. assert(error == GL_NO_ERROR);
  4185. }
  4186. static void OpenGLTimestamp_End(OpenGLTimestamp* stamp)
  4187. {
  4188. assert(stamp != NULL);
  4189. // End of disjoint and second query
  4190. assert(g_Remotery != NULL);
  4191. assert(g_Remotery->__glQueryCounter != NULL);
  4192. glQueryCounter(stamp->queries[1], GL_TIMESTAMP);
  4193. int error = glGetError();
  4194. assert(error == GL_NO_ERROR);
  4195. }
  4196. static rmtBool OpenGLTimestamp_GetData(OpenGLTimestamp* stamp, rmtU64 first_timestamp, rmtU64* out_start, rmtU64* out_end, rmtU64* out_first_timestamp)
  4197. {
  4198. GLuint64 start = 0, end = 0;
  4199. GLint startAvailable = 0, endAvailable = 0;
  4200. int error;
  4201. assert(g_Remotery != NULL);
  4202. assert(stamp != NULL);
  4203. assert(stamp->queries[0] != 0 && stamp->queries[1] != 0);
  4204. assert(g_Remotery->__glGetQueryObjectiv != NULL);
  4205. assert(g_Remotery->__glGetQueryObjectui64v != NULL);
  4206. // Check to see if all queries are ready
  4207. // If any fail to arrive, wait until later
  4208. glGetQueryObjectiv(stamp->queries[0], GL_QUERY_RESULT_AVAILABLE, &startAvailable);
  4209. error = glGetError();
  4210. assert(error == GL_NO_ERROR);
  4211. if (!startAvailable)
  4212. return RMT_FALSE;
  4213. glGetQueryObjectiv(stamp->queries[1], GL_QUERY_RESULT_AVAILABLE, &endAvailable);
  4214. error = glGetError();
  4215. assert(error == GL_NO_ERROR);
  4216. if (!endAvailable)
  4217. return RMT_FALSE;
  4218. glGetQueryObjectui64v(stamp->queries[0], GL_QUERY_RESULT, &start);
  4219. error = glGetError();
  4220. assert(error == GL_NO_ERROR);
  4221. glGetQueryObjectui64v(stamp->queries[1], GL_QUERY_RESULT, &end);
  4222. error = glGetError();
  4223. assert(error == GL_NO_ERROR);
  4224. // Mark the first timestamp
  4225. assert(out_first_timestamp != NULL);
  4226. if (*out_first_timestamp == 0)
  4227. *out_first_timestamp = start;
  4228. // Calculate start and end timestamps (we want us, the queries give us ns)
  4229. *out_start = (rmtU64)(start - first_timestamp) / 1000ULL;
  4230. *out_end = (rmtU64)(end - first_timestamp) / 1000ULL;
  4231. return RMT_TRUE;
  4232. }
  4233. typedef struct OpenGLSample
  4234. {
  4235. // IS-A inheritance relationship
  4236. Sample Sample;
  4237. OpenGLTimestamp* timestamp;
  4238. } OpenGLSample;
  4239. static enum rmtError OpenGLSample_Constructor(OpenGLSample* sample)
  4240. {
  4241. assert(sample != NULL);
  4242. // Chain to sample constructor
  4243. Sample_Constructor((Sample*)sample);
  4244. sample->Sample.type = SampleType_OpenGL;
  4245. sample->Sample.size_bytes = sizeof(OpenGLSample);
  4246. sample->timestamp = NULL;
  4247. return RMT_ERROR_NONE;
  4248. }
  4249. static void OpenGLSample_Destructor(OpenGLSample* sample)
  4250. {
  4251. Sample_Destructor((Sample*)sample);
  4252. }
  4253. void _rmt_BindOpenGL()
  4254. {
  4255. if (g_Remotery != NULL)
  4256. {
  4257. g_Remotery->__glGenQueries = (PFNGLGENQUERIESPROC)rmtGetProcAddress((const GLubyte*)"glGenQueries");
  4258. g_Remotery->__glDeleteQueries = (PFNGLDELETEQUERIESPROC)rmtGetProcAddress((const GLubyte*)"glDeleteQueries");
  4259. g_Remotery->__glBeginQuery = (PFNGLBEGINQUERYPROC)rmtGetProcAddress((const GLubyte*)"glBeginQuery");
  4260. g_Remotery->__glEndQuery = (PFNGLENDQUERYPROC)rmtGetProcAddress((const GLubyte*)"glEndQuery");
  4261. g_Remotery->__glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)rmtGetProcAddress((const GLubyte*)"glGetQueryObjectiv");
  4262. g_Remotery->__glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)rmtGetProcAddress((const GLubyte*)"glGetQueryObjectuiv");
  4263. g_Remotery->__glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC)rmtGetProcAddress((const GLubyte*)"glGetQueryObjecti64v");
  4264. g_Remotery->__glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC)rmtGetProcAddress((const GLubyte*)"glGetQueryObjectui64v");
  4265. g_Remotery->__glQueryCounter = (PFNGLQUERYCOUNTERPROC)rmtGetProcAddress((const GLubyte*)"glQueryCounter");
  4266. }
  4267. }
  4268. static void FreeOpenGLTimeStamps(Sample* sample)
  4269. {
  4270. Sample* child;
  4271. OpenGLSample* ogl_sample = (OpenGLSample*)sample;
  4272. assert(ogl_sample->timestamp != NULL);
  4273. ObjectAllocator_Free(g_Remotery->opengl_timestamp_allocator, (void*)ogl_sample->timestamp);
  4274. ogl_sample->timestamp = NULL;
  4275. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  4276. FreeOpenGLTimeStamps(child);
  4277. }
  4278. void _rmt_UnbindOpenGL(void)
  4279. {
  4280. if (g_Remotery != NULL)
  4281. {
  4282. // Flush the main queue of allocated OpenGL timestamps
  4283. while (1)
  4284. {
  4285. Msg_SampleTree* sample_tree;
  4286. Sample* sample;
  4287. Message* message = MessageQueue_PeekNextMessage(g_Remotery->mq_to_opengl_main);
  4288. if (message == NULL)
  4289. break;
  4290. // There's only one valid message type in this queue
  4291. assert(message->id == MsgID_SampleTree);
  4292. sample_tree = (Msg_SampleTree*)message->payload;
  4293. sample = sample_tree->root_sample;
  4294. assert(sample->type == SampleType_OpenGL);
  4295. FreeOpenGLTimeStamps(sample);
  4296. FreeSampleTree(sample, sample_tree->allocator);
  4297. MessageQueue_ConsumeNextMessage(g_Remotery->mq_to_opengl_main, message);
  4298. }
  4299. // Free all allocated D3D resources
  4300. ObjectAllocator_Destroy(g_Remotery->opengl_timestamp_allocator);
  4301. g_Remotery->opengl_timestamp_allocator = NULL;
  4302. }
  4303. }
  4304. void _rmt_BeginOpenGLSample(rmtPStr name, rmtU32* hash_cache)
  4305. {
  4306. ThreadSampler* ts;
  4307. if (g_Remotery == NULL)
  4308. return;
  4309. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  4310. {
  4311. enum rmtError error;
  4312. Sample* sample;
  4313. rmtU32 name_hash = GetNameHash(name, hash_cache);
  4314. // Create the OpenGL tree on-demand as the tree needs an up-front-created root.
  4315. // This is not possible to create on initialisation as a OpenGL binding is not yet available.
  4316. SampleTree** ogl_tree = &ts->sample_trees[SampleType_OpenGL];
  4317. if (*ogl_tree == NULL)
  4318. {
  4319. error = SampleTree_Create(ogl_tree, sizeof(OpenGLSample), (ObjConstructor)OpenGLSample_Constructor, (ObjDestructor)OpenGLSample_Destructor);
  4320. if (error != RMT_ERROR_NONE)
  4321. return;
  4322. }
  4323. // Also create the timestamp allocator on-demand to keep the OpenGL code localised to the same file section
  4324. if (g_Remotery->opengl_timestamp_allocator == NULL)
  4325. error = ObjectAllocator_Create(&g_Remotery->opengl_timestamp_allocator, sizeof(OpenGLTimestamp), (ObjConstructor)OpenGLTimestamp_Constructor, (ObjDestructor)OpenGLTimestamp_Destructor);
  4326. // Push the sample
  4327. if (ThreadSampler_Push(ts, *ogl_tree, name, name_hash, &sample) == RMT_ERROR_NONE)
  4328. {
  4329. OpenGLSample* ogl_sample = (OpenGLSample*)sample;
  4330. // Allocate a timestamp for the sample and activate it
  4331. assert(ogl_sample->timestamp == NULL);
  4332. error = ObjectAllocator_Alloc(g_Remotery->opengl_timestamp_allocator, (void**)&ogl_sample->timestamp);
  4333. if (error == RMT_ERROR_NONE)
  4334. OpenGLTimestamp_Begin(ogl_sample->timestamp);
  4335. }
  4336. }
  4337. }
  4338. void _rmt_EndOpenGLSample(void)
  4339. {
  4340. ThreadSampler* ts;
  4341. if (g_Remotery == NULL)
  4342. return;
  4343. if (Remotery_GetThreadSampler(g_Remotery, &ts) == RMT_ERROR_NONE)
  4344. {
  4345. // Close the timestamp
  4346. OpenGLSample* ogl_sample = (OpenGLSample*)ts->sample_trees[SampleType_OpenGL]->current_parent;
  4347. if (ogl_sample->timestamp != NULL)
  4348. OpenGLTimestamp_End(ogl_sample->timestamp);
  4349. // Send to the update loop for ready-polling
  4350. ThreadSampler_Pop(ts, g_Remotery->mq_to_opengl_main, (Sample*)ogl_sample);
  4351. }
  4352. }
  4353. static rmtBool GetOpenGLSampleTimes(Sample* sample, rmtU64 first_timestamp, rmtU64* out_first_timestamp)
  4354. {
  4355. Sample* child;
  4356. OpenGLSample* ogl_sample = (OpenGLSample*)sample;
  4357. assert(sample != NULL);
  4358. if (ogl_sample->timestamp != NULL)
  4359. {
  4360. if (!OpenGLTimestamp_GetData(ogl_sample->timestamp, first_timestamp, &sample->us_start, &sample->us_end, out_first_timestamp))
  4361. return RMT_FALSE;
  4362. }
  4363. // Get child sample times
  4364. for (child = sample->first_child; child != NULL; child = child->next_sibling)
  4365. {
  4366. if (!GetOpenGLSampleTimes(child, first_timestamp, out_first_timestamp))
  4367. return RMT_FALSE;
  4368. }
  4369. return RMT_TRUE;
  4370. }
  4371. void _rmt_UpdateOpenGLFrame(void)
  4372. {
  4373. Message* first_message = NULL;
  4374. if (g_Remotery == NULL)
  4375. return;
  4376. rmt_BeginCPUSample(rmt_UpdateOpenGLFrame);
  4377. // Process all messages in the OpenGL queue
  4378. while (1)
  4379. {
  4380. Msg_SampleTree* sample_tree;
  4381. Sample* sample;
  4382. rmtU64 first_timestamp;
  4383. rmtBool are_samples_ready;
  4384. Message* message = MessageQueue_PeekNextMessage(g_Remotery->mq_to_opengl_main);
  4385. if (message == NULL)
  4386. break;
  4387. // Keep track of the first message encountered during this loop and leave it's encountered
  4388. // again. This means the loop as had a good attempt at trying to get timing data for all messages
  4389. // in the queue.
  4390. if (first_message == NULL)
  4391. first_message = message;
  4392. else if (first_message == message)
  4393. break;
  4394. // There's only one valid message type in this queue
  4395. assert(message->id == MsgID_SampleTree);
  4396. sample_tree = (Msg_SampleTree*)message->payload;
  4397. sample = sample_tree->root_sample;
  4398. assert(sample->type == SampleType_OpenGL);
  4399. // Retrieve timing of all OpenGL samples
  4400. first_timestamp = g_Remotery->opengl_first_timestamp;
  4401. are_samples_ready = GetOpenGLSampleTimes(sample, first_timestamp, &g_Remotery->opengl_first_timestamp);
  4402. // If the samples are ready, pass them onto the remotery thread for sending to the viewer
  4403. if (are_samples_ready)
  4404. {
  4405. FreeOpenGLTimeStamps(sample);
  4406. AddSampleTreeMessage(g_Remotery->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->thread_name, message->thread_sampler);
  4407. }
  4408. else
  4409. {
  4410. // Otherwise just put them to the back of the queue
  4411. AddSampleTreeMessage(g_Remotery->mq_to_opengl_main, sample, sample_tree->allocator, sample_tree->thread_name, message->thread_sampler);
  4412. }
  4413. MessageQueue_ConsumeNextMessage(g_Remotery->mq_to_opengl_main, message);
  4414. }
  4415. rmt_EndCPUSample();
  4416. }
  4417. #endif // RMT_USE_OPENGL
  4418. #endif // RMT_ENABLED