PageRenderTime 31ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/libredis/parser.c

https://github.com/CorCornelisse/libredis
C | 406 lines | 339 code | 15 blank | 52 comment | 92 complexity | 6e25a23b3702d4763c77100392544d87 MD5 | raw file
  1. /**
  2. * Copyright (C) 2010, Hyves (Startphone Ltd.)
  3. *
  4. * This module is part of Libredis (http://github.com/toymachine/libredis) and is released under
  5. * the New BSD License: http://www.opensource.org/licenses/bsd-license.php
  6. *
  7. */
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11. #include <assert.h>
  12. #include <stdlib.h>
  13. #include "common.h"
  14. #include "alloc.h"
  15. #include "parser.h"
  16. #define MARK rp->mark = rp->p
  17. struct _ReplyParser
  18. {
  19. size_t p; //position
  20. int cs; //state
  21. int bulk_count; //number of chars to read for current binary safe bulk-value
  22. int multibulk_count; //the number of bulk replies to read for the current multibulk reply
  23. Reply *multibulk_reply;
  24. size_t mark; //helper to mark start of interesting data
  25. };
  26. void ReplyParser_reset(ReplyParser *rp)
  27. {
  28. rp->p = 0;
  29. rp->cs = 0;
  30. rp->bulk_count = 0;
  31. rp->mark = 0;
  32. rp->multibulk_count = 0;
  33. rp->multibulk_reply = NULL;
  34. }
  35. ReplyParser *ReplyParser_new()
  36. {
  37. DEBUG(("alloc ReplyParser\n"));
  38. ReplyParser *rp = Alloc_alloc_T(ReplyParser);
  39. if(rp == NULL) {
  40. Module_set_error(GET_MODULE(), "Out of memory while allocating ReplyParser");
  41. return NULL;
  42. }
  43. ReplyParser_reset(rp);
  44. return rp;
  45. }
  46. void ReplyParser_free(ReplyParser *rp)
  47. {
  48. if(rp == NULL) {
  49. return;
  50. }
  51. DEBUG(("dealloc ReplyParser\n"));
  52. Alloc_free_T(rp, ReplyParser);
  53. }
  54. /**
  55. * A State machine for parsing Redis replies.
  56. * State is kept in the ReplyParser instance rp. The execute method can be called over and over
  57. * again parsing evermore Replies from the given buffer.
  58. * The method returns with RPR_ERROR if there is an error in the stream,
  59. * RPR_MORE if it is not in an end-state, but the buffer ran out, indicating that more
  60. * data needs to be read.
  61. * Finally it returns RPR_REPLY everytime a valid Redis reply is parsed from the buffer, returning an instance
  62. * of Reply in the 'reply' out parameter.
  63. * 0 is the initial state, and after reading a valid reply, the machine will return to this state, ready to parse
  64. * a new reply.
  65. * states: 0->1->2 => single line positive reply (+OK\r\n)
  66. * 0->3->4 => single line negative reply (-Some error msg\r\n)
  67. * 0->5->6->7->8 => nil bulk reply ($-1\r\n)
  68. * 0->5->9->10->11->12 => bulk reply ($5\r\nblaat\r\n)
  69. * 0->13->14->15->16 => nil multibulk reply (*-1\r\n)
  70. * 0->13->17->16 => nil multibulk reply (*0\r\n)
  71. * 0->13->17->18 => multibulk reply (*3\r\n (... bulk replies ...)
  72. * 0->19->20 => integer reply (:42\r\n)
  73. * Note that it is not a 'pure' state machine (from a language theory perspective), e.g. some additional state is kept to
  74. * keep track of the number of chars to still read in a bulk reply, and some state to keep track of bulk replies that
  75. * belong to a multibulk reply.
  76. */
  77. ReplyParserResult ReplyParser_execute(ReplyParser *rp, Buffer *buffer, size_t len, Reply **reply)
  78. {
  79. DEBUG(("enter rp exec, rp->p: %d, len: %d, cs: %d\n", rp->p, len, rp->cs));
  80. assert(rp->p <= len);
  81. while((rp->p) < len) {
  82. *reply = NULL;
  83. Byte c = Buffer_data(buffer)[rp->p];
  84. //printf("cs: %d, char: %d\n", rp->cs, c);
  85. switch(rp->cs) {
  86. case 0: { //initial state
  87. if(c == '$') { //possible start of bulk-reply
  88. rp->p++;
  89. rp->cs = 5;
  90. continue;
  91. }
  92. else if(rp->multibulk_count == 0) {
  93. //the replies below only match when we are NOT in a multibulk reply.
  94. if(c == '+') {
  95. //possible start of positive single line server reply (e.g. +OK\r\n)
  96. rp->p++;
  97. rp->cs = 1;
  98. MARK;
  99. continue;
  100. }
  101. else if(c == '-') { //negative
  102. //possible start of negative single line server reply (e.g. -Some error message\r\n)
  103. rp->p++;
  104. rp->cs = 3;
  105. MARK;
  106. continue;
  107. }
  108. else if(c == '*') {
  109. //possible start of multibulk reply
  110. rp->p++;
  111. rp->cs = 13;
  112. continue;
  113. }
  114. else if(c == ':') {
  115. //possible start of integer reply
  116. rp->p++;
  117. rp->cs = 19;
  118. MARK;
  119. continue;
  120. }
  121. }
  122. break;
  123. }
  124. //term CRLF of single line reply
  125. case 1: {
  126. if(c == CR) {
  127. rp->p++;
  128. rp->cs = 2;
  129. continue;
  130. }
  131. else {
  132. rp->p++;
  133. continue;
  134. }
  135. break;
  136. }
  137. case 2: {
  138. if(c == LF) {
  139. rp->p++;
  140. rp->cs = 0;
  141. //report line data
  142. *reply = Reply_new(RT_OK, buffer, rp->mark, rp->p - rp->mark - 2);
  143. return RPR_REPLY;
  144. }
  145. break;
  146. }
  147. //end term CRLF of single line reply
  148. //term CRLF of error line reply
  149. case 3: {
  150. if(c == CR) {
  151. rp->p++;
  152. rp->cs = 4;
  153. continue;
  154. }
  155. else {
  156. rp->p++;
  157. continue;
  158. }
  159. break;
  160. }
  161. case 4: {
  162. if(c == LF) {
  163. rp->p++;
  164. rp->cs = 0;
  165. //report error line data
  166. *reply = Reply_new(RT_ERROR, buffer, rp->mark, rp->p - rp->mark - 2);
  167. return RPR_REPLY;
  168. }
  169. break;
  170. }
  171. //end term CRLF of single line reply
  172. //start bulk reply
  173. case 5: {
  174. if(c == '-') { //nill bulk reply
  175. rp->p++;
  176. rp->cs = 6;
  177. continue;
  178. }
  179. else if(isdigit(c)) { //normal bulk reply
  180. MARK;
  181. rp->p++;
  182. rp->cs = 9;
  183. continue;
  184. }
  185. break;
  186. }
  187. //start nil bulk reply
  188. case 6: {
  189. if(c == '1') {
  190. rp->p++;
  191. rp->cs = 7;
  192. continue;
  193. }
  194. break;
  195. }
  196. case 7: {
  197. if(c == CR) {
  198. rp->p++;
  199. rp->cs = 8;
  200. continue;
  201. }
  202. break;
  203. }
  204. case 8: {
  205. if(c == LF) {
  206. rp->p++;
  207. rp->cs = 0;
  208. *reply = Reply_new(RT_BULK_NIL, buffer, 0, 0);
  209. if(rp->multibulk_count > 0) {
  210. rp->multibulk_count -= 1;
  211. assert(rp->multibulk_reply != NULL);
  212. Reply_add_child(rp->multibulk_reply, *reply);
  213. if(rp->multibulk_count == 0) {
  214. *reply = rp->multibulk_reply;
  215. rp->multibulk_reply = NULL;
  216. return RPR_REPLY;
  217. }
  218. else {
  219. continue;
  220. }
  221. }
  222. else {
  223. return RPR_REPLY;
  224. }
  225. }
  226. break;
  227. }
  228. //end nil bulk reply
  229. //start normal bulk reply
  230. case 9: {
  231. if(c == CR) { //end of digits
  232. rp->bulk_count = atoi(Buffer_data(buffer) + rp->mark);
  233. rp->p++;
  234. rp->cs = 10;
  235. continue;
  236. }
  237. else if(isdigit(c)) { //one more digit
  238. rp->p++;
  239. continue;
  240. }
  241. break;
  242. }
  243. case 10: {
  244. if(c == LF) {
  245. rp->p++;
  246. rp->cs = 11;
  247. MARK;
  248. continue;
  249. }
  250. break;
  251. }
  252. case 11: { //reading of bulk_count chars
  253. int n = MIN(rp->bulk_count, len - rp->p);
  254. //printf("n=%d\n", n);
  255. if(n == 0 && c == CR) {
  256. rp->p++;
  257. rp->cs = 12;
  258. continue;
  259. }
  260. else if(n > 0) {
  261. rp->p += n;
  262. rp->bulk_count -= n;
  263. continue;
  264. }
  265. break;
  266. }
  267. case 12: {
  268. if(c == LF) {
  269. assert(rp->bulk_count == 0);
  270. rp->p++;
  271. rp->cs = 0;
  272. *reply = Reply_new(RT_BULK, buffer, rp->mark, rp->p - rp->mark - 2);
  273. if(rp->multibulk_count > 0) {
  274. rp->multibulk_count -= 1;
  275. assert(rp->multibulk_reply != NULL);
  276. Reply_add_child(rp->multibulk_reply, *reply);
  277. if(rp->multibulk_count == 0) {
  278. *reply = rp->multibulk_reply;
  279. rp->multibulk_reply = NULL;
  280. return RPR_REPLY;
  281. }
  282. else {
  283. continue;
  284. }
  285. }
  286. else {
  287. return RPR_REPLY;
  288. }
  289. }
  290. break;
  291. }
  292. //start multibulk reply
  293. case 13: {
  294. if(c == '-') { //nil multibulk reply
  295. rp->p++;
  296. rp->cs = 14;
  297. continue;
  298. }
  299. else if(isdigit(c)) { //normal multibulk reply
  300. MARK;
  301. rp->p++;
  302. rp->cs = 17;
  303. continue;
  304. }
  305. break;
  306. }
  307. //start nil multibulk reply
  308. case 14: {
  309. if(c == '1') {
  310. rp->p++;
  311. rp->cs = 15;
  312. continue;
  313. }
  314. break;
  315. }
  316. case 15: {
  317. if(c == CR) {
  318. rp->p++;
  319. rp->cs = 16;
  320. continue;
  321. }
  322. break;
  323. }
  324. case 16: {
  325. if(c == LF) {
  326. rp->p++;
  327. rp->cs = 0;
  328. *reply = Reply_new(RT_MULTIBULK_NIL, NULL, 0, 0);
  329. return RPR_REPLY;
  330. }
  331. break;
  332. }
  333. //start normal multibulk reply
  334. case 17: {
  335. if(c == CR) { //end of digits
  336. rp->multibulk_count = atoi(Buffer_data(buffer) + rp->mark);
  337. if(rp->multibulk_count == 0) {
  338. rp->p++;
  339. rp->cs = 16;
  340. }
  341. else {
  342. rp->multibulk_reply = Reply_new(RT_MULTIBULK, NULL, 0, rp->multibulk_count);
  343. rp->p++;
  344. rp->cs = 18;
  345. }
  346. continue;
  347. }
  348. else if(isdigit(c)) { //one more digit
  349. rp->p++;
  350. continue;
  351. }
  352. break;
  353. }
  354. case 18: {
  355. if(c == LF) {
  356. rp->p++;
  357. rp->cs = 0;
  358. continue;
  359. }
  360. break;
  361. }
  362. //integer reply
  363. case 19: {
  364. if(c == CR) {
  365. rp->p++;
  366. rp->cs = 20;
  367. continue;
  368. }
  369. else {
  370. rp->p++;
  371. continue;
  372. }
  373. break;
  374. }
  375. case 20: {
  376. if(c == LF) {
  377. rp->p++;
  378. rp->cs = 0;
  379. //report integer data
  380. *reply = Reply_new(RT_INTEGER, buffer, rp->mark, rp->p - rp->mark - 2);
  381. return RPR_REPLY;
  382. }
  383. break;
  384. }
  385. }
  386. return RPR_ERROR;
  387. }
  388. DEBUG(("exit rp pos: %d len: %d cs: %d\n", rp->p, len, rp->cs));
  389. assert(rp->p == len);
  390. return RPR_MORE;
  391. }