PageRenderTime 62ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/ext/date/date_strptime.c

https://github.com/idletekz/ruby
C | 699 lines | 585 code | 106 blank | 8 comment | 113 complexity | 6492e309d06d4488f772f1ffdfd1f35b MD5 | raw file
  1. /*
  2. date_strptime.c: Coded by Tadayoshi Funaba 2011
  3. */
  4. #include "ruby.h"
  5. #include "ruby/encoding.h"
  6. #include "ruby/re.h"
  7. #include <ctype.h>
  8. static const char *day_names[] = {
  9. "Sunday", "Monday", "Tuesday", "Wednesday",
  10. "Thursday", "Friday", "Saturday",
  11. "Sun", "Mon", "Tue", "Wed",
  12. "Thu", "Fri", "Sat"
  13. };
  14. static const char *month_names[] = {
  15. "January", "February", "March", "April",
  16. "May", "June", "July", "August", "September",
  17. "October", "November", "December",
  18. "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  19. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
  20. };
  21. static const char *merid_names[] = {
  22. "am", "pm",
  23. "a.m.", "p.m."
  24. };
  25. static const char *extz_pats[] = {
  26. ":z",
  27. "::z",
  28. ":::z"
  29. };
  30. #define sizeof_array(o) (sizeof o / sizeof o[0])
  31. #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
  32. #define f_add(x,y) rb_funcall(x, '+', 1, y)
  33. #define f_sub(x,y) rb_funcall(x, '-', 1, y)
  34. #define f_mul(x,y) rb_funcall(x, '*', 1, y)
  35. #define f_div(x,y) rb_funcall(x, '/', 1, y)
  36. #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
  37. #define f_mod(x,y) rb_funcall(x, '%', 1, y)
  38. #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
  39. #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
  40. #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
  41. #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
  42. #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
  43. #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
  44. #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
  45. #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
  46. #define issign(c) ((c) == '-' || (c) == '+')
  47. static int
  48. num_pattern_p(const char *s)
  49. {
  50. if (isdigit(*s))
  51. return 1;
  52. if (*s == '%') {
  53. s++;
  54. if (*s == 'E' || *s == 'O')
  55. s++;
  56. if (*s &&
  57. (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit(*s)))
  58. return 1;
  59. }
  60. return 0;
  61. }
  62. #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
  63. static long
  64. read_digits(const char *s, VALUE *n, size_t width)
  65. {
  66. size_t l;
  67. l = strspn(s, "0123456789");
  68. if (l == 0)
  69. return 0;
  70. if (width < l)
  71. l = width;
  72. if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
  73. const char *os = s;
  74. long v;
  75. v = 0;
  76. while ((size_t)(s - os) < l) {
  77. v *= 10;
  78. v += *s - '0';
  79. s++;
  80. }
  81. if (os == s)
  82. return 0;
  83. *n = LONG2NUM(v);
  84. return l;
  85. }
  86. else {
  87. char *s2 = ALLOCA_N(char, l + 1);
  88. memcpy(s2, s, l);
  89. s2[l] = '\0';
  90. *n = rb_cstr_to_inum(s2, 10, 0);
  91. return l;
  92. }
  93. }
  94. #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
  95. #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
  96. #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
  97. #define fail() \
  98. { \
  99. set_hash("_fail", Qtrue); \
  100. return 0; \
  101. }
  102. #define fail_p() (!NIL_P(ref_hash("_fail")))
  103. #define READ_DIGITS(n,w) \
  104. { \
  105. size_t l; \
  106. l = read_digits(&str[si], &n, w); \
  107. if (l == 0) \
  108. fail(); \
  109. si += l; \
  110. }
  111. #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
  112. static int
  113. valid_range_p(VALUE v, int a, int b)
  114. {
  115. if (FIXNUM_P(v)) {
  116. int vi = FIX2INT(v);
  117. return !(vi < a || vi > b);
  118. }
  119. return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
  120. }
  121. #define recur(fmt) \
  122. { \
  123. size_t l; \
  124. l = date__strptime_internal(&str[si], slen - si, \
  125. fmt, sizeof fmt - 1, hash); \
  126. if (fail_p()) \
  127. return 0; \
  128. si += l; \
  129. }
  130. VALUE date_zone_to_diff(VALUE);
  131. static size_t
  132. date__strptime_internal(const char *str, size_t slen,
  133. const char *fmt, size_t flen, VALUE hash)
  134. {
  135. size_t si, fi;
  136. int c;
  137. si = fi = 0;
  138. while (fi < flen) {
  139. switch (fmt[fi]) {
  140. case '%':
  141. again:
  142. fi++;
  143. c = fmt[fi];
  144. switch (c) {
  145. case 'E':
  146. if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
  147. goto again;
  148. fi--;
  149. goto ordinal;
  150. case 'O':
  151. if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
  152. goto again;
  153. fi--;
  154. goto ordinal;
  155. case ':':
  156. {
  157. int i;
  158. for (i = 0; i < (int)sizeof_array(extz_pats); i++)
  159. if (strncmp(extz_pats[i], &fmt[fi],
  160. strlen(extz_pats[i])) == 0) {
  161. fi += i;
  162. goto again;
  163. }
  164. fail();
  165. }
  166. case 'A':
  167. case 'a':
  168. {
  169. int i;
  170. for (i = 0; i < (int)sizeof_array(day_names); i++) {
  171. size_t l = strlen(day_names[i]);
  172. if (strncasecmp(day_names[i], &str[si], l) == 0) {
  173. si += l;
  174. set_hash("wday", INT2FIX(i % 7));
  175. goto matched;
  176. }
  177. }
  178. fail();
  179. }
  180. case 'B':
  181. case 'b':
  182. case 'h':
  183. {
  184. int i;
  185. for (i = 0; i < (int)sizeof_array(month_names); i++) {
  186. size_t l = strlen(month_names[i]);
  187. if (strncasecmp(month_names[i], &str[si], l) == 0) {
  188. si += l;
  189. set_hash("mon", INT2FIX((i % 12) + 1));
  190. goto matched;
  191. }
  192. }
  193. fail();
  194. }
  195. case 'C':
  196. {
  197. VALUE n;
  198. if (NUM_PATTERN_P())
  199. READ_DIGITS(n, 2)
  200. else
  201. READ_DIGITS_MAX(n)
  202. set_hash("_cent", n);
  203. goto matched;
  204. }
  205. case 'c':
  206. recur("%a %b %e %H:%M:%S %Y");
  207. goto matched;
  208. case 'D':
  209. recur("%m/%d/%y");
  210. goto matched;
  211. case 'd':
  212. case 'e':
  213. {
  214. VALUE n;
  215. if (str[si] == ' ') {
  216. si++;
  217. READ_DIGITS(n, 1);
  218. } else {
  219. READ_DIGITS(n, 2);
  220. }
  221. if (!valid_range_p(n, 1, 31))
  222. fail();
  223. set_hash("mday", n);
  224. goto matched;
  225. }
  226. case 'F':
  227. recur("%Y-%m-%d");
  228. goto matched;
  229. case 'G':
  230. {
  231. VALUE n;
  232. if (NUM_PATTERN_P())
  233. READ_DIGITS(n, 4)
  234. else
  235. READ_DIGITS_MAX(n)
  236. set_hash("cwyear", n);
  237. goto matched;
  238. }
  239. case 'g':
  240. {
  241. VALUE n;
  242. READ_DIGITS(n, 2);
  243. if (!valid_range_p(n, 0, 99))
  244. fail();
  245. set_hash("cwyear",n);
  246. set_hash("_cent",
  247. INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
  248. goto matched;
  249. }
  250. case 'H':
  251. case 'k':
  252. {
  253. VALUE n;
  254. if (str[si] == ' ') {
  255. si++;
  256. READ_DIGITS(n, 1);
  257. } else {
  258. READ_DIGITS(n, 2);
  259. }
  260. if (!valid_range_p(n, 0, 24))
  261. fail();
  262. set_hash("hour", n);
  263. goto matched;
  264. }
  265. case 'I':
  266. case 'l':
  267. {
  268. VALUE n;
  269. if (str[si] == ' ') {
  270. si++;
  271. READ_DIGITS(n, 1);
  272. } else {
  273. READ_DIGITS(n, 2);
  274. }
  275. if (!valid_range_p(n, 1, 12))
  276. fail();
  277. set_hash("hour", n);
  278. goto matched;
  279. }
  280. case 'j':
  281. {
  282. VALUE n;
  283. READ_DIGITS(n, 3);
  284. if (!valid_range_p(n, 1, 366))
  285. fail();
  286. set_hash("yday", n);
  287. goto matched;
  288. }
  289. case 'L':
  290. case 'N':
  291. {
  292. VALUE n;
  293. int sign = 1;
  294. size_t osi;
  295. if (issign(str[si])) {
  296. if (str[si] == '-')
  297. sign = -1;
  298. si++;
  299. }
  300. osi = si;
  301. if (NUM_PATTERN_P())
  302. READ_DIGITS(n, c == 'L' ? 3 : 9)
  303. else
  304. READ_DIGITS_MAX(n)
  305. if (sign == -1)
  306. n = f_negate(n);
  307. set_hash("sec_fraction",
  308. rb_rational_new2(n,
  309. f_expt(INT2FIX(10),
  310. ULONG2NUM(si - osi))));
  311. goto matched;
  312. }
  313. case 'M':
  314. {
  315. VALUE n;
  316. READ_DIGITS(n, 2);
  317. if (!valid_range_p(n, 0, 59))
  318. fail();
  319. set_hash("min", n);
  320. goto matched;
  321. }
  322. case 'm':
  323. {
  324. VALUE n;
  325. READ_DIGITS(n, 2);
  326. if (!valid_range_p(n, 1, 12))
  327. fail();
  328. set_hash("mon", n);
  329. goto matched;
  330. }
  331. case 'n':
  332. case 't':
  333. recur(" ");
  334. goto matched;
  335. case 'P':
  336. case 'p':
  337. {
  338. int i;
  339. for (i = 0; i < 4; i++) {
  340. size_t l = strlen(merid_names[i]);
  341. if (strncasecmp(merid_names[i], &str[si], l) == 0) {
  342. si += l;
  343. set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
  344. goto matched;
  345. }
  346. }
  347. fail();
  348. }
  349. case 'Q':
  350. {
  351. VALUE n;
  352. int sign = 1;
  353. if (str[si] == '-') {
  354. sign = -1;
  355. si++;
  356. }
  357. READ_DIGITS_MAX(n);
  358. if (sign == -1)
  359. n = f_negate(n);
  360. set_hash("seconds",
  361. rb_rational_new2(n,
  362. f_expt(INT2FIX(10),
  363. INT2FIX(3))));
  364. goto matched;
  365. }
  366. case 'R':
  367. recur("%H:%M");
  368. goto matched;
  369. case 'r':
  370. recur("%I:%M:%S %p");
  371. goto matched;
  372. case 'S':
  373. {
  374. VALUE n;
  375. READ_DIGITS(n, 2);
  376. if (!valid_range_p(n, 0, 60))
  377. fail();
  378. set_hash("sec", n);
  379. goto matched;
  380. }
  381. case 's':
  382. {
  383. VALUE n;
  384. int sign = 1;
  385. if (str[si] == '-') {
  386. sign = -1;
  387. si++;
  388. }
  389. READ_DIGITS_MAX(n);
  390. if (sign == -1)
  391. n = f_negate(n);
  392. set_hash("seconds", n);
  393. goto matched;
  394. }
  395. case 'T':
  396. recur("%H:%M:%S");
  397. goto matched;
  398. case 'U':
  399. case 'W':
  400. {
  401. VALUE n;
  402. READ_DIGITS(n, 2);
  403. if (!valid_range_p(n, 0, 53))
  404. fail();
  405. set_hash(c == 'U' ? "wnum0" : "wnum1", n);
  406. goto matched;
  407. }
  408. case 'u':
  409. {
  410. VALUE n;
  411. READ_DIGITS(n, 1);
  412. if (!valid_range_p(n, 1, 7))
  413. fail();
  414. set_hash("cwday", n);
  415. goto matched;
  416. }
  417. case 'V':
  418. {
  419. VALUE n;
  420. READ_DIGITS(n, 2);
  421. if (!valid_range_p(n, 1, 53))
  422. fail();
  423. set_hash("cweek", n);
  424. goto matched;
  425. }
  426. case 'v':
  427. recur("%e-%b-%Y");
  428. goto matched;
  429. case 'w':
  430. {
  431. VALUE n;
  432. READ_DIGITS(n, 1);
  433. if (!valid_range_p(n, 0, 6))
  434. fail();
  435. set_hash("wday", n);
  436. goto matched;
  437. }
  438. case 'X':
  439. recur("%H:%M:%S");
  440. goto matched;
  441. case 'x':
  442. recur("%m/%d/%y");
  443. goto matched;
  444. case 'Y':
  445. {
  446. VALUE n;
  447. int sign = 1;
  448. if (issign(str[si])) {
  449. if (str[si] == '-')
  450. sign = -1;
  451. si++;
  452. }
  453. if (NUM_PATTERN_P())
  454. READ_DIGITS(n, 4)
  455. else
  456. READ_DIGITS_MAX(n)
  457. if (sign == -1)
  458. n = f_negate(n);
  459. set_hash("year", n);
  460. goto matched;
  461. }
  462. case 'y':
  463. {
  464. VALUE n;
  465. int sign = 1;
  466. READ_DIGITS(n, 2);
  467. if (!valid_range_p(n, 0, 99))
  468. fail();
  469. if (sign == -1)
  470. n = f_negate(n);
  471. set_hash("year", n);
  472. set_hash("_cent",
  473. INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
  474. goto matched;
  475. }
  476. case 'Z':
  477. case 'z':
  478. {
  479. static const char pat_source[] =
  480. "\\A("
  481. "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
  482. "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b"
  483. "|[[:alpha:]]+(?:\\s+dst)?\\b"
  484. ")";
  485. static VALUE pat = Qnil;
  486. VALUE m, b;
  487. if (NIL_P(pat)) {
  488. pat = rb_reg_new(pat_source, sizeof pat_source - 1,
  489. ONIG_OPTION_IGNORECASE);
  490. rb_gc_register_mark_object(pat);
  491. }
  492. b = rb_backref_get();
  493. rb_match_busy(b);
  494. m = f_match(pat, rb_usascii_str_new2(&str[si]));
  495. if (!NIL_P(m)) {
  496. VALUE s, l, o;
  497. s = rb_reg_nth_match(1, m);
  498. l = f_end(m, INT2FIX(0));
  499. o = date_zone_to_diff(s);
  500. si += NUM2LONG(l);
  501. set_hash("zone", s);
  502. set_hash("offset", o);
  503. rb_backref_set(b);
  504. goto matched;
  505. }
  506. rb_backref_set(b);
  507. fail();
  508. }
  509. case '%':
  510. if (str[si] != '%')
  511. fail();
  512. si++;
  513. goto matched;
  514. case '+':
  515. recur("%a %b %e %H:%M:%S %Z %Y");
  516. goto matched;
  517. default:
  518. if (str[si] != '%')
  519. fail();
  520. si++;
  521. if (fi < flen)
  522. if (str[si] != fmt[fi])
  523. fail();
  524. si++;
  525. goto matched;
  526. }
  527. case ' ':
  528. case '\t':
  529. case '\n':
  530. case '\v':
  531. case '\f':
  532. case '\r':
  533. while (isspace(str[si]))
  534. si++;
  535. fi++;
  536. break;
  537. default:
  538. ordinal:
  539. if (str[si] != fmt[fi])
  540. fail();
  541. si++;
  542. fi++;
  543. break;
  544. matched:
  545. fi++;
  546. break;
  547. }
  548. }
  549. {
  550. VALUE s;
  551. if (slen > si) {
  552. s = rb_usascii_str_new(&str[si], slen - si);
  553. set_hash("leftover", s);
  554. }
  555. }
  556. return si;
  557. }
  558. VALUE
  559. date__strptime(const char *str, size_t slen,
  560. const char *fmt, size_t flen, VALUE hash)
  561. {
  562. VALUE cent, merid;
  563. date__strptime_internal(str, slen, fmt, flen, hash);
  564. if (fail_p())
  565. return Qnil;
  566. cent = ref_hash("_cent");
  567. if (!NIL_P(cent)) {
  568. VALUE year;
  569. year = ref_hash("cwyear");
  570. if (!NIL_P(year))
  571. set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
  572. year = ref_hash("year");
  573. if (!NIL_P(year))
  574. set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
  575. del_hash("_cent");
  576. }
  577. merid = ref_hash("_merid");
  578. if (!NIL_P(merid)) {
  579. VALUE hour;
  580. hour = ref_hash("hour");
  581. if (!NIL_P(hour)) {
  582. hour = f_mod(hour, INT2FIX(12));
  583. set_hash("hour", f_add(hour, merid));
  584. }
  585. del_hash("_merid");
  586. }
  587. return hash;
  588. }
  589. /*
  590. Local variables:
  591. c-file-style: "ruby"
  592. End:
  593. */