PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/inet.c

https://github.com/leia/dfsch
C | 659 lines | 555 code | 84 blank | 20 comment | 115 complexity | 78c0b04bc0ccd5e037dc7282d4d3e5bc MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. * dfsch - dfox's quick and dirty scheme implementation
  3. * Internet data handling
  4. * Copyright (C) 2005-2008 Ales Hakl
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. *
  20. */
  21. #include "dfsch/lib/inet.h"
  22. #include <dfsch/strings.h>
  23. #include <dfsch/hash.h>
  24. #include <string.h>
  25. static dfsch_object_t* urldecode_to_string(char* buf, size_t len){
  26. dfsch_strbuf_t sb;
  27. sb.ptr = buf;
  28. sb.len = len;
  29. return dfsch_make_string_nocopy(dfsch_inet_urldecode(&sb));
  30. }
  31. dfsch_object_t* dfsch_http_split_path(char* path){
  32. char* pos;
  33. dfsch_object_t* head;
  34. dfsch_object_t* tail;
  35. dfsch_object_t* tmp;
  36. head = tail = NULL;
  37. while (1){
  38. while (*path == '/'){
  39. path++;
  40. }
  41. if (*path == 0){
  42. tmp = dfsch_cons(NULL, NULL);
  43. if (tail){
  44. dfsch_set_cdr(tail, tmp);
  45. tail = tmp;
  46. } else {
  47. head = tail = tmp;
  48. }
  49. break;
  50. } else {
  51. pos = strchr(path, '/');
  52. if (!pos){
  53. tmp = dfsch_cons(urldecode_to_string(path, strlen(path)), NULL);
  54. if (tail){
  55. dfsch_set_cdr(tail, tmp);
  56. tail = tmp;
  57. } else {
  58. head = tail = tmp;
  59. }
  60. break;
  61. } else {
  62. tmp = dfsch_cons(urldecode_to_string(path, pos - path), NULL);
  63. path = pos;
  64. if (tail){
  65. dfsch_set_cdr(tail, tmp);
  66. tail = tmp;
  67. } else {
  68. head = tail = tmp;
  69. }
  70. }
  71. }
  72. }
  73. return head;
  74. }
  75. dfsch_object_t* dfsch_http_query_2_hash(char* query){
  76. size_t delim;
  77. char* value;
  78. dfsch_object_t* hash;
  79. hash = dfsch_hash_make(DFSCH_HASH_EQUAL);
  80. while (*query){
  81. query += strspn(query, "&;");
  82. delim = strcspn(query, "&;");
  83. value = memchr(query, '=', delim);
  84. if (value){
  85. value++;
  86. dfsch_hash_set(hash,
  87. urldecode_to_string(query, value-query-1),
  88. urldecode_to_string(value, (query+delim) - value));
  89. } else {
  90. dfsch_hash_set(hash,
  91. urldecode_to_string(query, delim),
  92. NULL);
  93. }
  94. query += delim;
  95. }
  96. return hash;
  97. }
  98. dfsch_object_t* dfsch_http_query_2_alist(char* query){
  99. size_t delim;
  100. char* value;
  101. dfsch_object_t* tmp;
  102. dfsch_object_t* head = NULL;
  103. dfsch_object_t* tail;
  104. while (*query){
  105. query += strspn(query, "&;");
  106. delim = strcspn(query, "&;");
  107. value = memchr(query, '=', delim);
  108. if (value){
  109. value++;
  110. tmp = dfsch_cons(dfsch_list(2,
  111. urldecode_to_string(query,
  112. value - query - 1),
  113. urldecode_to_string(value,
  114. (query+delim) - value)),
  115. NULL);
  116. } else {
  117. tmp = dfsch_cons(dfsch_list(1,
  118. urldecode_to_string(query, delim)),
  119. NULL);
  120. }
  121. if (head){
  122. dfsch_set_cdr(tail, tmp);
  123. tail = tmp;
  124. } else {
  125. head = tail = tmp;
  126. }
  127. query += delim;
  128. }
  129. return head;
  130. }
  131. static int xdigit_to_num(char digit){
  132. switch(digit){
  133. case '0':
  134. return 0;
  135. case '1':
  136. return 1;
  137. case '2':
  138. return 2;
  139. case '3':
  140. return 3;
  141. case '4':
  142. return 4;
  143. case '5':
  144. return 5;
  145. case '6':
  146. return 6;
  147. case '7':
  148. return 7;
  149. case '8':
  150. return 8;
  151. case '9':
  152. return 9;
  153. case 'A':
  154. case 'a':
  155. return 10;
  156. case 'B':
  157. case 'b':
  158. return 11;
  159. case 'C':
  160. case 'c':
  161. return 12;
  162. case 'D':
  163. case 'd':
  164. return 13;
  165. case 'E':
  166. case 'e':
  167. return 14;
  168. case 'F':
  169. case 'f':
  170. return 15;
  171. default:
  172. return 255;
  173. }
  174. }
  175. dfsch_strbuf_t* dfsch_inet_urldecode(dfsch_strbuf_t* strbuf){
  176. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  177. size_t i;
  178. char* out;
  179. res->len = 0;
  180. i = 0;
  181. while (i < strbuf->len){
  182. if (strbuf->ptr[i] == '%'){
  183. i++;
  184. res->len++;
  185. if (i < strbuf->len && isxdigit(strbuf->ptr[i])){
  186. i++;
  187. if (i < strbuf->len && isxdigit(strbuf->ptr[i])){
  188. i++;
  189. } else {
  190. i--;
  191. }
  192. }
  193. } else {
  194. res->len++;
  195. i++;
  196. }
  197. }
  198. res->ptr = out = GC_MALLOC_ATOMIC(res->len+1);
  199. i = 0;
  200. while (i < strbuf->len){
  201. if (strbuf->ptr[i] == '%'){
  202. i++;
  203. if (i < strbuf->len && isxdigit(strbuf->ptr[i])){
  204. *out = xdigit_to_num(strbuf->ptr[i]) << 4;
  205. i++;
  206. if (i < strbuf->len && isxdigit(strbuf->ptr[i])){
  207. *out |= xdigit_to_num(strbuf->ptr[i]);
  208. i++;
  209. out++;
  210. } else {
  211. i--;
  212. }
  213. } else {
  214. *out++ = '%';
  215. }
  216. } else if (strbuf->ptr[i] == '+'){
  217. i++;
  218. *out++ = ' ';
  219. } else {
  220. *out++ = strbuf->ptr[i++];
  221. }
  222. }
  223. *out = 0;
  224. return res;
  225. }
  226. static char hex_digits[] = "0123456789abcdef";
  227. dfsch_strbuf_t* dfsch_inet_urlencode(dfsch_strbuf_t* strbuf){
  228. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  229. size_t i;
  230. char* out;
  231. i = 0;
  232. while (i < strbuf->len){
  233. if (isalpha(strbuf->ptr[i]) ||
  234. isdigit(strbuf->ptr[i]) ||
  235. strchr("-_.~", strbuf->ptr[i])){
  236. res->len++;
  237. i++;
  238. } else if (strbuf->ptr[i] == ' ') {
  239. res->len++;
  240. i++;
  241. } else {
  242. res->len+=3;
  243. i++;
  244. }
  245. }
  246. res->ptr = out = GC_MALLOC_ATOMIC(res->len + 1);
  247. i = 0;
  248. while (i < strbuf->len){
  249. if (isalpha(strbuf->ptr[i]) ||
  250. isdigit(strbuf->ptr[i]) ||
  251. strchr("-_.~", strbuf->ptr[i])){
  252. *out++ = strbuf->ptr[i++];
  253. } else if (strbuf->ptr[i] == ' ') {
  254. *out++ = '+';
  255. i++;
  256. }else {
  257. *out++ = '%';
  258. *out++ = hex_digits[((unsigned char)strbuf->ptr[i]) >> 4];
  259. *out++ = hex_digits[strbuf->ptr[i] & 0xf];
  260. i++;
  261. }
  262. }
  263. return res;
  264. }
  265. static char base64_chars[] =
  266. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  267. static int base64char_value(char ch){
  268. if ((ch >= 'A') && (ch <= 'Z')){
  269. return ch - 'A';
  270. } else if ((ch >= 'a') && (ch <= 'z')){
  271. return ch - 'a' + 26;
  272. } else if ((ch >= '0') && (ch <= '9')){
  273. return ch - '0' + 52;
  274. } else if (ch == '+'){
  275. return 62;
  276. } else if (ch == '/'){
  277. return 63;
  278. }
  279. return -1;
  280. }
  281. static char ubase64_chars[] =
  282. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-";
  283. static int ubase64char_value(char ch){
  284. if ((ch >= 'A') && (ch <= 'Z')){
  285. return ch - 'A';
  286. } else if ((ch >= 'a') && (ch <= 'z')){
  287. return ch - 'a' + 26;
  288. } else if ((ch >= '0') && (ch <= '9')){
  289. return ch - '0' + 52;
  290. } else if (ch == '.'){
  291. return 62;
  292. } else if (ch == '-'){
  293. return 63;
  294. }
  295. return -1;
  296. }
  297. dfsch_strbuf_t* dfsch_inet_base64_decode(dfsch_strbuf_t* str_buf){
  298. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  299. size_t i;
  300. size_t valid;
  301. uint32_t tmp;
  302. int val;
  303. char* out;
  304. valid = 0;
  305. for (i = 0; i < str_buf->len; i++){
  306. if (base64char_value(str_buf->ptr[i]) >= 0){
  307. valid++;
  308. }
  309. }
  310. switch (valid % 4){
  311. case 0:
  312. case 1:
  313. res->len = valid / 4 * 3;
  314. break;
  315. case 2:
  316. res->len = valid / 4 * 3 + 1;
  317. break;
  318. case 3:
  319. res->len = valid / 4 * 3 + 2;
  320. break;
  321. }
  322. tmp = 0;
  323. valid = 0;
  324. res->ptr = out = GC_MALLOC_ATOMIC(res->len+1);
  325. for (i = 0; i < str_buf->len; i++){
  326. val = base64char_value(str_buf->ptr[i]);
  327. if (val >= 0){
  328. tmp <<= 6;
  329. tmp |= val;
  330. valid++;
  331. if (valid == 4){
  332. valid = 0;
  333. *out++ = (tmp >> 16) & 0xff;
  334. *out++ = (tmp >> 8) & 0xff;
  335. *out++ = (tmp) & 0xff;
  336. tmp = 0;
  337. }
  338. }
  339. }
  340. switch (valid){
  341. case 2:
  342. tmp <<= 12;
  343. *out++ = (tmp >> 16) & 0xff;
  344. case 3:
  345. tmp <<= 6;
  346. *out++ = (tmp >> 16) & 0xff;
  347. *out++ = (tmp >> 8) & 0xff;
  348. }
  349. *out = 0;
  350. return res;
  351. }
  352. dfsch_strbuf_t* dfsch_inet_base64_encode(dfsch_strbuf_t* str_buf,
  353. int wrap,
  354. int pad){
  355. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  356. size_t i;
  357. char* out;
  358. uint32_t tmp;
  359. switch (str_buf->len % 3){
  360. case 0:
  361. res->len = str_buf->len / 3 * 4;
  362. break;
  363. case 1:
  364. res->len = str_buf->len / 3 * 4 + (pad ? 4 : 2);
  365. break;
  366. case 2:
  367. res->len = str_buf->len / 3 * 4 + (pad ? 4 : 3);
  368. break;
  369. }
  370. if (wrap){
  371. res->len += (res->len / 64) * 2;
  372. }
  373. res->ptr = out = GC_MALLOC_ATOMIC(res->len+1);
  374. for (i = 0; i + 2 < str_buf->len; i+=3){
  375. tmp = ((((unsigned)str_buf->ptr[i]) & 0xff) << 16) |
  376. ((((unsigned)str_buf->ptr[i+1]) & 0xff) << 8) |
  377. (((unsigned)str_buf->ptr[i+2]) & 0xff);
  378. *out++ = base64_chars[(tmp >> 18) & 0x3f];
  379. *out++ = base64_chars[(tmp >> 12) & 0x3f];
  380. *out++ = base64_chars[(tmp >> 6) & 0x3f];
  381. *out++ = base64_chars[tmp & 0x3f];
  382. if (wrap && i % 48 == 45){
  383. *out++ = '\r';
  384. *out++ = '\n';
  385. }
  386. }
  387. switch (str_buf->len - i){
  388. case 1:
  389. tmp = ((((unsigned)str_buf->ptr[i]) & 0xff) << 16);
  390. *out++ = base64_chars[(tmp >> 18) & 0x3f];
  391. *out++ = base64_chars[(tmp >> 12) & 0x3f];
  392. if (pad){
  393. *out++ = '=';
  394. *out++ = '=';
  395. }
  396. break;
  397. case 2:
  398. tmp = ((((unsigned)str_buf->ptr[i]) & 0xff) << 16) |
  399. ((((unsigned)str_buf->ptr[i+1]) & 0xff)<< 8);
  400. *out++ = base64_chars[(tmp >> 18) & 0x3f];
  401. *out++ = base64_chars[(tmp >> 12) & 0x3f];
  402. *out++ = base64_chars[(tmp >> 6) & 0x3f];
  403. if (pad){
  404. *out++ = '=';
  405. }
  406. break;
  407. }
  408. *out = 0;
  409. return res;
  410. }
  411. dfsch_strbuf_t* dfsch_inet_uri_base64_decode(dfsch_strbuf_t* str_buf){
  412. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  413. size_t i;
  414. size_t valid;
  415. uint32_t tmp;
  416. int val;
  417. char* out;
  418. valid = 0;
  419. for (i = 0; i < str_buf->len; i++){
  420. if (ubase64char_value(str_buf->ptr[i]) >= 0){
  421. valid++;
  422. }
  423. }
  424. switch (valid % 4){
  425. case 0:
  426. case 1:
  427. res->len = valid / 4 * 3;
  428. break;
  429. case 2:
  430. res->len = valid / 4 * 3 + 1;
  431. break;
  432. case 3:
  433. res->len = valid / 4 * 3 + 2;
  434. break;
  435. }
  436. tmp = 0;
  437. valid = 0;
  438. res->ptr = out = GC_MALLOC_ATOMIC(res->len+1);
  439. for (i = 0; i < str_buf->len; i++){
  440. val = ubase64char_value(str_buf->ptr[i]);
  441. if (val >= 0){
  442. tmp <<= 6;
  443. tmp |= val;
  444. valid++;
  445. if (valid == 4){
  446. valid = 0;
  447. *out++ = (tmp >> 16) & 0xff;
  448. *out++ = (tmp >> 8) & 0xff;
  449. *out++ = (tmp) & 0xff;
  450. }
  451. }
  452. }
  453. switch (valid){
  454. case 2:
  455. tmp <<= 12;
  456. *out++ = (tmp >> 16) & 0xff;
  457. case 3:
  458. tmp <<= 6;
  459. *out++ = (tmp >> 16) & 0xff;
  460. *out++ = (tmp >> 8) & 0xff;
  461. }
  462. *out = 0;
  463. return res;
  464. }
  465. dfsch_strbuf_t* dfsch_inet_uri_base64_encode(dfsch_strbuf_t* str_buf){
  466. dfsch_strbuf_t* res = GC_NEW(dfsch_strbuf_t);
  467. size_t i;
  468. char* out;
  469. uint32_t tmp;
  470. switch (str_buf->len % 3){
  471. case 0:
  472. res->len = str_buf->len / 3 * 4;
  473. break;
  474. case 1:
  475. res->len = str_buf->len / 3 * 4 + 2;
  476. break;
  477. case 2:
  478. res->len = str_buf->len / 3 * 4 + 3;
  479. break;
  480. }
  481. res->ptr = out = GC_MALLOC_ATOMIC(res->len+1);
  482. for (i = 0; i + 2 < str_buf->len; i+=3){
  483. tmp = ((((unsigned)str_buf->ptr[i]) & 0xff) << 16) |
  484. ((((unsigned)str_buf->ptr[i+1]) & 0xff) << 8) |
  485. (((unsigned)str_buf->ptr[i+2]) & 0xff);
  486. *out++ = ubase64_chars[(tmp >> 18) & 0x3f];
  487. *out++ = ubase64_chars[(tmp >> 12) & 0x3f];
  488. *out++ = ubase64_chars[(tmp >> 6) & 0x3f];
  489. *out++ = ubase64_chars[tmp & 0x3f];
  490. }
  491. switch (str_buf->len - i){
  492. case 1:
  493. tmp = (((unsigned)str_buf->ptr[i]) << 16) & 0xff;
  494. *out++ = ubase64_chars[(tmp >> 18) & 0x3f];
  495. *out++ = ubase64_chars[(tmp >> 12) & 0x3f];
  496. break;
  497. case 2:
  498. tmp = ((((unsigned)str_buf->ptr[i]) & 0xff) << 16) |
  499. ((((unsigned)str_buf->ptr[i+1]) & 0xff) << 8);
  500. *out++ = ubase64_chars[(tmp >> 18) & 0x3f];
  501. *out++ = ubase64_chars[(tmp >> 12) & 0x3f];
  502. *out++ = ubase64_chars[(tmp >> 6) & 0x3f];
  503. break;
  504. }
  505. *out = 0;
  506. return res;
  507. }
  508. char* dfsch_inet_xml_escape(char* str){
  509. char* res;
  510. size_t len;
  511. size_t i;
  512. char* out;
  513. len = 0;
  514. for (i = 0; str[i]; i++){
  515. switch (str[i]){
  516. case '<':
  517. case '>':
  518. len += 4;
  519. break;
  520. case '\"':
  521. case '\'':
  522. len += 6;
  523. break;
  524. case '&':
  525. len += 5;
  526. break;
  527. default:
  528. len++;
  529. }
  530. }
  531. res = out = GC_MALLOC_ATOMIC(len + 1);
  532. for (i = 0; str[i]; i++){
  533. switch (str[i]){
  534. case '<':
  535. *out++ = '&';
  536. *out++ = 'l';
  537. *out++ = 't';
  538. *out++ = ';';
  539. break;
  540. case '>':
  541. *out++ = '&';
  542. *out++ = 'g';
  543. *out++ = 't';
  544. *out++ = ';';
  545. break;
  546. case '\"':
  547. *out++ = '&';
  548. *out++ = 'q';
  549. *out++ = 'u';
  550. *out++ = 'o';
  551. *out++ = 't';
  552. *out++ = ';';
  553. break;
  554. case '\'':
  555. *out++ = '&';
  556. *out++ = 'a';
  557. *out++ = 'p';
  558. *out++ = 'o';
  559. *out++ = 's';
  560. *out++ = ';';
  561. break;
  562. case '&':
  563. *out++ = '&';
  564. *out++ = 'a';
  565. *out++ = 'm';
  566. *out++ = 'p';
  567. *out++ = ';';
  568. break;
  569. default:
  570. *out++ = str[i];
  571. }
  572. }
  573. *out = 0;
  574. return res;
  575. }