PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/src/ssfossil/fossil/src/encode.c

https://github.com/paulfitz/coopy
C | 510 lines | 344 code | 30 blank | 136 comment | 68 complexity | ef7ee51f91804f77afceed7a2133b077 MD5 | raw file
  1. /*
  2. ** Copyright (c) 2006 D. Richard Hipp
  3. **
  4. ** This program is free software; you can redistribute it and/or
  5. ** modify it under the terms of the Simplified BSD License (also
  6. ** known as the "2-Clause License" or "FreeBSD License".)
  7. ** This program is distributed in the hope that it will be useful,
  8. ** but without any warranty; without even the implied warranty of
  9. ** merchantability or fitness for a particular purpose.
  10. **
  11. ** Author contact information:
  12. ** drh@hwaci.com
  13. ** http://www.hwaci.com/drh/
  14. **
  15. *******************************************************************************
  16. **
  17. ** Routines for encoding and decoding text.
  18. */
  19. #include "config.h"
  20. #include "encode.h"
  21. /*
  22. ** Make the given string safe for HTML by converting every "<" into "&lt;",
  23. ** every ">" into "&gt;" and every "&" into "&amp;". Return a pointer
  24. ** to a new string obtained from malloc().
  25. **
  26. ** We also encode " as &quot; so that it can appear as an argument
  27. ** to markup.
  28. */
  29. char *htmlize(const char *zIn, int n){
  30. int c;
  31. int i = 0;
  32. int count = 0;
  33. char *zOut;
  34. if( n<0 ) n = strlen(zIn);
  35. while( i<n && (c = zIn[i])!=0 ){
  36. switch( c ){
  37. case '<': count += 4; break;
  38. case '>': count += 4; break;
  39. case '&': count += 5; break;
  40. case '"': count += 6; break;
  41. default: count++; break;
  42. }
  43. i++;
  44. }
  45. i = 0;
  46. zOut = malloc( count+1 );
  47. if( zOut==0 ) return 0;
  48. while( n-->0 && (c = *zIn)!=0 ){
  49. switch( c ){
  50. case '<':
  51. zOut[i++] = '&';
  52. zOut[i++] = 'l';
  53. zOut[i++] = 't';
  54. zOut[i++] = ';';
  55. break;
  56. case '>':
  57. zOut[i++] = '&';
  58. zOut[i++] = 'g';
  59. zOut[i++] = 't';
  60. zOut[i++] = ';';
  61. break;
  62. case '&':
  63. zOut[i++] = '&';
  64. zOut[i++] = 'a';
  65. zOut[i++] = 'm';
  66. zOut[i++] = 'p';
  67. zOut[i++] = ';';
  68. break;
  69. case '"':
  70. zOut[i++] = '&';
  71. zOut[i++] = 'q';
  72. zOut[i++] = 'u';
  73. zOut[i++] = 'o';
  74. zOut[i++] = 't';
  75. zOut[i++] = ';';
  76. break;
  77. default:
  78. zOut[i++] = c;
  79. break;
  80. }
  81. zIn++;
  82. }
  83. zOut[i] = 0;
  84. return zOut;
  85. }
  86. /*
  87. ** Encode a string for HTTP. This means converting lots of
  88. ** characters into the "%HH" where H is a hex digit. It also
  89. ** means converting spaces to "+".
  90. **
  91. ** This is the opposite of DeHttpizeString below.
  92. */
  93. static char *EncodeHttp(const char *zIn, int n, int encodeSlash){
  94. int c;
  95. int i = 0;
  96. int count = 0;
  97. char *zOut;
  98. int other;
  99. # define IsSafeChar(X) \
  100. (isalnum(X) || (X)=='.' || (X)=='$' \
  101. || (X)=='~' || (X)=='-' || (X)=='_' || (X)==other)
  102. if( zIn==0 ) return 0;
  103. if( n<0 ) n = strlen(zIn);
  104. other = encodeSlash ? 'a' : '/';
  105. while( i<n && (c = zIn[i])!=0 ){
  106. if( IsSafeChar(c) || c==' ' ){
  107. count++;
  108. }else{
  109. count += 3;
  110. }
  111. i++;
  112. }
  113. i = 0;
  114. zOut = malloc( count+1 );
  115. if( zOut==0 ) return 0;
  116. while( n-->0 && (c = *zIn)!=0 ){
  117. if( IsSafeChar(c) ){
  118. zOut[i++] = c;
  119. }else if( c==' ' ){
  120. zOut[i++] = '+';
  121. }else{
  122. zOut[i++] = '%';
  123. zOut[i++] = "0123456789ABCDEF"[(c>>4)&0xf];
  124. zOut[i++] = "0123456789ABCDEF"[c&0xf];
  125. }
  126. zIn++;
  127. }
  128. zOut[i] = 0;
  129. return zOut;
  130. }
  131. /*
  132. ** Convert the input string into a form that is suitable for use as
  133. ** a token in the HTTP protocol. Spaces are encoded as '+' and special
  134. ** characters are encoded as "%HH" where HH is a two-digit hexidecimal
  135. ** representation of the character. The "/" character is encoded
  136. ** as "%2F".
  137. */
  138. char *httpize(const char *z, int n){
  139. return EncodeHttp(z, n, 1);
  140. }
  141. /*
  142. ** Convert the input string into a form that is suitable for use as
  143. ** a token in the HTTP protocol. Spaces are encoded as '+' and special
  144. ** characters are encoded as "%HH" where HH is a two-digit hexidecimal
  145. ** representation of the character. The "/" character is not encoded
  146. ** by this routine.
  147. */
  148. char *urlize(const char *z, int n){
  149. return EncodeHttp(z, n, 0);
  150. }
  151. /*
  152. ** Convert a single HEX digit to an integer
  153. */
  154. static int AsciiToHex(int c){
  155. if( c>='a' && c<='f' ){
  156. c += 10 - 'a';
  157. }else if( c>='A' && c<='F' ){
  158. c += 10 - 'A';
  159. }else if( c>='0' && c<='9' ){
  160. c -= '0';
  161. }else{
  162. c = 0;
  163. }
  164. return c;
  165. }
  166. /*
  167. ** Remove the HTTP encodings from a string. The conversion is done
  168. ** in-place. Return the length of the string after conversion.
  169. */
  170. int dehttpize(char *z){
  171. int i, j;
  172. /* Treat a null pointer as a zero-length string. */
  173. if( !z ) return 0;
  174. i = j = 0;
  175. while( z[i] ){
  176. switch( z[i] ){
  177. case '%':
  178. if( z[i+1] && z[i+2] ){
  179. z[j] = AsciiToHex(z[i+1]) << 4;
  180. z[j] |= AsciiToHex(z[i+2]);
  181. i += 2;
  182. }
  183. break;
  184. case '+':
  185. z[j] = ' ';
  186. break;
  187. default:
  188. z[j] = z[i];
  189. break;
  190. }
  191. i++;
  192. j++;
  193. }
  194. z[j] = 0;
  195. return j;
  196. }
  197. /*
  198. ** The "fossilize" encoding is used in the headers of records
  199. ** (aka "content files") to escape special characters. The
  200. ** fossilize encoding passes most characters through unchanged.
  201. ** The changes are these:
  202. **
  203. ** space -> \s
  204. ** tab -> \t
  205. ** newline -> \n
  206. ** cr -> \r
  207. ** formfeed -> \f
  208. ** vtab -> \v
  209. ** nul -> \0
  210. ** \ -> \\
  211. **
  212. ** The fossilize() routine does an encoding of its input and
  213. ** returns a pointer to the encoding in space obtained from
  214. ** malloc.
  215. */
  216. char *fossilize(const char *zIn, int nIn){
  217. int n, i, j, c;
  218. char *zOut;
  219. if( nIn<0 ) nIn = strlen(zIn);
  220. for(i=n=0; i<nIn; i++){
  221. c = zIn[i];
  222. if( c==0 || c==' ' || c=='\n' || c=='\t' || c=='\r' || c=='\f' || c=='\v'
  223. || c=='\\' ) n++;
  224. }
  225. n += nIn;
  226. zOut = malloc( n+1 );
  227. if( zOut ){
  228. for(i=j=0; i<nIn; i++){
  229. int c = zIn[i];
  230. if( c==0 ){
  231. zOut[j++] = '\\';
  232. zOut[j++] = '0';
  233. }else if( c=='\\' ){
  234. zOut[j++] = '\\';
  235. zOut[j++] = '\\';
  236. }else if( isspace(c) ){
  237. zOut[j++] = '\\';
  238. switch( c ){
  239. case '\n': c = 'n'; break;
  240. case ' ': c = 's'; break;
  241. case '\t': c = 't'; break;
  242. case '\r': c = 'r'; break;
  243. case '\v': c = 'v'; break;
  244. case '\f': c = 'f'; break;
  245. }
  246. zOut[j++] = c;
  247. }else{
  248. zOut[j++] = c;
  249. }
  250. }
  251. zOut[j] = 0;
  252. }
  253. return zOut;
  254. }
  255. /*
  256. ** Decode a fossilized string in-place.
  257. */
  258. void defossilize(char *z){
  259. int i, j, c;
  260. for(i=j=0; z[i]; i++){
  261. c = z[i];
  262. if( c=='\\' && z[i+1] ){
  263. i++;
  264. switch( z[i] ){
  265. case 'n': c = '\n'; break;
  266. case 's': c = ' '; break;
  267. case 't': c = '\t'; break;
  268. case 'r': c = '\r'; break;
  269. case 'v': c = '\v'; break;
  270. case 'f': c = '\f'; break;
  271. case '0': c = 0; break;
  272. case '\\': c = '\\'; break;
  273. default: c = z[i]; break;
  274. }
  275. }
  276. z[j++] = c;
  277. }
  278. if( z[j] ) z[j] = 0;
  279. }
  280. /*
  281. ** The characters used for HTTP base64 encoding.
  282. */
  283. static unsigned char zBase[] =
  284. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  285. /*
  286. ** Encode a string using a base-64 encoding.
  287. ** The encoding can be reversed using the <b>decode64</b> function.
  288. **
  289. ** Space to hold the result comes from malloc().
  290. */
  291. char *encode64(const char *zData, int nData){
  292. char *z64;
  293. int i, n;
  294. if( nData<=0 ){
  295. nData = strlen(zData);
  296. }
  297. z64 = malloc( (nData*4)/3 + 8 );
  298. for(i=n=0; i+2<nData; i+=3){
  299. z64[n++] = zBase[ (zData[i]>>2) & 0x3f ];
  300. z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ];
  301. z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) | ((zData[i+2]>>6) & 0x03) ];
  302. z64[n++] = zBase[ zData[i+2] & 0x3f ];
  303. }
  304. if( i+1<nData ){
  305. z64[n++] = zBase[ (zData[i]>>2) & 0x3f ];
  306. z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ];
  307. z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) ];
  308. z64[n++] = '=';
  309. }else if( i<nData ){
  310. z64[n++] = zBase[ (zData[i]>>2) & 0x3f ];
  311. z64[n++] = zBase[ ((zData[i]<<4) & 0x30) ];
  312. z64[n++] = '=';
  313. z64[n++] = '=';
  314. }
  315. z64[n] = 0;
  316. return z64;
  317. }
  318. /*
  319. ** COMMAND: test-encode64
  320. ** Usage: %fossil test-encode64 STRING
  321. */
  322. void test_encode64_cmd(void){
  323. char *z;
  324. int i;
  325. for(i=2; i<g.argc; i++){
  326. z = encode64(g.argv[i], -1);
  327. printf("%s\n", z);
  328. free(z);
  329. }
  330. }
  331. /*
  332. ** This function treats its input as a base-64 string and returns the
  333. ** decoded value of that string. Characters of input that are not
  334. ** valid base-64 characters (such as spaces and newlines) are ignored.
  335. **
  336. ** Space to hold the decoded string is obtained from malloc().
  337. **
  338. ** The number of bytes decoded is returned in *pnByte
  339. */
  340. char *decode64(const char *z64, int *pnByte){
  341. char *zData;
  342. int n64;
  343. int i, j;
  344. int a, b, c, d;
  345. static int isInit = 0;
  346. static int trans[128];
  347. if( !isInit ){
  348. for(i=0; i<128; i++){ trans[i] = 0; }
  349. for(i=0; zBase[i]; i++){ trans[zBase[i] & 0x7f] = i; }
  350. isInit = 1;
  351. }
  352. n64 = strlen(z64);
  353. while( n64>0 && z64[n64-1]=='=' ) n64--;
  354. zData = malloc( (n64*3)/4 + 4 );
  355. for(i=j=0; i+3<n64; i+=4){
  356. a = trans[z64[i] & 0x7f];
  357. b = trans[z64[i+1] & 0x7f];
  358. c = trans[z64[i+2] & 0x7f];
  359. d = trans[z64[i+3] & 0x7f];
  360. zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03);
  361. zData[j++] = ((b<<4) & 0xf0) | ((c>>2) & 0x0f);
  362. zData[j++] = ((c<<6) & 0xc0) | (d & 0x3f);
  363. }
  364. if( i+2<n64 ){
  365. a = trans[z64[i] & 0x7f];
  366. b = trans[z64[i+1] & 0x7f];
  367. c = trans[z64[i+2] & 0x7f];
  368. zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03);
  369. zData[j++] = ((b<<4) & 0xf0) | ((c>>2) & 0x0f);
  370. }else if( i+1<n64 ){
  371. a = trans[z64[i] & 0x7f];
  372. b = trans[z64[i+1] & 0x7f];
  373. zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03);
  374. }
  375. zData[j] = 0;
  376. *pnByte = j;
  377. return zData;
  378. }
  379. /*
  380. ** COMMAND: test-decode64
  381. ** Usage: %fossil test-decode64 STRING
  382. */
  383. void test_decode64_cmd(void){
  384. char *z;
  385. int i, n;
  386. for(i=2; i<g.argc; i++){
  387. z = decode64(g.argv[i], &n);
  388. printf("%d: %s\n", n, z);
  389. free(z);
  390. }
  391. }
  392. /*
  393. ** The base-16 encoding using the following characters:
  394. **
  395. ** 0123456789abcdef
  396. **
  397. */
  398. /*
  399. ** The array used for encoding
  400. */ /* 123456789 12345 */
  401. static const char zEncode[] = "0123456789abcdef";
  402. /*
  403. ** Encode a N-digit base-256 in base-16. Return zero on success
  404. ** and non-zero if there is an error.
  405. */
  406. int encode16(const unsigned char *pIn, unsigned char *zOut, int N){
  407. int i;
  408. for(i=0; i<N; i++){
  409. *(zOut++) = zEncode[pIn[i]>>4];
  410. *(zOut++) = zEncode[pIn[i]&0xf];
  411. }
  412. *zOut = 0;
  413. return 0;
  414. }
  415. /*
  416. ** An array for translating single base-16 characters into a value.
  417. ** Disallowed input characters have a value of 64. Upper and lower
  418. ** case is the same.
  419. */
  420. static const char zDecode[] = {
  421. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  422. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  423. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  424. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 64, 64, 64, 64, 64, 64,
  425. 64, 10, 11, 12, 13, 14, 15, 64, 64, 1, 64, 64, 1, 64, 64, 0,
  426. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  427. 64, 10, 11, 12, 13, 14, 15, 64, 64, 1, 64, 64, 1, 64, 64, 0,
  428. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  429. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  430. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  431. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  432. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  433. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  434. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  435. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  436. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  437. };
  438. /*
  439. ** Decode a N-character base-16 number into base-256. N must be a
  440. ** multiple of 2. The output buffer must be at least N/2 characters
  441. ** in length
  442. */
  443. int decode16(const unsigned char *zIn, unsigned char *pOut, int N){
  444. int i, j;
  445. if( (N&1)!=0 ) return 1;
  446. for(i=j=0; i<N; i += 2, j++){
  447. int v1, v2, a;
  448. a = zIn[i];
  449. if( (a & 0x80)!=0 || (v1 = zDecode[a])==64 ) return 1;
  450. a = zIn[i+1];
  451. if( (a & 0x80)!=0 || (v2 = zDecode[a])==64 ) return 1;
  452. pOut[j] = (v1<<4) + v2;
  453. }
  454. return 0;
  455. }
  456. /*
  457. ** Return true if the input string contains only valid base-16 digits.
  458. ** If any invalid characters appear in the string, return false.
  459. */
  460. int validate16(const char *zIn, int nIn){
  461. int i;
  462. for(i=0; i<nIn; i++, zIn++){
  463. if( zDecode[zIn[0]&0xff]>63 ){
  464. return zIn[0]==0;
  465. }
  466. }
  467. return 1;
  468. }
  469. /*
  470. ** The input string is a base16 value. Convert it into its canonical
  471. ** form. This means that digits are all lower case and that conversions
  472. ** like "l"->"1" and "O"->"0" occur.
  473. */
  474. void canonical16(char *z, int n){
  475. while( *z && n-- ){
  476. *z = zEncode[zDecode[(*z)&0x7f]&0x1f];
  477. z++;
  478. }
  479. }