/src/tutorial/test url/parser.c.BASE.10308.c

https://github.com/guocongwudi/9315ass2 · C · 308 lines · 210 code · 68 blank · 30 comment · 106 complexity · b364e2e1a7a7511e3cb240ef55233c96 MD5 · raw file

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <ctype.h>
  4. typedef struct parsed_url {
  5. char *scheme;
  6. char *host;
  7. char *port;
  8. char *path;
  9. char *params;
  10. } Url;
  11. char *str_n_dup(char *, int);
  12. Url *parseURL(char *);
  13. Url *makeParsedURL();
  14. Url *freeParsedURL(Url *);
  15. char *str_n_dup(char *str, int n) {
  16. char *new = malloc(n + 1);
  17. if (new == NULL
  18. )
  19. return NULL;
  20. strncpy(new, str, n);
  21. new[n] = '\0';
  22. return new;
  23. }
  24. Url *parseURL(char *url) {
  25. char *c, *d;
  26. Url *purl = NULL;
  27. // trim trailing newline
  28. c = url;
  29. while (*c != '\0' && *c != '\n')
  30. c++;
  31. if (*c == '\n')
  32. *c = '\0';
  33. // create ParsedURL object
  34. if ((purl = makeParsedURL()) == NULL
  35. )
  36. return NULL;
  37. // start parse
  38. c = d = url;
  39. // find scheme component
  40. while (*d != '\0' && *d != ':')
  41. d++;
  42. // didn't find scheme
  43. if (*d == '\0')
  44. return freeParsedURL(purl);
  45. // copy scheme
  46. purl->scheme = str_n_dup(c, d - c);
  47. // must be "http" or "https"
  48. if (strcmp(purl->scheme, "http") != 0 && strcmp(purl->scheme, "https") != 0)
  49. return freeParsedURL(purl);
  50. // copy host
  51. if (*(d + 1) != '/' && *(d + 2) != '/')
  52. return freeParsedURL(purl);
  53. c = d = d + 3; // skip over '//'
  54. //skip other/
  55. while (*d == '/')
  56. d++;
  57. c = d;
  58. while (*d != '\0' && *d != ':' && *d != '/')
  59. d++;
  60. if (*d == '\0')
  61. return freeParsedURL(purl);
  62. purl->host = str_n_dup(c, d - c);
  63. // must contain at least one dot
  64. if (strchr(purl->host, '.') == NULL
  65. )
  66. return freeParsedURL(purl);
  67. // copy port, if any
  68. if (*d == ':') {
  69. c = d = d + 1; // skip over ':'
  70. while (*d != '\0' && *d != '/')
  71. d++;
  72. purl->port = str_n_dup(c, d - c);
  73. }
  74. // else {
  75. // if (strcmp(purl->scheme, "http") == 0)
  76. // purl->port = "80";
  77. //
  78. // else
  79. // purl->port = "403";
  80. //
  81. // }
  82. // **************merge*****************//
  83. // default port
  84. if (purl->port == NULL) {
  85. if (strcmp(purl->scheme, "http") == 0) {
  86. purl->port = malloc(3);
  87. strcpy(purl->port, "80");
  88. purl->port[2] = '\0';
  89. } else {
  90. purl->port = malloc(4);
  91. strcpy(purl->port, "443");
  92. purl->port[3] = '\0';
  93. }
  94. }
  95. // if url end with / and final component is not path, treat it as invalid
  96. if (*d == '/' && *d + 1 == '\0')
  97. return freeParsedURL(purl);
  98. // ****************merge************//
  99. // copy path, if any
  100. if (*d != '\0') {
  101. c = d = d + 1; // skip over '/'
  102. //skip other/
  103. while (*d == '/')
  104. d++;
  105. c = d;
  106. if (*d != '\0') {
  107. while (*d != '\0' && *d != '?')
  108. d++;
  109. purl->path = str_n_dup(c, d - c);
  110. }
  111. }
  112. // ********************merge**********************//
  113. // default path
  114. if(purl->path == NULL) {
  115. purl->path = malloc(11);
  116. strcpy(purl->path, "index.html");
  117. purl->path[10] = '\0';
  118. }
  119. // ****************merge**********************//
  120. // copy params, if any
  121. if (*d != '\0') {
  122. c = d = d + 1; // skip over '?'
  123. if (*d != '\0') {
  124. purl->params = strdup(c);
  125. }
  126. }
  127. int i;
  128. for(i = 0; i < strlen(purl->params); i++) {
  129. (purl->params)[i] = tolower((purl->params)[i]);
  130. }
  131. for(i = 0; i < strlen(purl->host); i++) {
  132. (purl->host)[i] = tolower((purl->host)[i]);
  133. }
  134. for(i = 0; i < strlen(purl->path); i++) {
  135. (purl->path)[i] = tolower((purl->path)[i]);
  136. }
  137. for(i = 0; i < strlen(purl->port); i++) {
  138. (purl->port)[i] = tolower((purl->port)[i]);
  139. }
  140. for (i = 0; i < strlen(purl->scheme); i++) {
  141. (purl->scheme)[i] = tolower((purl->scheme)[i]);
  142. }
  143. return purl;
  144. }
  145. Url *makeParsedURL() {
  146. Url *purl;
  147. if ((purl = malloc(sizeof(Url))) == NULL
  148. )
  149. return NULL;
  150. purl->scheme = NULL;
  151. purl->host = NULL;
  152. purl->port = NULL;
  153. purl->path = NULL;
  154. purl->params = NULL;
  155. return purl;
  156. }
  157. Url *freeParsedURL(Url *purl) {
  158. if (purl == NULL
  159. )
  160. return NULL;
  161. if (purl->scheme != NULL
  162. )
  163. free(purl->scheme);
  164. if (purl->host != NULL
  165. )
  166. free(purl->host);
  167. if (purl->port != NULL
  168. )
  169. free(purl->port);
  170. if (purl->path != NULL
  171. )
  172. free(purl->path);
  173. if (purl->params != NULL
  174. )
  175. free(purl->params);
  176. free(purl);
  177. return NULL;
  178. }
  179. void main(void) {
  180. Url *url;
  181. Url *a;
  182. Url *b;
  183. char* line;
  184. char* line1;
  185. char* line2;
  186. line = "http://www.AAAA.com/song/play?ids=/song/playlist/id/7335983/type/3";
  187. line1 = "https://www.AAAA.com/song/play:80?ids=/song/playlist/id/7335983/type/3";
  188. line2 = "http://www.AAAA.com/song/play:80?ids=/song/playlist/id/7335983/type/4";
  189. url = parseURL(line);
  190. a = parseURL(line1);
  191. b = parseURL(line2);
  192. if (url == NULL
  193. )
  194. printf("this url is invalid");
  195. char *result="";
  196. int url_len=0;
  197. url_len = strlen(url->host) +strlen(url->params) +strlen(url->path)+strlen(url->port)+strlen(url->scheme);
  198. result = (char *) malloc(url_len+5);
  199. result = strcat( result, url->scheme);
  200. result = strcat( result, "://");
  201. result = strcat( result, url->host);
  202. result = strcat( result, ":");
  203. result = strcat( result, url->port );
  204. result = strcat( result, "/" );
  205. if(url->path!=NULL)
  206. {
  207. result = strcat( result, url->path );
  208. }
  209. if(url->params!=NULL)
  210. {
  211. result = strcat( result, url->params );
  212. }
  213. printf("len = %d ,%s",url_len,result);
  214. //test the equal
  215. int isEqual = 1; // 1 means equal
  216. if (
  217. strcmp(a->host,b->host) == 0 &&
  218. strcmp(a->path,b->path) == 0 &&
  219. strcmp(a->params,b->params) == 0 &&
  220. isEqual)
  221. isEqual = 1;
  222. else
  223. isEqual = 0;
  224. if (isEqual)
  225. {
  226. if (
  227. (strcmp(a->scheme,b->scheme) == 0 && strcmp(a->port,b->port) == 0)
  228. ||
  229. (
  230. (strcmp(a->scheme,"http") == 0 || strcmp(b->scheme,"https") == 0) &&
  231. (strcmp(b->scheme,"http") == 0 || strcmp(a->scheme,"https") == 0) &&
  232. strcmp(a->port,b->port) == 0
  233. )
  234. )
  235. isEqual = 1;
  236. }
  237. else
  238. isEqual = 0;
  239. printf("\n%d\n",isEqual);
  240. }