/util.c

http://github.com/fizx/parsley · C · 237 lines · 210 code · 26 blank · 1 comment · 51 complexity · 3cae780bc31d4c682a87913cbd970f05 MD5 · raw file

  1. #include "util.h"
  2. static bool parsley_exslt_registered = false;
  3. #define BUF 128
  4. FILE* parsley_fopen(char* name, char* mode) {
  5. FILE* fo;
  6. if(!strcmp("-", name)) {
  7. if(!strcmp("w", mode)) {
  8. fo = stdout;
  9. } else {
  10. fo = stdin;
  11. }
  12. } else {
  13. fo = fopen(name, mode);
  14. }
  15. if(fo == NULL) {
  16. fprintf(stderr, "Cannot open file %s, error %d, %s\n", name, errno, strerror(errno));
  17. exit(1);
  18. }
  19. return fo;
  20. }
  21. static int parsley_io_mode = 0;
  22. static char *parsley_user_agent_header = NULL;
  23. int
  24. parsley_io_get_mode() {
  25. return parsley_io_mode;
  26. }
  27. static xsltStylesheetPtr span_wrap_sheet = NULL;
  28. xmlDocPtr
  29. parsley_apply_span_wrap(xmlDocPtr doc) {
  30. if(span_wrap_sheet == NULL) {
  31. char * sheet = "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:sg=\"http://selectorgadget.com/\"> \
  32. <xsl:template match=\"text()[(following-sibling::* or preceding-sibling::*) and normalize-space(.) != '']\"> \
  33. <sg_wrap><xsl:value-of select=\".\" /></sg_wrap> \
  34. </xsl:template> \
  35. <xsl:template match=\"@*|node()\"> \
  36. <xsl:copy> \
  37. <xsl:apply-templates select=\"@*|node()\"/> \
  38. </xsl:copy> \
  39. </xsl:template> \
  40. </xsl:stylesheet>";
  41. xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
  42. xmlDocPtr xml = xmlCtxtReadMemory(ctxt, sheet, strlen(sheet), NULL, NULL, 0);
  43. span_wrap_sheet = xsltParseStylesheetDoc(xml);
  44. }
  45. xsltTransformContextPtr ctxt = xsltNewTransformContext(span_wrap_sheet, doc);
  46. xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
  47. xmlDocPtr out = xsltApplyStylesheetUser(span_wrap_sheet, doc, NULL, NULL, NULL, ctxt);
  48. xsltFreeTransformContext(ctxt);
  49. return out;
  50. }
  51. void
  52. _parsley_set_user_agent(char * agent) {
  53. if(parsley_user_agent_header != NULL) free(parsley_user_agent_header);
  54. if(agent == NULL) {
  55. parsley_user_agent_header = NULL;
  56. } else {
  57. asprintf(&parsley_user_agent_header, "User-Agent: %s\n", agent);
  58. }
  59. }
  60. static void *
  61. xmlUserAgentIOHTTPOpen(const char * file_name) {
  62. return (void *)(xmlNanoHTTPMethod(file_name, NULL, NULL, NULL, parsley_user_agent_header, 0));
  63. }
  64. void
  65. parsley_io_set_mode(int mode) {
  66. if(mode == parsley_io_mode) return;
  67. parsley_io_mode = mode;
  68. xmlCleanupInputCallbacks();
  69. if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_LOCAL) {
  70. xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
  71. xmlFileRead, xmlFileClose);
  72. #ifdef HAVE_ZLIB_H
  73. xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen,
  74. xmlGzfileRead, xmlGzfileClose);
  75. #endif /* HAVE_ZLIB_H */
  76. }
  77. if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_NET) {
  78. #ifdef LIBXML_HTTP_ENABLED
  79. xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlUserAgentIOHTTPOpen,
  80. xmlIOHTTPRead, xmlIOHTTPClose);
  81. #endif /* LIBXML_HTTP_ENABLED */
  82. #ifdef LIBXML_FTP_ENABLED
  83. xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen,
  84. xmlIOFTPRead, xmlIOFTPClose);
  85. #endif /* LIBXML_FTP_ENABLED */
  86. }
  87. }
  88. void
  89. printbuf_file_read(FILE *f, struct printbuf *buf) {
  90. char chars[BUF];
  91. while(fgets(chars, BUF, f) != NULL){
  92. sprintbuf(buf, "%s", chars);
  93. }
  94. }
  95. void registerEXSLT() {
  96. if(!parsley_exslt_registered) {
  97. exsltRegisterAll();
  98. parsley_register_all();
  99. init_xpath_alias();
  100. exslt_org_regular_expressions_init();
  101. parsley_exslt_registered = true;
  102. }
  103. }
  104. int parsley_key_flags(char* key) {
  105. char* ptr = key;
  106. char* last_alnum = key;
  107. char* last_paren = key;
  108. while(*ptr++ != '\0'){
  109. if(isalnum(*ptr)) {
  110. last_alnum = ptr;
  111. } else if (*ptr == ')') {
  112. last_paren = ptr;
  113. }
  114. }
  115. ptr = (last_alnum > last_paren ? last_alnum : last_paren);
  116. int flags = 0;
  117. while(*ptr++ != '\0'){
  118. switch(*ptr){
  119. case '?':
  120. flags |= PARSLEY_OPTIONAL;
  121. break;
  122. case '!':
  123. flags |= PARSLEY_BANG;
  124. break;
  125. }
  126. }
  127. return flags;
  128. }
  129. char* parsley_key_tag(char* key) {
  130. char *tag = strdup(key);
  131. char *ptr = tag;
  132. while(*ptr++ != '\0'){
  133. if(!isalnum(*ptr) && *ptr != '_' && *ptr != '-') {
  134. *ptr = 0;
  135. return tag;
  136. }
  137. }
  138. return tag;
  139. }
  140. pxpathPtr parsley_key_filter(char* key) {
  141. char *expr = strdup(key);
  142. char *ptr = expr;
  143. char *orig = expr;
  144. char *last_paren;
  145. int offset = 0;
  146. bool has_expr = false;
  147. while(*ptr++ != '\0'){
  148. if(!has_expr) offset++;
  149. if(*ptr == '(') has_expr = true;
  150. if(*ptr == ')') last_paren = ptr;
  151. }
  152. if(!has_expr) return NULL;
  153. *last_paren = 0; // clip ")"
  154. expr += offset + 1; // clip "("
  155. pxpathPtr out = strlen(expr) == 0 ? NULL : myparse(expr);
  156. free(orig);
  157. // free(expr);
  158. return out;
  159. }
  160. static xmlNodePtr
  161. _xmlLastElementChild(xmlNodePtr node) {
  162. xmlNodePtr child = node->children;
  163. xmlNodePtr elem = NULL;
  164. while(child != NULL) {
  165. if(child->type == XML_ELEMENT_NODE) elem = child;
  166. child = child->next;
  167. }
  168. return elem;
  169. }
  170. xmlNodePtr new_stylesheet_skeleton(char *incl) {
  171. struct printbuf *buf = printbuf_new();
  172. sprintbuf(buf, "%s", "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
  173. sprintbuf(buf, "%s", " xmlns:lib=\"http://parselets.com/stdlib\"");
  174. sprintbuf(buf, "%s", " xmlns:parsley=\"http://parselets.com/json\"");
  175. sprintbuf(buf, "%s", " xmlns:str=\"http://exslt.org/strings\"");
  176. sprintbuf(buf, "%s", " xmlns:set=\"http://exslt.org/sets\"");
  177. sprintbuf(buf, "%s", " xmlns:math=\"http://exslt.org/math\"");
  178. sprintbuf(buf, "%s", " xmlns:func=\"http://exslt.org/functions\"");
  179. sprintbuf(buf, "%s", " xmlns:user=\"http://parselets.com/usre\"");
  180. sprintbuf(buf, "%s", " xmlns:dyn=\"http://exslt.org/dynamic\"");
  181. sprintbuf(buf, "%s", " xmlns:date=\"http://exslt.org/dates-and-times\"");
  182. sprintbuf(buf, "%s", " xmlns:exsl=\"http://exslt.org/common\"");
  183. sprintbuf(buf, "%s", " xmlns:saxon=\"http://icl.com/saxon\"");
  184. sprintbuf(buf, "%s", " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
  185. sprintbuf(buf, "%s", " xmlns:regex=\"http://exslt.org/regular-expressions\"");
  186. sprintbuf(buf, "%s", " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
  187. sprintbuf(buf, "%s", ">\n");
  188. sprintbuf(buf, "%s", "<xsl:variable name=\"nbsp\">&#160;</xsl:variable>\n");
  189. sprintbuf(buf, "%s", "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
  190. sprintbuf(buf, "%s", "<xsl:strip-space elements=\"*\"/>\n");
  191. sprintbuf(buf, "%s", "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
  192. sprintbuf(buf, "%s", "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
  193. sprintbuf(buf, "%s", "<func:result select=\"$out\" /></func:function>");
  194. sprintbuf(buf, "%s", "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
  195. sprintbuf(buf, "%s", "<xsl:template match=\"script|style\" mode=\"innertext\"/>");
  196. sprintbuf(buf, "%s", "<xsl:template match=\"br|address|blockquote|center|dir|div|form|h1|h2|h3|h4|h5|h6|hr|menu|noframes|noscript|p|pre|li|td|th|p\" mode=\"innertext\"><xsl:apply-templates mode=\"innertext\" /><xsl:text>\n</xsl:text></xsl:template>");
  197. sprintbuf(buf, "%s\n", incl);
  198. sprintbuf(buf, "%s\n", "<xsl:template match=\"/\">\n");
  199. sprintbuf(buf, "%s\n", "<parsley:root />\n");
  200. sprintbuf(buf, "%s\n", "</xsl:template>\n");
  201. sprintbuf(buf, "%s\n", "</xsl:stylesheet>\n");
  202. xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
  203. xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://parselets.com/compiled", NULL, 3);
  204. xmlFreeParserCtxt(ctxt);
  205. printbuf_free(buf);
  206. xmlNodePtr node = xmlDocGetRootElement(doc);
  207. while(_xmlLastElementChild(node) != NULL) {
  208. node = _xmlLastElementChild(node);
  209. }
  210. return node;
  211. }