PageRenderTime 27ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/Open-ILS/src/apachemods/mod_xmlent.c

https://github.com/atz/OpenILS-Evergreen
C | 429 lines | 303 code | 71 blank | 55 comment | 43 complexity | 9ecb2782c186de78cfe940351c4d02b5 MD5 | raw file
  1. #include "httpd.h"
  2. /* vim:noet:ts=4
  3. */
  4. #include "http_config.h"
  5. #include "http_core.h"
  6. #include "http_protocol.h"
  7. #include "http_request.h"
  8. //#include "apr_compat.h"
  9. #include "apr_strings.h"
  10. #include "apr_reslist.h"
  11. #include "http_log.h"
  12. #include "util_filter.h"
  13. #include "opensrf/utils.h"
  14. #include <sys/types.h>
  15. #include <unistd.h>
  16. #include <expat.h>
  17. #define MODULE_NAME "xmlent_module"
  18. /* Define the config defaults here */
  19. #define MODXMLENT_CONFIG_STRIP_COMMENTS "XMLEntStripComments"
  20. #define MODXMLENT_CONFIG_CONTENT_TYPE "XMLEntContentType"
  21. #define MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT "text/html"
  22. #define MODXMLENT_CONFIG_STRIP_PI "XMLEntStripPI"
  23. #define MODXMLENT_CONFIG_DOCTYPE "XMLEntDoctype"
  24. #define MODXMLENT_CONFIG_STRIP_DOCTYPE "XMLEntStripDoctype"
  25. #define MODXMLENT_CONFIG_ESCAPE_SCRIPT "XMLEntEscapeScript"
  26. module AP_MODULE_DECLARE_DATA xmlent_module;
  27. int xmlEntInScript = 0; /* are we in the middle of a <script> tag */
  28. /* our context */
  29. typedef struct {
  30. apr_bucket_brigade* brigade; /* the bucket brigade we buffer our data into */
  31. XML_Parser parser; /* our XML parser */
  32. } xmlEntContext;
  33. /* our config data */
  34. typedef struct {
  35. int stripComments; /* should we strip comments on the way out? */
  36. int stripPI; /* should we strip processing instructions on the way out? */
  37. int stripDoctype;
  38. int escapeScript; /* if true, we html-escape anything text inside a <script> tag */
  39. char* contentType; /* the content type used to server pages */
  40. char* doctype; /* the doctype header to send before any other data */
  41. } xmlEntConfig;
  42. /* check to see if this is an empty XHTML element */
  43. static int isEmptyElement(const char *element) {
  44. /* derived from "grep EMPTY xhtml1-transitional.dtd" */
  45. static char *emptyTags[] = {
  46. "base",
  47. "meta",
  48. "link",
  49. "hr",
  50. "br",
  51. "basefont",
  52. "param",
  53. "img",
  54. "area",
  55. "input",
  56. "isindex",
  57. "col",
  58. 0
  59. };
  60. int i, isEmpty;
  61. const char *p;
  62. i = 0;
  63. isEmpty = 0;
  64. p = *(emptyTags);
  65. while (!isEmpty && p != 0) {
  66. isEmpty = !strcmp((const char*)element, (const char*)p);
  67. p = *(emptyTags + ++i);
  68. }
  69. return isEmpty;
  70. }
  71. /* get the content type from the config */
  72. static const char* xmlEntSetContentType(cmd_parms *params, void *cfg, const char *arg) {
  73. xmlEntConfig* config = (xmlEntConfig*) cfg;
  74. config->contentType = (char*) arg;
  75. return NULL;
  76. }
  77. /* get the strip PI flag from the config */
  78. static const char* xmlEntSetStripPI(cmd_parms *params, void *cfg, const char *arg) {
  79. xmlEntConfig* config = (xmlEntConfig*) cfg;
  80. char* a = (char*) arg;
  81. config->stripPI = (a && !strcasecmp(a, "yes")) ? 1 : 0;
  82. return NULL;
  83. }
  84. /* Get the strip comments flag from the config */
  85. static const char* xmlEntSetStripComments(cmd_parms *params, void *cfg, const char *arg) {
  86. xmlEntConfig* config = (xmlEntConfig*) cfg;
  87. char* a = (char*) arg;
  88. config->stripComments = (a && !strcasecmp(a, "yes")) ? 1 : 0;
  89. return NULL;
  90. }
  91. static const char* xmlEntSetEscapeScript(cmd_parms *params, void *cfg, const char *arg) {
  92. xmlEntConfig* config = (xmlEntConfig*) cfg;
  93. char* a = (char*) arg;
  94. config->escapeScript = (a && !strcasecmp(a, "yes")) ? 1 : 0;
  95. return NULL;
  96. }
  97. static const char* xmlEntSetStripDoctype(cmd_parms *params, void *cfg, const char *arg) {
  98. xmlEntConfig* config = (xmlEntConfig*) cfg;
  99. char* a = (char*) arg;
  100. config->stripDoctype = (a && !strcasecmp(a, "yes")) ? 1 : 0;
  101. return NULL;
  102. }
  103. /* Get the user defined doctype from the config */
  104. static const char* xmlEntSetDoctype(cmd_parms *params, void *cfg, const char *arg) {
  105. xmlEntConfig* config = (xmlEntConfig*) cfg;
  106. config->doctype = (char*) arg;
  107. return NULL;
  108. }
  109. /* Tell apache how to set our config variables */
  110. static const command_rec xmlEntCommands[] = {
  111. AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_COMMENTS,
  112. xmlEntSetStripComments, NULL, ACCESS_CONF, "XMLENT Strip Comments"),
  113. AP_INIT_TAKE1( MODXMLENT_CONFIG_CONTENT_TYPE,
  114. xmlEntSetContentType, NULL, ACCESS_CONF, "XMLENT Content Type"),
  115. AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_PI,
  116. xmlEntSetStripPI, NULL, ACCESS_CONF, "XMLENT Strip XML Processing Instructions"),
  117. AP_INIT_TAKE1( MODXMLENT_CONFIG_DOCTYPE,
  118. xmlEntSetDoctype, NULL, ACCESS_CONF, "XMLENT Doctype Declaration"),
  119. AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_DOCTYPE,
  120. xmlEntSetStripDoctype, NULL, ACCESS_CONF, "XMLENT Strip Doctype Declaration"),
  121. AP_INIT_TAKE1( MODXMLENT_CONFIG_ESCAPE_SCRIPT,
  122. xmlEntSetEscapeScript, NULL, ACCESS_CONF, "XMLENT Escape data in script tags"),
  123. {NULL}
  124. };
  125. /* Creates a new config object */
  126. static void* xmlEntCreateDirConfig( apr_pool_t* p, char* dir ) {
  127. xmlEntConfig* config =
  128. (xmlEntConfig*) apr_palloc( p, sizeof(xmlEntConfig) );
  129. config->stripComments = 0;
  130. config->stripPI = 0;
  131. config->stripDoctype = 0;
  132. config->escapeScript = 1;
  133. config->contentType = MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT;
  134. config->doctype = NULL;
  135. return (void*) config;
  136. }
  137. /* keep for a while in case we ever need it */
  138. /*
  139. #define XMLENT_INHERIT(p, c, f) ((c->f) ? c->f : p->f);
  140. static void* xmlEntMergeDirConfig(apr_pool_t *p, void *base, void *overrides) {
  141. xmlEntConfig* parent = base;
  142. xmlEntConfig* child = overrides;
  143. xmlEntConfig* newConf = (xmlEntConfig*) apr_pcalloc(p, sizeof(xmlEntConfig));
  144. newConf->contentType = XMLENT_INHERIT(parent, child, contentType);
  145. newConf->stripComments = XMLENT_INHERIT(parent, child, stripComments);
  146. return newConf;
  147. }
  148. */
  149. /* We need a global parser object because sub-requests, with different
  150. * filter contexts, are parsing part of the same document.
  151. * This means that this filter will only work in forked (non-threaded) environments.
  152. * XXX Figure out how to share pointers/data accross filters */
  153. XML_Parser parser = NULL;
  154. /* utility function which passes data to the next filter */
  155. static void _fwrite( ap_filter_t* filter, char* data, ... ) {
  156. if(!(filter && data)) return;
  157. xmlEntContext* ctx = (xmlEntContext*) filter->ctx;
  158. VA_LIST_TO_STRING(data);
  159. ap_fwrite( filter->next, ctx->brigade, VA_BUF, strlen(VA_BUF));
  160. }
  161. /** XXX move me to opensrf/utils.h */
  162. #define OSRF_UTILS_REPLACE_CHAR(str, o, n)\
  163. do {\
  164. int i = 0;\
  165. while(str[i] != '\0') {\
  166. if(str[i] == o)\
  167. str[i] = n;\
  168. i++;\
  169. }\
  170. } while(0)
  171. /* cycles through the attributes attached to an element */
  172. static void printAttr( ap_filter_t* filter, const char** atts ) {
  173. if(!atts) return;
  174. int i;
  175. for( i = 0; atts[i] && atts[i+1]; i++ ) {
  176. const char* name = atts[i];
  177. const char* value = atts[i+1];
  178. char* escaped = ap_escape_html(filter->r->pool, value);
  179. /* we make a big assumption here that if the string contains a ',
  180. * then the original attribute was wrapped in "s - so recreate that */
  181. if( strchr( escaped, '\'' ) ) {
  182. OSRF_UTILS_REPLACE_CHAR(escaped,'"','\'');
  183. _fwrite( filter, " %s=\"%s\"", name, escaped );
  184. } else {
  185. OSRF_UTILS_REPLACE_CHAR(escaped,'\'','"');
  186. _fwrite( filter, " %s='%s'", name, escaped );
  187. }
  188. i++;
  189. }
  190. }
  191. /* Starts an XML element */
  192. static void XMLCALL startElement(void *userData, const char *name, const char **atts) {
  193. ap_filter_t* filter = (ap_filter_t*) userData;
  194. xmlEntConfig* config = ap_get_module_config(
  195. filter->r->per_dir_config, &xmlent_module );
  196. _fwrite(filter, "<%s", name );
  197. printAttr( filter, atts );
  198. if (!strncmp(config->contentType, MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT, 9)
  199. && isEmptyElement(name)) {
  200. _fwrite(filter, " />", name );
  201. } else {
  202. _fwrite(filter, ">", name );
  203. }
  204. if(!strcmp(name, "script"))
  205. xmlEntInScript = 1;
  206. }
  207. /* Handles the character data */
  208. static void XMLCALL charHandler( void* userData, const XML_Char* s, int len ) {
  209. ap_filter_t* filter = (ap_filter_t*) userData;
  210. char data[len+1];
  211. memset( data, '\0', sizeof(data) );
  212. memcpy( data, s, len );
  213. xmlEntConfig* config = ap_get_module_config(
  214. filter->r->per_dir_config, &xmlent_module );
  215. if( xmlEntInScript && ! config->escapeScript ) {
  216. _fwrite( filter, "%s", data );
  217. } else {
  218. char* escaped = ap_escape_html(filter->r->pool, data);
  219. _fwrite( filter, "%s", escaped );
  220. }
  221. }
  222. static void XMLCALL handlePI( void* userData, const XML_Char* target, const XML_Char* data) {
  223. ap_filter_t* filter = (ap_filter_t*) userData;
  224. _fwrite(filter, "<?%s %s?>", target, data);
  225. }
  226. static void XMLCALL handleComment( void* userData, const XML_Char* comment ) {
  227. ap_filter_t* filter = (ap_filter_t*) userData;
  228. _fwrite(filter, "<!-- %s -->", comment);
  229. }
  230. /* Ends an XML element */
  231. static void XMLCALL endElement(void *userData, const char *name) {
  232. ap_filter_t* filter = (ap_filter_t*) userData;
  233. xmlEntConfig* config = ap_get_module_config(
  234. filter->r->per_dir_config, &xmlent_module );
  235. if (!strncmp(config->contentType, MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT, 9)
  236. && isEmptyElement(name)) {
  237. return;
  238. }
  239. _fwrite( filter, "</%s>", name );
  240. if(!strcmp(name, "script"))
  241. xmlEntInScript = 1;
  242. }
  243. static void XMLCALL doctypeHandler( void* userData,
  244. const char* name, const char* sysid, const char* pubid, int hasinternal ) {
  245. ap_filter_t* filter = (ap_filter_t*) userData;
  246. char* s = (sysid) ? (char*) sysid : "";
  247. char* p = (pubid) ? (char*) pubid : "";
  248. _fwrite( filter, "<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", name, p, s );
  249. }
  250. /* The handler. Create a new parser and/or filter context where appropriate
  251. * and parse the chunks of data received from the brigade
  252. */
  253. static int xmlEntHandler( ap_filter_t *f, apr_bucket_brigade *brigade ) {
  254. xmlEntContext* ctx = f->ctx;
  255. apr_bucket* currentBucket = NULL;
  256. apr_pool_t* pool = f->r->pool;
  257. const char* data;
  258. apr_size_t len;
  259. /* load the per-dir/location config */
  260. xmlEntConfig* config = ap_get_module_config(
  261. f->r->per_dir_config, &xmlent_module );
  262. ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
  263. 0, f->r, "XMLENT Config:\nContent Type = %s, "
  264. "Strip PI = %s, Strip Comments = %s, Doctype = %s",
  265. config->contentType,
  266. (config->stripPI) ? "yes" : "no",
  267. (config->stripComments) ? "yes" : "no",
  268. config->doctype);
  269. /* set the content type based on the config */
  270. ap_set_content_type(f->r, config->contentType);
  271. /* create the XML parser */
  272. int firstrun = 0;
  273. if( parser == NULL ) {
  274. firstrun = 1;
  275. parser = XML_ParserCreate("UTF-8");
  276. XML_SetUserData(parser, f);
  277. XML_SetElementHandler(parser, startElement, endElement);
  278. XML_SetCharacterDataHandler(parser, charHandler);
  279. if(!config->stripDoctype)
  280. XML_SetStartDoctypeDeclHandler( parser, doctypeHandler );
  281. if(!config->stripPI)
  282. XML_SetProcessingInstructionHandler(parser, handlePI);
  283. if(!config->stripComments)
  284. XML_SetCommentHandler(parser, handleComment);
  285. }
  286. /* create the filter context */
  287. if( ctx == NULL ) {
  288. f->ctx = ctx = apr_pcalloc( pool, sizeof(*ctx));
  289. ctx->brigade = apr_brigade_create( pool, f->c->bucket_alloc );
  290. ctx->parser = parser;
  291. }
  292. if(firstrun) { /* we haven't started writing the data to the stream yet */
  293. /* go ahead and write the doctype out if we have one defined */
  294. if(config->doctype) {
  295. ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
  296. 0, f->r, "XMLENT DOCTYPE => %s", config->doctype);
  297. _fwrite(f, "%s\n", config->doctype);
  298. }
  299. }
  300. /* cycle through the buckets in the brigade */
  301. while (!APR_BRIGADE_EMPTY(brigade)) {
  302. /* grab the next bucket */
  303. currentBucket = APR_BRIGADE_FIRST(brigade);
  304. /* clean up when we're done */
  305. if (APR_BUCKET_IS_EOS(currentBucket) || APR_BUCKET_IS_FLUSH(currentBucket)) {
  306. APR_BUCKET_REMOVE(currentBucket);
  307. APR_BRIGADE_INSERT_TAIL(ctx->brigade, currentBucket);
  308. ap_pass_brigade(f->next, ctx->brigade);
  309. XML_ParserFree(parser);
  310. parser = NULL;
  311. return APR_SUCCESS;
  312. }
  313. /* read the incoming data */
  314. int s = apr_bucket_read(currentBucket, &data, &len, APR_NONBLOCK_READ);
  315. if( s != APR_SUCCESS ) {
  316. ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
  317. "XMLENT error reading data from filter with status %d", s);
  318. return s;
  319. }
  320. if (len > 0) {
  321. ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
  322. 0, f->r, "XMLENT read %d bytes", (int)len);
  323. /* push data into the XML push parser */
  324. if ( XML_Parse(ctx->parser, data, len, 0) == XML_STATUS_ERROR ) {
  325. char tmp[len+1];
  326. memcpy(tmp, data, len);
  327. tmp[len] = '\0';
  328. /* log and die on XML errors */
  329. ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
  330. "XMLENT XML Parse Error: %s at line %d: parsing %s: data %s",
  331. XML_ErrorString(XML_GetErrorCode(ctx->parser)),
  332. (int) XML_GetCurrentLineNumber(ctx->parser), f->r->filename, tmp);
  333. XML_ParserFree(parser);
  334. parser = NULL;
  335. return HTTP_INTERNAL_SERVER_ERROR;
  336. }
  337. }
  338. /* so a subrequest doesn't re-read this bucket */
  339. apr_bucket_delete(currentBucket);
  340. }
  341. apr_brigade_destroy(brigade);
  342. return APR_SUCCESS;
  343. }
  344. /* Register the filter function as a filter for modifying the HTTP body (content) */
  345. static void xmlEntRegisterHook(apr_pool_t *pool) {
  346. ap_register_output_filter("XMLENT", xmlEntHandler, NULL, AP_FTYPE_CONTENT_SET);
  347. }
  348. /* Define the module data */
  349. module AP_MODULE_DECLARE_DATA xmlent_module = {
  350. STANDARD20_MODULE_STUFF,
  351. xmlEntCreateDirConfig, /* dir config creater */
  352. NULL, /* dir merger --- default is to override */
  353. NULL, /* server config */
  354. NULL, /* merge server config */
  355. xmlEntCommands, /* command apr_table_t */
  356. xmlEntRegisterHook /* register hook */
  357. };