PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/src/libopensrf/osrf_prefork.c

https://gitlab.com/evergreen-bjwebb/opensrf-debian
C | 1223 lines | 713 code | 196 blank | 314 comment | 135 complexity | 4b8fac82f31f9206b75c17f5fa74e130 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. /**
  2. @file osrf_prefork.c
  3. @brief Spawn and manage a collection of child process to service requests.
  4. Spawn a collection of child processes, replacing them as needed. Forward requests to them
  5. and let the children do the work.
  6. Each child processes some maximum number of requests before it terminates itself. When a
  7. child dies, either deliberately or otherwise, we can spawn another one to replace it,
  8. keeping the number of children within a predefined range.
  9. Use a doubly-linked circular list to keep track of the children to whom we have forwarded
  10. a request, and who are still working on them. Use a separate linear linked list to keep
  11. track of children that are currently idle. Move them back and forth as needed.
  12. For each child, set up two pipes:
  13. - One for the parent to send requests to the child.
  14. - One for the child to notify the parent that it is available for another request.
  15. The message sent to the child represents an XML stanza as received from Jabber.
  16. When the child finishes processing the request, it writes the string "available" back
  17. to the parent. Then the parent knows that it can send that child another request.
  18. */
  19. #include <errno.h>
  20. #include <signal.h>
  21. #include <sys/types.h>
  22. #include <sys/time.h>
  23. #include <unistd.h>
  24. #include <stdlib.h>
  25. #include <stdio.h>
  26. #include <string.h>
  27. #include <sys/select.h>
  28. #include <sys/wait.h>
  29. #include "opensrf/utils.h"
  30. #include "opensrf/log.h"
  31. #include "opensrf/transport_client.h"
  32. #include "opensrf/osrf_stack.h"
  33. #include "opensrf/osrf_settings.h"
  34. #include "opensrf/osrf_application.h"
  35. #define READ_BUFSIZE 1024
  36. #define ABS_MAX_CHILDREN 256
  37. typedef struct {
  38. int max_requests; /**< How many requests a child processes before terminating. */
  39. int min_children; /**< Minimum number of children to maintain. */
  40. int max_children; /**< Maximum number of children to maintain. */
  41. int fd; /**< Unused. */
  42. int data_to_child; /**< Unused. */
  43. int data_to_parent; /**< Unused. */
  44. int current_num_children; /**< How many children are currently on the list. */
  45. int keepalive; /**< Keepalive time for stateful sessions. */
  46. char* appname; /**< Name of the application. */
  47. /** Points to a circular linked list of children. */
  48. struct prefork_child_struct* first_child;
  49. /** List of of child processes that aren't doing anything at the moment and are
  50. therefore available to service a new request. */
  51. struct prefork_child_struct* idle_list;
  52. /** List of allocated but unused prefork_children, available for reuse. Each one is just
  53. raw memory, apart from the "next" pointer used to stitch them together. In particular,
  54. there is no child process for them, and the file descriptors are not open. */
  55. struct prefork_child_struct* free_list;
  56. transport_client* connection; /**< Connection to Jabber. */
  57. } prefork_simple;
  58. struct prefork_child_struct {
  59. pid_t pid; /**< Process ID of the child. */
  60. int read_data_fd; /**< Child uses to read request. */
  61. int write_data_fd; /**< Parent uses to write request. */
  62. int read_status_fd; /**< Parent reads to see if child is available. */
  63. int write_status_fd; /**< Child uses to notify parent when it's available again. */
  64. int max_requests; /**< How many requests a child can process before terminating. */
  65. const char* appname; /**< Name of the application. */
  66. int keepalive; /**< Keepalive time for stateful sessions. */
  67. struct prefork_child_struct* next; /**< Linkage pointer for linked list. */
  68. struct prefork_child_struct* prev; /**< Linkage pointer for linked list. */
  69. };
  70. typedef struct prefork_child_struct prefork_child;
  71. /** Boolean. Set to true by a signal handler when it traps SIGCHLD. */
  72. static volatile sig_atomic_t child_dead;
  73. static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
  74. int max_requests, int min_children, int max_children );
  75. static prefork_child* launch_child( prefork_simple* forker );
  76. static void prefork_launch_children( prefork_simple* forker );
  77. static void prefork_run( prefork_simple* forker );
  78. static void add_prefork_child( prefork_simple* forker, prefork_child* child );
  79. static void del_prefork_child( prefork_simple* forker, pid_t pid );
  80. static int check_children( prefork_simple* forker, int forever );
  81. static int prefork_child_process_request( prefork_child*, char* data );
  82. static int prefork_child_init_hook( prefork_child* );
  83. static prefork_child* prefork_child_init( prefork_simple* forker,
  84. int read_data_fd, int write_data_fd,
  85. int read_status_fd, int write_status_fd );
  86. /* listens on the 'data_to_child' fd and wait for incoming data */
  87. static void prefork_child_wait( prefork_child* child );
  88. static void prefork_clear( prefork_simple* );
  89. static void prefork_child_free( prefork_simple* forker, prefork_child* );
  90. static void osrf_prefork_register_routers( const char* appname );
  91. static void osrf_prefork_child_exit( prefork_child* );
  92. static void sigchld_handler( int sig );
  93. /**
  94. @brief Spawn and manage a collection of drone processes for servicing requests.
  95. @param appname Name of the application.
  96. @return 0 if successful, or -1 if error.
  97. */
  98. int osrf_prefork_run( const char* appname ) {
  99. if( !appname ) {
  100. osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run requires an appname to run!");
  101. return -1;
  102. }
  103. set_proc_title( "OpenSRF Listener [%s]", appname );
  104. int maxr = 1000;
  105. int maxc = 10;
  106. int minc = 3;
  107. int kalive = 5;
  108. // Get configuration settings
  109. osrfLogInfo( OSRF_LOG_MARK, "Loading config in osrf_forker for app %s", appname );
  110. char* max_req = osrf_settings_host_value( "/apps/%s/unix_config/max_requests", appname );
  111. char* min_children = osrf_settings_host_value( "/apps/%s/unix_config/min_children", appname );
  112. char* max_children = osrf_settings_host_value( "/apps/%s/unix_config/max_children", appname );
  113. char* keepalive = osrf_settings_host_value( "/apps/%s/keepalive", appname );
  114. if( !keepalive )
  115. osrfLogWarning( OSRF_LOG_MARK, "Keepalive is not defined, assuming %d", kalive );
  116. else
  117. kalive = atoi( keepalive );
  118. if( !max_req )
  119. osrfLogWarning( OSRF_LOG_MARK, "Max requests not defined, assuming %d", maxr );
  120. else
  121. maxr = atoi( max_req );
  122. if( !min_children )
  123. osrfLogWarning( OSRF_LOG_MARK, "Min children not defined, assuming %d", minc );
  124. else
  125. minc = atoi( min_children );
  126. if( !max_children )
  127. osrfLogWarning( OSRF_LOG_MARK, "Max children not defined, assuming %d", maxc );
  128. else
  129. maxc = atoi( max_children );
  130. free( keepalive );
  131. free( max_req );
  132. free( min_children );
  133. free( max_children );
  134. /* --------------------------------------------------- */
  135. char* resc = va_list_to_string( "%s_listener", appname );
  136. // Make sure that we haven't already booted
  137. if( !osrfSystemBootstrapClientResc( NULL, NULL, resc )) {
  138. osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()" );
  139. free( resc );
  140. return -1;
  141. }
  142. free( resc );
  143. prefork_simple forker;
  144. if( prefork_simple_init( &forker, osrfSystemGetTransportClient(), maxr, minc, maxc )) {
  145. osrfLogError( OSRF_LOG_MARK,
  146. "osrf_prefork_run() failed to create prefork_simple object" );
  147. return -1;
  148. }
  149. // Finish initializing the prefork_simple.
  150. forker.appname = strdup( appname );
  151. forker.keepalive = kalive;
  152. // Spawn the children; put them in the idle list.
  153. prefork_launch_children( &forker );
  154. // Tell the router that you're open for business.
  155. osrf_prefork_register_routers( appname );
  156. // Sit back and let the requests roll in
  157. osrfLogInfo( OSRF_LOG_MARK, "Launching osrf_forker for app %s", appname );
  158. prefork_run( &forker );
  159. osrfLogWarning( OSRF_LOG_MARK, "prefork_run() returned - how??" );
  160. prefork_clear( &forker );
  161. return 0;
  162. }
  163. /**
  164. @brief Register the application with a specified router.
  165. @param appname Name of the application.
  166. @param routerName Name of the router.
  167. @param routerDomain Domain of the router.
  168. Tell the router that you're open for business so that it can route requests to you.
  169. Called only by the parent process.
  170. */
  171. static void osrf_prefork_send_router_registration(
  172. const char* appname, const char* routerName, const char* routerDomain ) {
  173. // Get a pointer to the global transport_client
  174. transport_client* client = osrfSystemGetTransportClient();
  175. // Construct the Jabber address of the router
  176. char* jid = va_list_to_string( "%s@%s/router", routerName, routerDomain );
  177. osrfLogInfo( OSRF_LOG_MARK, "%s registering with router %s", appname, jid );
  178. // Create the registration message, and send it
  179. transport_message* msg = message_init( "registering", NULL, NULL, jid, NULL );
  180. message_set_router_info( msg, NULL, NULL, appname, "register", 0 );
  181. client_send_message( client, msg );
  182. // Clean up
  183. message_free( msg );
  184. free( jid );
  185. }
  186. /**
  187. @brief Register with a router, or not, according to some config settings.
  188. @param appname Name of the application
  189. @param RouterChunk A representation of part of the config file.
  190. Parse a "complex" router configuration chunk.
  191. Examine the services listed for a given router (normally in opensrf_core.xml). If
  192. there is an entry for this service, or if there are @em no services listed, then
  193. register with this router. Otherwise don't.
  194. Called only by the parent process.
  195. */
  196. static void osrf_prefork_parse_router_chunk( const char* appname, const jsonObject* routerChunk ) {
  197. const char* routerName = jsonObjectGetString( jsonObjectGetKeyConst( routerChunk, "name" ));
  198. const char* domain = jsonObjectGetString( jsonObjectGetKeyConst( routerChunk, "domain" ));
  199. const jsonObject* services = jsonObjectGetKeyConst( routerChunk, "services" );
  200. osrfLogDebug( OSRF_LOG_MARK, "found router config with domain %s and name %s",
  201. routerName, domain );
  202. if( services && services->type == JSON_HASH ) {
  203. osrfLogDebug( OSRF_LOG_MARK, "investigating router information..." );
  204. const jsonObject* service_obj = jsonObjectGetKeyConst( services, "service" );
  205. if( !service_obj )
  206. ; // do nothing (shouldn't happen)
  207. else if( JSON_ARRAY == service_obj->type ) {
  208. // There are multiple services listed. Register with this router
  209. // if and only if this service is on the list.
  210. int j;
  211. for( j = 0; j < service_obj->size; j++ ) {
  212. const char* service = jsonObjectGetString( jsonObjectGetIndex( service_obj, j ));
  213. if( service && !strcmp( appname, service ))
  214. osrf_prefork_send_router_registration( appname, routerName, domain );
  215. }
  216. }
  217. else if( JSON_STRING == service_obj->type ) {
  218. // There's only one service listed. Register with this router
  219. // if and only if this service is the one listed.
  220. if( !strcmp( appname, jsonObjectGetString( service_obj )) )
  221. osrf_prefork_send_router_registration( appname, routerName, domain );
  222. }
  223. } else {
  224. // This router is not restricted to any set of services,
  225. // so go ahead and register with it.
  226. osrf_prefork_send_router_registration( appname, routerName, domain );
  227. }
  228. }
  229. /**
  230. @brief Register the application with one or more routers, according to the configuration.
  231. @param appname Name of the application.
  232. Called only by the parent process.
  233. */
  234. static void osrf_prefork_register_routers( const char* appname ) {
  235. jsonObject* routerInfo = osrfConfigGetValueObject( NULL, "/routers/router" );
  236. int i;
  237. for( i = 0; i < routerInfo->size; i++ ) {
  238. const jsonObject* routerChunk = jsonObjectGetIndex( routerInfo, i );
  239. if( routerChunk->type == JSON_STRING ) {
  240. /* this accomodates simple router configs */
  241. char* routerName = osrfConfigGetValue( NULL, "/router_name" );
  242. char* domain = osrfConfigGetValue( NULL, "/routers/router" );
  243. osrfLogDebug( OSRF_LOG_MARK, "found simple router settings with router name %s",
  244. routerName );
  245. osrf_prefork_send_router_registration( appname, routerName, domain );
  246. free( routerName );
  247. free( domain );
  248. } else {
  249. osrf_prefork_parse_router_chunk( appname, routerChunk );
  250. }
  251. }
  252. jsonObjectFree( routerInfo );
  253. }
  254. /**
  255. @brief Initialize a child process.
  256. @param child Pointer to the prefork_child representing the new child process.
  257. @return Zero if successful, or -1 if not.
  258. Called only by child processes. Actions:
  259. - Connect to one or more cache servers
  260. - Reconfigure logger, if necessary
  261. - Discard parent's Jabber connection and open a new one
  262. - Dynamically call an application-specific initialization routine
  263. - Change the command line as reported by ps
  264. */
  265. static int prefork_child_init_hook( prefork_child* child ) {
  266. if( !child ) return -1;
  267. osrfLogDebug( OSRF_LOG_MARK, "Child init hook for child %d", child->pid );
  268. // Connect to cache server(s).
  269. osrfSystemInitCache();
  270. char* resc = va_list_to_string( "%s_drone", child->appname );
  271. // If we're a source-client, tell the logger now that we're a new process.
  272. char* isclient = osrfConfigGetValue( NULL, "/client" );
  273. if( isclient && !strcasecmp( isclient,"true" ))
  274. osrfLogSetIsClient( 1 );
  275. free( isclient );
  276. // Remove traces of our parent's socket connection so we can have our own.
  277. osrfSystemIgnoreTransportClient();
  278. // Connect to Jabber
  279. if( !osrfSystemBootstrapClientResc( NULL, NULL, resc )) {
  280. osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()" );
  281. free( resc );
  282. return -1;
  283. }
  284. free( resc );
  285. // Dynamically call the application-specific initialization function
  286. // from a previously loaded shared library.
  287. if( ! osrfAppRunChildInit( child->appname )) {
  288. osrfLogDebug( OSRF_LOG_MARK, "Prefork child_init succeeded\n" );
  289. } else {
  290. osrfLogError( OSRF_LOG_MARK, "Prefork child_init failed\n" );
  291. return -1;
  292. }
  293. // Change the command line as reported by ps
  294. set_proc_title( "OpenSRF Drone [%s]", child->appname );
  295. return 0;
  296. }
  297. /**
  298. @brief Respond to a client request forwarded by the parent.
  299. @param child Pointer to the state of the child process.
  300. @param data Pointer to the raw XMPP message received from the parent.
  301. @return 0 on success; non-zero means that the child process should clean itself up
  302. and terminate immediately, presumably due to a fatal error condition.
  303. Called only by a child process.
  304. */
  305. static int prefork_child_process_request( prefork_child* child, char* data ) {
  306. if( !child ) return 0;
  307. transport_client* client = osrfSystemGetTransportClient();
  308. // Make sure that we're still connected to Jabber; reconnect if necessary.
  309. if( !client_connected( client )) {
  310. osrfSystemIgnoreTransportClient();
  311. osrfLogWarning( OSRF_LOG_MARK, "Reconnecting child to opensrf after disconnect..." );
  312. if( !osrf_system_bootstrap_client( NULL, NULL )) {
  313. osrfLogError( OSRF_LOG_MARK,
  314. "Unable to bootstrap client in prefork_child_process_request()" );
  315. sleep( 1 );
  316. osrf_prefork_child_exit( child );
  317. }
  318. }
  319. // Construct the message from the xml.
  320. transport_message* msg = new_message_from_xml( data );
  321. // Respond to the transport message. This is where method calls are buried.
  322. osrfAppSession* session = osrf_stack_transport_handler( msg, child->appname );
  323. if( !session )
  324. return 0;
  325. int rc = session->panic;
  326. if( rc ) {
  327. osrfLogWarning( OSRF_LOG_MARK,
  328. "Drone for session %s terminating immediately", session->session_id );
  329. osrfAppSessionFree( session );
  330. return rc;
  331. }
  332. if( session->stateless && session->state != OSRF_SESSION_CONNECTED ) {
  333. // We're no longer connected to the client, which presumably means that
  334. // we're done with this request. Bail out.
  335. osrfAppSessionFree( session );
  336. return rc;
  337. }
  338. // If we get this far, then the client has opened an application connection so that it
  339. // can send multiple requests directly to the same server drone, bypassing the router
  340. // and the listener. For example, it may need to do a database transaction, requiring
  341. // multiple method calls within the same database session.
  342. // Hence we go into a loop, responding to successive requests from the same client, until
  343. // either the client disconnects or an error occurs.
  344. osrfLogDebug( OSRF_LOG_MARK, "Entering keepalive loop for session %s", session->session_id );
  345. int keepalive = child->keepalive;
  346. int retval;
  347. int recvd;
  348. time_t start;
  349. time_t end;
  350. while( 1 ) {
  351. // Respond to any input messages. This is where the method calls are buried.
  352. osrfLogDebug( OSRF_LOG_MARK,
  353. "osrf_prefork calling queue_wait [%d] in keepalive loop", keepalive );
  354. start = time( NULL );
  355. retval = osrf_app_session_queue_wait( session, keepalive, &recvd );
  356. end = time( NULL );
  357. osrfLogDebug( OSRF_LOG_MARK, "Data received == %d", recvd );
  358. // Now we check a number of possible reasons to exit the loop.
  359. // If the method call decided to terminate immediately,
  360. // note that for future reference.
  361. if( session->panic )
  362. rc = 1;
  363. // If an error occurred when we tried to service the request, exit the loop.
  364. if( retval ) {
  365. osrfLogError( OSRF_LOG_MARK, "queue-wait returned non-success %d", retval );
  366. break;
  367. }
  368. // If the client disconnected, exit the loop.
  369. if( session->state != OSRF_SESSION_CONNECTED )
  370. break;
  371. // If we timed out while waiting for a request, exit the loop.
  372. if( !recvd && (end - start) >= keepalive ) {
  373. osrfLogInfo( OSRF_LOG_MARK,
  374. "No request was received in %d seconds, exiting stateful session", keepalive );
  375. osrfAppSessionStatus(
  376. session,
  377. OSRF_STATUS_TIMEOUT,
  378. "osrfConnectStatus",
  379. 0, "Disconnected on timeout" );
  380. break;
  381. }
  382. // If the child process has decided to terminate immediately, exit the loop.
  383. if( rc )
  384. break;
  385. }
  386. osrfLogDebug( OSRF_LOG_MARK, "Exiting keepalive loop for session %s", session->session_id );
  387. osrfAppSessionFree( session );
  388. return rc;
  389. }
  390. /**
  391. @brief Partially initialize a prefork_simple provided by the caller.
  392. @param prefork Pointer to a a raw prefork_simple to be initialized.
  393. @param client Pointer to a transport_client (connection to Jabber).
  394. @param max_requests The maximum number of requests that a child process may service
  395. before terminating.
  396. @param min_children Minimum number of child processes to maintain.
  397. @param max_children Maximum number of child processes to maintain.
  398. @return 0 if successful, or 1 if not (due to invalid parameters).
  399. */
  400. static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
  401. int max_requests, int min_children, int max_children ) {
  402. if( min_children > max_children ) {
  403. osrfLogError( OSRF_LOG_MARK, "min_children (%d) is greater "
  404. "than max_children (%d)", min_children, max_children );
  405. return 1;
  406. }
  407. if( max_children > ABS_MAX_CHILDREN ) {
  408. osrfLogError( OSRF_LOG_MARK, "max_children (%d) is greater than ABS_MAX_CHILDREN (%d)",
  409. max_children, ABS_MAX_CHILDREN );
  410. return 1;
  411. }
  412. osrfLogInfo( OSRF_LOG_MARK, "Prefork launching child with max_request=%d,"
  413. "min_children=%d, max_children=%d", max_requests, min_children, max_children );
  414. /* flesh out the struct */
  415. prefork->max_requests = max_requests;
  416. prefork->min_children = min_children;
  417. prefork->max_children = max_children;
  418. prefork->fd = 0;
  419. prefork->data_to_child = 0;
  420. prefork->data_to_parent = 0;
  421. prefork->current_num_children = 0;
  422. prefork->keepalive = 0;
  423. prefork->appname = NULL;
  424. prefork->first_child = NULL;
  425. prefork->idle_list = NULL;
  426. prefork->free_list = NULL;
  427. prefork->connection = client;
  428. return 0;
  429. }
  430. /**
  431. @brief Spawn a new child process and put it in the idle list.
  432. @param forker Pointer to the prefork_simple that will own the process.
  433. @return Pointer to the new prefork_child, or not at all.
  434. Spawn a new child process. Create a prefork_child for it and put it in the idle list.
  435. After forking, the parent returns a pointer to the new prefork_child. The child
  436. services its quota of requests and then terminates without returning.
  437. */
  438. static prefork_child* launch_child( prefork_simple* forker ) {
  439. pid_t pid;
  440. int data_fd[2];
  441. int status_fd[2];
  442. // Set up the data and status pipes
  443. if( pipe( data_fd ) < 0 ) { /* build the data pipe*/
  444. osrfLogError( OSRF_LOG_MARK, "Pipe making error" );
  445. return NULL;
  446. }
  447. if( pipe( status_fd ) < 0 ) {/* build the status pipe */
  448. osrfLogError( OSRF_LOG_MARK, "Pipe making error" );
  449. close( data_fd[1] );
  450. close( data_fd[0] );
  451. return NULL;
  452. }
  453. osrfLogInternal( OSRF_LOG_MARK, "Pipes: %d %d %d %d",
  454. data_fd[0], data_fd[1], status_fd[0], status_fd[1] );
  455. // Create and initialize a prefork_child for the new process
  456. prefork_child* child = prefork_child_init( forker, data_fd[0],
  457. data_fd[1], status_fd[0], status_fd[1] );
  458. if( (pid=fork()) < 0 ) {
  459. osrfLogError( OSRF_LOG_MARK, "Forking Error" );
  460. prefork_child_free( forker, child );
  461. return NULL;
  462. }
  463. // Add the new child to the head of the idle list
  464. child->next = forker->idle_list;
  465. forker->idle_list = child;
  466. if( pid > 0 ) { /* parent */
  467. signal( SIGCHLD, sigchld_handler );
  468. ( forker->current_num_children )++;
  469. child->pid = pid;
  470. osrfLogDebug( OSRF_LOG_MARK, "Parent launched %d", pid );
  471. /* *no* child pipe FD's can be closed or the parent will re-use fd's that
  472. the children are currently using */
  473. return child;
  474. }
  475. else { /* child */
  476. osrfLogInternal( OSRF_LOG_MARK,
  477. "I am new child with read_data_fd = %d and write_status_fd = %d",
  478. child->read_data_fd, child->write_status_fd );
  479. child->pid = getpid();
  480. close( child->write_data_fd );
  481. close( child->read_status_fd );
  482. /* do the initing */
  483. if( prefork_child_init_hook( child ) == -1 ) {
  484. osrfLogError( OSRF_LOG_MARK,
  485. "Forker child going away because we could not connect to OpenSRF..." );
  486. osrf_prefork_child_exit( child );
  487. }
  488. prefork_child_wait( child ); // Should exit without returning
  489. osrf_prefork_child_exit( child ); // Just to be sure
  490. return NULL; // Unreachable, but it keeps the compiler happy
  491. }
  492. }
  493. /**
  494. @brief Terminate a child process.
  495. @param child Pointer to the prefork_child representing the child process (not used).
  496. Called only by child processes. Dynamically call an application-specific shutdown
  497. function from a previously loaded shared library; then exit.
  498. */
  499. static void osrf_prefork_child_exit( prefork_child* child ) {
  500. osrfAppRunExitCode();
  501. exit( 0 );
  502. }
  503. /**
  504. @brief Launch all the child processes, putting them in the idle list.
  505. @param forker Pointer to the prefork_simple that will own the children.
  506. Called only by the parent process (in order to become a parent).
  507. */
  508. static void prefork_launch_children( prefork_simple* forker ) {
  509. if( !forker ) return;
  510. int c = 0;
  511. while( c++ < forker->min_children )
  512. launch_child( forker );
  513. }
  514. /**
  515. @brief Signal handler for SIGCHLD: note that a child process has terminated.
  516. @param sig The value of the trapped signal; always SIGCHLD.
  517. Set a boolean to be checked later.
  518. */
  519. static void sigchld_handler( int sig ) {
  520. signal( SIGCHLD, sigchld_handler );
  521. child_dead = 1;
  522. }
  523. /**
  524. @brief Replenish the collection of child processes, after one has terminated.
  525. @param forker Pointer to the prefork_simple that manages the child processes.
  526. The parent calls this function when it notices (via a signal handler) that
  527. a child process has died.
  528. Wait on the dead children so that they won't be zombies. Spawn new ones as needed
  529. to maintain at least a minimum number.
  530. */
  531. void reap_children( prefork_simple* forker ) {
  532. pid_t child_pid;
  533. // Reset our boolean so that we can detect any further terminations.
  534. child_dead = 0;
  535. // Bury the children so that they won't be zombies. WNOHANG means that waitpid() returns
  536. // immediately if there are no waitable children, instead of waiting for more to die.
  537. // Ignore the return code of the child. We don't do an autopsy.
  538. while( (child_pid = waitpid( -1, NULL, WNOHANG )) > 0 ) {
  539. --forker->current_num_children;
  540. del_prefork_child( forker, child_pid );
  541. }
  542. // Spawn more children as needed.
  543. while( forker->current_num_children < forker->min_children )
  544. launch_child( forker );
  545. }
  546. /**
  547. @brief Read transport_messages and dispatch them to child processes for servicing.
  548. @param forker Pointer to the prefork_simple that manages the child processes.
  549. This is the main loop of the parent process, and once entered, does not exit.
  550. For each usable transport_message received: look for an idle child to service it. If
  551. no idle children are available, either spawn a new one or, if we've already spawned the
  552. maximum number of children, wait for one to become available. Once a child is available
  553. by whatever means, write an XML version of the input message, to a pipe designated for
  554. use by that child.
  555. */
  556. static void prefork_run( prefork_simple* forker ) {
  557. if( NULL == forker->idle_list )
  558. return; // No available children, and we haven't even started yet
  559. transport_message* cur_msg = NULL;
  560. while( 1 ) {
  561. if( forker->first_child == NULL && forker->idle_list == NULL ) {/* no more children */
  562. osrfLogWarning( OSRF_LOG_MARK, "No more children..." );
  563. return;
  564. }
  565. // Wait indefinitely for an input message
  566. osrfLogDebug( OSRF_LOG_MARK, "Forker going into wait for data..." );
  567. cur_msg = client_recv( forker->connection, -1 );
  568. if( cur_msg == NULL )
  569. continue; // Error? Interrupted by a signal? Try again...
  570. message_prepare_xml( cur_msg );
  571. const char* msg_data = cur_msg->msg_xml;
  572. if( ! msg_data || ! *msg_data ) {
  573. osrfLogWarning( OSRF_LOG_MARK, "Received % message from %s, thread %",
  574. (msg_data ? "empty" : "NULL"), cur_msg->sender, cur_msg->thread );
  575. message_free( cur_msg );
  576. continue; // Message not usable; go on to the next one.
  577. }
  578. int honored = 0; /* will be set to true when we service the request */
  579. int no_recheck = 0;
  580. while( ! honored ) {
  581. if( !no_recheck ) {
  582. if(check_children( forker, 0 ) < 0) {
  583. continue; // check failed, try again
  584. }
  585. }
  586. no_recheck = 0;
  587. osrfLogDebug( OSRF_LOG_MARK, "Server received inbound data" );
  588. prefork_child* cur_child = NULL;
  589. // Look for an available child in the idle list. Since the idle list operates
  590. // as a stack, the child we get is the one that was most recently active, or
  591. // most recently spawned. That means it's the one most likely still to be in
  592. // physical memory, and the one least likely to have to be swapped in.
  593. while( forker->idle_list ) {
  594. osrfLogDebug( OSRF_LOG_MARK, "Looking for idle child" );
  595. // Grab the prefork_child at the head of the idle list
  596. cur_child = forker->idle_list;
  597. forker->idle_list = cur_child->next;
  598. cur_child->next = NULL;
  599. osrfLogInternal( OSRF_LOG_MARK,
  600. "Searching for available child. cur_child->pid = %d", cur_child->pid );
  601. osrfLogInternal( OSRF_LOG_MARK, "Current num children %d",
  602. forker->current_num_children );
  603. osrfLogDebug( OSRF_LOG_MARK, "forker sending data to %d", cur_child->pid );
  604. osrfLogInternal( OSRF_LOG_MARK, "Writing to child fd %d",
  605. cur_child->write_data_fd );
  606. int written = write( cur_child->write_data_fd, msg_data, strlen( msg_data ) + 1 );
  607. if( written < 0 ) {
  608. // This child appears to be dead or unusable. Discard it.
  609. osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d: %s",
  610. errno, strerror( errno ));
  611. kill( cur_child->pid, SIGKILL );
  612. del_prefork_child( forker, cur_child->pid );
  613. continue;
  614. }
  615. add_prefork_child( forker, cur_child ); // Add it to active list
  616. honored = 1;
  617. break;
  618. }
  619. /* if none available, add a new child if we can */
  620. if( ! honored ) {
  621. osrfLogDebug( OSRF_LOG_MARK, "Not enough children, attempting to add..." );
  622. if( forker->current_num_children < forker->max_children ) {
  623. osrfLogDebug( OSRF_LOG_MARK, "Launching new child with current_num = %d",
  624. forker->current_num_children );
  625. launch_child( forker ); // Put a new child into the idle list
  626. if( forker->idle_list ) {
  627. // Take the new child from the idle list
  628. prefork_child* new_child = forker->idle_list;
  629. forker->idle_list = new_child->next;
  630. new_child->next = NULL;
  631. osrfLogDebug( OSRF_LOG_MARK, "Writing to new child fd %d : pid %d",
  632. new_child->write_data_fd, new_child->pid );
  633. int written = write(
  634. new_child->write_data_fd, msg_data, strlen( msg_data ) + 1 );
  635. if( written < 0 ) {
  636. // This child appears to be dead or unusable. Discard it.
  637. osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d: %s",
  638. errno, strerror( errno ));
  639. kill( cur_child->pid, SIGKILL );
  640. del_prefork_child( forker, cur_child->pid );
  641. } else {
  642. add_prefork_child( forker, new_child );
  643. honored = 1;
  644. }
  645. }
  646. } else {
  647. osrfLogWarning( OSRF_LOG_MARK, "Could not launch a new child as %d children "
  648. "were already running; consider increasing max_children for this "
  649. "application higher than %d in the OpenSRF configuration if this "
  650. "message occurs frequently",
  651. forker->current_num_children, forker->max_children );
  652. }
  653. }
  654. if( !honored ) {
  655. osrfLogWarning( OSRF_LOG_MARK, "No children available, waiting..." );
  656. if( check_children( forker, 1 ) >= 0 ) {
  657. // Tell the loop not to call check_children again, since we just successfully called it
  658. no_recheck = 1;
  659. }
  660. }
  661. if( child_dead )
  662. reap_children( forker );
  663. } // end while( ! honored )
  664. message_free( cur_msg );
  665. } /* end top level listen loop */
  666. }
  667. /**
  668. @brief See if any children have become available.
  669. @param forker Pointer to the prefork_simple that owns the children.
  670. @param forever Boolean: true if we should wait indefinitely.
  671. @return 0 or greater if successful, -1 on select error/interrupt
  672. Call select() for all the children in the active list. Read each active file
  673. descriptor and move the corresponding child to the idle list.
  674. If @a forever is true, wait indefinitely for input. Otherwise return immediately if
  675. there are no active file descriptors.
  676. */
  677. static int check_children( prefork_simple* forker, int forever ) {
  678. if( child_dead )
  679. reap_children( forker );
  680. if( NULL == forker->first_child ) {
  681. // If forever is true, then we're here because we've run out of idle
  682. // processes, so there should be some active ones around.
  683. // If forever is false, then the children may all be idle, and that's okay.
  684. if( forever )
  685. osrfLogError( OSRF_LOG_MARK, "No active child processes to check" );
  686. return 0;
  687. }
  688. int select_ret;
  689. fd_set read_set;
  690. FD_ZERO( &read_set );
  691. int max_fd = 0;
  692. int n;
  693. // Prepare to select() on pipes from all the active children
  694. prefork_child* cur_child = forker->first_child;
  695. do {
  696. if( cur_child->read_status_fd > max_fd )
  697. max_fd = cur_child->read_status_fd;
  698. FD_SET( cur_child->read_status_fd, &read_set );
  699. cur_child = cur_child->next;
  700. } while( cur_child != forker->first_child );
  701. FD_CLR( 0, &read_set ); /* just to be sure */
  702. if( forever ) {
  703. osrfLogWarning( OSRF_LOG_MARK,
  704. "We have no children available - waiting for one to show up..." );
  705. if( (select_ret=select( max_fd + 1, &read_set, NULL, NULL, NULL )) == -1 ) {
  706. osrfLogWarning( OSRF_LOG_MARK, "Select returned error %d on check_children: %s",
  707. errno, strerror( errno ));
  708. }
  709. osrfLogInfo( OSRF_LOG_MARK,
  710. "select() completed after waiting on children to become available" );
  711. } else {
  712. struct timeval tv;
  713. tv.tv_sec = 0;
  714. tv.tv_usec = 0;
  715. if( (select_ret=select( max_fd + 1, &read_set, NULL, NULL, &tv )) == -1 ) {
  716. osrfLogWarning( OSRF_LOG_MARK, "Select returned error %d on check_children: %s",
  717. errno, strerror( errno ));
  718. }
  719. }
  720. if( select_ret <= 0 ) // we're done here
  721. return select_ret;
  722. // Check each child in the active list.
  723. // If it has responded, move it to the idle list.
  724. cur_child = forker->first_child;
  725. prefork_child* next_child = NULL;
  726. int num_handled = 0;
  727. do {
  728. next_child = cur_child->next;
  729. if( FD_ISSET( cur_child->read_status_fd, &read_set )) {
  730. osrfLogDebug( OSRF_LOG_MARK,
  731. "Server received status from a child %d", cur_child->pid );
  732. num_handled++;
  733. /* now suck off the data */
  734. char buf[64];
  735. if( (n=read( cur_child->read_status_fd, buf, sizeof( buf ) - 1 )) < 0 ) {
  736. osrfLogWarning( OSRF_LOG_MARK,
  737. "Read error after select in child status read with errno %d: %s",
  738. errno, strerror( errno ));
  739. }
  740. else {
  741. buf[n] = '\0';
  742. osrfLogDebug( OSRF_LOG_MARK, "Read %d bytes from status buffer: %s", n, buf );
  743. }
  744. // Remove the child from the active list
  745. if( forker->first_child == cur_child ) {
  746. if( cur_child->next == cur_child )
  747. forker->first_child = NULL; // only child in the active list
  748. else
  749. forker->first_child = cur_child->next;
  750. }
  751. cur_child->next->prev = cur_child->prev;
  752. cur_child->prev->next = cur_child->next;
  753. // Add it to the idle list
  754. cur_child->prev = NULL;
  755. cur_child->next = forker->idle_list;
  756. forker->idle_list = cur_child;
  757. }
  758. cur_child = next_child;
  759. } while( forker->first_child && forker->first_child != next_child );
  760. return select_ret;
  761. }
  762. /**
  763. @brief Service up a set maximum number of requests; then shut down.
  764. @param child Pointer to the prefork_child representing the child process.
  765. Called only by child process.
  766. Enter a loop, for up to max_requests iterations. On each iteration:
  767. - Wait indefinitely for a request from the parent.
  768. - Service the request.
  769. - Increment a counter. If the limit hasn't been reached, notify the parent that you
  770. are available for another request.
  771. After exiting the loop, shut down and terminate the process.
  772. */
  773. static void prefork_child_wait( prefork_child* child ) {
  774. int i,n;
  775. growing_buffer* gbuf = buffer_init( READ_BUFSIZE );
  776. char buf[READ_BUFSIZE];
  777. for( i = 0; i < child->max_requests; i++ ) {
  778. n = -1;
  779. int gotdata = 0; // boolean; set to true if we get data
  780. clr_fl( child->read_data_fd, O_NONBLOCK );
  781. // Read a request from the parent, via a pipe, into a growing_buffer.
  782. while( (n = read( child->read_data_fd, buf, READ_BUFSIZE-1 )) > 0 ) {
  783. buf[n] = '\0';
  784. osrfLogDebug( OSRF_LOG_MARK, "Prefork child read %d bytes of data", n );
  785. if( !gotdata ) {
  786. set_fl( child->read_data_fd, O_NONBLOCK );
  787. gotdata = 1;
  788. }
  789. buffer_add_n( gbuf, buf, n );
  790. }
  791. if( errno == EAGAIN )
  792. n = 0;
  793. if( errno == EPIPE ) {
  794. osrfLogDebug( OSRF_LOG_MARK, "C child attempted read on broken pipe, exiting..." );
  795. break;
  796. }
  797. int terminate_now = 0; // Boolean
  798. if( n < 0 ) {
  799. osrfLogWarning( OSRF_LOG_MARK,
  800. "Prefork child read returned error with errno %d", errno );
  801. break;
  802. } else if( gotdata ) {
  803. // Process the request
  804. osrfLogDebug( OSRF_LOG_MARK, "Prefork child got a request.. processing.." );
  805. terminate_now = prefork_child_process_request( child, gbuf->buf );
  806. buffer_reset( gbuf );
  807. }
  808. if( terminate_now ) {
  809. // We're terminating prematurely -- presumably due to a fatal error condition.
  810. osrfLogWarning( OSRF_LOG_MARK, "Prefork child terminating abruptly" );
  811. break;
  812. }
  813. if( i < child->max_requests - 1 ) {
  814. // Report back to the parent for another request.
  815. size_t msg_len = 9;
  816. ssize_t len = write(
  817. child->write_status_fd, "available" /*less than 64 bytes*/, msg_len );
  818. if( len != msg_len ) {
  819. osrfLogError( OSRF_LOG_MARK,
  820. "Drone terminating: unable to notify listener of availability: %s",
  821. strerror( errno ));
  822. buffer_free( gbuf );
  823. osrf_prefork_child_exit( child );
  824. }
  825. }
  826. }
  827. buffer_free( gbuf );
  828. osrfLogDebug( OSRF_LOG_MARK, "Child with max-requests=%d, num-served=%d exiting...[%ld]",
  829. child->max_requests, i, (long) getpid());
  830. osrf_prefork_child_exit( child );
  831. }
  832. /**
  833. @brief Add a prefork_child to the end of the active list.
  834. @param forker Pointer to the prefork_simple that owns the list.
  835. @param child Pointer to the prefork_child to be added.
  836. */
  837. static void add_prefork_child( prefork_simple* forker, prefork_child* child ) {
  838. if( forker->first_child == NULL ) {
  839. // Simplest case: list is initially empty.
  840. forker->first_child = child;
  841. child->next = child;
  842. child->prev = child;
  843. } else {
  844. // Find the last node in the circular list.
  845. prefork_child* last_child = forker->first_child->prev;
  846. // Insert the new child between the last and first children.
  847. last_child->next = child;
  848. child->prev = last_child;
  849. child->next = forker->first_child;
  850. forker->first_child->prev = child;
  851. }
  852. }
  853. /**
  854. @brief Delete and destroy a dead child from our list.
  855. @param forker Pointer to the prefork_simple that owns the dead child.
  856. @param pid Process ID of the dead child.
  857. Look for the dead child first in the list of active children. If you don't find it
  858. there, look in the list of idle children. If you find it, remove it from whichever
  859. list it's on, and destroy it.
  860. */
  861. static void del_prefork_child( prefork_simple* forker, pid_t pid ) {
  862. osrfLogDebug( OSRF_LOG_MARK, "Deleting Child: %d", pid );
  863. prefork_child* cur_child = NULL;
  864. // Look first in the active list
  865. if( forker->first_child ) {
  866. cur_child = forker->first_child; /* current pointer */
  867. while( cur_child->pid != pid && cur_child->next != forker->first_child )
  868. cur_child = cur_child->next;
  869. if( cur_child->pid == pid ) {
  870. // We found the right node. Remove it from the list.
  871. if( cur_child->next == cur_child )
  872. forker->first_child = NULL; // only child in the list
  873. else {
  874. if( forker->first_child == cur_child )
  875. forker->first_child = cur_child->next; // Reseat forker->first_child
  876. // Stitch the adjacent nodes together
  877. cur_child->prev->next = cur_child->next;
  878. cur_child->next->prev = cur_child->prev;
  879. }
  880. } else
  881. cur_child = NULL; // Didn't find it in the active list
  882. }
  883. if( ! cur_child ) {
  884. // Maybe it's in the idle list. This can happen if, for example,
  885. // a child is killed by a signal while it's between requests.
  886. prefork_child* prev = NULL;
  887. cur_child = forker->idle_list;
  888. while( cur_child && cur_child->pid != pid ) {
  889. prev = cur_child;
  890. cur_child = cur_child->next;
  891. }
  892. if( cur_child ) {
  893. // Detach from the list
  894. if( prev )
  895. prev->next = cur_child->next;
  896. else
  897. forker->idle_list = cur_child->next;
  898. } // else we can't find it
  899. }
  900. // If we found the node, destroy it.
  901. if( cur_child )
  902. prefork_child_free( forker, cur_child );
  903. }
  904. /**
  905. @brief Create and initialize a prefork_child.
  906. @param forker Pointer to the prefork_simple that will own the prefork_child.
  907. @param read_data_fd Used by child to read request from parent.
  908. @param write_data_fd Used by parent to write request to child.
  909. @param read_status_fd Used by parent to read status from child.
  910. @param write_status_fd Used by child to write status to parent.
  911. @return Pointer to the newly created prefork_child.
  912. The calling code is responsible for freeing the prefork_child by calling
  913. prefork_child_free().
  914. */
  915. static prefork_child* prefork_child_init( prefork_simple* forker,
  916. int read_data_fd, int write_data_fd,
  917. int read_status_fd, int write_status_fd ) {
  918. // Allocate a prefork_child -- from the free list if possible, or from
  919. // the heap if necessary. The free list is a non-circular, singly-linked list.
  920. prefork_child* child;
  921. if( forker->free_list ) {
  922. child = forker->free_list;
  923. forker->free_list = child->next;
  924. } else
  925. child = safe_malloc( sizeof( prefork_child ));
  926. child->pid = 0;
  927. child->read_data_fd = read_data_fd;
  928. child->write_data_fd = write_data_fd;
  929. child->read_status_fd = read_status_fd;
  930. child->write_status_fd = write_status_fd;
  931. child->max_requests = forker->max_requests;
  932. child->appname = forker->appname; // We don't make a separate copy
  933. child->keepalive = forker->keepalive;
  934. child->next = NULL;
  935. child->prev = NULL;
  936. return child;
  937. }
  938. /**
  939. @brief Terminate all child processes and clear out a prefork_simple.
  940. @param prefork Pointer to the prefork_simple to be cleared out.
  941. We do not deallocate the prefork_simple itself, just its contents.
  942. */
  943. static void prefork_clear( prefork_simple* prefork ) {
  944. // Kill all the active children, and move their prefork_child nodes to the free list.
  945. while( prefork->first_child ) {
  946. kill( prefork->first_child->pid, SIGKILL );
  947. del_prefork_child( prefork, prefork->first_child->pid );
  948. }
  949. // Kill all the idle prefork children, close their file
  950. // descriptors, and move them to the free list.
  951. prefork_child* child = prefork->idle_list;
  952. prefork->idle_list = NULL;
  953. while( child ) {
  954. prefork_child* temp = child->next;
  955. kill( child->pid, SIGKILL );
  956. prefork_child_free( prefork, child );
  957. child = temp;
  958. }
  959. //prefork->current_num_children = 0;
  960. // Physically free the free list of prefork_children.
  961. child = prefork->free_list;
  962. prefork->free_list = NULL;
  963. while( child ) {
  964. prefork_child* temp = child->next;
  965. free( child );
  966. child = temp;
  967. }
  968. // Close the Jabber connection
  969. client_free( prefork->connection );
  970. prefork->connection = NULL;
  971. // After giving the child processes a second to terminate, wait on them so that they
  972. // don't become zombies. We don't wait indefinitely, so it's possible that some
  973. // children will survive a bit longer.
  974. sleep( 1 );
  975. while( (waitpid( -1, NULL, WNOHANG )) > 0 ) {
  976. --prefork->current_num_children;
  977. }
  978. free( prefork->appname );
  979. prefork->appname = NULL;
  980. }
  981. /**
  982. @brief Destroy and deallocate a prefork_child.
  983. @param forker Pointer to the prefork_simple that owns the prefork_child.
  984. @param child Pointer to the prefork_child to be destroyed.
  985. */
  986. static void prefork_child_free( prefork_simple* forker, prefork_child* child ) {
  987. close( child->read_data_fd );
  988. close( child->write_data_fd );
  989. close( child->read_status_fd );
  990. close( child->write_status_fd );
  991. // Stick the prefork_child in a free list for potential reuse. This is a
  992. // non-circular, singly linked list.
  993. child->prev = NULL;
  994. child->next = forker->free_list;
  995. forker->free_list = child;
  996. }