PageRenderTime 56ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/src/9vx/a/sysproc.c

https://bitbucket.org/rminnich/vx32/
C | 1139 lines | 820 code | 134 blank | 185 comment | 170 complexity | a9e667cec4e768e6a22873c037c8b9d6 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1
  1. #define WANT_M
  2. #include "u.h"
  3. #include "tos.h"
  4. #include "lib.h"
  5. #include "mem.h"
  6. #include "dat.h"
  7. #include "fns.h"
  8. #include "error.h"
  9. #include "a.out.h"
  10. int shargs(char*, int, char**);
  11. extern void checkpages(void);
  12. extern void checkpagerefs(void);
  13. long
  14. sysr1(uint32 *x)
  15. {
  16. vx32sysr1();
  17. return 0;
  18. }
  19. long
  20. sysrfork(uint32 *arg)
  21. {
  22. Proc *p;
  23. int n, i;
  24. Fgrp *ofg;
  25. Pgrp *opg;
  26. Rgrp *org;
  27. Egrp *oeg;
  28. ulong pid, flag;
  29. Mach *wm;
  30. flag = arg[0];
  31. /* Check flags before we commit */
  32. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  33. error(Ebadarg);
  34. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  35. error(Ebadarg);
  36. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  37. error(Ebadarg);
  38. if((flag&RFPROC) == 0) {
  39. if(flag & (RFMEM|RFNOWAIT))
  40. error(Ebadarg);
  41. if(flag & (RFFDG|RFCFDG)) {
  42. ofg = up->fgrp;
  43. if(flag & RFFDG)
  44. up->fgrp = dupfgrp(ofg);
  45. else
  46. up->fgrp = dupfgrp(nil);
  47. closefgrp(ofg);
  48. }
  49. if(flag & (RFNAMEG|RFCNAMEG)) {
  50. opg = up->pgrp;
  51. up->pgrp = newpgrp();
  52. if(flag & RFNAMEG)
  53. pgrpcpy(up->pgrp, opg);
  54. /* inherit noattach */
  55. up->pgrp->noattach = opg->noattach;
  56. closepgrp(opg);
  57. }
  58. if(flag & RFNOMNT)
  59. up->pgrp->noattach = 1;
  60. if(flag & RFREND) {
  61. org = up->rgrp;
  62. up->rgrp = newrgrp();
  63. closergrp(org);
  64. }
  65. if(flag & (RFENVG|RFCENVG)) {
  66. oeg = up->egrp;
  67. up->egrp = smalloc(sizeof(Egrp));
  68. up->egrp->ref.ref = 1;
  69. if(flag & RFENVG)
  70. envcpy(up->egrp, oeg);
  71. closeegrp(oeg);
  72. }
  73. if(flag & RFNOTEG)
  74. up->noteid = incref(&noteidalloc);
  75. return 0;
  76. }
  77. p = newproc();
  78. p->fpsave = up->fpsave;
  79. p->scallnr = up->scallnr;
  80. p->s = up->s;
  81. p->nerrlab = 0;
  82. p->slash = up->slash;
  83. p->dot = up->dot;
  84. incref(&p->dot->ref);
  85. memmove(p->note, up->note, sizeof(p->note));
  86. p->privatemem = up->privatemem;
  87. p->noswap = up->noswap;
  88. p->nnote = up->nnote;
  89. p->notified = 0;
  90. p->lastnote = up->lastnote;
  91. p->notify = up->notify;
  92. p->ureg = up->ureg;
  93. p->dbgreg = 0;
  94. /* Make a new set of memory segments */
  95. n = flag & RFMEM;
  96. qlock(&p->seglock);
  97. if(waserror()){
  98. qunlock(&p->seglock);
  99. nexterror();
  100. }
  101. for(i = 0; i < NSEG; i++)
  102. if(up->seg[i])
  103. p->seg[i] = dupseg(up->seg, i, n);
  104. qunlock(&p->seglock);
  105. poperror();
  106. /* File descriptors */
  107. if(flag & (RFFDG|RFCFDG)) {
  108. if(flag & RFFDG)
  109. p->fgrp = dupfgrp(up->fgrp);
  110. else
  111. p->fgrp = dupfgrp(nil);
  112. }
  113. else {
  114. p->fgrp = up->fgrp;
  115. incref(&p->fgrp->ref);
  116. }
  117. /* Process groups */
  118. if(flag & (RFNAMEG|RFCNAMEG)) {
  119. p->pgrp = newpgrp();
  120. if(flag & RFNAMEG)
  121. pgrpcpy(p->pgrp, up->pgrp);
  122. /* inherit noattach */
  123. p->pgrp->noattach = up->pgrp->noattach;
  124. }
  125. else {
  126. p->pgrp = up->pgrp;
  127. incref(&p->pgrp->ref);
  128. }
  129. if(flag & RFNOMNT)
  130. up->pgrp->noattach = 1;
  131. if(flag & RFREND)
  132. p->rgrp = newrgrp();
  133. else {
  134. incref(&up->rgrp->ref);
  135. p->rgrp = up->rgrp;
  136. }
  137. /* Environment group */
  138. if(flag & (RFENVG|RFCENVG)) {
  139. p->egrp = smalloc(sizeof(Egrp));
  140. p->egrp->ref.ref = 1;
  141. if(flag & RFENVG)
  142. envcpy(p->egrp, up->egrp);
  143. }
  144. else {
  145. p->egrp = up->egrp;
  146. incref(&p->egrp->ref);
  147. }
  148. p->hang = up->hang;
  149. p->procmode = up->procmode;
  150. /* Craft a return frame which will cause the child to pop out of
  151. * the scheduler in user mode with the return register zero
  152. */
  153. forkchild(p, up->dbgreg);
  154. p->parent = up;
  155. p->parentpid = up->pid;
  156. if(flag&RFNOWAIT)
  157. p->parentpid = 0;
  158. else {
  159. lock(&up->exl);
  160. up->nchild++;
  161. unlock(&up->exl);
  162. }
  163. if((flag&RFNOTEG) == 0)
  164. p->noteid = up->noteid;
  165. p->fpstate = up->fpstate;
  166. pid = p->pid;
  167. memset(p->time, 0, sizeof(p->time));
  168. p->time[TReal] = msec();
  169. kstrdup(&p->text, up->text);
  170. kstrdup(&p->user, up->user);
  171. /*
  172. * since the bss/data segments are now shareable,
  173. * any mmu info about this process is now stale
  174. * (i.e. has bad properties) and has to be discarded.
  175. */
  176. flushmmu();
  177. p->basepri = up->basepri;
  178. p->priority = up->basepri;
  179. p->fixedpri = up->fixedpri;
  180. p->mp = up->mp;
  181. wm = up->wired;
  182. if(wm)
  183. procwired(p, wm->machno);
  184. ready(p);
  185. sched();
  186. return pid;
  187. }
  188. static uint32
  189. l2be(uint32 l)
  190. {
  191. uchar *cp;
  192. cp = (uchar*)&l;
  193. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  194. }
  195. static char Echanged[] = "exec arguments changed underfoot";
  196. long
  197. sysexec(uint32 *arg)
  198. {
  199. char *volatile elem, *volatile file, *ufile;
  200. Chan *volatile tc;
  201. /*
  202. * Open the file, remembering the final element and the full name.
  203. */
  204. file = nil;
  205. elem = nil;
  206. tc = nil;
  207. if(waserror()){
  208. if(file)
  209. free(file);
  210. if(elem)
  211. free(elem);
  212. if(tc)
  213. cclose(tc);
  214. nexterror();
  215. }
  216. ufile = uvalidaddr(arg[0], 1, 0);
  217. file = validnamedup(ufile, 1);
  218. tc = namec(file, Aopen, OEXEC, 0);
  219. kstrdup((char**)&elem, up->genbuf);
  220. /*
  221. * Read the header. If it's a #!, fill in progarg[] with info and repeat.
  222. */
  223. int i, n, nprogarg;
  224. char *progarg[sizeof(Exec)/2+1];
  225. char *prog, *p;
  226. char line[sizeof(Exec)+1];
  227. Exec exec;
  228. nprogarg = 0;
  229. n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  230. if(n < 2)
  231. error(Ebadexec);
  232. p = (char*)&exec;
  233. if(p[0] == '#' && p[1] == '!'){
  234. memmove(line, p, n);
  235. nprogarg = shargs(line, n, progarg);
  236. if(nprogarg == 0)
  237. error(Ebadexec);
  238. /* The original file becomes an extra arg after #! line */
  239. progarg[nprogarg++] = file;
  240. /*
  241. * Take the #! $0 as a file to open, and replace
  242. * $0 with the original path's name.
  243. */
  244. prog = progarg[0];
  245. progarg[0] = elem;
  246. cclose(tc);
  247. tc = nil; /* in case namec errors out */
  248. tc = namec(prog, Aopen, OEXEC, 0);
  249. n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  250. if(n < 2)
  251. error(Ebadexec);
  252. }
  253. /*
  254. * #! has had its chance, now we need a real binary
  255. */
  256. uint32 magic, entry, text, etext, data, edata, bss, ebss;
  257. magic = l2be(exec.magic);
  258. if(n != sizeof(Exec) || l2be(exec.magic) != AOUT_MAGIC)
  259. error(Ebadexec);
  260. entry = l2be(exec.entry);
  261. text = l2be(exec.text);
  262. data = l2be(exec.data);
  263. bss = l2be(exec.bss);
  264. etext = ROUND(UTZERO+sizeof(Exec)+text, BY2PG);
  265. edata = ROUND(etext + data, BY2PG);
  266. ebss = ROUND(etext + data + bss, BY2PG);
  267. //iprint("entry %#lux text %#lux data %#lux bss %#lux\n", entry, text, data, bss);
  268. //iprint("etext %#lux edata %#lux ebss %#lux\n", etext, edata, ebss);
  269. if(entry < UTZERO+sizeof(Exec) || entry >= UTZERO+sizeof(Exec)+text)
  270. error(Ebadexec);
  271. /* many overflow possibilities */
  272. if(text >= USTKTOP || data >= USTKTOP || bss >= USTKTOP
  273. || etext >= USTKTOP || edata >= USTKTOP || ebss >= USTKTOP
  274. || etext >= USTKTOP || edata < etext || ebss < edata)
  275. error(Ebadexec);
  276. /*
  277. * Copy argv into new stack segment temporarily mapped elsewhere.
  278. * Be careful: multithreaded program could be changing argv during this.
  279. * Pass 1: count number of arguments, string bytes.
  280. */
  281. int nargv, strbytes;
  282. uint32 argp, ssize, spage;
  283. strbytes = 0;
  284. for(i=0; i<nprogarg; i++)
  285. strbytes += strlen(progarg[i]) + 1;
  286. argp = arg[1];
  287. for(nargv=0;; nargv++, argp += BY2WD){
  288. uint32 a;
  289. char *str;
  290. a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
  291. if(a == 0)
  292. break;
  293. str = uvalidaddr(a, 1, 0);
  294. n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
  295. if(nprogarg > 0 && nargv == 0)
  296. continue; /* going to skip argv[0] on #! */
  297. strbytes += n;
  298. }
  299. if(nargv == 0)
  300. error("exec missing argv");
  301. /*
  302. * Skip over argv[0] if using #!. Waited until now so that
  303. * string would still be checked for validity during loop.
  304. */
  305. if(nprogarg > 0){
  306. nargv--;
  307. arg[1] += BY2WD;
  308. }
  309. ssize = BY2WD*((nprogarg+nargv)+1) + ROUND(strbytes, BY2WD) + sizeof(Tos);
  310. /*
  311. * 8-byte align SP for those (e.g. sparc) that need it.
  312. * execregs() will subtract another 4 bytes for argc.
  313. */
  314. if((ssize+4) & 7)
  315. ssize += 4;
  316. spage = (ssize+(BY2PG-1)) >> PGSHIFT;
  317. /*
  318. * Pass 2: build the stack segment, being careful not to assume
  319. * that the counts from pass 1 are still valid.
  320. */
  321. if(spage > TSTKSIZ)
  322. error(Enovmem);
  323. qlock(&up->seglock);
  324. if(waserror()){
  325. if(up->seg[ESEG]){
  326. putseg(up->seg[ESEG]);
  327. up->seg[ESEG] = nil;
  328. }
  329. qunlock(&up->seglock);
  330. nexterror();
  331. }
  332. up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
  333. flushmmu(); // Needed for Plan 9 VX XXX really?
  334. /*
  335. * Top-of-stack structure.
  336. */
  337. uchar *uzero;
  338. uzero = up->pmmu.uzero;
  339. Tos *tos;
  340. uint32 utos;
  341. utos = USTKTOP - sizeof(Tos);
  342. tos = (Tos*)(uzero + utos + TSTKTOP - USTKTOP);
  343. tos->cyclefreq = m->cyclefreq;
  344. cycles((uvlong*)&tos->pcycles);
  345. tos->pcycles = -tos->pcycles;
  346. tos->kcycles = tos->pcycles;
  347. tos->clock = 0;
  348. /*
  349. * Argument pointers and strings, together.
  350. */
  351. char *bp, *ep;
  352. uint32 *targp;
  353. uint32 ustrp, uargp;
  354. ustrp = utos - ROUND(strbytes, BY2WD);
  355. uargp = ustrp - BY2WD*((nprogarg+nargv)+1);
  356. bp = (char*)(uzero + ustrp + TSTKTOP - USTKTOP);
  357. ep = bp + strbytes;
  358. p = bp;
  359. targp = (uint32*)(uzero + uargp + TSTKTOP - USTKTOP);
  360. /* #! args are trusted */
  361. for(i=0; i<nprogarg; i++){
  362. n = strlen(progarg[i]) + 1;
  363. if(n > ep - p)
  364. error(Echanged);
  365. memmove(p, progarg[i], n);
  366. p += n;
  367. *targp++ = ustrp;
  368. ustrp += n;
  369. }
  370. /* the rest are not */
  371. argp = arg[1];
  372. for(i=0; i<nargv; i++){
  373. uint32 a;
  374. char *str;
  375. a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
  376. argp += BY2WD;
  377. str = uvalidaddr(a, 1, 0);
  378. n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
  379. if(n > ep - p)
  380. error(Echanged);
  381. memmove(p, str, n);
  382. p += n;
  383. *targp++ = ustrp;
  384. ustrp += n;
  385. }
  386. if(*(uint32*)uvalidaddr(argp, BY2WD, 0) != 0)
  387. error(Echanged);
  388. *targp = 0;
  389. /*
  390. * But wait, there's more: prepare an arg copy for up->args
  391. * using the copy we just made in the temporary segment.
  392. */
  393. char *args;
  394. int nargs;
  395. n = p - bp; /* includes NUL on last arg, so must be > 0 */
  396. if(n <= 0) /* nprogarg+nargv > 0; checked above */
  397. error(Egreg);
  398. if(n > 128)
  399. n = 128;
  400. args = smalloc(n);
  401. if(waserror()){
  402. free(args);
  403. nexterror();
  404. }
  405. memmove(args, bp, n);
  406. /* find beginning of UTF character boundary to place final NUL */
  407. while(n > 0 && (args[n-1]&0xC0) == 0x80)
  408. n--;
  409. args[n-1] = '\0';
  410. nargs = n;
  411. /*
  412. * Now we're ready to commit.
  413. */
  414. free(up->text);
  415. up->text = elem;
  416. free(up->args);
  417. up->args = args;
  418. up->nargs = n;
  419. elem = nil;
  420. poperror(); /* args */
  421. /*
  422. * Free old memory. Special segments maintained across exec.
  423. */
  424. Segment *s;
  425. for(i = SSEG; i <= BSEG; i++) {
  426. putseg(up->seg[i]);
  427. up->seg[i] = nil; /* in case of error */
  428. }
  429. for(i = BSEG+1; i< NSEG; i++) {
  430. s = up->seg[i];
  431. if(s && (s->type&SG_CEXEC)) {
  432. putseg(s);
  433. up->seg[i] = nil;
  434. }
  435. }
  436. /*
  437. * Close on exec
  438. */
  439. Fgrp *f;
  440. f = up->fgrp;
  441. for(i=0; i<=f->maxfd; i++)
  442. fdclose(i, CCEXEC);
  443. /* Text. Shared. Attaches to cache image if possible */
  444. /* attachimage returns a locked cache image */
  445. Image *img;
  446. Segment *ts;
  447. img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (etext-UTZERO)>>PGSHIFT);
  448. ts = img->s;
  449. up->seg[TSEG] = ts;
  450. ts->flushme = 1;
  451. ts->fstart = 0;
  452. ts->flen = sizeof(Exec)+text;
  453. unlock(&img->ref.lk);
  454. /* Data. Shared. */
  455. s = newseg(SG_DATA, etext, (edata-etext)>>PGSHIFT);
  456. up->seg[DSEG] = s;
  457. /* Attached by hand */
  458. incref(&img->ref);
  459. s->image = img;
  460. s->fstart = ts->fstart+ts->flen;
  461. s->flen = data;
  462. /* BSS. Zero fill on demand */
  463. up->seg[BSEG] = newseg(SG_BSS, edata, (ebss-edata)>>PGSHIFT);
  464. /*
  465. * Move the stack
  466. */
  467. s = up->seg[ESEG];
  468. up->seg[ESEG] = 0;
  469. up->seg[SSEG] = s;
  470. qunlock(&up->seglock);
  471. poperror(); /* seglock */
  472. s->base = USTKTOP-USTKSIZE;
  473. s->top = USTKTOP;
  474. relocateseg(s, USTKTOP-TSTKTOP);
  475. /*
  476. * '/' processes are higher priority (hack to make /ip more responsive).
  477. */
  478. if(devtab[tc->type]->dc == L'/')
  479. up->basepri = PriRoot;
  480. up->priority = up->basepri;
  481. poperror(); /* tc, elem, file */
  482. cclose(tc);
  483. free(file);
  484. // elem is now up->text
  485. /*
  486. * At this point, the mmu contains info about the old address
  487. * space and needs to be flushed
  488. */
  489. flushmmu();
  490. qlock(&up->debug);
  491. up->nnote = 0;
  492. up->notify = 0;
  493. up->notified = 0;
  494. up->privatemem = 0;
  495. procsetup(up);
  496. qunlock(&up->debug);
  497. if(up->hang)
  498. up->procctl = Proc_stopme;
  499. return execregs(entry, USTKTOP - uargp, nprogarg+nargv);
  500. }
  501. int
  502. shargs(char *s, int n, char **ap)
  503. {
  504. int i;
  505. s += 2;
  506. n -= 2; /* skip #! */
  507. for(i=0; s[i]!='\n'; i++)
  508. if(i == n-1)
  509. return 0;
  510. s[i] = 0;
  511. *ap = 0;
  512. i = 0;
  513. for(;;) {
  514. while(*s==' ' || *s=='\t')
  515. s++;
  516. if(*s == 0)
  517. break;
  518. i++;
  519. *ap++ = s;
  520. *ap = 0;
  521. while(*s && *s!=' ' && *s!='\t')
  522. s++;
  523. if(*s == 0)
  524. break;
  525. else
  526. *s++ = 0;
  527. }
  528. return i;
  529. }
  530. int
  531. return0(void *v)
  532. {
  533. return 0;
  534. }
  535. long
  536. syssleep(uint32 *arg)
  537. {
  538. int n;
  539. n = arg[0];
  540. if(n <= 0) {
  541. yield();
  542. return 0;
  543. }
  544. if(n < TK2MS(1))
  545. n = TK2MS(1);
  546. tsleep(&up->sleep, return0, 0, n);
  547. return 0;
  548. }
  549. long
  550. sysalarm(uint32 *arg)
  551. {
  552. return procalarm(arg[0]);
  553. }
  554. long
  555. sysexits(uint32 *arg)
  556. {
  557. char *status;
  558. char *inval = "invalid exit string";
  559. char buf[ERRMAX];
  560. if(arg[0]){
  561. if(waserror())
  562. status = inval;
  563. else{
  564. status = uvalidaddr(arg[0], 1, 0);
  565. if(vmemchr(status, 0, ERRMAX) == 0){
  566. memmove(buf, status, ERRMAX);
  567. buf[ERRMAX-1] = 0;
  568. status = buf;
  569. }
  570. poperror();
  571. }
  572. }else
  573. status = nil;
  574. pexit(status, 1);
  575. return 0; /* not reached */
  576. }
  577. long
  578. sys_wait(uint32 *arg)
  579. {
  580. int pid;
  581. Waitmsg w;
  582. OWaitmsg *ow;
  583. if(arg[0] == 0)
  584. return pwait(nil);
  585. ow = uvalidaddr(arg[0], sizeof(OWaitmsg), 1);
  586. evenaddr(arg[0]);
  587. pid = pwait(&w);
  588. if(pid >= 0){
  589. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  590. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  591. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  592. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  593. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  594. ow->msg[sizeof(ow->msg)-1] = '\0';
  595. }
  596. return pid;
  597. }
  598. long
  599. sysawait(uint32 *arg)
  600. {
  601. int i;
  602. int pid;
  603. Waitmsg w;
  604. uint32 n;
  605. char *buf;
  606. n = arg[1];
  607. buf = uvalidaddr(arg[0], n, 1);
  608. pid = pwait(&w);
  609. if(pid < 0)
  610. return -1;
  611. i = snprint(buf, n, "%d %lud %lud %lud %q",
  612. w.pid,
  613. w.time[TUser], w.time[TSys], w.time[TReal],
  614. w.msg);
  615. return i;
  616. }
  617. void
  618. werrstr(char *fmt, ...)
  619. {
  620. va_list va;
  621. if(up == nil)
  622. return;
  623. va_start(va, fmt);
  624. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  625. va_end(va);
  626. }
  627. static long
  628. generrstr(uint32 addr, uint nbuf)
  629. {
  630. char tmp[ERRMAX];
  631. char *buf;
  632. if(nbuf == 0)
  633. error(Ebadarg);
  634. buf = uvalidaddr(addr, nbuf, 1);
  635. if(nbuf > sizeof tmp)
  636. nbuf = sizeof tmp;
  637. memmove(tmp, buf, nbuf);
  638. /* make sure it's NUL-terminated */
  639. tmp[nbuf-1] = '\0';
  640. memmove(buf, up->syserrstr, nbuf);
  641. buf[nbuf-1] = '\0';
  642. memmove(up->syserrstr, tmp, nbuf);
  643. return 0;
  644. }
  645. long
  646. syserrstr(uint32 *arg)
  647. {
  648. return generrstr(arg[0], arg[1]);
  649. }
  650. /* compatibility for old binaries */
  651. long
  652. sys_errstr(uint32 *arg)
  653. {
  654. return generrstr(arg[0], 64);
  655. }
  656. long
  657. sysnotify(uint32 *arg)
  658. {
  659. if(arg[0] != 0)
  660. uvalidaddr(arg[0], 1, 0);
  661. up->notify = arg[0]; /* checked again when used */
  662. return 0;
  663. }
  664. long
  665. sysnoted(uint32 *arg)
  666. {
  667. if(arg[0]!=NRSTR && !up->notified)
  668. error(Egreg);
  669. return 0;
  670. }
  671. long
  672. syssegbrk(uint32 *arg)
  673. {
  674. int i;
  675. uint32 addr;
  676. Segment *s;
  677. addr = arg[0];
  678. for(i = 0; i < NSEG; i++) {
  679. s = up->seg[i];
  680. if(s == 0 || addr < s->base || addr >= s->top)
  681. continue;
  682. switch(s->type&SG_TYPE) {
  683. case SG_TEXT:
  684. case SG_DATA:
  685. case SG_STACK:
  686. error(Ebadarg);
  687. default:
  688. return ibrk(arg[1], i);
  689. }
  690. }
  691. error(Ebadarg);
  692. return 0; /* not reached */
  693. }
  694. long
  695. syssegattach(uint32 *arg)
  696. {
  697. return segattach(up, arg[0], uvalidaddr(arg[1], 1, 0), arg[2], arg[3]);
  698. }
  699. long
  700. syssegdetach(uint32 *arg)
  701. {
  702. int i;
  703. uint32 addr;
  704. Segment *s;
  705. qlock(&up->seglock);
  706. if(waserror()){
  707. qunlock(&up->seglock);
  708. nexterror();
  709. }
  710. s = 0;
  711. addr = arg[0];
  712. for(i = 0; i < NSEG; i++)
  713. if((s = up->seg[i])) {
  714. qlock(&s->lk);
  715. if((addr >= s->base && addr < s->top) ||
  716. (s->top == s->base && addr == s->base))
  717. goto found;
  718. qunlock(&s->lk);
  719. }
  720. error(Ebadarg);
  721. found:
  722. /*
  723. * Check we are not detaching the initial stack segment.
  724. */
  725. if(s == up->seg[SSEG]){
  726. qunlock(&s->lk);
  727. error(Ebadarg);
  728. }
  729. up->seg[i] = 0;
  730. qunlock(&s->lk);
  731. putseg(s);
  732. qunlock(&up->seglock);
  733. poperror();
  734. /* Ensure we flush any entries from the lost segment */
  735. flushmmu();
  736. return 0;
  737. }
  738. long
  739. syssegfree(uint32 *arg)
  740. {
  741. Segment *s;
  742. uint32 from, to;
  743. from = arg[0];
  744. s = seg(up, from, 1);
  745. if(s == nil)
  746. error(Ebadarg);
  747. to = (from + arg[1]) & ~(BY2PG-1);
  748. from = PGROUND(from);
  749. if(to > s->top) {
  750. qunlock(&s->lk);
  751. error(Ebadarg);
  752. }
  753. mfreeseg(s, from, (to - from) / BY2PG);
  754. qunlock(&s->lk);
  755. flushmmu();
  756. return 0;
  757. }
  758. /* For binary compatibility */
  759. long
  760. sysbrk_(uint32 *arg)
  761. {
  762. return ibrk(arg[0], BSEG);
  763. }
  764. long
  765. sysrendezvous(uint32 *arg)
  766. {
  767. uintptr tag, val;
  768. Proc *p, **l;
  769. tag = arg[0];
  770. l = &REND(up->rgrp, tag);
  771. up->rendval = ~(uintptr)0;
  772. lock(&up->rgrp->ref.lk);
  773. for(p = *l; p; p = p->rendhash) {
  774. if(p->rendtag == tag) {
  775. *l = p->rendhash;
  776. val = p->rendval;
  777. p->rendval = arg[1];
  778. while(p->mach != 0)
  779. ;
  780. ready(p);
  781. unlock(&up->rgrp->ref.lk);
  782. return val;
  783. }
  784. l = &p->rendhash;
  785. }
  786. /* Going to sleep here */
  787. up->rendtag = tag;
  788. up->rendval = arg[1];
  789. up->rendhash = *l;
  790. *l = up;
  791. up->state = Rendezvous;
  792. unlock(&up->rgrp->ref.lk);
  793. sched();
  794. return up->rendval;
  795. }
  796. /*
  797. * The implementation of semaphores is complicated by needing
  798. * to avoid rescheduling in syssemrelease, so that it is safe
  799. * to call from real-time processes. This means syssemrelease
  800. * cannot acquire any qlocks, only spin locks.
  801. *
  802. * Semacquire and semrelease must both manipulate the semaphore
  803. * wait list. Lock-free linked lists only exist in theory, not
  804. * in practice, so the wait list is protected by a spin lock.
  805. *
  806. * The semaphore value *addr is stored in user memory, so it
  807. * cannot be read or written while holding spin locks.
  808. *
  809. * Thus, we can access the list only when holding the lock, and
  810. * we can access the semaphore only when not holding the lock.
  811. * This makes things interesting. Note that sleep's condition function
  812. * is called while holding two locks - r and up->rlock - so it cannot
  813. * access the semaphore value either.
  814. *
  815. * An acquirer announces its intention to try for the semaphore
  816. * by putting a Sema structure onto the wait list and then
  817. * setting Sema.waiting. After one last check of semaphore,
  818. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  819. * must wake up n acquirers who have Sema.waiting set. It does
  820. * this by clearing Sema.waiting and then calling wakeup.
  821. *
  822. * There are three interesting races here.
  823. * The first is that in this particular sleep/wakeup usage, a single
  824. * wakeup can rouse a process from two consecutive sleeps!
  825. * The ordering is:
  826. *
  827. * (a) set Sema.waiting = 1
  828. * (a) call sleep
  829. * (b) set Sema.waiting = 0
  830. * (a) check Sema.waiting inside sleep, return w/o sleeping
  831. * (a) try for semaphore, fail
  832. * (a) set Sema.waiting = 1
  833. * (a) call sleep
  834. * (b) call wakeup(a)
  835. * (a) wake up again
  836. *
  837. * This is okay - semacquire will just go around the loop
  838. * again. It does mean that at the top of the for(;;) loop in
  839. * semacquire, phore.waiting might already be set to 1.
  840. *
  841. * The second is that a releaser might wake an acquirer who is
  842. * interrupted before he can acquire the lock. Since
  843. * release(n) issues only n wakeup calls -- only n can be used
  844. * anyway -- if the interrupted process is not going to use his
  845. * wakeup call he must pass it on to another acquirer.
  846. *
  847. * The third race is similar to the second but more subtle. An
  848. * acquirer sets waiting=1 and then does a final canacquire()
  849. * before going to sleep. The opposite order would result in
  850. * missing wakeups that happen between canacquire and
  851. * waiting=1. (In fact, the whole point of Sema.waiting is to
  852. * avoid missing wakeups between canacquire() and sleep().) But
  853. * there can be spurious wakeups between a successful
  854. * canacquire() and the following semdequeue(). This wakeup is
  855. * not useful to the acquirer, since he has already acquired
  856. * the semaphore. Like in the previous case, though, the
  857. * acquirer must pass the wakeup call along.
  858. *
  859. * This is all rather subtle. The code below has been verified
  860. * with the spin model /sys/src/9/port/semaphore.p. The
  861. * original code anticipated the second race but not the first
  862. * or third, which were caught only with spin. The first race
  863. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  864. * It was lucky that my abstract model of sleep/wakeup still managed
  865. * to preserve that behavior.
  866. *
  867. * I remain slightly concerned about memory coherence
  868. * outside of locks. The spin model does not take
  869. * queued processor writes into account so we have to
  870. * think hard. The only variables accessed outside locks
  871. * are the semaphore value itself and the boolean flag
  872. * Sema.waiting. The value is only accessed with cmpswap,
  873. * whose job description includes doing the right thing as
  874. * far as memory coherence across processors. That leaves
  875. * Sema.waiting. To handle it, we call coherence() before each
  876. * read and after each write. - rsc
  877. */
  878. /* Add semaphore p with addr a to list in seg. */
  879. static void
  880. semqueue(Segment *s, long *a, Sema *p)
  881. {
  882. memset(p, 0, sizeof *p);
  883. p->addr = a;
  884. lock(&s->sema.rendez.lk); /* uses s->sema.Rendez.Lock, but no one else is */
  885. p->next = &s->sema;
  886. p->prev = s->sema.prev;
  887. p->next->prev = p;
  888. p->prev->next = p;
  889. unlock(&s->sema.rendez.lk);
  890. }
  891. /* Remove semaphore p from list in seg. */
  892. static void
  893. semdequeue(Segment *s, Sema *p)
  894. {
  895. lock(&s->sema.rendez.lk);
  896. p->next->prev = p->prev;
  897. p->prev->next = p->next;
  898. unlock(&s->sema.rendez.lk);
  899. }
  900. /* Wake up n waiters with addr a on list in seg. */
  901. static void
  902. semwakeup(Segment *s, long *a, long n)
  903. {
  904. Sema *p;
  905. lock(&s->sema.rendez.lk);
  906. for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
  907. if(p->addr == a && p->waiting){
  908. p->waiting = 0;
  909. coherence();
  910. wakeup(&p->rendez);
  911. n--;
  912. }
  913. }
  914. unlock(&s->sema.rendez.lk);
  915. }
  916. /* Add delta to semaphore and wake up waiters as appropriate. */
  917. static long
  918. semrelease(Segment *s, long *addr, long delta)
  919. {
  920. long value;
  921. do
  922. value = *addr;
  923. while(!cmpswap(addr, value, value+delta));
  924. semwakeup(s, addr, delta);
  925. return value+delta;
  926. }
  927. /* Try to acquire semaphore using compare-and-swap */
  928. static int
  929. canacquire(long *addr)
  930. {
  931. long value;
  932. while((value=*addr) > 0)
  933. if(cmpswap(addr, value, value-1))
  934. return 1;
  935. return 0;
  936. }
  937. /* Should we wake up? */
  938. static int
  939. semawoke(void *p)
  940. {
  941. coherence();
  942. return !((Sema*)p)->waiting;
  943. }
  944. /* Acquire semaphore (subtract 1). */
  945. static int
  946. semacquire(Segment *s, long *addr, int block)
  947. {
  948. int acquired;
  949. Sema phore;
  950. if(canacquire(addr))
  951. return 1;
  952. if(!block)
  953. return 0;
  954. acquired = 0;
  955. semqueue(s, addr, &phore);
  956. for(;;){
  957. phore.waiting = 1;
  958. coherence();
  959. if(canacquire(addr)){
  960. acquired = 1;
  961. break;
  962. }
  963. if(waserror())
  964. break;
  965. sleep(&phore.rendez, semawoke, &phore);
  966. poperror();
  967. }
  968. semdequeue(s, &phore);
  969. coherence(); /* not strictly necessary due to lock in semdequeue */
  970. if(!phore.waiting)
  971. semwakeup(s, addr, 1);
  972. if(!acquired)
  973. nexterror();
  974. return 1;
  975. }
  976. long
  977. syssemacquire(uint32 *arg)
  978. {
  979. int block;
  980. long *addr;
  981. Segment *s;
  982. addr = uvalidaddr(arg[0], sizeof(long), 1);
  983. evenaddr(arg[0]);
  984. block = arg[1];
  985. if((s = seg(up, arg[0], 0)) == nil)
  986. error(Ebadarg);
  987. if(*addr < 0)
  988. error(Ebadarg);
  989. return semacquire(s, addr, block);
  990. }
  991. long
  992. syssemrelease(uint32 *arg)
  993. {
  994. long *addr, delta;
  995. Segment *s;
  996. addr = uvalidaddr(arg[0], sizeof(long), 1);
  997. evenaddr(arg[0]);
  998. delta = arg[1];
  999. if((s = seg(up, arg[0], 0)) == nil)
  1000. error(Ebadarg);
  1001. if(delta < 0 || *addr < 0)
  1002. error(Ebadarg);
  1003. return semrelease(s, addr, arg[1]);
  1004. }