PageRenderTime 133ms CodeModel.GetById 9ms app.highlight 85ms RepoModel.GetById 2ms app.codeStats 1ms

/fs/jfs/jfs_xtree.c

https://bitbucket.org/cyanogenmod/android_kernel_asus_tf300t
C | 3905 lines | 2020 code | 510 blank | 1375 comment | 374 complexity | 4c5575c4be9470acfa2d51e22e5d234c MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0

Large files files are truncated, but you can click here to view the full file

   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *
   4 *   This program is free software;  you can redistribute it and/or modify
   5 *   it under the terms of the GNU General Public License as published by
   6 *   the Free Software Foundation; either version 2 of the License, or
   7 *   (at your option) any later version.
   8 *
   9 *   This program is distributed in the hope that it will be useful,
  10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  12 *   the GNU General Public License for more details.
  13 *
  14 *   You should have received a copy of the GNU General Public License
  15 *   along with this program;  if not, write to the Free Software
  16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18/*
  19 *	jfs_xtree.c: extent allocation descriptor B+-tree manager
  20 */
  21
  22#include <linux/fs.h>
  23#include <linux/module.h>
  24#include <linux/quotaops.h>
  25#include <linux/seq_file.h>
  26#include "jfs_incore.h"
  27#include "jfs_filsys.h"
  28#include "jfs_metapage.h"
  29#include "jfs_dmap.h"
  30#include "jfs_dinode.h"
  31#include "jfs_superblock.h"
  32#include "jfs_debug.h"
  33
  34/*
  35 * xtree local flag
  36 */
  37#define XT_INSERT	0x00000001
  38
  39/*
  40 *	xtree key/entry comparison: extent offset
  41 *
  42 * return:
  43 *	-1: k < start of extent
  44 *	 0: start_of_extent <= k <= end_of_extent
  45 *	 1: k > end_of_extent
  46 */
  47#define XT_CMP(CMP, K, X, OFFSET64)\
  48{\
  49	OFFSET64 = offsetXAD(X);\
  50	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  51		((K) < OFFSET64) ? -1 : 0;\
  52}
  53
  54/* write a xad entry */
  55#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  56{\
  57	(XAD)->flag = (FLAG);\
  58	XADoffset((XAD), (OFF));\
  59	XADlength((XAD), (LEN));\
  60	XADaddress((XAD), (ADDR));\
  61}
  62
  63#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  64
  65/* get page buffer for specified block address */
  66/* ToDo: Replace this ugly macro with a function */
  67#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
  68{\
  69	BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
  70	if (!(RC))\
  71	{\
  72		if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
  73		    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
  74		    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
  75		{\
  76			jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
  77			BT_PUTPAGE(MP);\
  78			MP = NULL;\
  79			RC = -EIO;\
  80		}\
  81	}\
  82}
  83
  84/* for consistency */
  85#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  86
  87#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  88	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  89/* xtree entry parameter descriptor */
  90struct xtsplit {
  91	struct metapage *mp;
  92	s16 index;
  93	u8 flag;
  94	s64 off;
  95	s64 addr;
  96	int len;
  97	struct pxdlist *pxdlist;
  98};
  99
 100
 101/*
 102 *	statistics
 103 */
 104#ifdef CONFIG_JFS_STATISTICS
 105static struct {
 106	uint search;
 107	uint fastSearch;
 108	uint split;
 109} xtStat;
 110#endif
 111
 112
 113/*
 114 * forward references
 115 */
 116static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
 117		    struct btstack * btstack, int flag);
 118
 119static int xtSplitUp(tid_t tid,
 120		     struct inode *ip,
 121		     struct xtsplit * split, struct btstack * btstack);
 122
 123static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
 124		       struct metapage ** rmpp, s64 * rbnp);
 125
 126static int xtSplitRoot(tid_t tid, struct inode *ip,
 127		       struct xtsplit * split, struct metapage ** rmpp);
 128
 129#ifdef _STILL_TO_PORT
 130static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 131		      xtpage_t * fp, struct btstack * btstack);
 132
 133static int xtSearchNode(struct inode *ip,
 134			xad_t * xad,
 135			int *cmpp, struct btstack * btstack, int flag);
 136
 137static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 138#endif				/*  _STILL_TO_PORT */
 139
 140/*
 141 *	xtLookup()
 142 *
 143 * function: map a single page into a physical extent;
 144 */
 145int xtLookup(struct inode *ip, s64 lstart,
 146	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
 147{
 148	int rc = 0;
 149	struct btstack btstack;
 150	int cmp;
 151	s64 bn;
 152	struct metapage *mp;
 153	xtpage_t *p;
 154	int index;
 155	xad_t *xad;
 156	s64 next, size, xoff, xend;
 157	int xlen;
 158	s64 xaddr;
 159
 160	*paddr = 0;
 161	*plen = llen;
 162
 163	if (!no_check) {
 164		/* is lookup offset beyond eof ? */
 165		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 166		    JFS_SBI(ip->i_sb)->l2bsize;
 167		if (lstart >= size)
 168			return 0;
 169	}
 170
 171	/*
 172	 * search for the xad entry covering the logical extent
 173	 */
 174//search:
 175	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
 176		jfs_err("xtLookup: xtSearch returned %d", rc);
 177		return rc;
 178	}
 179
 180	/*
 181	 *	compute the physical extent covering logical extent
 182	 *
 183	 * N.B. search may have failed (e.g., hole in sparse file),
 184	 * and returned the index of the next entry.
 185	 */
 186	/* retrieve search result */
 187	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 188
 189	/* is xad found covering start of logical extent ?
 190	 * lstart is a page start address,
 191	 * i.e., lstart cannot start in a hole;
 192	 */
 193	if (cmp) {
 194		if (next)
 195			*plen = min(next - lstart, llen);
 196		goto out;
 197	}
 198
 199	/*
 200	 * lxd covered by xad
 201	 */
 202	xad = &p->xad[index];
 203	xoff = offsetXAD(xad);
 204	xlen = lengthXAD(xad);
 205	xend = xoff + xlen;
 206	xaddr = addressXAD(xad);
 207
 208	/* initialize new pxd */
 209	*pflag = xad->flag;
 210	*paddr = xaddr + (lstart - xoff);
 211	/* a page must be fully covered by an xad */
 212	*plen = min(xend - lstart, llen);
 213
 214      out:
 215	XT_PUTPAGE(mp);
 216
 217	return rc;
 218}
 219
 220/*
 221 *	xtSearch()
 222 *
 223 * function:	search for the xad entry covering specified offset.
 224 *
 225 * parameters:
 226 *	ip	- file object;
 227 *	xoff	- extent offset;
 228 *	nextp	- address of next extent (if any) for search miss
 229 *	cmpp	- comparison result:
 230 *	btstack - traverse stack;
 231 *	flag	- search process flag (XT_INSERT);
 232 *
 233 * returns:
 234 *	btstack contains (bn, index) of search path traversed to the entry.
 235 *	*cmpp is set to result of comparison with the entry returned.
 236 *	the page containing the entry is pinned at exit.
 237 */
 238static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 239		    int *cmpp, struct btstack * btstack, int flag)
 240{
 241	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 242	int rc = 0;
 243	int cmp = 1;		/* init for empty page */
 244	s64 bn;			/* block number */
 245	struct metapage *mp;	/* page buffer */
 246	xtpage_t *p;		/* page */
 247	xad_t *xad;
 248	int base, index, lim, btindex;
 249	struct btframe *btsp;
 250	int nsplit = 0;		/* number of pages to split */
 251	s64 t64;
 252	s64 next = 0;
 253
 254	INCREMENT(xtStat.search);
 255
 256	BT_CLR(btstack);
 257
 258	btstack->nsplit = 0;
 259
 260	/*
 261	 *	search down tree from root:
 262	 *
 263	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 264	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 265	 *
 266	 * if entry with search key K is not found
 267	 * internal page search find the entry with largest key Ki
 268	 * less than K which point to the child page to search;
 269	 * leaf page search find the entry with smallest key Kj
 270	 * greater than K so that the returned index is the position of
 271	 * the entry to be shifted right for insertion of new entry.
 272	 * for empty tree, search key is greater than any key of the tree.
 273	 *
 274	 * by convention, root bn = 0.
 275	 */
 276	for (bn = 0;;) {
 277		/* get/pin the page to search */
 278		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 279		if (rc)
 280			return rc;
 281
 282		/* try sequential access heuristics with the previous
 283		 * access entry in target leaf page:
 284		 * once search narrowed down into the target leaf,
 285		 * key must either match an entry in the leaf or
 286		 * key entry does not exist in the tree;
 287		 */
 288//fastSearch:
 289		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
 290		    (p->header.flag & BT_LEAF) &&
 291		    (index = jfs_ip->btindex) <
 292		    le16_to_cpu(p->header.nextindex)) {
 293			xad = &p->xad[index];
 294			t64 = offsetXAD(xad);
 295			if (xoff < t64 + lengthXAD(xad)) {
 296				if (xoff >= t64) {
 297					*cmpp = 0;
 298					goto out;
 299				}
 300
 301				/* stop sequential access heuristics */
 302				goto binarySearch;
 303			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
 304
 305				/* try next sequential entry */
 306				index++;
 307				if (index <
 308				    le16_to_cpu(p->header.nextindex)) {
 309					xad++;
 310					t64 = offsetXAD(xad);
 311					if (xoff < t64 + lengthXAD(xad)) {
 312						if (xoff >= t64) {
 313							*cmpp = 0;
 314							goto out;
 315						}
 316
 317						/* miss: key falls between
 318						 * previous and this entry
 319						 */
 320						*cmpp = 1;
 321						next = t64;
 322						goto out;
 323					}
 324
 325					/* (xoff >= t64 + lengthXAD(xad));
 326					 * matching entry may be further out:
 327					 * stop heuristic search
 328					 */
 329					/* stop sequential access heuristics */
 330					goto binarySearch;
 331				}
 332
 333				/* (index == p->header.nextindex);
 334				 * miss: key entry does not exist in
 335				 * the target leaf/tree
 336				 */
 337				*cmpp = 1;
 338				goto out;
 339			}
 340
 341			/*
 342			 * if hit, return index of the entry found, and
 343			 * if miss, where new entry with search key is
 344			 * to be inserted;
 345			 */
 346		      out:
 347			/* compute number of pages to split */
 348			if (flag & XT_INSERT) {
 349				if (p->header.nextindex ==	/* little-endian */
 350				    p->header.maxentry)
 351					nsplit++;
 352				else
 353					nsplit = 0;
 354				btstack->nsplit = nsplit;
 355			}
 356
 357			/* save search result */
 358			btsp = btstack->top;
 359			btsp->bn = bn;
 360			btsp->index = index;
 361			btsp->mp = mp;
 362
 363			/* update sequential access heuristics */
 364			jfs_ip->btindex = index;
 365
 366			if (nextp)
 367				*nextp = next;
 368
 369			INCREMENT(xtStat.fastSearch);
 370			return 0;
 371		}
 372
 373		/* well, ... full search now */
 374	      binarySearch:
 375		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 376
 377		/*
 378		 * binary search with search key K on the current page
 379		 */
 380		for (base = XTENTRYSTART; lim; lim >>= 1) {
 381			index = base + (lim >> 1);
 382
 383			XT_CMP(cmp, xoff, &p->xad[index], t64);
 384			if (cmp == 0) {
 385				/*
 386				 *	search hit
 387				 */
 388				/* search hit - leaf page:
 389				 * return the entry found
 390				 */
 391				if (p->header.flag & BT_LEAF) {
 392					*cmpp = cmp;
 393
 394					/* compute number of pages to split */
 395					if (flag & XT_INSERT) {
 396						if (p->header.nextindex ==
 397						    p->header.maxentry)
 398							nsplit++;
 399						else
 400							nsplit = 0;
 401						btstack->nsplit = nsplit;
 402					}
 403
 404					/* save search result */
 405					btsp = btstack->top;
 406					btsp->bn = bn;
 407					btsp->index = index;
 408					btsp->mp = mp;
 409
 410					/* init sequential access heuristics */
 411					btindex = jfs_ip->btindex;
 412					if (index == btindex ||
 413					    index == btindex + 1)
 414						jfs_ip->btorder = BT_SEQUENTIAL;
 415					else
 416						jfs_ip->btorder = BT_RANDOM;
 417					jfs_ip->btindex = index;
 418
 419					return 0;
 420				}
 421				/* search hit - internal page:
 422				 * descend/search its child page
 423				 */
 424				if (index < le16_to_cpu(p->header.nextindex)-1)
 425					next = offsetXAD(&p->xad[index + 1]);
 426				goto next;
 427			}
 428
 429			if (cmp > 0) {
 430				base = index + 1;
 431				--lim;
 432			}
 433		}
 434
 435		/*
 436		 *	search miss
 437		 *
 438		 * base is the smallest index with key (Kj) greater than
 439		 * search key (K) and may be zero or maxentry index.
 440		 */
 441		if (base < le16_to_cpu(p->header.nextindex))
 442			next = offsetXAD(&p->xad[base]);
 443		/*
 444		 * search miss - leaf page:
 445		 *
 446		 * return location of entry (base) where new entry with
 447		 * search key K is to be inserted.
 448		 */
 449		if (p->header.flag & BT_LEAF) {
 450			*cmpp = cmp;
 451
 452			/* compute number of pages to split */
 453			if (flag & XT_INSERT) {
 454				if (p->header.nextindex ==
 455				    p->header.maxentry)
 456					nsplit++;
 457				else
 458					nsplit = 0;
 459				btstack->nsplit = nsplit;
 460			}
 461
 462			/* save search result */
 463			btsp = btstack->top;
 464			btsp->bn = bn;
 465			btsp->index = base;
 466			btsp->mp = mp;
 467
 468			/* init sequential access heuristics */
 469			btindex = jfs_ip->btindex;
 470			if (base == btindex || base == btindex + 1)
 471				jfs_ip->btorder = BT_SEQUENTIAL;
 472			else
 473				jfs_ip->btorder = BT_RANDOM;
 474			jfs_ip->btindex = base;
 475
 476			if (nextp)
 477				*nextp = next;
 478
 479			return 0;
 480		}
 481
 482		/*
 483		 * search miss - non-leaf page:
 484		 *
 485		 * if base is non-zero, decrement base by one to get the parent
 486		 * entry of the child page to search.
 487		 */
 488		index = base ? base - 1 : base;
 489
 490		/*
 491		 * go down to child page
 492		 */
 493	      next:
 494		/* update number of pages to split */
 495		if (p->header.nextindex == p->header.maxentry)
 496			nsplit++;
 497		else
 498			nsplit = 0;
 499
 500		/* push (bn, index) of the parent page/entry */
 501		if (BT_STACK_FULL(btstack)) {
 502			jfs_error(ip->i_sb, "stack overrun in xtSearch!");
 503			XT_PUTPAGE(mp);
 504			return -EIO;
 505		}
 506		BT_PUSH(btstack, bn, index);
 507
 508		/* get the child page block number */
 509		bn = addressXAD(&p->xad[index]);
 510
 511		/* unpin the parent page */
 512		XT_PUTPAGE(mp);
 513	}
 514}
 515
 516/*
 517 *	xtInsert()
 518 *
 519 * function:
 520 *
 521 * parameter:
 522 *	tid	- transaction id;
 523 *	ip	- file object;
 524 *	xflag	- extent flag (XAD_NOTRECORDED):
 525 *	xoff	- extent offset;
 526 *	xlen	- extent length;
 527 *	xaddrp	- extent address pointer (in/out):
 528 *		if (*xaddrp)
 529 *			caller allocated data extent at *xaddrp;
 530 *		else
 531 *			allocate data extent and return its xaddr;
 532 *	flag	-
 533 *
 534 * return:
 535 */
 536int xtInsert(tid_t tid,		/* transaction id */
 537	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
 538	     int flag)
 539{
 540	int rc = 0;
 541	s64 xaddr, hint;
 542	struct metapage *mp;	/* meta-page buffer */
 543	xtpage_t *p;		/* base B+-tree index page */
 544	s64 bn;
 545	int index, nextindex;
 546	struct btstack btstack;	/* traverse stack */
 547	struct xtsplit split;	/* split information */
 548	xad_t *xad;
 549	int cmp;
 550	s64 next;
 551	struct tlock *tlck;
 552	struct xtlock *xtlck;
 553
 554	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 555
 556	/*
 557	 *	search for the entry location at which to insert:
 558	 *
 559	 * xtFastSearch() and xtSearch() both returns (leaf page
 560	 * pinned, index at which to insert).
 561	 * n.b. xtSearch() may return index of maxentry of
 562	 * the full page.
 563	 */
 564	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 565		return rc;
 566
 567	/* retrieve search result */
 568	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 569
 570	/* This test must follow XT_GETSEARCH since mp must be valid if
 571	 * we branch to out: */
 572	if ((cmp == 0) || (next && (xlen > next - xoff))) {
 573		rc = -EEXIST;
 574		goto out;
 575	}
 576
 577	/*
 578	 * allocate data extent requested
 579	 *
 580	 * allocation hint: last xad
 581	 */
 582	if ((xaddr = *xaddrp) == 0) {
 583		if (index > XTENTRYSTART) {
 584			xad = &p->xad[index - 1];
 585			hint = addressXAD(xad) + lengthXAD(xad) - 1;
 586		} else
 587			hint = 0;
 588		if ((rc = dquot_alloc_block(ip, xlen)))
 589			goto out;
 590		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
 591			dquot_free_block(ip, xlen);
 592			goto out;
 593		}
 594	}
 595
 596	/*
 597	 *	insert entry for new extent
 598	 */
 599	xflag |= XAD_NEW;
 600
 601	/*
 602	 *	if the leaf page is full, split the page and
 603	 *	propagate up the router entry for the new page from split
 604	 *
 605	 * The xtSplitUp() will insert the entry and unpin the leaf page.
 606	 */
 607	nextindex = le16_to_cpu(p->header.nextindex);
 608	if (nextindex == le16_to_cpu(p->header.maxentry)) {
 609		split.mp = mp;
 610		split.index = index;
 611		split.flag = xflag;
 612		split.off = xoff;
 613		split.len = xlen;
 614		split.addr = xaddr;
 615		split.pxdlist = NULL;
 616		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
 617			/* undo data extent allocation */
 618			if (*xaddrp == 0) {
 619				dbFree(ip, xaddr, (s64) xlen);
 620				dquot_free_block(ip, xlen);
 621			}
 622			return rc;
 623		}
 624
 625		*xaddrp = xaddr;
 626		return 0;
 627	}
 628
 629	/*
 630	 *	insert the new entry into the leaf page
 631	 */
 632	/*
 633	 * acquire a transaction lock on the leaf page;
 634	 *
 635	 * action: xad insertion/extension;
 636	 */
 637	BT_MARK_DIRTY(mp, ip);
 638
 639	/* if insert into middle, shift right remaining entries. */
 640	if (index < nextindex)
 641		memmove(&p->xad[index + 1], &p->xad[index],
 642			(nextindex - index) * sizeof(xad_t));
 643
 644	/* insert the new entry: mark the entry NEW */
 645	xad = &p->xad[index];
 646	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 647
 648	/* advance next available entry index */
 649	le16_add_cpu(&p->header.nextindex, 1);
 650
 651	/* Don't log it if there are no links to the file */
 652	if (!test_cflag(COMMIT_Nolink, ip)) {
 653		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 654		xtlck = (struct xtlock *) & tlck->lock;
 655		xtlck->lwm.offset =
 656		    (xtlck->lwm.offset) ? min(index,
 657					      (int)xtlck->lwm.offset) : index;
 658		xtlck->lwm.length =
 659		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 660	}
 661
 662	*xaddrp = xaddr;
 663
 664      out:
 665	/* unpin the leaf page */
 666	XT_PUTPAGE(mp);
 667
 668	return rc;
 669}
 670
 671
 672/*
 673 *	xtSplitUp()
 674 *
 675 * function:
 676 *	split full pages as propagating insertion up the tree
 677 *
 678 * parameter:
 679 *	tid	- transaction id;
 680 *	ip	- file object;
 681 *	split	- entry parameter descriptor;
 682 *	btstack - traverse stack from xtSearch()
 683 *
 684 * return:
 685 */
 686static int
 687xtSplitUp(tid_t tid,
 688	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
 689{
 690	int rc = 0;
 691	struct metapage *smp;
 692	xtpage_t *sp;		/* split page */
 693	struct metapage *rmp;
 694	s64 rbn;		/* new right page block number */
 695	struct metapage *rcmp;
 696	xtpage_t *rcp;		/* right child page */
 697	s64 rcbn;		/* right child page block number */
 698	int skip;		/* index of entry of insertion */
 699	int nextindex;		/* next available entry index of p */
 700	struct btframe *parent;	/* parent page entry on traverse stack */
 701	xad_t *xad;
 702	s64 xaddr;
 703	int xlen;
 704	int nsplit;		/* number of pages split */
 705	struct pxdlist pxdlist;
 706	pxd_t *pxd;
 707	struct tlock *tlck;
 708	struct xtlock *xtlck;
 709
 710	smp = split->mp;
 711	sp = XT_PAGE(ip, smp);
 712
 713	/* is inode xtree root extension/inline EA area free ? */
 714	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
 715	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
 716	    (JFS_IP(ip)->mode2 & INLINEEA)) {
 717		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
 718		JFS_IP(ip)->mode2 &= ~INLINEEA;
 719
 720		BT_MARK_DIRTY(smp, ip);
 721		/*
 722		 * acquire a transaction lock on the leaf page;
 723		 *
 724		 * action: xad insertion/extension;
 725		 */
 726
 727		/* if insert into middle, shift right remaining entries. */
 728		skip = split->index;
 729		nextindex = le16_to_cpu(sp->header.nextindex);
 730		if (skip < nextindex)
 731			memmove(&sp->xad[skip + 1], &sp->xad[skip],
 732				(nextindex - skip) * sizeof(xad_t));
 733
 734		/* insert the new entry: mark the entry NEW */
 735		xad = &sp->xad[skip];
 736		XT_PUTENTRY(xad, split->flag, split->off, split->len,
 737			    split->addr);
 738
 739		/* advance next available entry index */
 740		le16_add_cpu(&sp->header.nextindex, 1);
 741
 742		/* Don't log it if there are no links to the file */
 743		if (!test_cflag(COMMIT_Nolink, ip)) {
 744			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
 745			xtlck = (struct xtlock *) & tlck->lock;
 746			xtlck->lwm.offset = (xtlck->lwm.offset) ?
 747			    min(skip, (int)xtlck->lwm.offset) : skip;
 748			xtlck->lwm.length =
 749			    le16_to_cpu(sp->header.nextindex) -
 750			    xtlck->lwm.offset;
 751		}
 752
 753		return 0;
 754	}
 755
 756	/*
 757	 * allocate new index blocks to cover index page split(s)
 758	 *
 759	 * allocation hint: ?
 760	 */
 761	if (split->pxdlist == NULL) {
 762		nsplit = btstack->nsplit;
 763		split->pxdlist = &pxdlist;
 764		pxdlist.maxnpxd = pxdlist.npxd = 0;
 765		pxd = &pxdlist.pxd[0];
 766		xlen = JFS_SBI(ip->i_sb)->nbperpage;
 767		for (; nsplit > 0; nsplit--, pxd++) {
 768			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
 769			    == 0) {
 770				PXDaddress(pxd, xaddr);
 771				PXDlength(pxd, xlen);
 772
 773				pxdlist.maxnpxd++;
 774
 775				continue;
 776			}
 777
 778			/* undo allocation */
 779
 780			XT_PUTPAGE(smp);
 781			return rc;
 782		}
 783	}
 784
 785	/*
 786	 * Split leaf page <sp> into <sp> and a new right page <rp>.
 787	 *
 788	 * The split routines insert the new entry into the leaf page,
 789	 * and acquire txLock as appropriate.
 790	 * return <rp> pinned and its block number <rpbn>.
 791	 */
 792	rc = (sp->header.flag & BT_ROOT) ?
 793	    xtSplitRoot(tid, ip, split, &rmp) :
 794	    xtSplitPage(tid, ip, split, &rmp, &rbn);
 795
 796	XT_PUTPAGE(smp);
 797
 798	if (rc)
 799		return -EIO;
 800	/*
 801	 * propagate up the router entry for the leaf page just split
 802	 *
 803	 * insert a router entry for the new page into the parent page,
 804	 * propagate the insert/split up the tree by walking back the stack
 805	 * of (bn of parent page, index of child page entry in parent page)
 806	 * that were traversed during the search for the page that split.
 807	 *
 808	 * the propagation of insert/split up the tree stops if the root
 809	 * splits or the page inserted into doesn't have to split to hold
 810	 * the new entry.
 811	 *
 812	 * the parent entry for the split page remains the same, and
 813	 * a new entry is inserted at its right with the first key and
 814	 * block number of the new right page.
 815	 *
 816	 * There are a maximum of 3 pages pinned at any time:
 817	 * right child, left parent and right parent (when the parent splits)
 818	 * to keep the child page pinned while working on the parent.
 819	 * make sure that all pins are released at exit.
 820	 */
 821	while ((parent = BT_POP(btstack)) != NULL) {
 822		/* parent page specified by stack frame <parent> */
 823
 824		/* keep current child pages <rcp> pinned */
 825		rcmp = rmp;
 826		rcbn = rbn;
 827		rcp = XT_PAGE(ip, rcmp);
 828
 829		/*
 830		 * insert router entry in parent for new right child page <rp>
 831		 */
 832		/* get/pin the parent page <sp> */
 833		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
 834		if (rc) {
 835			XT_PUTPAGE(rcmp);
 836			return rc;
 837		}
 838
 839		/*
 840		 * The new key entry goes ONE AFTER the index of parent entry,
 841		 * because the split was to the right.
 842		 */
 843		skip = parent->index + 1;
 844
 845		/*
 846		 * split or shift right remaining entries of the parent page
 847		 */
 848		nextindex = le16_to_cpu(sp->header.nextindex);
 849		/*
 850		 * parent page is full - split the parent page
 851		 */
 852		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
 853			/* init for parent page split */
 854			split->mp = smp;
 855			split->index = skip;	/* index at insert */
 856			split->flag = XAD_NEW;
 857			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
 858			split->len = JFS_SBI(ip->i_sb)->nbperpage;
 859			split->addr = rcbn;
 860
 861			/* unpin previous right child page */
 862			XT_PUTPAGE(rcmp);
 863
 864			/* The split routines insert the new entry,
 865			 * and acquire txLock as appropriate.
 866			 * return <rp> pinned and its block number <rpbn>.
 867			 */
 868			rc = (sp->header.flag & BT_ROOT) ?
 869			    xtSplitRoot(tid, ip, split, &rmp) :
 870			    xtSplitPage(tid, ip, split, &rmp, &rbn);
 871			if (rc) {
 872				XT_PUTPAGE(smp);
 873				return rc;
 874			}
 875
 876			XT_PUTPAGE(smp);
 877			/* keep new child page <rp> pinned */
 878		}
 879		/*
 880		 * parent page is not full - insert in parent page
 881		 */
 882		else {
 883			/*
 884			 * insert router entry in parent for the right child
 885			 * page from the first entry of the right child page:
 886			 */
 887			/*
 888			 * acquire a transaction lock on the parent page;
 889			 *
 890			 * action: router xad insertion;
 891			 */
 892			BT_MARK_DIRTY(smp, ip);
 893
 894			/*
 895			 * if insert into middle, shift right remaining entries
 896			 */
 897			if (skip < nextindex)
 898				memmove(&sp->xad[skip + 1], &sp->xad[skip],
 899					(nextindex -
 900					 skip) << L2XTSLOTSIZE);
 901
 902			/* insert the router entry */
 903			xad = &sp->xad[skip];
 904			XT_PUTENTRY(xad, XAD_NEW,
 905				    offsetXAD(&rcp->xad[XTENTRYSTART]),
 906				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
 907
 908			/* advance next available entry index. */
 909			le16_add_cpu(&sp->header.nextindex, 1);
 910
 911			/* Don't log it if there are no links to the file */
 912			if (!test_cflag(COMMIT_Nolink, ip)) {
 913				tlck = txLock(tid, ip, smp,
 914					      tlckXTREE | tlckGROW);
 915				xtlck = (struct xtlock *) & tlck->lock;
 916				xtlck->lwm.offset = (xtlck->lwm.offset) ?
 917				    min(skip, (int)xtlck->lwm.offset) : skip;
 918				xtlck->lwm.length =
 919				    le16_to_cpu(sp->header.nextindex) -
 920				    xtlck->lwm.offset;
 921			}
 922
 923			/* unpin parent page */
 924			XT_PUTPAGE(smp);
 925
 926			/* exit propagate up */
 927			break;
 928		}
 929	}
 930
 931	/* unpin current right page */
 932	XT_PUTPAGE(rmp);
 933
 934	return 0;
 935}
 936
 937
 938/*
 939 *	xtSplitPage()
 940 *
 941 * function:
 942 *	split a full non-root page into
 943 *	original/split/left page and new right page
 944 *	i.e., the original/split page remains as left page.
 945 *
 946 * parameter:
 947 *	int		tid,
 948 *	struct inode	*ip,
 949 *	struct xtsplit	*split,
 950 *	struct metapage	**rmpp,
 951 *	u64		*rbnp,
 952 *
 953 * return:
 954 *	Pointer to page in which to insert or NULL on error.
 955 */
 956static int
 957xtSplitPage(tid_t tid, struct inode *ip,
 958	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
 959{
 960	int rc = 0;
 961	struct metapage *smp;
 962	xtpage_t *sp;
 963	struct metapage *rmp;
 964	xtpage_t *rp;		/* new right page allocated */
 965	s64 rbn;		/* new right page block number */
 966	struct metapage *mp;
 967	xtpage_t *p;
 968	s64 nextbn;
 969	int skip, maxentry, middle, righthalf, n;
 970	xad_t *xad;
 971	struct pxdlist *pxdlist;
 972	pxd_t *pxd;
 973	struct tlock *tlck;
 974	struct xtlock *sxtlck = NULL, *rxtlck = NULL;
 975	int quota_allocation = 0;
 976
 977	smp = split->mp;
 978	sp = XT_PAGE(ip, smp);
 979
 980	INCREMENT(xtStat.split);
 981
 982	pxdlist = split->pxdlist;
 983	pxd = &pxdlist->pxd[pxdlist->npxd];
 984	pxdlist->npxd++;
 985	rbn = addressPXD(pxd);
 986
 987	/* Allocate blocks to quota. */
 988	rc = dquot_alloc_block(ip, lengthPXD(pxd));
 989	if (rc)
 990		goto clean_up;
 991
 992	quota_allocation += lengthPXD(pxd);
 993
 994	/*
 995	 * allocate the new right page for the split
 996	 */
 997	rmp = get_metapage(ip, rbn, PSIZE, 1);
 998	if (rmp == NULL) {
 999		rc = -EIO;
1000		goto clean_up;
1001	}
1002
1003	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1004
1005	BT_MARK_DIRTY(rmp, ip);
1006	/*
1007	 * action: new page;
1008	 */
1009
1010	rp = (xtpage_t *) rmp->data;
1011	rp->header.self = *pxd;
1012	rp->header.flag = sp->header.flag & BT_TYPE;
1013	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
1014	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1015
1016	BT_MARK_DIRTY(smp, ip);
1017	/* Don't log it if there are no links to the file */
1018	if (!test_cflag(COMMIT_Nolink, ip)) {
1019		/*
1020		 * acquire a transaction lock on the new right page;
1021		 */
1022		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1023		rxtlck = (struct xtlock *) & tlck->lock;
1024		rxtlck->lwm.offset = XTENTRYSTART;
1025		/*
1026		 * acquire a transaction lock on the split page
1027		 */
1028		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1029		sxtlck = (struct xtlock *) & tlck->lock;
1030	}
1031
1032	/*
1033	 * initialize/update sibling pointers of <sp> and <rp>
1034	 */
1035	nextbn = le64_to_cpu(sp->header.next);
1036	rp->header.next = cpu_to_le64(nextbn);
1037	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1038	sp->header.next = cpu_to_le64(rbn);
1039
1040	skip = split->index;
1041
1042	/*
1043	 *	sequential append at tail (after last entry of last page)
1044	 *
1045	 * if splitting the last page on a level because of appending
1046	 * a entry to it (skip is maxentry), it's likely that the access is
1047	 * sequential. adding an empty page on the side of the level is less
1048	 * work and can push the fill factor much higher than normal.
1049	 * if we're wrong it's no big deal -  we will do the split the right
1050	 * way next time.
1051	 * (it may look like it's equally easy to do a similar hack for
1052	 * reverse sorted data, that is, split the tree left, but it's not.
1053	 * Be my guest.)
1054	 */
1055	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1056		/*
1057		 * acquire a transaction lock on the new/right page;
1058		 *
1059		 * action: xad insertion;
1060		 */
1061		/* insert entry at the first entry of the new right page */
1062		xad = &rp->xad[XTENTRYSTART];
1063		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1064			    split->addr);
1065
1066		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1067
1068		if (!test_cflag(COMMIT_Nolink, ip)) {
1069			/* rxtlck->lwm.offset = XTENTRYSTART; */
1070			rxtlck->lwm.length = 1;
1071		}
1072
1073		*rmpp = rmp;
1074		*rbnp = rbn;
1075
1076		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1077		return 0;
1078	}
1079
1080	/*
1081	 *	non-sequential insert (at possibly middle page)
1082	 */
1083
1084	/*
1085	 * update previous pointer of old next/right page of <sp>
1086	 */
1087	if (nextbn != 0) {
1088		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1089		if (rc) {
1090			XT_PUTPAGE(rmp);
1091			goto clean_up;
1092		}
1093
1094		BT_MARK_DIRTY(mp, ip);
1095		/*
1096		 * acquire a transaction lock on the next page;
1097		 *
1098		 * action:sibling pointer update;
1099		 */
1100		if (!test_cflag(COMMIT_Nolink, ip))
1101			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1102
1103		p->header.prev = cpu_to_le64(rbn);
1104
1105		/* sibling page may have been updated previously, or
1106		 * it may be updated later;
1107		 */
1108
1109		XT_PUTPAGE(mp);
1110	}
1111
1112	/*
1113	 * split the data between the split and new/right pages
1114	 */
1115	maxentry = le16_to_cpu(sp->header.maxentry);
1116	middle = maxentry >> 1;
1117	righthalf = maxentry - middle;
1118
1119	/*
1120	 * skip index in old split/left page - insert into left page:
1121	 */
1122	if (skip <= middle) {
1123		/* move right half of split page to the new right page */
1124		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1125			righthalf << L2XTSLOTSIZE);
1126
1127		/* shift right tail of left half to make room for new entry */
1128		if (skip < middle)
1129			memmove(&sp->xad[skip + 1], &sp->xad[skip],
1130				(middle - skip) << L2XTSLOTSIZE);
1131
1132		/* insert new entry */
1133		xad = &sp->xad[skip];
1134		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1135			    split->addr);
1136
1137		/* update page header */
1138		sp->header.nextindex = cpu_to_le16(middle + 1);
1139		if (!test_cflag(COMMIT_Nolink, ip)) {
1140			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1141			    min(skip, (int)sxtlck->lwm.offset) : skip;
1142		}
1143
1144		rp->header.nextindex =
1145		    cpu_to_le16(XTENTRYSTART + righthalf);
1146	}
1147	/*
1148	 * skip index in new right page - insert into right page:
1149	 */
1150	else {
1151		/* move left head of right half to right page */
1152		n = skip - middle;
1153		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1154			n << L2XTSLOTSIZE);
1155
1156		/* insert new entry */
1157		n += XTENTRYSTART;
1158		xad = &rp->xad[n];
1159		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1160			    split->addr);
1161
1162		/* move right tail of right half to right page */
1163		if (skip < maxentry)
1164			memmove(&rp->xad[n + 1], &sp->xad[skip],
1165				(maxentry - skip) << L2XTSLOTSIZE);
1166
1167		/* update page header */
1168		sp->header.nextindex = cpu_to_le16(middle);
1169		if (!test_cflag(COMMIT_Nolink, ip)) {
1170			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1171			    min(middle, (int)sxtlck->lwm.offset) : middle;
1172		}
1173
1174		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1175						   righthalf + 1);
1176	}
1177
1178	if (!test_cflag(COMMIT_Nolink, ip)) {
1179		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1180		    sxtlck->lwm.offset;
1181
1182		/* rxtlck->lwm.offset = XTENTRYSTART; */
1183		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1184		    XTENTRYSTART;
1185	}
1186
1187	*rmpp = rmp;
1188	*rbnp = rbn;
1189
1190	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1191	return rc;
1192
1193      clean_up:
1194
1195	/* Rollback quota allocation. */
1196	if (quota_allocation)
1197		dquot_free_block(ip, quota_allocation);
1198
1199	return (rc);
1200}
1201
1202
1203/*
1204 *	xtSplitRoot()
1205 *
1206 * function:
1207 *	split the full root page into original/root/split page and new
1208 *	right page
1209 *	i.e., root remains fixed in tree anchor (inode) and the root is
1210 *	copied to a single new right child page since root page <<
1211 *	non-root page, and the split root page contains a single entry
1212 *	for the new right child page.
1213 *
1214 * parameter:
1215 *	int		tid,
1216 *	struct inode	*ip,
1217 *	struct xtsplit	*split,
1218 *	struct metapage	**rmpp)
1219 *
1220 * return:
1221 *	Pointer to page in which to insert or NULL on error.
1222 */
1223static int
1224xtSplitRoot(tid_t tid,
1225	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1226{
1227	xtpage_t *sp;
1228	struct metapage *rmp;
1229	xtpage_t *rp;
1230	s64 rbn;
1231	int skip, nextindex;
1232	xad_t *xad;
1233	pxd_t *pxd;
1234	struct pxdlist *pxdlist;
1235	struct tlock *tlck;
1236	struct xtlock *xtlck;
1237	int rc;
1238
1239	sp = &JFS_IP(ip)->i_xtroot;
1240
1241	INCREMENT(xtStat.split);
1242
1243	/*
1244	 *	allocate a single (right) child page
1245	 */
1246	pxdlist = split->pxdlist;
1247	pxd = &pxdlist->pxd[pxdlist->npxd];
1248	pxdlist->npxd++;
1249	rbn = addressPXD(pxd);
1250	rmp = get_metapage(ip, rbn, PSIZE, 1);
1251	if (rmp == NULL)
1252		return -EIO;
1253
1254	/* Allocate blocks to quota. */
1255	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1256	if (rc) {
1257		release_metapage(rmp);
1258		return rc;
1259	}
1260
1261	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1262
1263	/*
1264	 * acquire a transaction lock on the new right page;
1265	 *
1266	 * action: new page;
1267	 */
1268	BT_MARK_DIRTY(rmp, ip);
1269
1270	rp = (xtpage_t *) rmp->data;
1271	rp->header.flag =
1272	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1273	rp->header.self = *pxd;
1274	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1275	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1276
1277	/* initialize sibling pointers */
1278	rp->header.next = 0;
1279	rp->header.prev = 0;
1280
1281	/*
1282	 * copy the in-line root page into new right page extent
1283	 */
1284	nextindex = le16_to_cpu(sp->header.maxentry);
1285	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1286		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1287
1288	/*
1289	 * insert the new entry into the new right/child page
1290	 * (skip index in the new right page will not change)
1291	 */
1292	skip = split->index;
1293	/* if insert into middle, shift right remaining entries */
1294	if (skip != nextindex)
1295		memmove(&rp->xad[skip + 1], &rp->xad[skip],
1296			(nextindex - skip) * sizeof(xad_t));
1297
1298	xad = &rp->xad[skip];
1299	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1300
1301	/* update page header */
1302	rp->header.nextindex = cpu_to_le16(nextindex + 1);
1303
1304	if (!test_cflag(COMMIT_Nolink, ip)) {
1305		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1306		xtlck = (struct xtlock *) & tlck->lock;
1307		xtlck->lwm.offset = XTENTRYSTART;
1308		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1309		    XTENTRYSTART;
1310	}
1311
1312	/*
1313	 *	reset the root
1314	 *
1315	 * init root with the single entry for the new right page
1316	 * set the 1st entry offset to 0, which force the left-most key
1317	 * at any level of the tree to be less than any search key.
1318	 */
1319	/*
1320	 * acquire a transaction lock on the root page (in-memory inode);
1321	 *
1322	 * action: root split;
1323	 */
1324	BT_MARK_DIRTY(split->mp, ip);
1325
1326	xad = &sp->xad[XTENTRYSTART];
1327	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1328
1329	/* update page header of root */
1330	sp->header.flag &= ~BT_LEAF;
1331	sp->header.flag |= BT_INTERNAL;
1332
1333	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1334
1335	if (!test_cflag(COMMIT_Nolink, ip)) {
1336		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1337		xtlck = (struct xtlock *) & tlck->lock;
1338		xtlck->lwm.offset = XTENTRYSTART;
1339		xtlck->lwm.length = 1;
1340	}
1341
1342	*rmpp = rmp;
1343
1344	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1345	return 0;
1346}
1347
1348
1349/*
1350 *	xtExtend()
1351 *
1352 * function: extend in-place;
1353 *
1354 * note: existing extent may or may not have been committed.
1355 * caller is responsible for pager buffer cache update, and
1356 * working block allocation map update;
1357 * update pmap: alloc whole extended extent;
1358 */
1359int xtExtend(tid_t tid,		/* transaction id */
1360	     struct inode *ip, s64 xoff,	/* delta extent offset */
1361	     s32 xlen,		/* delta extent length */
1362	     int flag)
1363{
1364	int rc = 0;
1365	int cmp;
1366	struct metapage *mp;	/* meta-page buffer */
1367	xtpage_t *p;		/* base B+-tree index page */
1368	s64 bn;
1369	int index, nextindex, len;
1370	struct btstack btstack;	/* traverse stack */
1371	struct xtsplit split;	/* split information */
1372	xad_t *xad;
1373	s64 xaddr;
1374	struct tlock *tlck;
1375	struct xtlock *xtlck = NULL;
1376
1377	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1378
1379	/* there must exist extent to be extended */
1380	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1381		return rc;
1382
1383	/* retrieve search result */
1384	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1385
1386	if (cmp != 0) {
1387		XT_PUTPAGE(mp);
1388		jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1389		return -EIO;
1390	}
1391
1392	/* extension must be contiguous */
1393	xad = &p->xad[index];
1394	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1395		XT_PUTPAGE(mp);
1396		jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1397		return -EIO;
1398	}
1399
1400	/*
1401	 * acquire a transaction lock on the leaf page;
1402	 *
1403	 * action: xad insertion/extension;
1404	 */
1405	BT_MARK_DIRTY(mp, ip);
1406	if (!test_cflag(COMMIT_Nolink, ip)) {
1407		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1408		xtlck = (struct xtlock *) & tlck->lock;
1409	}
1410
1411	/* extend will overflow extent ? */
1412	xlen = lengthXAD(xad) + xlen;
1413	if ((len = xlen - MAXXLEN) <= 0)
1414		goto extendOld;
1415
1416	/*
1417	 *	extent overflow: insert entry for new extent
1418	 */
1419//insertNew:
1420	xoff = offsetXAD(xad) + MAXXLEN;
1421	xaddr = addressXAD(xad) + MAXXLEN;
1422	nextindex = le16_to_cpu(p->header.nextindex);
1423
1424	/*
1425	 *	if the leaf page is full, insert the new entry and
1426	 *	propagate up the router entry for the new page from split
1427	 *
1428	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1429	 */
1430	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1431		/* xtSpliUp() unpins leaf pages */
1432		split.mp = mp;
1433		split.index = index + 1;
1434		split.flag = XAD_NEW;
1435		split.off = xoff;	/* split offset */
1436		split.len = len;
1437		split.addr = xaddr;
1438		split.pxdlist = NULL;
1439		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1440			return rc;
1441
1442		/* get back old page */
1443		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1444		if (rc)
1445			return rc;
1446		/*
1447		 * if leaf root has been split, original root has been
1448		 * copied to new child page, i.e., original entry now
1449		 * resides on the new child page;
1450		 */
1451		if (p->header.flag & BT_INTERNAL) {
1452			ASSERT(p->header.nextindex ==
1453			       cpu_to_le16(XTENTRYSTART + 1));
1454			xad = &p->xad[XTENTRYSTART];
1455			bn = addressXAD(xad);
1456			XT_PUTPAGE(mp);
1457
1458			/* get new child page */
1459			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1460			if (rc)
1461				return rc;
1462
1463			BT_MARK_DIRTY(mp, ip);
1464			if (!test_cflag(COMMIT_Nolink, ip)) {
1465				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1466				xtlck = (struct xtlock *) & tlck->lock;
1467			}
1468		}
1469	}
1470	/*
1471	 *	insert the new entry into the leaf page
1472	 */
1473	else {
1474		/* insert the new entry: mark the entry NEW */
1475		xad = &p->xad[index + 1];
1476		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1477
1478		/* advance next available entry index */
1479		le16_add_cpu(&p->header.nextindex, 1);
1480	}
1481
1482	/* get back old entry */
1483	xad = &p->xad[index];
1484	xlen = MAXXLEN;
1485
1486	/*
1487	 * extend old extent
1488	 */
1489      extendOld:
1490	XADlength(xad, xlen);
1491	if (!(xad->flag & XAD_NEW))
1492		xad->flag |= XAD_EXTENDED;
1493
1494	if (!test_cflag(COMMIT_Nolink, ip)) {
1495		xtlck->lwm.offset =
1496		    (xtlck->lwm.offset) ? min(index,
1497					      (int)xtlck->lwm.offset) : index;
1498		xtlck->lwm.length =
1499		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1500	}
1501
1502	/* unpin the leaf page */
1503	XT_PUTPAGE(mp);
1504
1505	return rc;
1506}
1507
1508#ifdef _NOTYET
1509/*
1510 *	xtTailgate()
1511 *
1512 * function: split existing 'tail' extent
1513 *	(split offset >= start offset of tail extent), and
1514 *	relocate and extend the split tail half;
1515 *
1516 * note: existing extent may or may not have been committed.
1517 * caller is responsible for pager buffer cache update, and
1518 * working block allocation map update;
1519 * update pmap: free old split tail extent, alloc new extent;
1520 */
1521int xtTailgate(tid_t tid,		/* transaction id */
1522	       struct inode *ip, s64 xoff,	/* split/new extent offset */
1523	       s32 xlen,	/* new extent length */
1524	       s64 xaddr,	/* new extent address */
1525	       int flag)
1526{
1527	int rc = 0;
1528	int cmp;
1529	struct metapage *mp;	/* meta-page buffer */
1530	xtpage_t *p;		/* base B+-tree index page */
1531	s64 bn;
1532	int index, nextindex, llen, rlen;
1533	struct btstack btstack;	/* traverse stack */
1534	struct xtsplit split;	/* split information */
1535	xad_t *xad;
1536	struct tlock *tlck;
1537	struct xtlock *xtlck = 0;
1538	struct tlock *mtlck;
1539	struct maplock *pxdlock;
1540
1541/*
1542printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1543	(ulong)xoff, xlen, (ulong)xaddr);
1544*/
1545
1546	/* there must exist extent to be tailgated */
1547	if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1548		return rc;
1549
1550	/* retrieve search result */
1551	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1552
1553	if (cmp != 0) {
1554		XT_PUTPAGE(mp);
1555		jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1556		return -EIO;
1557	}
1558
1559	/* entry found must be last entry */
1560	nextindex = le16_to_cpu(p->header.nextindex);
1561	if (index != nextindex - 1) {
1562		XT_PUTPAGE(mp);
1563		jfs_error(ip->i_sb,
1564			  "xtTailgate: the entry found is not the last entry");
1565		return -EIO;
1566	}
1567
1568	BT_MARK_DIRTY(mp, ip);
1569	/*
1570	 * acquire tlock of the leaf page containing original entry
1571	 */
1572	if (!test_cflag(COMMIT_Nolink, ip)) {
1573		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1574		xtlck = (struct xtlock *) & tlck->lock;
1575	}
1576
1577	/* completely replace extent ? */
1578	xad = &p->xad[index];
1579/*
1580printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1581	(ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1582*/
1583	if ((llen = xoff - offsetXAD(xad)) == 0)
1584		goto updateOld;
1585
1586	/*
1587	 *	partially replace extent: insert entry for new extent
1588	 */
1589//insertNew:
1590	/*
1591	 *	if the leaf page is full, insert the new entry and
1592	 *	propagate up the router entry for the new page from split
1593	 *
1594	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1595	 */
1596	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1597		/* xtSpliUp() unpins leaf pages */
1598		split.mp = mp;
1599		split.index = index + 1;
1600		split.flag = XAD_NEW;
1601		split.off = xoff;	/* split offset */
1602		split.len = xlen;
1603		split.addr = xaddr;
1604		split.pxdlist = NULL;
1605		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1606			return rc;
1607
1608		/* get back old page */
1609		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1610		if (rc)
1611			return rc;
1612		/*
1613		 * if leaf root has been split, original root has been
1614		 * copied to new child page, i.e., original entry now
1615		 * resides on the new child page;
1616		 */
1617		if (p->header.flag & BT_INTERNAL) {
1618			ASSERT(p->header.nextindex ==
1619			       cpu_to_le16(XTENTRYSTART + 1));
1620			xad = &p->xad[XTENTRYSTART];
1621			bn = addressXAD(xad);
1622			XT_PUTPAGE(mp);
1623
1624			/* get new child page */
1625			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1626			if (rc)
1627				return rc;
1628
1629			BT_MARK_DIRTY(mp, ip);
1630			if (!test_cflag(COMMIT_Nolink, ip)) {
1631				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1632				xtlck = (struct xtlock *) & tlck->lock;
1633			}
1634		}
1635	}
1636	/*
1637	 *	insert the new entry into the leaf page
1638	 */
1639	else {
1640		/* insert the new entry: mark the entry NEW */
1641		xad = &p->xad[index + 1];
1642		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1643
1644		/* advance next available entry index */
1645		le16_add_cpu(&p->header.nextindex, 1);
1646	}
1647
1648	/* get back old XAD */
1649	xad = &p->xad[index];
1650
1651	/*
1652	 * truncate/relocate old extent at split offset
1653	 */
1654      updateOld:
1655	/* update dmap for old/committed/truncated extent */
1656	rlen = lengthXAD(xad) - llen;
1657	if (!(xad->flag & XAD_NEW)) {
1658		/* free from PWMAP at commit */
1659		if (!test_cflag(COMMIT_Nolink, ip)) {
1660			mtlck = txMaplock(tid, ip, tlckMAP);
1661			pxdlock = (struct maplock *) & mtlck->lock;
1662			pxdlock->flag = mlckFREEPXD;
1663			PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1664			PXDlength(&pxdlock->pxd, rlen);
1665			pxdlock->index = 1;
1666		}
1667	} else
1668		/* free from WMAP */
1669		dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1670
1671	if (llen)
1672		/* truncate */
1673		XADlength(xad, llen);
1674	else
1675		/* replace */
1676		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1677
1678	if (!test_cflag(COMMIT_Nolink, ip)) {
1679		xtlck->lwm.offset = (xtlck->lwm.offset) ?
1680		    min(index, (int)xtlck->lwm.offset) : index;
1681		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1682		    xtlck->lwm.offset;
1683	}
1684
1685	/* unpin the leaf page */
1686	XT_PUTPAGE(mp);
1687
1688	return rc;
1689}
1690#endif /* _NOTYET */
1691
1692/*
1693 *	xtUpdate()
1694 *
1695 * function: update XAD;
1696 *
1697 *	update extent for allocated_but_not_recorded or
1698 *	compressed extent;
1699 *
1700 * parameter:
1701 *	nxad	- new XAD;
1702 *		logical extent of the specified XAD must be completely
1703 *		contained by an existing XAD;
1704 */
1705int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1706{				/* new XAD */
1707	int rc = 0;
1708	int cmp;
1709	struct metapage *mp;	/* meta-page buffer */
1710	xtpage_t *p;		/* base B+-tree index page */
1711	s64 bn;
1712	int index0, index, newindex, nextindex;
1713	struct btstack btstack;	/* traverse stack */
1714	struct xtsplit split;	/* split information */
1715	xad_t *xad, *lxad, *rxad;
1716	int xflag;
1717	s64 nxoff, xoff;
1718	int nxlen, xlen, lxlen, rxlen;
1719	s64 nxaddr, xaddr;
1720	struct tlock *tlck;
1721	struct xtlock *xtlck = NULL;
1722	int newpage = 0;
1723
1724	/* there must exist extent to be tailgated */
1725	nxoff = offsetXAD(nxad);
1726	nxlen = lengthXAD(nxad);
1727	nxaddr = addressXAD(nxad);
1728
1729	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1730		return rc;
1731
1732	/* retrieve search result */
1733	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1734
1735	if (cmp != 0) {
1736		XT_PUTPAGE(mp);
1737		jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
1738		return -EIO;
1739	}
1740
1741	BT_MARK_DIRTY(mp, ip);
1742	/*
1743	 * acquire tlock of the leaf page containing original entry
1744	 */
1745	if (!test_cflag(COMMIT_Nolink, ip)) {
1746		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1747		xtlck = (struct xtlock *) & tlck->lock;
1748	}
1749
1750	xad = &p->xad[index0];
1751	xflag = xad->flag;
1752	xoff = offsetXAD(xad);
1753	xlen = lengthXAD(xad);
1754	xaddr = addressXAD(xad);
1755
1756	/* nXAD must be completely contained within XAD */
1757	if ((xoff > nxoff) ||
1758	    (nxoff + nxlen > xoff + xlen)) {
1759		XT_PUTPAGE(mp);
1760		jfs_error(ip->i_sb,
1761			  "xtUpdate: nXAD in not completely contained within XAD");
1762		return -EIO;
1763	}
1764
1765	index = index0;
1766	newindex = index + 1;
1767	nextindex = le16_to_cpu(p->header.nextindex);
1768
1769#ifdef  _JFS_WIP_NOCOALESCE
1770	if (xoff < nxoff)
1771		goto updateRight;
1772
1773	/*
1774	 * replace XAD with nXAD
1775	 */
1776      replace:			/* (nxoff == xoff) */
1777	if (nxlen == xlen) {
1778		/* replace XAD with nXAD:recorded */
1779		*xad = *nxad;
1780		xad->flag = xflag & ~XAD_NOTRECORDED;
1781
1782		goto out;
1783	} else			/* (nxlen < xlen) */
1784		goto updateLeft;
1785#endif				/* _JFS_WIP_NOCOALESCE */
1786
1787/* #ifdef _JFS_WIP_COALESCE */
1788	if (xoff < nxoff)
1789		goto coalesceRight;
1790
1791	/*
1792	 * coalesce with left XAD
1793	 */
1794//coalesceLeft: /* (xoff == nxoff) */
1795	/* is XAD first entry of page ? */
1796	if (index == XTENTRYSTART)
1797		goto replace;
1798
1799	/* is nXAD logically and physically contiguous with lXAD ? */
1800	lxad = &p->xad[index - 1];
1801	lxlen = lengthXAD(lxad);
1802	if (!(lxad->flag & XAD_NOTRECORDED) &&
1803	    (nxoff == offsetXAD(lxad) + lxlen) &&
1804	    (nxaddr == addressXAD(lxad) + lxlen) &&
1805	    (lxlen + nxlen < MAXXLEN)) {
1806		/* extend right lXAD */
1807		index0 = index - 1;
1808		XADlength(lxad, lxlen + nxlen);
1809
1810		/* If we just merged two extents together, need to make sure the
1811		 * right extent gets logged.  If the left one is marked XAD_NEW,
1812		 * then we know it will be logged.  Otherwise, mark as
1813		 * XAD_EXTENDED
1814		 */
1815		if (!(lxad->flag & XAD_NEW))
1816			lxad->flag |= XAD_EXTENDED;
1817
1818		if (xlen > nxlen) {
1819			/* truncate XAD */
1820			XADoffset(xad, xoff + nxlen);
1821			XADlength(xad, xlen - nxlen);
1822			XADaddress(xad, xaddr + nxlen);
1823			goto out;
1824		} else {	/* (xlen == nxlen) */
1825
1826			/* remove XAD */
1827			if (index < nextindex - 1)
1828				memmove(&p->xad[index], &p->xad[index + 1],
1829					(nextindex - index -
1830					 1) << L2XTSLOTSIZE);
1831
1832			p->header.nextindex =
1833			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1834					1);
1835
1836			index = index0;
1837			newindex = index + 1;
1838			nextindex = le16_to_cpu(p->header.nextindex);
1839			xoff = nxoff = offsetXAD(lxad);
1840			xlen = nxlen = lxlen + nxlen;
1841			xaddr = nxaddr = addressXAD(lxad);
1842			goto coalesceRight;
1843		}
1844	}
1845
1846	/*
1847	 * replace XAD with nXAD
1848	 */
1849      replace:			/* (nxoff == xoff) */
1850	if (nxlen == xlen) {
1851		/* replace XAD with nXAD:recorded */
1852		*xad = *nxad;
1853		xad->flag = xflag & ~XAD_NOTRECORDED;
1854
1855		goto coalesceRight;
1856	} else			/* (nxlen < xlen) */
1857		goto updateLeft;
1858
1859	/*
1860	 * coalesce with right XAD
1861	 */
1862      coalesceRight:		/* (xoff <= nxoff) */
1863	/* is XAD last entry of page ? */
1864	if (newindex == nextindex) {
1865		if (xoff == nxoff)
1866			goto out;
1867		goto updateRight;
1868	}
1869
1870	/* is nXAD logically and physically contiguous with rXAD ? */
1871	rxad = &p->xad[index + 1];
1872	rxlen = lengthXAD(rxad);
1873	if (!(rxad->flag & XAD_NOTRECORDED) &&
1874	    (nxoff + nxlen == offsetXAD(rxad)) &&
1875	    (nxaddr + nxlen == addressXAD(rxad)) &&
1876	    (rxlen + nxlen < MAXXLEN)) {
1877		/* extend left rXAD */
1878		XADoffset(rxad, nxoff);
1879		XADlength(rxad, rxlen + nxlen);
1880		XADaddress(rxad, nxaddr);
1881
1882		/* If we just merged two extents together, need to make sure
1883		 * the left extent gets logged.  If the right one is marked
1884		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1885		 * XAD_EXTENDED
1886		 */
1887		if (!(rxad->flag & XAD_NEW))
1888			rxad->flag |= XAD_EXTENDED;
1889
1890		if (xlen > nxlen)
1891			/* truncate XAD */
1892			XADlength(xad, xlen - nxlen);
1893		else {		/* (xlen == nxlen) */
1894
1895			/* remove XAD */
1896			memmove(&p->xad[index], &p->xad[index + 1],
1897				(nextindex - index - 1) << L2XTSLOTSIZE);
1898
1899			p->header.nextindex =
1900			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1901					1);
1902		}
1903
1904		goto out;
1905	} else if (xoff == nxoff)
1906		goto out;
1907
1908	if (xoff >= nxoff) {
1909		XT_PUTPAGE(mp);
1910		jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
1911		return -EIO;
1912	}
1913/* #endif _JFS_WIP_COALESCE */
1914
1915	/*
1916	 * split XAD into (lXAD, nXAD):
1917	 *
1918	 *          |---nXAD--->
1919	 * --|----------XAD----------|--
1920	 *   |-lXAD-|
1921	 */
1922      updateRight:		/* (xoff < nxoff) */
1923	/* truncate old XAD as lXAD:not_recorded */
1924	xad = &p->xad[index];
1925	XADlength(xad, nxoff - xoff);
1926
1927	/* insert nXAD:recorded */
1928	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1929
1930		/* xtSpliUp() unpins leaf pages */
1931		split.mp = mp;
1932		split.index = newindex;
1933		split.flag = xflag & ~XAD_NOTRECORDED;
1934		split.off = nxoff;
1935		split.len = nxlen;
1936		split.addr = nxaddr;
1937		split.pxdlist = NULL;
1938		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1939			return rc;
1940
1941		/* get back old page */
1942		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1943		if (rc)
1944			return rc;
1945		/*
1946		 * if leaf root has been split, original root has been
1947		 * copied to new child page, i.e., original entry now
1948		 * resides on the new child page;
1949		 */
1950		if (p->header.flag & BT_INTERNAL) {
1951			ASSERT(p->header.nextindex ==
1952			       cpu_to_le16(XTENTRYSTART + 1));
1953			xad = &p->xad[XTENTRYSTART];
1954			bn = addressXAD(xad);
1955			XT_PUTPAGE(mp);
1956
1957			/* get new child page */
1958			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1959			if (rc)
1960				return rc;
1961
1962			BT_MARK_DIRTY(mp, ip);
1963			if (!test_cflag(COMMIT_Nolink, ip)) {
1964				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1965				xtlck = (struct xtlock *) & tlck->lock;
1966			}
1967		} else {
1968			/* is nXAD on new page ? */
1969			if (newindex >
1970			    (le16_to_cpu(p->header.maxentry) >> 1)) {
1971				newindex =
1972				    newindex -
1973				    le16_to_cpu(p->header.nextindex) +
1974				    XTENTRYSTART;
1975				newpage = 1;
1976			}
1977		}
1978	} else {
1979		/* if insert into middle, shift right remaining entries */
1980		if (newindex < nextindex)
1981			memmove(&p->xad[newindex + 1], &p->xad[newindex],
1982				(nextindex - newindex) << L2XTSLOTSIZE);
1983
1984		/* insert the entry */
1985		xad = &p->xad[newindex];
1986		*xad = *nxad;
1987		xad->flag = xflag & ~XAD_NOTRECORDED;
1988
1989		/* advance next available entry index. */
1990		p->header.nextindex =
1991		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1992	}
1993
1994	/*
1995	 * does nXAD force 3-way split ?
1996	 *
1997	 *          |---nXAD--->|
1998	 * --|----------XAD-------------|--
1999	 *   |-lXAD-|           |-rXAD -|
2000	 */
2001	if (nxoff + nxlen == xoff + xlen)
2002		goto out;
2003
2004	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2005	if (newpage) {
2006		/* close out old page */
2007		if (!test_cflag(COMMIT_Nolink, ip)) {
2008			xtlck->lwm.offset = (xtlck->lwm.offset) ?
2009			    min(index0, (int)xtlck->lwm.offset) : index0;
2010			xtlck->lwm.length =
2011			    le16_to_cpu(p->header.nextindex) -
2012			    xtlck->lwm.offset;
2013		}
2014
2015		bn = le64_to_cpu(p->header.next);
2016		XT_PUTPAGE(mp);
2017
2018		/* get new right page */
2019		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2020		if (rc)
2021			return rc;
2022
2023		BT_MARK_DIRTY(mp, ip);
2024		if (!test_cflag(COMMIT_Nolink, ip)) {
2025			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2026			xtlck = (struct xtlock *) & tlck->lock;
2027		}
2028
2029		index0 = index = newindex;
2030	} else
2031

Large files files are truncated, but you can click here to view the full file