PageRenderTime 118ms CodeModel.GetById 3ms app.highlight 94ms RepoModel.GetById 1ms app.codeStats 1ms

/fs/xfs/xfs_da_btree.c

https://bitbucket.org/evzijst/gittest
C | 2648 lines | 1963 code | 145 blank | 540 comment | 519 complexity | 667f661528e7ce76311daf68cab11033 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of version 2 of the GNU General Public License as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it would be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11 *
  12 * Further, this software is distributed without any warranty that it is
  13 * free of the rightful claim of any third person regarding infringement
  14 * or the like.  Any license provided herein, whether implied or
  15 * otherwise, applies only to this software file.  Patent licenses, if
  16 * any, provided herein do not apply to combinations of this program with
  17 * other software, or any other product whatsoever.
  18 *
  19 * You should have received a copy of the GNU General Public License along
  20 * with this program; if not, write the Free Software Foundation, Inc., 59
  21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  22 *
  23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
  24 * Mountain View, CA  94043, or:
  25 *
  26 * http://www.sgi.com
  27 *
  28 * For further information regarding this notice, see:
  29 *
  30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  31 */
  32
  33#include "xfs.h"
  34
  35#include "xfs_macros.h"
  36#include "xfs_types.h"
  37#include "xfs_inum.h"
  38#include "xfs_log.h"
  39#include "xfs_trans.h"
  40#include "xfs_sb.h"
  41#include "xfs_ag.h"
  42#include "xfs_dir.h"
  43#include "xfs_dir2.h"
  44#include "xfs_dmapi.h"
  45#include "xfs_mount.h"
  46#include "xfs_alloc_btree.h"
  47#include "xfs_bmap_btree.h"
  48#include "xfs_ialloc_btree.h"
  49#include "xfs_alloc.h"
  50#include "xfs_btree.h"
  51#include "xfs_attr_sf.h"
  52#include "xfs_dir_sf.h"
  53#include "xfs_dir2_sf.h"
  54#include "xfs_dinode.h"
  55#include "xfs_inode_item.h"
  56#include "xfs_inode.h"
  57#include "xfs_bmap.h"
  58#include "xfs_da_btree.h"
  59#include "xfs_attr.h"
  60#include "xfs_attr_leaf.h"
  61#include "xfs_dir_leaf.h"
  62#include "xfs_dir2_data.h"
  63#include "xfs_dir2_leaf.h"
  64#include "xfs_dir2_block.h"
  65#include "xfs_dir2_node.h"
  66#include "xfs_error.h"
  67#include "xfs_bit.h"
  68
  69/*
  70 * xfs_da_btree.c
  71 *
  72 * Routines to implement directories as Btrees of hashed names.
  73 */
  74
  75/*========================================================================
  76 * Function prototypes for the kernel.
  77 *========================================================================*/
  78
  79/*
  80 * Routines used for growing the Btree.
  81 */
  82STATIC int xfs_da_root_split(xfs_da_state_t *state,
  83					    xfs_da_state_blk_t *existing_root,
  84					    xfs_da_state_blk_t *new_child);
  85STATIC int xfs_da_node_split(xfs_da_state_t *state,
  86					    xfs_da_state_blk_t *existing_blk,
  87					    xfs_da_state_blk_t *split_blk,
  88					    xfs_da_state_blk_t *blk_to_add,
  89					    int treelevel,
  90					    int *result);
  91STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
  92					 xfs_da_state_blk_t *node_blk_1,
  93					 xfs_da_state_blk_t *node_blk_2);
  94STATIC void xfs_da_node_add(xfs_da_state_t *state,
  95				   xfs_da_state_blk_t *old_node_blk,
  96				   xfs_da_state_blk_t *new_node_blk);
  97
  98/*
  99 * Routines used for shrinking the Btree.
 100 */
 101STATIC int xfs_da_root_join(xfs_da_state_t *state,
 102					   xfs_da_state_blk_t *root_blk);
 103STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
 104STATIC void xfs_da_node_remove(xfs_da_state_t *state,
 105					      xfs_da_state_blk_t *drop_blk);
 106STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 107					 xfs_da_state_blk_t *src_node_blk,
 108					 xfs_da_state_blk_t *dst_node_blk);
 109
 110/*
 111 * Utility routines.
 112 */
 113STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 114STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
 115STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
 116
 117
 118/*========================================================================
 119 * Routines used for growing the Btree.
 120 *========================================================================*/
 121
 122/*
 123 * Create the initial contents of an intermediate node.
 124 */
 125int
 126xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 127				 xfs_dabuf_t **bpp, int whichfork)
 128{
 129	xfs_da_intnode_t *node;
 130	xfs_dabuf_t *bp;
 131	int error;
 132	xfs_trans_t *tp;
 133
 134	tp = args->trans;
 135	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
 136	if (error)
 137		return(error);
 138	ASSERT(bp != NULL);
 139	node = bp->data;
 140	node->hdr.info.forw = 0;
 141	node->hdr.info.back = 0;
 142	INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
 143	node->hdr.info.pad = 0;
 144	node->hdr.count = 0;
 145	INT_SET(node->hdr.level, ARCH_CONVERT, level);
 146
 147	xfs_da_log_buf(tp, bp,
 148		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 149
 150	*bpp = bp;
 151	return(0);
 152}
 153
 154/*
 155 * Split a leaf node, rebalance, then possibly split
 156 * intermediate nodes, rebalance, etc.
 157 */
 158int							/* error */
 159xfs_da_split(xfs_da_state_t *state)
 160{
 161	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
 162	xfs_da_intnode_t *node;
 163	xfs_dabuf_t *bp;
 164	int max, action, error, i;
 165
 166	/*
 167	 * Walk back up the tree splitting/inserting/adjusting as necessary.
 168	 * If we need to insert and there isn't room, split the node, then
 169	 * decide which fragment to insert the new block from below into.
 170	 * Note that we may split the root this way, but we need more fixup.
 171	 */
 172	max = state->path.active - 1;
 173	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
 174	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
 175	       state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
 176
 177	addblk = &state->path.blk[max];		/* initial dummy value */
 178	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
 179		oldblk = &state->path.blk[i];
 180		newblk = &state->altpath.blk[i];
 181
 182		/*
 183		 * If a leaf node then
 184		 *     Allocate a new leaf node, then rebalance across them.
 185		 * else if an intermediate node then
 186		 *     We split on the last layer, must we split the node?
 187		 */
 188		switch (oldblk->magic) {
 189		case XFS_ATTR_LEAF_MAGIC:
 190#ifndef __KERNEL__
 191			return(ENOTTY);
 192#else
 193			error = xfs_attr_leaf_split(state, oldblk, newblk);
 194			if ((error != 0) && (error != ENOSPC)) {
 195				return(error);	/* GROT: attr is inconsistent */
 196			}
 197			if (!error) {
 198				addblk = newblk;
 199				break;
 200			}
 201			/*
 202			 * Entry wouldn't fit, split the leaf again.
 203			 */
 204			state->extravalid = 1;
 205			if (state->inleaf) {
 206				state->extraafter = 0;	/* before newblk */
 207				error = xfs_attr_leaf_split(state, oldblk,
 208							    &state->extrablk);
 209			} else {
 210				state->extraafter = 1;	/* after newblk */
 211				error = xfs_attr_leaf_split(state, newblk,
 212							    &state->extrablk);
 213			}
 214			if (error)
 215				return(error);	/* GROT: attr inconsistent */
 216			addblk = newblk;
 217			break;
 218#endif
 219		case XFS_DIR_LEAF_MAGIC:
 220			ASSERT(XFS_DIR_IS_V1(state->mp));
 221			error = xfs_dir_leaf_split(state, oldblk, newblk);
 222			if ((error != 0) && (error != ENOSPC)) {
 223				return(error);	/* GROT: dir is inconsistent */
 224			}
 225			if (!error) {
 226				addblk = newblk;
 227				break;
 228			}
 229			/*
 230			 * Entry wouldn't fit, split the leaf again.
 231			 */
 232			state->extravalid = 1;
 233			if (state->inleaf) {
 234				state->extraafter = 0;	/* before newblk */
 235				error = xfs_dir_leaf_split(state, oldblk,
 236							   &state->extrablk);
 237				if (error)
 238					return(error);	/* GROT: dir incon. */
 239				addblk = newblk;
 240			} else {
 241				state->extraafter = 1;	/* after newblk */
 242				error = xfs_dir_leaf_split(state, newblk,
 243							   &state->extrablk);
 244				if (error)
 245					return(error);	/* GROT: dir incon. */
 246				addblk = newblk;
 247			}
 248			break;
 249		case XFS_DIR2_LEAFN_MAGIC:
 250			ASSERT(XFS_DIR_IS_V2(state->mp));
 251			error = xfs_dir2_leafn_split(state, oldblk, newblk);
 252			if (error)
 253				return error;
 254			addblk = newblk;
 255			break;
 256		case XFS_DA_NODE_MAGIC:
 257			error = xfs_da_node_split(state, oldblk, newblk, addblk,
 258							 max - i, &action);
 259			xfs_da_buf_done(addblk->bp);
 260			addblk->bp = NULL;
 261			if (error)
 262				return(error);	/* GROT: dir is inconsistent */
 263			/*
 264			 * Record the newly split block for the next time thru?
 265			 */
 266			if (action)
 267				addblk = newblk;
 268			else
 269				addblk = NULL;
 270			break;
 271		}
 272
 273		/*
 274		 * Update the btree to show the new hashval for this child.
 275		 */
 276		xfs_da_fixhashpath(state, &state->path);
 277		/*
 278		 * If we won't need this block again, it's getting dropped
 279		 * from the active path by the loop control, so we need
 280		 * to mark it done now.
 281		 */
 282		if (i > 0 || !addblk)
 283			xfs_da_buf_done(oldblk->bp);
 284	}
 285	if (!addblk)
 286		return(0);
 287
 288	/*
 289	 * Split the root node.
 290	 */
 291	ASSERT(state->path.active == 0);
 292	oldblk = &state->path.blk[0];
 293	error = xfs_da_root_split(state, oldblk, addblk);
 294	if (error) {
 295		xfs_da_buf_done(oldblk->bp);
 296		xfs_da_buf_done(addblk->bp);
 297		addblk->bp = NULL;
 298		return(error);	/* GROT: dir is inconsistent */
 299	}
 300
 301	/*
 302	 * Update pointers to the node which used to be block 0 and
 303	 * just got bumped because of the addition of a new root node.
 304	 * There might be three blocks involved if a double split occurred,
 305	 * and the original block 0 could be at any position in the list.
 306	 */
 307
 308	node = oldblk->bp->data;
 309	if (node->hdr.info.forw) {
 310		if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
 311			bp = addblk->bp;
 312		} else {
 313			ASSERT(state->extravalid);
 314			bp = state->extrablk.bp;
 315		}
 316		node = bp->data;
 317		INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
 318		xfs_da_log_buf(state->args->trans, bp,
 319		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 320		    sizeof(node->hdr.info)));
 321	}
 322	node = oldblk->bp->data;
 323	if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
 324		if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
 325			bp = addblk->bp;
 326		} else {
 327			ASSERT(state->extravalid);
 328			bp = state->extrablk.bp;
 329		}
 330		node = bp->data;
 331		INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
 332		xfs_da_log_buf(state->args->trans, bp,
 333		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 334		    sizeof(node->hdr.info)));
 335	}
 336	xfs_da_buf_done(oldblk->bp);
 337	xfs_da_buf_done(addblk->bp);
 338	addblk->bp = NULL;
 339	return(0);
 340}
 341
 342/*
 343 * Split the root.  We have to create a new root and point to the two
 344 * parts (the split old root) that we just created.  Copy block zero to
 345 * the EOF, extending the inode in process.
 346 */
 347STATIC int						/* error */
 348xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 349				 xfs_da_state_blk_t *blk2)
 350{
 351	xfs_da_intnode_t *node, *oldroot;
 352	xfs_da_args_t *args;
 353	xfs_dablk_t blkno;
 354	xfs_dabuf_t *bp;
 355	int error, size;
 356	xfs_inode_t *dp;
 357	xfs_trans_t *tp;
 358	xfs_mount_t *mp;
 359	xfs_dir2_leaf_t *leaf;
 360
 361	/*
 362	 * Copy the existing (incorrect) block from the root node position
 363	 * to a free space somewhere.
 364	 */
 365	args = state->args;
 366	ASSERT(args != NULL);
 367	error = xfs_da_grow_inode(args, &blkno);
 368	if (error)
 369		return(error);
 370	dp = args->dp;
 371	tp = args->trans;
 372	mp = state->mp;
 373	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
 374	if (error)
 375		return(error);
 376	ASSERT(bp != NULL);
 377	node = bp->data;
 378	oldroot = blk1->bp->data;
 379	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
 380		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
 381			     (char *)oldroot);
 382	} else {
 383		ASSERT(XFS_DIR_IS_V2(mp));
 384		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 385		leaf = (xfs_dir2_leaf_t *)oldroot;
 386		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
 387			     (char *)leaf);
 388	}
 389	memcpy(node, oldroot, size);
 390	xfs_da_log_buf(tp, bp, 0, size - 1);
 391	xfs_da_buf_done(blk1->bp);
 392	blk1->bp = bp;
 393	blk1->blkno = blkno;
 394
 395	/*
 396	 * Set up the new root node.
 397	 */
 398	error = xfs_da_node_create(args,
 399		args->whichfork == XFS_DATA_FORK &&
 400		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
 401		INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
 402	if (error)
 403		return(error);
 404	node = bp->data;
 405	INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
 406	INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
 407	INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
 408	INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
 409	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 410
 411#ifdef DEBUG
 412	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
 413		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
 414		       blk1->blkno < mp->m_dirfreeblk);
 415		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
 416		       blk2->blkno < mp->m_dirfreeblk);
 417	}
 418#endif
 419
 420	/* Header is already logged by xfs_da_node_create */
 421	xfs_da_log_buf(tp, bp,
 422		XFS_DA_LOGRANGE(node, node->btree,
 423			sizeof(xfs_da_node_entry_t) * 2));
 424	xfs_da_buf_done(bp);
 425
 426	return(0);
 427}
 428
 429/*
 430 * Split the node, rebalance, then add the new entry.
 431 */
 432STATIC int						/* error */
 433xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 434				 xfs_da_state_blk_t *newblk,
 435				 xfs_da_state_blk_t *addblk,
 436				 int treelevel, int *result)
 437{
 438	xfs_da_intnode_t *node;
 439	xfs_dablk_t blkno;
 440	int newcount, error;
 441	int useextra;
 442
 443	node = oldblk->bp->data;
 444	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 445
 446	/*
 447	 * With V2 the extra block is data or freespace.
 448	 */
 449	useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
 450	newcount = 1 + useextra;
 451	/*
 452	 * Do we have to split the node?
 453	 */
 454	if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
 455		/*
 456		 * Allocate a new node, add to the doubly linked chain of
 457		 * nodes, then move some of our excess entries into it.
 458		 */
 459		error = xfs_da_grow_inode(state->args, &blkno);
 460		if (error)
 461			return(error);	/* GROT: dir is inconsistent */
 462
 463		error = xfs_da_node_create(state->args, blkno, treelevel,
 464					   &newblk->bp, state->args->whichfork);
 465		if (error)
 466			return(error);	/* GROT: dir is inconsistent */
 467		newblk->blkno = blkno;
 468		newblk->magic = XFS_DA_NODE_MAGIC;
 469		xfs_da_node_rebalance(state, oldblk, newblk);
 470		error = xfs_da_blk_link(state, oldblk, newblk);
 471		if (error)
 472			return(error);
 473		*result = 1;
 474	} else {
 475		*result = 0;
 476	}
 477
 478	/*
 479	 * Insert the new entry(s) into the correct block
 480	 * (updating last hashval in the process).
 481	 *
 482	 * xfs_da_node_add() inserts BEFORE the given index,
 483	 * and as a result of using node_lookup_int() we always
 484	 * point to a valid entry (not after one), but a split
 485	 * operation always results in a new block whose hashvals
 486	 * FOLLOW the current block.
 487	 *
 488	 * If we had double-split op below us, then add the extra block too.
 489	 */
 490	node = oldblk->bp->data;
 491	if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
 492		oldblk->index++;
 493		xfs_da_node_add(state, oldblk, addblk);
 494		if (useextra) {
 495			if (state->extraafter)
 496				oldblk->index++;
 497			xfs_da_node_add(state, oldblk, &state->extrablk);
 498			state->extravalid = 0;
 499		}
 500	} else {
 501		newblk->index++;
 502		xfs_da_node_add(state, newblk, addblk);
 503		if (useextra) {
 504			if (state->extraafter)
 505				newblk->index++;
 506			xfs_da_node_add(state, newblk, &state->extrablk);
 507			state->extravalid = 0;
 508		}
 509	}
 510
 511	return(0);
 512}
 513
 514/*
 515 * Balance the btree elements between two intermediate nodes,
 516 * usually one full and one empty.
 517 *
 518 * NOTE: if blk2 is empty, then it will get the upper half of blk1.
 519 */
 520STATIC void
 521xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 522				     xfs_da_state_blk_t *blk2)
 523{
 524	xfs_da_intnode_t *node1, *node2, *tmpnode;
 525	xfs_da_node_entry_t *btree_s, *btree_d;
 526	int count, tmp;
 527	xfs_trans_t *tp;
 528
 529	node1 = blk1->bp->data;
 530	node2 = blk2->bp->data;
 531	/*
 532	 * Figure out how many entries need to move, and in which direction.
 533	 * Swap the nodes around if that makes it simpler.
 534	 */
 535	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
 536	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
 537	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
 538	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
 539		tmpnode = node1;
 540		node1 = node2;
 541		node2 = tmpnode;
 542	}
 543	ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 544	ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 545	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
 546	if (count == 0)
 547		return;
 548	tp = state->args->trans;
 549	/*
 550	 * Two cases: high-to-low and low-to-high.
 551	 */
 552	if (count > 0) {
 553		/*
 554		 * Move elements in node2 up to make a hole.
 555		 */
 556		if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
 557			tmp *= (uint)sizeof(xfs_da_node_entry_t);
 558			btree_s = &node2->btree[0];
 559			btree_d = &node2->btree[count];
 560			memmove(btree_d, btree_s, tmp);
 561		}
 562
 563		/*
 564		 * Move the req'd B-tree elements from high in node1 to
 565		 * low in node2.
 566		 */
 567		INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
 568		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 569		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
 570		btree_d = &node2->btree[0];
 571		memcpy(btree_d, btree_s, tmp);
 572		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
 573
 574	} else {
 575		/*
 576		 * Move the req'd B-tree elements from low in node2 to
 577		 * high in node1.
 578		 */
 579		count = -count;
 580		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 581		btree_s = &node2->btree[0];
 582		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
 583		memcpy(btree_d, btree_s, tmp);
 584		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
 585		xfs_da_log_buf(tp, blk1->bp,
 586			XFS_DA_LOGRANGE(node1, btree_d, tmp));
 587
 588		/*
 589		 * Move elements in node2 down to fill the hole.
 590		 */
 591		tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
 592		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 593		btree_s = &node2->btree[count];
 594		btree_d = &node2->btree[0];
 595		memmove(btree_d, btree_s, tmp);
 596		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
 597	}
 598
 599	/*
 600	 * Log header of node 1 and all current bits of node 2.
 601	 */
 602	xfs_da_log_buf(tp, blk1->bp,
 603		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
 604	xfs_da_log_buf(tp, blk2->bp,
 605		XFS_DA_LOGRANGE(node2, &node2->hdr,
 606			sizeof(node2->hdr) +
 607			sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
 608
 609	/*
 610	 * Record the last hashval from each block for upward propagation.
 611	 * (note: don't use the swapped node pointers)
 612	 */
 613	node1 = blk1->bp->data;
 614	node2 = blk2->bp->data;
 615	blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 616	blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 617
 618	/*
 619	 * Adjust the expected index for insertion.
 620	 */
 621	if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
 622		blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
 623		blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;	/* make it invalid */
 624	}
 625}
 626
 627/*
 628 * Add a new entry to an intermediate node.
 629 */
 630STATIC void
 631xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 632			       xfs_da_state_blk_t *newblk)
 633{
 634	xfs_da_intnode_t *node;
 635	xfs_da_node_entry_t *btree;
 636	int tmp;
 637	xfs_mount_t *mp;
 638
 639	node = oldblk->bp->data;
 640	mp = state->mp;
 641	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 642	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
 643	ASSERT(newblk->blkno != 0);
 644	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 645		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
 646		       newblk->blkno < mp->m_dirfreeblk);
 647
 648	/*
 649	 * We may need to make some room before we insert the new node.
 650	 */
 651	tmp = 0;
 652	btree = &node->btree[ oldblk->index ];
 653	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
 654		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
 655		memmove(btree + 1, btree, tmp);
 656	}
 657	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
 658	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
 659	xfs_da_log_buf(state->args->trans, oldblk->bp,
 660		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
 661	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
 662	xfs_da_log_buf(state->args->trans, oldblk->bp,
 663		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 664
 665	/*
 666	 * Copy the last hash value from the oldblk to propagate upwards.
 667	 */
 668	oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 669}
 670
 671/*========================================================================
 672 * Routines used for shrinking the Btree.
 673 *========================================================================*/
 674
 675/*
 676 * Deallocate an empty leaf node, remove it from its parent,
 677 * possibly deallocating that block, etc...
 678 */
 679int
 680xfs_da_join(xfs_da_state_t *state)
 681{
 682	xfs_da_state_blk_t *drop_blk, *save_blk;
 683	int action, error;
 684
 685	action = 0;
 686	drop_blk = &state->path.blk[ state->path.active-1 ];
 687	save_blk = &state->altpath.blk[ state->path.active-1 ];
 688	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
 689	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
 690	       drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
 691
 692	/*
 693	 * Walk back up the tree joining/deallocating as necessary.
 694	 * When we stop dropping blocks, break out.
 695	 */
 696	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
 697		 state->path.active--) {
 698		/*
 699		 * See if we can combine the block with a neighbor.
 700		 *   (action == 0) => no options, just leave
 701		 *   (action == 1) => coalesce, then unlink
 702		 *   (action == 2) => block empty, unlink it
 703		 */
 704		switch (drop_blk->magic) {
 705		case XFS_ATTR_LEAF_MAGIC:
 706#ifndef __KERNEL__
 707			error = ENOTTY;
 708#else
 709			error = xfs_attr_leaf_toosmall(state, &action);
 710#endif
 711			if (error)
 712				return(error);
 713			if (action == 0)
 714				return(0);
 715#ifdef __KERNEL__
 716			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
 717#endif
 718			break;
 719		case XFS_DIR_LEAF_MAGIC:
 720			ASSERT(XFS_DIR_IS_V1(state->mp));
 721			error = xfs_dir_leaf_toosmall(state, &action);
 722			if (error)
 723				return(error);
 724			if (action == 0)
 725				return(0);
 726			xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
 727			break;
 728		case XFS_DIR2_LEAFN_MAGIC:
 729			ASSERT(XFS_DIR_IS_V2(state->mp));
 730			error = xfs_dir2_leafn_toosmall(state, &action);
 731			if (error)
 732				return error;
 733			if (action == 0)
 734				return 0;
 735			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
 736			break;
 737		case XFS_DA_NODE_MAGIC:
 738			/*
 739			 * Remove the offending node, fixup hashvals,
 740			 * check for a toosmall neighbor.
 741			 */
 742			xfs_da_node_remove(state, drop_blk);
 743			xfs_da_fixhashpath(state, &state->path);
 744			error = xfs_da_node_toosmall(state, &action);
 745			if (error)
 746				return(error);
 747			if (action == 0)
 748				return 0;
 749			xfs_da_node_unbalance(state, drop_blk, save_blk);
 750			break;
 751		}
 752		xfs_da_fixhashpath(state, &state->altpath);
 753		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
 754		xfs_da_state_kill_altpath(state);
 755		if (error)
 756			return(error);
 757		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
 758							 drop_blk->bp);
 759		drop_blk->bp = NULL;
 760		if (error)
 761			return(error);
 762	}
 763	/*
 764	 * We joined all the way to the top.  If it turns out that
 765	 * we only have one entry in the root, make the child block
 766	 * the new root.
 767	 */
 768	xfs_da_node_remove(state, drop_blk);
 769	xfs_da_fixhashpath(state, &state->path);
 770	error = xfs_da_root_join(state, &state->path.blk[0]);
 771	return(error);
 772}
 773
 774/*
 775 * We have only one entry in the root.  Copy the only remaining child of
 776 * the old root to block 0 as the new root node.
 777 */
 778STATIC int
 779xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 780{
 781	xfs_da_intnode_t *oldroot;
 782	/* REFERENCED */
 783	xfs_da_blkinfo_t *blkinfo;
 784	xfs_da_args_t *args;
 785	xfs_dablk_t child;
 786	xfs_dabuf_t *bp;
 787	int error;
 788
 789	args = state->args;
 790	ASSERT(args != NULL);
 791	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
 792	oldroot = root_blk->bp->data;
 793	ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 794	ASSERT(!oldroot->hdr.info.forw);
 795	ASSERT(!oldroot->hdr.info.back);
 796
 797	/*
 798	 * If the root has more than one child, then don't do anything.
 799	 */
 800	if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
 801		return(0);
 802
 803	/*
 804	 * Read in the (only) child block, then copy those bytes into
 805	 * the root block's buffer and free the original child block.
 806	 */
 807	child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
 808	ASSERT(child != 0);
 809	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
 810					     args->whichfork);
 811	if (error)
 812		return(error);
 813	ASSERT(bp != NULL);
 814	blkinfo = bp->data;
 815	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
 816		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 817		       INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
 818	} else {
 819		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 820	}
 821	ASSERT(!blkinfo->forw);
 822	ASSERT(!blkinfo->back);
 823	memcpy(root_blk->bp->data, bp->data, state->blocksize);
 824	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 825	error = xfs_da_shrink_inode(args, child, bp);
 826	return(error);
 827}
 828
 829/*
 830 * Check a node block and its neighbors to see if the block should be
 831 * collapsed into one or the other neighbor.  Always keep the block
 832 * with the smaller block number.
 833 * If the current block is over 50% full, don't try to join it, return 0.
 834 * If the block is empty, fill in the state structure and return 2.
 835 * If it can be collapsed, fill in the state structure and return 1.
 836 * If nothing can be done, return 0.
 837 */
 838STATIC int
 839xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 840{
 841	xfs_da_intnode_t *node;
 842	xfs_da_state_blk_t *blk;
 843	xfs_da_blkinfo_t *info;
 844	int count, forward, error, retval, i;
 845	xfs_dablk_t blkno;
 846	xfs_dabuf_t *bp;
 847
 848	/*
 849	 * Check for the degenerate case of the block being over 50% full.
 850	 * If so, it's not worth even looking to see if we might be able
 851	 * to coalesce with a sibling.
 852	 */
 853	blk = &state->path.blk[ state->path.active-1 ];
 854	info = blk->bp->data;
 855	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 856	node = (xfs_da_intnode_t *)info;
 857	count = INT_GET(node->hdr.count, ARCH_CONVERT);
 858	if (count > (state->node_ents >> 1)) {
 859		*action = 0;	/* blk over 50%, don't try to join */
 860		return(0);	/* blk over 50%, don't try to join */
 861	}
 862
 863	/*
 864	 * Check for the degenerate case of the block being empty.
 865	 * If the block is empty, we'll simply delete it, no need to
 866	 * coalesce it with a sibling block.  We choose (aribtrarily)
 867	 * to merge with the forward block unless it is NULL.
 868	 */
 869	if (count == 0) {
 870		/*
 871		 * Make altpath point to the block we want to keep and
 872		 * path point to the block we want to drop (this one).
 873		 */
 874		forward = info->forw;
 875		memcpy(&state->altpath, &state->path, sizeof(state->path));
 876		error = xfs_da_path_shift(state, &state->altpath, forward,
 877						 0, &retval);
 878		if (error)
 879			return(error);
 880		if (retval) {
 881			*action = 0;
 882		} else {
 883			*action = 2;
 884		}
 885		return(0);
 886	}
 887
 888	/*
 889	 * Examine each sibling block to see if we can coalesce with
 890	 * at least 25% free space to spare.  We need to figure out
 891	 * whether to merge with the forward or the backward block.
 892	 * We prefer coalescing with the lower numbered sibling so as
 893	 * to shrink a directory over time.
 894	 */
 895	/* start with smaller blk num */
 896	forward = (INT_GET(info->forw, ARCH_CONVERT)
 897				< INT_GET(info->back, ARCH_CONVERT));
 898	for (i = 0; i < 2; forward = !forward, i++) {
 899		if (forward)
 900			blkno = INT_GET(info->forw, ARCH_CONVERT);
 901		else
 902			blkno = INT_GET(info->back, ARCH_CONVERT);
 903		if (blkno == 0)
 904			continue;
 905		error = xfs_da_read_buf(state->args->trans, state->args->dp,
 906					blkno, -1, &bp, state->args->whichfork);
 907		if (error)
 908			return(error);
 909		ASSERT(bp != NULL);
 910
 911		node = (xfs_da_intnode_t *)info;
 912		count  = state->node_ents;
 913		count -= state->node_ents >> 2;
 914		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 915		node = bp->data;
 916		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 917		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 918		xfs_da_brelse(state->args->trans, bp);
 919		if (count >= 0)
 920			break;	/* fits with at least 25% to spare */
 921	}
 922	if (i >= 2) {
 923		*action = 0;
 924		return(0);
 925	}
 926
 927	/*
 928	 * Make altpath point to the block we want to keep (the lower
 929	 * numbered block) and path point to the block we want to drop.
 930	 */
 931	memcpy(&state->altpath, &state->path, sizeof(state->path));
 932	if (blkno < blk->blkno) {
 933		error = xfs_da_path_shift(state, &state->altpath, forward,
 934						 0, &retval);
 935		if (error) {
 936			return(error);
 937		}
 938		if (retval) {
 939			*action = 0;
 940			return(0);
 941		}
 942	} else {
 943		error = xfs_da_path_shift(state, &state->path, forward,
 944						 0, &retval);
 945		if (error) {
 946			return(error);
 947		}
 948		if (retval) {
 949			*action = 0;
 950			return(0);
 951		}
 952	}
 953	*action = 1;
 954	return(0);
 955}
 956
 957/*
 958 * Walk back up the tree adjusting hash values as necessary,
 959 * when we stop making changes, return.
 960 */
 961void
 962xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 963{
 964	xfs_da_state_blk_t *blk;
 965	xfs_da_intnode_t *node;
 966	xfs_da_node_entry_t *btree;
 967	xfs_dahash_t lasthash=0;
 968	int level, count;
 969
 970	level = path->active-1;
 971	blk = &path->blk[ level ];
 972	switch (blk->magic) {
 973#ifdef __KERNEL__
 974	case XFS_ATTR_LEAF_MAGIC:
 975		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
 976		if (count == 0)
 977			return;
 978		break;
 979#endif
 980	case XFS_DIR_LEAF_MAGIC:
 981		ASSERT(XFS_DIR_IS_V1(state->mp));
 982		lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
 983		if (count == 0)
 984			return;
 985		break;
 986	case XFS_DIR2_LEAFN_MAGIC:
 987		ASSERT(XFS_DIR_IS_V2(state->mp));
 988		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
 989		if (count == 0)
 990			return;
 991		break;
 992	case XFS_DA_NODE_MAGIC:
 993		lasthash = xfs_da_node_lasthash(blk->bp, &count);
 994		if (count == 0)
 995			return;
 996		break;
 997	}
 998	for (blk--, level--; level >= 0; blk--, level--) {
 999		node = blk->bp->data;
1000		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1001		btree = &node->btree[ blk->index ];
1002		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
1003			break;
1004		blk->hashval = lasthash;
1005		INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
1006		xfs_da_log_buf(state->args->trans, blk->bp,
1007				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1008
1009		lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1010	}
1011}
1012
1013/*
1014 * Remove an entry from an intermediate node.
1015 */
1016STATIC void
1017xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
1018{
1019	xfs_da_intnode_t *node;
1020	xfs_da_node_entry_t *btree;
1021	int tmp;
1022
1023	node = drop_blk->bp->data;
1024	ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
1025	ASSERT(drop_blk->index >= 0);
1026
1027	/*
1028	 * Copy over the offending entry, or just zero it out.
1029	 */
1030	btree = &node->btree[drop_blk->index];
1031	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1032		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
1033		tmp *= (uint)sizeof(xfs_da_node_entry_t);
1034		memmove(btree, btree + 1, tmp);
1035		xfs_da_log_buf(state->args->trans, drop_blk->bp,
1036		    XFS_DA_LOGRANGE(node, btree, tmp));
1037		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
1038	}
1039	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1040	xfs_da_log_buf(state->args->trans, drop_blk->bp,
1041	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1042	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
1043	xfs_da_log_buf(state->args->trans, drop_blk->bp,
1044	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1045
1046	/*
1047	 * Copy the last hash value from the block to propagate upwards.
1048	 */
1049	btree--;
1050	drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
1051}
1052
1053/*
1054 * Unbalance the btree elements between two intermediate nodes,
1055 * move all Btree elements from one node into another.
1056 */
1057STATIC void
1058xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1059				     xfs_da_state_blk_t *save_blk)
1060{
1061	xfs_da_intnode_t *drop_node, *save_node;
1062	xfs_da_node_entry_t *btree;
1063	int tmp;
1064	xfs_trans_t *tp;
1065
1066	drop_node = drop_blk->bp->data;
1067	save_node = save_blk->bp->data;
1068	ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1069	ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1070	tp = state->args->trans;
1071
1072	/*
1073	 * If the dying block has lower hashvals, then move all the
1074	 * elements in the remaining block up to make a hole.
1075	 */
1076	if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1077	    (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1078	     INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
1079	{
1080		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
1081		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1082		memmove(btree, &save_node->btree[0], tmp);
1083		btree = &save_node->btree[0];
1084		xfs_da_log_buf(tp, save_blk->bp,
1085			XFS_DA_LOGRANGE(save_node, btree,
1086				(INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
1087				sizeof(xfs_da_node_entry_t)));
1088	} else {
1089		btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
1090		xfs_da_log_buf(tp, save_blk->bp,
1091			XFS_DA_LOGRANGE(save_node, btree,
1092				INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
1093				sizeof(xfs_da_node_entry_t)));
1094	}
1095
1096	/*
1097	 * Move all the B-tree elements from drop_blk to save_blk.
1098	 */
1099	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1100	memcpy(btree, &drop_node->btree[0], tmp);
1101	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
1102
1103	xfs_da_log_buf(tp, save_blk->bp,
1104		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1105			sizeof(save_node->hdr)));
1106
1107	/*
1108	 * Save the last hashval in the remaining block for upward propagation.
1109	 */
1110	save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1111}
1112
1113/*========================================================================
1114 * Routines used for finding things in the Btree.
1115 *========================================================================*/
1116
1117/*
1118 * Walk down the Btree looking for a particular filename, filling
1119 * in the state structure as we go.
1120 *
1121 * We will set the state structure to point to each of the elements
1122 * in each of the nodes where either the hashval is or should be.
1123 *
1124 * We support duplicate hashval's so for each entry in the current
1125 * node that could contain the desired hashval, descend.  This is a
1126 * pruned depth-first tree search.
1127 */
1128int							/* error */
1129xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1130{
1131	xfs_da_state_blk_t *blk;
1132	xfs_da_blkinfo_t *curr;
1133	xfs_da_intnode_t *node;
1134	xfs_da_node_entry_t *btree;
1135	xfs_dablk_t blkno;
1136	int probe, span, max, error, retval;
1137	xfs_dahash_t hashval;
1138	xfs_da_args_t *args;
1139
1140	args = state->args;
1141
1142	/*
1143	 * Descend thru the B-tree searching each level for the right
1144	 * node to use, until the right hashval is found.
1145	 */
1146	if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1147		blkno = state->mp->m_dirleafblk;
1148	else
1149		blkno = 0;
1150	for (blk = &state->path.blk[0], state->path.active = 1;
1151			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1152			 blk++, state->path.active++) {
1153		/*
1154		 * Read the next node down in the tree.
1155		 */
1156		blk->blkno = blkno;
1157		error = xfs_da_read_buf(args->trans, args->dp, blkno,
1158					-1, &blk->bp, args->whichfork);
1159		if (error) {
1160			blk->blkno = 0;
1161			state->path.active--;
1162			return(error);
1163		}
1164		curr = blk->bp->data;
1165		ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1166		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1167		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1168
1169		/*
1170		 * Search an intermediate node for a match.
1171		 */
1172		blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
1173		if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1174			node = blk->bp->data;
1175			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1176
1177			/*
1178			 * Binary search.  (note: small blocks will skip loop)
1179			 */
1180			max = INT_GET(node->hdr.count, ARCH_CONVERT);
1181			probe = span = max / 2;
1182			hashval = args->hashval;
1183			for (btree = &node->btree[probe]; span > 4;
1184				   btree = &node->btree[probe]) {
1185				span /= 2;
1186				if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
1187					probe += span;
1188				else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
1189					probe -= span;
1190				else
1191					break;
1192			}
1193			ASSERT((probe >= 0) && (probe < max));
1194			ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
1195
1196			/*
1197			 * Since we may have duplicate hashval's, find the first
1198			 * matching hashval in the node.
1199			 */
1200			while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
1201				btree--;
1202				probe--;
1203			}
1204			while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
1205				btree++;
1206				probe++;
1207			}
1208
1209			/*
1210			 * Pick the right block to descend on.
1211			 */
1212			if (probe == max) {
1213				blk->index = max-1;
1214				blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
1215			} else {
1216				blk->index = probe;
1217				blkno = INT_GET(btree->before, ARCH_CONVERT);
1218			}
1219		}
1220#ifdef __KERNEL__
1221		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
1222			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1223			break;
1224		}
1225#endif
1226		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1227			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1228			break;
1229		}
1230		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1231			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1232			break;
1233		}
1234	}
1235
1236	/*
1237	 * A leaf block that ends in the hashval that we are interested in
1238	 * (final hashval == search hashval) means that the next block may
1239	 * contain more entries with the same hashval, shift upward to the
1240	 * next leaf and keep searching.
1241	 */
1242	for (;;) {
1243		if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1244			ASSERT(XFS_DIR_IS_V1(state->mp));
1245			retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1246								  &blk->index);
1247		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1248			ASSERT(XFS_DIR_IS_V2(state->mp));
1249			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1250							&blk->index, state);
1251		}
1252#ifdef __KERNEL__
1253		else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1254			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1255			blk->index = args->index;
1256			args->blkno = blk->blkno;
1257		}
1258#endif
1259		if (((retval == ENOENT) || (retval == ENOATTR)) &&
1260		    (blk->hashval == args->hashval)) {
1261			error = xfs_da_path_shift(state, &state->path, 1, 1,
1262							 &retval);
1263			if (error)
1264				return(error);
1265			if (retval == 0) {
1266				continue;
1267			}
1268#ifdef __KERNEL__
1269			else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1270				/* path_shift() gives ENOENT */
1271				retval = XFS_ERROR(ENOATTR);
1272			}
1273#endif
1274		}
1275		break;
1276	}
1277	*result = retval;
1278	return(0);
1279}
1280
1281/*========================================================================
1282 * Utility routines.
1283 *========================================================================*/
1284
1285/*
1286 * Link a new block into a doubly linked list of blocks (of whatever type).
1287 */
1288int							/* error */
1289xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1290			       xfs_da_state_blk_t *new_blk)
1291{
1292	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1293	xfs_da_args_t *args;
1294	int before=0, error;
1295	xfs_dabuf_t *bp;
1296
1297	/*
1298	 * Set up environment.
1299	 */
1300	args = state->args;
1301	ASSERT(args != NULL);
1302	old_info = old_blk->bp->data;
1303	new_info = new_blk->bp->data;
1304	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1305	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1306	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1307	ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
1308	ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
1309	ASSERT(old_blk->magic == new_blk->magic);
1310
1311	switch (old_blk->magic) {
1312#ifdef __KERNEL__
1313	case XFS_ATTR_LEAF_MAGIC:
1314		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1315		break;
1316#endif
1317	case XFS_DIR_LEAF_MAGIC:
1318		ASSERT(XFS_DIR_IS_V1(state->mp));
1319		before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1320		break;
1321	case XFS_DIR2_LEAFN_MAGIC:
1322		ASSERT(XFS_DIR_IS_V2(state->mp));
1323		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1324		break;
1325	case XFS_DA_NODE_MAGIC:
1326		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1327		break;
1328	}
1329
1330	/*
1331	 * Link blocks in appropriate order.
1332	 */
1333	if (before) {
1334		/*
1335		 * Link new block in before existing block.
1336		 */
1337		INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
1338		new_info->back = old_info->back; /* INT_: direct copy */
1339		if (INT_GET(old_info->back, ARCH_CONVERT)) {
1340			error = xfs_da_read_buf(args->trans, args->dp,
1341						INT_GET(old_info->back,
1342							ARCH_CONVERT), -1, &bp,
1343						args->whichfork);
1344			if (error)
1345				return(error);
1346			ASSERT(bp != NULL);
1347			tmp_info = bp->data;
1348			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
1349			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
1350			INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
1351			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1352			xfs_da_buf_done(bp);
1353		}
1354		INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
1355	} else {
1356		/*
1357		 * Link new block in after existing block.
1358		 */
1359		new_info->forw = old_info->forw; /* INT_: direct copy */
1360		INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
1361		if (INT_GET(old_info->forw, ARCH_CONVERT)) {
1362			error = xfs_da_read_buf(args->trans, args->dp,
1363						INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
1364						args->whichfork);
1365			if (error)
1366				return(error);
1367			ASSERT(bp != NULL);
1368			tmp_info = bp->data;
1369			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1370				    == INT_GET(old_info->magic, ARCH_CONVERT));
1371			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1372				    == old_blk->blkno);
1373			INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
1374			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1375			xfs_da_buf_done(bp);
1376		}
1377		INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
1378	}
1379
1380	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1381	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1382	return(0);
1383}
1384
1385/*
1386 * Compare two intermediate nodes for "order".
1387 */
1388STATIC int
1389xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1390{
1391	xfs_da_intnode_t *node1, *node2;
1392
1393	node1 = node1_bp->data;
1394	node2 = node2_bp->data;
1395	ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
1396	       (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
1397	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
1398	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
1399	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1400	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1401	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1402		return(1);
1403	}
1404	return(0);
1405}
1406
1407/*
1408 * Pick up the last hashvalue from an intermediate node.
1409 */
1410STATIC uint
1411xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1412{
1413	xfs_da_intnode_t *node;
1414
1415	node = bp->data;
1416	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1417	if (count)
1418		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
1419	if (!node->hdr.count)
1420		return(0);
1421	return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1422}
1423
1424/*
1425 * Unlink a block from a doubly linked list of blocks.
1426 */
1427int							/* error */
1428xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1429				 xfs_da_state_blk_t *save_blk)
1430{
1431	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1432	xfs_da_args_t *args;
1433	xfs_dabuf_t *bp;
1434	int error;
1435
1436	/*
1437	 * Set up environment.
1438	 */
1439	args = state->args;
1440	ASSERT(args != NULL);
1441	save_info = save_blk->bp->data;
1442	drop_info = drop_blk->bp->data;
1443	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1444	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1445	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1446	ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
1447	ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
1448	ASSERT(save_blk->magic == drop_blk->magic);
1449	ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
1450	       (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
1451	ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
1452	       (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
1453
1454	/*
1455	 * Unlink the leaf block from the doubly linked chain of leaves.
1456	 */
1457	if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
1458		save_info->back = drop_info->back; /* INT_: direct copy */
1459		if (INT_GET(drop_info->back, ARCH_CONVERT)) {
1460			error = xfs_da_read_buf(args->trans, args->dp,
1461						INT_GET(drop_info->back,
1462							ARCH_CONVERT), -1, &bp,
1463						args->whichfork);
1464			if (error)
1465				return(error);
1466			ASSERT(bp != NULL);
1467			tmp_info = bp->data;
1468			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
1469			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
1470			INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
1471			xfs_da_log_buf(args->trans, bp, 0,
1472						    sizeof(*tmp_info) - 1);
1473			xfs_da_buf_done(bp);
1474		}
1475	} else {
1476		save_info->forw = drop_info->forw; /* INT_: direct copy */
1477		if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
1478			error = xfs_da_read_buf(args->trans, args->dp,
1479						INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
1480						args->whichfork);
1481			if (error)
1482				return(error);
1483			ASSERT(bp != NULL);
1484			tmp_info = bp->data;
1485			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1486				    == INT_GET(save_info->magic, ARCH_CONVERT));
1487			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1488				    == drop_blk->blkno);
1489			INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
1490			xfs_da_log_buf(args->trans, bp, 0,
1491						    sizeof(*tmp_info) - 1);
1492			xfs_da_buf_done(bp);
1493		}
1494	}
1495
1496	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1497	return(0);
1498}
1499
1500/*
1501 * Move a path "forward" or "!forward" one block at the current level.
1502 *
1503 * This routine will adjust a "path" to point to the next block
1504 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1505 * Btree, including updating pointers to the intermediate nodes between
1506 * the new bottom and the root.
1507 */
1508int							/* error */
1509xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1510				 int forward, int release, int *result)
1511{
1512	xfs_da_state_blk_t *blk;
1513	xfs_da_blkinfo_t *info;
1514	xfs_da_intnode_t *node;
1515	xfs_da_args_t *args;
1516	xfs_dablk_t blkno=0;
1517	int level, error;
1518
1519	/*
1520	 * Roll up the Btree looking for the first block where our
1521	 * current index is not at the edge of the block.  Note that
1522	 * we skip the bottom layer because we want the sibling block.
1523	 */
1524	args = state->args;
1525	ASSERT(args != NULL);
1526	ASSERT(path != NULL);
1527	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1528	level = (path->active-1) - 1;	/* skip bottom layer in path */
1529	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1530		ASSERT(blk->bp != NULL);
1531		node = blk->bp->data;
1532		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1533		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1534			blk->index++;
1535			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1536			break;
1537		} else if (!forward && (blk->index > 0)) {
1538			blk->index--;
1539			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1540			break;
1541		}
1542	}
1543	if (level < 0) {
1544		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
1545		ASSERT(args->oknoent);
1546		return(0);
1547	}
1548
1549	/*
1550	 * Roll down the edge of the subtree until we reach the
1551	 * same depth we were at originally.
1552	 */
1553	for (blk++, level++; level < path->active; blk++, level++) {
1554		/*
1555		 * Release the old block.
1556		 * (if it's dirty, trans won't actually let go)
1557		 */
1558		if (release)
1559			xfs_da_brelse(args->trans, blk->bp);
1560
1561		/*
1562		 * Read the next child block.
1563		 */
1564		blk->blkno = blkno;
1565		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1566						     &blk->bp, args->whichfork);
1567		if (error)
1568			return(error);
1569		ASSERT(blk->bp != NULL);
1570		info = blk->bp->data;
1571		ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1572		       INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1573		       INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1574		blk->magic = INT_GET(info->magic, ARCH_CONVERT);
1575		if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1576			node = (xfs_da_intnode_t *)info;
1577			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1578			if (forward)
1579				blk->index = 0;
1580			else
1581				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
1582			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1583		} else {
1584			ASSERT(level == path->active-1);
1585			blk->index = 0;
1586			switch(blk->magic) {
1587#ifdef __KERNEL__
1588			case XFS_ATTR_LEAF_MAGIC:
1589				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1590								      NULL);
1591				break;
1592#endif
1593			case XFS_DIR_LEAF_MAGIC:
1594				ASSERT(XFS_DIR_IS_V1(state->mp));
1595				blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1596								     NULL);
1597				break;
1598			case XFS_DIR2_LEAFN_MAGIC:
1599				ASSERT(XFS_DIR_IS_V2(state->mp));
1600				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1601								       NULL);
1602				break;
1603			default:
1604				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1605				       blk->magic ==
1606				       XFS_DIRX_LEAF_MAGIC(state->mp));
1607				break;
1608			}
1609		}
1610	}
1611	*result = 0;
1612	return(0);
1613}
1614
1615
1616/*========================================================================
1617 * Utility routines.
1618 *========================================================================*/
1619
1620/*
1621 * Implement a simple hash on a character string.
1622 * Rotate the hash value by 7 bits, then XOR each character in.
1623 * This is implemented with some source-level loop unrolling.
1624 */
1625xfs_dahash_t
1626xfs_da_hashname(uchar_t *name, int namelen)
1627{
1628	xfs_dahash_t hash;
1629
1630#ifdef SLOWVERSION
1631	/*
1632	 * This is the old one-byte-at-a-time version.
1633	 */
1634	for (hash = 0; namelen > 0; namelen--)
1635		hash = *name++ ^ rol32(hash, 7);
1636
1637	return(hash);
1638#else
1639	/*
1640	 * Do four characters at a time as long as we can.
1641	 */
1642	for (hash = 0; namelen >= 4; namelen -= 4, 

Large files files are truncated, but you can click here to view the full file