PageRenderTime 102ms CodeModel.GetById 2ms app.highlight 86ms RepoModel.GetById 1ms app.codeStats 0ms

/fs/xfs/xfs_vnodeops.c

https://bitbucket.org/DutchDanny/bindroid-xtc-onex
C | 2394 lines | 1541 code | 287 blank | 566 comment | 333 complexity | e157a16cdbc83f260967e2297329a154 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18
  19#include "xfs.h"
  20#include "xfs_fs.h"
  21#include "xfs_types.h"
  22#include "xfs_bit.h"
  23#include "xfs_log.h"
  24#include "xfs_inum.h"
  25#include "xfs_trans.h"
  26#include "xfs_sb.h"
  27#include "xfs_ag.h"
  28#include "xfs_dir2.h"
  29#include "xfs_mount.h"
  30#include "xfs_da_btree.h"
  31#include "xfs_bmap_btree.h"
  32#include "xfs_ialloc_btree.h"
  33#include "xfs_dinode.h"
  34#include "xfs_inode.h"
  35#include "xfs_inode_item.h"
  36#include "xfs_itable.h"
  37#include "xfs_ialloc.h"
  38#include "xfs_alloc.h"
  39#include "xfs_bmap.h"
  40#include "xfs_acl.h"
  41#include "xfs_attr.h"
  42#include "xfs_rw.h"
  43#include "xfs_error.h"
  44#include "xfs_quota.h"
  45#include "xfs_utils.h"
  46#include "xfs_rtalloc.h"
  47#include "xfs_trans_space.h"
  48#include "xfs_log_priv.h"
  49#include "xfs_filestream.h"
  50#include "xfs_vnodeops.h"
  51#include "xfs_trace.h"
  52
  53/*
  54 * The maximum pathlen is 1024 bytes. Since the minimum file system
  55 * blocksize is 512 bytes, we can get a max of 2 extents back from
  56 * bmapi.
  57 */
  58#define SYMLINK_MAPS 2
  59
  60STATIC int
  61xfs_readlink_bmap(
  62	xfs_inode_t	*ip,
  63	char		*link)
  64{
  65	xfs_mount_t	*mp = ip->i_mount;
  66	int		pathlen = ip->i_d.di_size;
  67	int             nmaps = SYMLINK_MAPS;
  68	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
  69	xfs_daddr_t	d;
  70	int		byte_cnt;
  71	int		n;
  72	xfs_buf_t	*bp;
  73	int		error = 0;
  74
  75	error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
  76			mval, &nmaps, NULL);
  77	if (error)
  78		goto out;
  79
  80	for (n = 0; n < nmaps; n++) {
  81		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
  82		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
  83
  84		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
  85				  XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
  86		if (!bp)
  87			return XFS_ERROR(ENOMEM);
  88		error = bp->b_error;
  89		if (error) {
  90			xfs_ioerror_alert("xfs_readlink",
  91				  ip->i_mount, bp, XFS_BUF_ADDR(bp));
  92			xfs_buf_relse(bp);
  93			goto out;
  94		}
  95		if (pathlen < byte_cnt)
  96			byte_cnt = pathlen;
  97		pathlen -= byte_cnt;
  98
  99		memcpy(link, bp->b_addr, byte_cnt);
 100		xfs_buf_relse(bp);
 101	}
 102
 103	link[ip->i_d.di_size] = '\0';
 104	error = 0;
 105
 106 out:
 107	return error;
 108}
 109
 110int
 111xfs_readlink(
 112	xfs_inode_t     *ip,
 113	char		*link)
 114{
 115	xfs_mount_t	*mp = ip->i_mount;
 116	xfs_fsize_t	pathlen;
 117	int		error = 0;
 118
 119	trace_xfs_readlink(ip);
 120
 121	if (XFS_FORCED_SHUTDOWN(mp))
 122		return XFS_ERROR(EIO);
 123
 124	xfs_ilock(ip, XFS_ILOCK_SHARED);
 125
 126	pathlen = ip->i_d.di_size;
 127	if (!pathlen)
 128		goto out;
 129
 130	if (pathlen < 0 || pathlen > MAXPATHLEN) {
 131		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
 132			 __func__, (unsigned long long) ip->i_ino,
 133			 (long long) pathlen);
 134		ASSERT(0);
 135		return XFS_ERROR(EFSCORRUPTED);
 136	}
 137
 138
 139	if (ip->i_df.if_flags & XFS_IFINLINE) {
 140		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
 141		link[pathlen] = '\0';
 142	} else {
 143		error = xfs_readlink_bmap(ip, link);
 144	}
 145
 146 out:
 147	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 148	return error;
 149}
 150
 151/*
 152 * Flags for xfs_free_eofblocks
 153 */
 154#define XFS_FREE_EOF_TRYLOCK	(1<<0)
 155
 156/*
 157 * This is called by xfs_inactive to free any blocks beyond eof
 158 * when the link count isn't zero and by xfs_dm_punch_hole() when
 159 * punching a hole to EOF.
 160 */
 161STATIC int
 162xfs_free_eofblocks(
 163	xfs_mount_t	*mp,
 164	xfs_inode_t	*ip,
 165	int		flags)
 166{
 167	xfs_trans_t	*tp;
 168	int		error;
 169	xfs_fileoff_t	end_fsb;
 170	xfs_fileoff_t	last_fsb;
 171	xfs_filblks_t	map_len;
 172	int		nimaps;
 173	xfs_bmbt_irec_t	imap;
 174
 175	/*
 176	 * Figure out if there are any blocks beyond the end
 177	 * of the file.  If not, then there is nothing to do.
 178	 */
 179	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
 180	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
 181	if (last_fsb <= end_fsb)
 182		return 0;
 183	map_len = last_fsb - end_fsb;
 184
 185	nimaps = 1;
 186	xfs_ilock(ip, XFS_ILOCK_SHARED);
 187	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
 188			  NULL, 0, &imap, &nimaps, NULL);
 189	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 190
 191	if (!error && (nimaps != 0) &&
 192	    (imap.br_startblock != HOLESTARTBLOCK ||
 193	     ip->i_delayed_blks)) {
 194		/*
 195		 * Attach the dquots to the inode up front.
 196		 */
 197		error = xfs_qm_dqattach(ip, 0);
 198		if (error)
 199			return error;
 200
 201		/*
 202		 * There are blocks after the end of file.
 203		 * Free them up now by truncating the file to
 204		 * its current size.
 205		 */
 206		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 207
 208		if (flags & XFS_FREE_EOF_TRYLOCK) {
 209			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
 210				xfs_trans_cancel(tp, 0);
 211				return 0;
 212			}
 213		} else {
 214			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 215		}
 216
 217		error = xfs_trans_reserve(tp, 0,
 218					  XFS_ITRUNCATE_LOG_RES(mp),
 219					  0, XFS_TRANS_PERM_LOG_RES,
 220					  XFS_ITRUNCATE_LOG_COUNT);
 221		if (error) {
 222			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 223			xfs_trans_cancel(tp, 0);
 224			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 225			return error;
 226		}
 227
 228		xfs_ilock(ip, XFS_ILOCK_EXCL);
 229		xfs_trans_ijoin(tp, ip);
 230
 231		error = xfs_itruncate_data(&tp, ip, ip->i_size);
 232		if (error) {
 233			/*
 234			 * If we get an error at this point we simply don't
 235			 * bother truncating the file.
 236			 */
 237			xfs_trans_cancel(tp,
 238					 (XFS_TRANS_RELEASE_LOG_RES |
 239					  XFS_TRANS_ABORT));
 240		} else {
 241			error = xfs_trans_commit(tp,
 242						XFS_TRANS_RELEASE_LOG_RES);
 243		}
 244		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
 245	}
 246	return error;
 247}
 248
 249/*
 250 * Free a symlink that has blocks associated with it.
 251 */
 252STATIC int
 253xfs_inactive_symlink_rmt(
 254	xfs_inode_t	*ip,
 255	xfs_trans_t	**tpp)
 256{
 257	xfs_buf_t	*bp;
 258	int		committed;
 259	int		done;
 260	int		error;
 261	xfs_fsblock_t	first_block;
 262	xfs_bmap_free_t	free_list;
 263	int		i;
 264	xfs_mount_t	*mp;
 265	xfs_bmbt_irec_t	mval[SYMLINK_MAPS];
 266	int		nmaps;
 267	xfs_trans_t	*ntp;
 268	int		size;
 269	xfs_trans_t	*tp;
 270
 271	tp = *tpp;
 272	mp = ip->i_mount;
 273	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
 274	/*
 275	 * We're freeing a symlink that has some
 276	 * blocks allocated to it.  Free the
 277	 * blocks here.  We know that we've got
 278	 * either 1 or 2 extents and that we can
 279	 * free them all in one bunmapi call.
 280	 */
 281	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
 282	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
 283			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
 284		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 285		xfs_trans_cancel(tp, 0);
 286		*tpp = NULL;
 287		return error;
 288	}
 289	/*
 290	 * Lock the inode, fix the size, and join it to the transaction.
 291	 * Hold it so in the normal path, we still have it locked for
 292	 * the second transaction.  In the error paths we need it
 293	 * held so the cancel won't rele it, see below.
 294	 */
 295	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 296	size = (int)ip->i_d.di_size;
 297	ip->i_d.di_size = 0;
 298	xfs_trans_ijoin(tp, ip);
 299	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 300	/*
 301	 * Find the block(s) so we can inval and unmap them.
 302	 */
 303	done = 0;
 304	xfs_bmap_init(&free_list, &first_block);
 305	nmaps = ARRAY_SIZE(mval);
 306	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
 307			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
 308			&free_list)))
 309		goto error0;
 310	/*
 311	 * Invalidate the block(s).
 312	 */
 313	for (i = 0; i < nmaps; i++) {
 314		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 315			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
 316			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
 317		xfs_trans_binval(tp, bp);
 318	}
 319	/*
 320	 * Unmap the dead block(s) to the free_list.
 321	 */
 322	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
 323			&first_block, &free_list, &done)))
 324		goto error1;
 325	ASSERT(done);
 326	/*
 327	 * Commit the first transaction.  This logs the EFI and the inode.
 328	 */
 329	if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
 330		goto error1;
 331	/*
 332	 * The transaction must have been committed, since there were
 333	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
 334	 * The new tp has the extent freeing and EFDs.
 335	 */
 336	ASSERT(committed);
 337	/*
 338	 * The first xact was committed, so add the inode to the new one.
 339	 * Mark it dirty so it will be logged and moved forward in the log as
 340	 * part of every commit.
 341	 */
 342	xfs_trans_ijoin(tp, ip);
 343	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 344	/*
 345	 * Get a new, empty transaction to return to our caller.
 346	 */
 347	ntp = xfs_trans_dup(tp);
 348	/*
 349	 * Commit the transaction containing extent freeing and EFDs.
 350	 * If we get an error on the commit here or on the reserve below,
 351	 * we need to unlock the inode since the new transaction doesn't
 352	 * have the inode attached.
 353	 */
 354	error = xfs_trans_commit(tp, 0);
 355	tp = ntp;
 356	if (error) {
 357		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 358		goto error0;
 359	}
 360	/*
 361	 * transaction commit worked ok so we can drop the extra ticket
 362	 * reference that we gained in xfs_trans_dup()
 363	 */
 364	xfs_log_ticket_put(tp->t_ticket);
 365
 366	/*
 367	 * Remove the memory for extent descriptions (just bookkeeping).
 368	 */
 369	if (ip->i_df.if_bytes)
 370		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
 371	ASSERT(ip->i_df.if_bytes == 0);
 372	/*
 373	 * Put an itruncate log reservation in the new transaction
 374	 * for our caller.
 375	 */
 376	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
 377			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
 378		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 379		goto error0;
 380	}
 381	/*
 382	 * Return with the inode locked but not joined to the transaction.
 383	 */
 384	*tpp = tp;
 385	return 0;
 386
 387 error1:
 388	xfs_bmap_cancel(&free_list);
 389 error0:
 390	/*
 391	 * Have to come here with the inode locked and either
 392	 * (held and in the transaction) or (not in the transaction).
 393	 * If the inode isn't held then cancel would iput it, but
 394	 * that's wrong since this is inactive and the vnode ref
 395	 * count is 0 already.
 396	 * Cancel won't do anything to the inode if held, but it still
 397	 * needs to be locked until the cancel is done, if it was
 398	 * joined to the transaction.
 399	 */
 400	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 401	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 402	*tpp = NULL;
 403	return error;
 404
 405}
 406
 407STATIC int
 408xfs_inactive_symlink_local(
 409	xfs_inode_t	*ip,
 410	xfs_trans_t	**tpp)
 411{
 412	int		error;
 413
 414	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
 415	/*
 416	 * We're freeing a symlink which fit into
 417	 * the inode.  Just free the memory used
 418	 * to hold the old symlink.
 419	 */
 420	error = xfs_trans_reserve(*tpp, 0,
 421				  XFS_ITRUNCATE_LOG_RES(ip->i_mount),
 422				  0, XFS_TRANS_PERM_LOG_RES,
 423				  XFS_ITRUNCATE_LOG_COUNT);
 424
 425	if (error) {
 426		xfs_trans_cancel(*tpp, 0);
 427		*tpp = NULL;
 428		return error;
 429	}
 430	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 431
 432	/*
 433	 * Zero length symlinks _can_ exist.
 434	 */
 435	if (ip->i_df.if_bytes > 0) {
 436		xfs_idata_realloc(ip,
 437				  -(ip->i_df.if_bytes),
 438				  XFS_DATA_FORK);
 439		ASSERT(ip->i_df.if_bytes == 0);
 440	}
 441	return 0;
 442}
 443
 444STATIC int
 445xfs_inactive_attrs(
 446	xfs_inode_t	*ip,
 447	xfs_trans_t	**tpp)
 448{
 449	xfs_trans_t	*tp;
 450	int		error;
 451	xfs_mount_t	*mp;
 452
 453	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 454	tp = *tpp;
 455	mp = ip->i_mount;
 456	ASSERT(ip->i_d.di_forkoff != 0);
 457	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 458	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 459	if (error)
 460		goto error_unlock;
 461
 462	error = xfs_attr_inactive(ip);
 463	if (error)
 464		goto error_unlock;
 465
 466	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 467	error = xfs_trans_reserve(tp, 0,
 468				  XFS_IFREE_LOG_RES(mp),
 469				  0, XFS_TRANS_PERM_LOG_RES,
 470				  XFS_INACTIVE_LOG_COUNT);
 471	if (error)
 472		goto error_cancel;
 473
 474	xfs_ilock(ip, XFS_ILOCK_EXCL);
 475	xfs_trans_ijoin(tp, ip);
 476	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
 477
 478	ASSERT(ip->i_d.di_anextents == 0);
 479
 480	*tpp = tp;
 481	return 0;
 482
 483error_cancel:
 484	ASSERT(XFS_FORCED_SHUTDOWN(mp));
 485	xfs_trans_cancel(tp, 0);
 486error_unlock:
 487	*tpp = NULL;
 488	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 489	return error;
 490}
 491
 492int
 493xfs_release(
 494	xfs_inode_t	*ip)
 495{
 496	xfs_mount_t	*mp = ip->i_mount;
 497	int		error;
 498
 499	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
 500		return 0;
 501
 502	/* If this is a read-only mount, don't do this (would generate I/O) */
 503	if (mp->m_flags & XFS_MOUNT_RDONLY)
 504		return 0;
 505
 506	if (!XFS_FORCED_SHUTDOWN(mp)) {
 507		int truncated;
 508
 509		/*
 510		 * If we are using filestreams, and we have an unlinked
 511		 * file that we are processing the last close on, then nothing
 512		 * will be able to reopen and write to this file. Purge this
 513		 * inode from the filestreams cache so that it doesn't delay
 514		 * teardown of the inode.
 515		 */
 516		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
 517			xfs_filestream_deassociate(ip);
 518
 519		/*
 520		 * If we previously truncated this file and removed old data
 521		 * in the process, we want to initiate "early" writeout on
 522		 * the last close.  This is an attempt to combat the notorious
 523		 * NULL files problem which is particularly noticeable from a
 524		 * truncate down, buffered (re-)write (delalloc), followed by
 525		 * a crash.  What we are effectively doing here is
 526		 * significantly reducing the time window where we'd otherwise
 527		 * be exposed to that problem.
 528		 */
 529		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
 530		if (truncated) {
 531			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
 532			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
 533				xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
 534		}
 535	}
 536
 537	if (ip->i_d.di_nlink == 0)
 538		return 0;
 539
 540	if ((S_ISREG(ip->i_d.di_mode) &&
 541	     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
 542	       ip->i_delayed_blks > 0)) &&
 543	     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
 544	    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
 545
 546		/*
 547		 * If we can't get the iolock just skip truncating the blocks
 548		 * past EOF because we could deadlock with the mmap_sem
 549		 * otherwise.  We'll get another chance to drop them once the
 550		 * last reference to the inode is dropped, so we'll never leak
 551		 * blocks permanently.
 552		 *
 553		 * Further, check if the inode is being opened, written and
 554		 * closed frequently and we have delayed allocation blocks
 555		 * outstanding (e.g. streaming writes from the NFS server),
 556		 * truncating the blocks past EOF will cause fragmentation to
 557		 * occur.
 558		 *
 559		 * In this case don't do the truncation, either, but we have to
 560		 * be careful how we detect this case. Blocks beyond EOF show
 561		 * up as i_delayed_blks even when the inode is clean, so we
 562		 * need to truncate them away first before checking for a dirty
 563		 * release. Hence on the first dirty close we will still remove
 564		 * the speculative allocation, but after that we will leave it
 565		 * in place.
 566		 */
 567		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
 568			return 0;
 569
 570		error = xfs_free_eofblocks(mp, ip,
 571					   XFS_FREE_EOF_TRYLOCK);
 572		if (error)
 573			return error;
 574
 575		/* delalloc blocks after truncation means it really is dirty */
 576		if (ip->i_delayed_blks)
 577			xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
 578	}
 579	return 0;
 580}
 581
 582/*
 583 * xfs_inactive
 584 *
 585 * This is called when the vnode reference count for the vnode
 586 * goes to zero.  If the file has been unlinked, then it must
 587 * now be truncated.  Also, we clear all of the read-ahead state
 588 * kept for the inode here since the file is now closed.
 589 */
 590int
 591xfs_inactive(
 592	xfs_inode_t	*ip)
 593{
 594	xfs_bmap_free_t	free_list;
 595	xfs_fsblock_t	first_block;
 596	int		committed;
 597	xfs_trans_t	*tp;
 598	xfs_mount_t	*mp;
 599	int		error;
 600	int		truncate;
 601
 602	/*
 603	 * If the inode is already free, then there can be nothing
 604	 * to clean up here.
 605	 */
 606	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
 607		ASSERT(ip->i_df.if_real_bytes == 0);
 608		ASSERT(ip->i_df.if_broot_bytes == 0);
 609		return VN_INACTIVE_CACHE;
 610	}
 611
 612	/*
 613	 * Only do a truncate if it's a regular file with
 614	 * some actual space in it.  It's OK to look at the
 615	 * inode's fields without the lock because we're the
 616	 * only one with a reference to the inode.
 617	 */
 618	truncate = ((ip->i_d.di_nlink == 0) &&
 619	    ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
 620	     (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
 621	    S_ISREG(ip->i_d.di_mode));
 622
 623	mp = ip->i_mount;
 624
 625	error = 0;
 626
 627	/* If this is a read-only mount, don't do this (would generate I/O) */
 628	if (mp->m_flags & XFS_MOUNT_RDONLY)
 629		goto out;
 630
 631	if (ip->i_d.di_nlink != 0) {
 632		if ((S_ISREG(ip->i_d.di_mode) &&
 633                     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
 634                       ip->i_delayed_blks > 0)) &&
 635		      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
 636		     (!(ip->i_d.di_flags &
 637				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 638		      (ip->i_delayed_blks != 0)))) {
 639			error = xfs_free_eofblocks(mp, ip, 0);
 640			if (error)
 641				return VN_INACTIVE_CACHE;
 642		}
 643		goto out;
 644	}
 645
 646	ASSERT(ip->i_d.di_nlink == 0);
 647
 648	error = xfs_qm_dqattach(ip, 0);
 649	if (error)
 650		return VN_INACTIVE_CACHE;
 651
 652	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 653	if (truncate) {
 654		xfs_ilock(ip, XFS_IOLOCK_EXCL);
 655
 656		xfs_ioend_wait(ip);
 657
 658		error = xfs_trans_reserve(tp, 0,
 659					  XFS_ITRUNCATE_LOG_RES(mp),
 660					  0, XFS_TRANS_PERM_LOG_RES,
 661					  XFS_ITRUNCATE_LOG_COUNT);
 662		if (error) {
 663			/* Don't call itruncate_cleanup */
 664			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 665			xfs_trans_cancel(tp, 0);
 666			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 667			return VN_INACTIVE_CACHE;
 668		}
 669
 670		xfs_ilock(ip, XFS_ILOCK_EXCL);
 671		xfs_trans_ijoin(tp, ip);
 672
 673		error = xfs_itruncate_data(&tp, ip, 0);
 674		if (error) {
 675			xfs_trans_cancel(tp,
 676				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 677			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 678			return VN_INACTIVE_CACHE;
 679		}
 680	} else if (S_ISLNK(ip->i_d.di_mode)) {
 681
 682		/*
 683		 * If we get an error while cleaning up a
 684		 * symlink we bail out.
 685		 */
 686		error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
 687			xfs_inactive_symlink_rmt(ip, &tp) :
 688			xfs_inactive_symlink_local(ip, &tp);
 689
 690		if (error) {
 691			ASSERT(tp == NULL);
 692			return VN_INACTIVE_CACHE;
 693		}
 694
 695		xfs_trans_ijoin(tp, ip);
 696	} else {
 697		error = xfs_trans_reserve(tp, 0,
 698					  XFS_IFREE_LOG_RES(mp),
 699					  0, XFS_TRANS_PERM_LOG_RES,
 700					  XFS_INACTIVE_LOG_COUNT);
 701		if (error) {
 702			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 703			xfs_trans_cancel(tp, 0);
 704			return VN_INACTIVE_CACHE;
 705		}
 706
 707		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 708		xfs_trans_ijoin(tp, ip);
 709	}
 710
 711	/*
 712	 * If there are attributes associated with the file
 713	 * then blow them away now.  The code calls a routine
 714	 * that recursively deconstructs the attribute fork.
 715	 * We need to just commit the current transaction
 716	 * because we can't use it for xfs_attr_inactive().
 717	 */
 718	if (ip->i_d.di_anextents > 0) {
 719		error = xfs_inactive_attrs(ip, &tp);
 720		/*
 721		 * If we got an error, the transaction is already
 722		 * cancelled, and the inode is unlocked. Just get out.
 723		 */
 724		 if (error)
 725			 return VN_INACTIVE_CACHE;
 726	} else if (ip->i_afp) {
 727		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
 728	}
 729
 730	/*
 731	 * Free the inode.
 732	 */
 733	xfs_bmap_init(&free_list, &first_block);
 734	error = xfs_ifree(tp, ip, &free_list);
 735	if (error) {
 736		/*
 737		 * If we fail to free the inode, shut down.  The cancel
 738		 * might do that, we need to make sure.  Otherwise the
 739		 * inode might be lost for a long time or forever.
 740		 */
 741		if (!XFS_FORCED_SHUTDOWN(mp)) {
 742			xfs_notice(mp, "%s: xfs_ifree returned error %d",
 743				__func__, error);
 744			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 745		}
 746		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
 747	} else {
 748		/*
 749		 * Credit the quota account(s). The inode is gone.
 750		 */
 751		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 752
 753		/*
 754		 * Just ignore errors at this point.  There is nothing we can
 755		 * do except to try to keep going. Make sure it's not a silent
 756		 * error.
 757		 */
 758		error = xfs_bmap_finish(&tp,  &free_list, &committed);
 759		if (error)
 760			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
 761				__func__, error);
 762		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 763		if (error)
 764			xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
 765				__func__, error);
 766	}
 767
 768	/*
 769	 * Release the dquots held by inode, if any.
 770	 */
 771	xfs_qm_dqdetach(ip);
 772	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 773
 774 out:
 775	return VN_INACTIVE_CACHE;
 776}
 777
 778/*
 779 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
 780 * is allowed, otherwise it has to be an exact match. If a CI match is found,
 781 * ci_name->name will point to a the actual name (caller must free) or
 782 * will be set to NULL if an exact match is found.
 783 */
 784int
 785xfs_lookup(
 786	xfs_inode_t		*dp,
 787	struct xfs_name		*name,
 788	xfs_inode_t		**ipp,
 789	struct xfs_name		*ci_name)
 790{
 791	xfs_ino_t		inum;
 792	int			error;
 793	uint			lock_mode;
 794
 795	trace_xfs_lookup(dp, name);
 796
 797	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 798		return XFS_ERROR(EIO);
 799
 800	lock_mode = xfs_ilock_map_shared(dp);
 801	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
 802	xfs_iunlock_map_shared(dp, lock_mode);
 803
 804	if (error)
 805		goto out;
 806
 807	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
 808	if (error)
 809		goto out_free_name;
 810
 811	return 0;
 812
 813out_free_name:
 814	if (ci_name)
 815		kmem_free(ci_name->name);
 816out:
 817	*ipp = NULL;
 818	return error;
 819}
 820
 821int
 822xfs_create(
 823	xfs_inode_t		*dp,
 824	struct xfs_name		*name,
 825	mode_t			mode,
 826	xfs_dev_t		rdev,
 827	xfs_inode_t		**ipp)
 828{
 829	int			is_dir = S_ISDIR(mode);
 830	struct xfs_mount	*mp = dp->i_mount;
 831	struct xfs_inode	*ip = NULL;
 832	struct xfs_trans	*tp = NULL;
 833	int			error;
 834	xfs_bmap_free_t		free_list;
 835	xfs_fsblock_t		first_block;
 836	boolean_t		unlock_dp_on_error = B_FALSE;
 837	uint			cancel_flags;
 838	int			committed;
 839	prid_t			prid;
 840	struct xfs_dquot	*udqp = NULL;
 841	struct xfs_dquot	*gdqp = NULL;
 842	uint			resblks;
 843	uint			log_res;
 844	uint			log_count;
 845
 846	trace_xfs_create(dp, name);
 847
 848	if (XFS_FORCED_SHUTDOWN(mp))
 849		return XFS_ERROR(EIO);
 850
 851	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 852		prid = xfs_get_projid(dp);
 853	else
 854		prid = XFS_PROJID_DEFAULT;
 855
 856	/*
 857	 * Make sure that we have allocated dquot(s) on disk.
 858	 */
 859	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
 860			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 861	if (error)
 862		return error;
 863
 864	if (is_dir) {
 865		rdev = 0;
 866		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
 867		log_res = XFS_MKDIR_LOG_RES(mp);
 868		log_count = XFS_MKDIR_LOG_COUNT;
 869		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
 870	} else {
 871		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
 872		log_res = XFS_CREATE_LOG_RES(mp);
 873		log_count = XFS_CREATE_LOG_COUNT;
 874		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
 875	}
 876
 877	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 878
 879	/*
 880	 * Initially assume that the file does not exist and
 881	 * reserve the resources for that case.  If that is not
 882	 * the case we'll drop the one we have and get a more
 883	 * appropriate transaction later.
 884	 */
 885	error = xfs_trans_reserve(tp, resblks, log_res, 0,
 886			XFS_TRANS_PERM_LOG_RES, log_count);
 887	if (error == ENOSPC) {
 888		/* flush outstanding delalloc blocks and retry */
 889		xfs_flush_inodes(dp);
 890		error = xfs_trans_reserve(tp, resblks, log_res, 0,
 891				XFS_TRANS_PERM_LOG_RES, log_count);
 892	}
 893	if (error == ENOSPC) {
 894		/* No space at all so try a "no-allocation" reservation */
 895		resblks = 0;
 896		error = xfs_trans_reserve(tp, 0, log_res, 0,
 897				XFS_TRANS_PERM_LOG_RES, log_count);
 898	}
 899	if (error) {
 900		cancel_flags = 0;
 901		goto out_trans_cancel;
 902	}
 903
 904	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 905	unlock_dp_on_error = B_TRUE;
 906
 907	/*
 908	 * Check for directory link count overflow.
 909	 */
 910	if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
 911		error = XFS_ERROR(EMLINK);
 912		goto out_trans_cancel;
 913	}
 914
 915	xfs_bmap_init(&free_list, &first_block);
 916
 917	/*
 918	 * Reserve disk quota and the inode.
 919	 */
 920	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
 921	if (error)
 922		goto out_trans_cancel;
 923
 924	error = xfs_dir_canenter(tp, dp, name, resblks);
 925	if (error)
 926		goto out_trans_cancel;
 927
 928	/*
 929	 * A newly created regular or special file just has one directory
 930	 * entry pointing to them, but a directory also the "." entry
 931	 * pointing to itself.
 932	 */
 933	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
 934			       prid, resblks > 0, &ip, &committed);
 935	if (error) {
 936		if (error == ENOSPC)
 937			goto out_trans_cancel;
 938		goto out_trans_abort;
 939	}
 940
 941	/*
 942	 * Now we join the directory inode to the transaction.  We do not do it
 943	 * earlier because xfs_dir_ialloc might commit the previous transaction
 944	 * (and release all the locks).  An error from here on will result in
 945	 * the transaction cancel unlocking dp so don't do it explicitly in the
 946	 * error path.
 947	 */
 948	xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
 949	unlock_dp_on_error = B_FALSE;
 950
 951	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
 952					&first_block, &free_list, resblks ?
 953					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 954	if (error) {
 955		ASSERT(error != ENOSPC);
 956		goto out_trans_abort;
 957	}
 958	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 959	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 960
 961	if (is_dir) {
 962		error = xfs_dir_init(tp, ip, dp);
 963		if (error)
 964			goto out_bmap_cancel;
 965
 966		error = xfs_bumplink(tp, dp);
 967		if (error)
 968			goto out_bmap_cancel;
 969	}
 970
 971	/*
 972	 * If this is a synchronous mount, make sure that the
 973	 * create transaction goes to disk before returning to
 974	 * the user.
 975	 */
 976	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 977		xfs_trans_set_sync(tp);
 978
 979	/*
 980	 * Attach the dquot(s) to the inodes and modify them incore.
 981	 * These ids of the inode couldn't have changed since the new
 982	 * inode has been locked ever since it was created.
 983	 */
 984	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
 985
 986	error = xfs_bmap_finish(&tp, &free_list, &committed);
 987	if (error)
 988		goto out_bmap_cancel;
 989
 990	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 991	if (error)
 992		goto out_release_inode;
 993
 994	xfs_qm_dqrele(udqp);
 995	xfs_qm_dqrele(gdqp);
 996
 997	*ipp = ip;
 998	return 0;
 999
1000 out_bmap_cancel:
1001	xfs_bmap_cancel(&free_list);
1002 out_trans_abort:
1003	cancel_flags |= XFS_TRANS_ABORT;
1004 out_trans_cancel:
1005	xfs_trans_cancel(tp, cancel_flags);
1006 out_release_inode:
1007	/*
1008	 * Wait until after the current transaction is aborted to
1009	 * release the inode.  This prevents recursive transactions
1010	 * and deadlocks from xfs_inactive.
1011	 */
1012	if (ip)
1013		IRELE(ip);
1014
1015	xfs_qm_dqrele(udqp);
1016	xfs_qm_dqrele(gdqp);
1017
1018	if (unlock_dp_on_error)
1019		xfs_iunlock(dp, XFS_ILOCK_EXCL);
1020	return error;
1021}
1022
1023#ifdef DEBUG
1024int xfs_locked_n;
1025int xfs_small_retries;
1026int xfs_middle_retries;
1027int xfs_lots_retries;
1028int xfs_lock_delays;
1029#endif
1030
1031/*
1032 * Bump the subclass so xfs_lock_inodes() acquires each lock with
1033 * a different value
1034 */
1035static inline int
1036xfs_lock_inumorder(int lock_mode, int subclass)
1037{
1038	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1039		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
1040	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
1041		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
1042
1043	return lock_mode;
1044}
1045
1046/*
1047 * The following routine will lock n inodes in exclusive mode.
1048 * We assume the caller calls us with the inodes in i_ino order.
1049 *
1050 * We need to detect deadlock where an inode that we lock
1051 * is in the AIL and we start waiting for another inode that is locked
1052 * by a thread in a long running transaction (such as truncate). This can
1053 * result in deadlock since the long running trans might need to wait
1054 * for the inode we just locked in order to push the tail and free space
1055 * in the log.
1056 */
1057void
1058xfs_lock_inodes(
1059	xfs_inode_t	**ips,
1060	int		inodes,
1061	uint		lock_mode)
1062{
1063	int		attempts = 0, i, j, try_lock;
1064	xfs_log_item_t	*lp;
1065
1066	ASSERT(ips && (inodes >= 2)); /* we need at least two */
1067
1068	try_lock = 0;
1069	i = 0;
1070
1071again:
1072	for (; i < inodes; i++) {
1073		ASSERT(ips[i]);
1074
1075		if (i && (ips[i] == ips[i-1]))	/* Already locked */
1076			continue;
1077
1078		/*
1079		 * If try_lock is not set yet, make sure all locked inodes
1080		 * are not in the AIL.
1081		 * If any are, set try_lock to be used later.
1082		 */
1083
1084		if (!try_lock) {
1085			for (j = (i - 1); j >= 0 && !try_lock; j--) {
1086				lp = (xfs_log_item_t *)ips[j]->i_itemp;
1087				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1088					try_lock++;
1089				}
1090			}
1091		}
1092
1093		/*
1094		 * If any of the previous locks we have locked is in the AIL,
1095		 * we must TRY to get the second and subsequent locks. If
1096		 * we can't get any, we must release all we have
1097		 * and try again.
1098		 */
1099
1100		if (try_lock) {
1101			/* try_lock must be 0 if i is 0. */
1102			/*
1103			 * try_lock means we have an inode locked
1104			 * that is in the AIL.
1105			 */
1106			ASSERT(i != 0);
1107			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
1108				attempts++;
1109
1110				/*
1111				 * Unlock all previous guys and try again.
1112				 * xfs_iunlock will try to push the tail
1113				 * if the inode is in the AIL.
1114				 */
1115
1116				for(j = i - 1; j >= 0; j--) {
1117
1118					/*
1119					 * Check to see if we've already
1120					 * unlocked this one.
1121					 * Not the first one going back,
1122					 * and the inode ptr is the same.
1123					 */
1124					if ((j != (i - 1)) && ips[j] ==
1125								ips[j+1])
1126						continue;
1127
1128					xfs_iunlock(ips[j], lock_mode);
1129				}
1130
1131				if ((attempts % 5) == 0) {
1132					delay(1); /* Don't just spin the CPU */
1133#ifdef DEBUG
1134					xfs_lock_delays++;
1135#endif
1136				}
1137				i = 0;
1138				try_lock = 0;
1139				goto again;
1140			}
1141		} else {
1142			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
1143		}
1144	}
1145
1146#ifdef DEBUG
1147	if (attempts) {
1148		if (attempts < 5) xfs_small_retries++;
1149		else if (attempts < 100) xfs_middle_retries++;
1150		else xfs_lots_retries++;
1151	} else {
1152		xfs_locked_n++;
1153	}
1154#endif
1155}
1156
1157/*
1158 * xfs_lock_two_inodes() can only be used to lock one type of lock
1159 * at a time - the iolock or the ilock, but not both at once. If
1160 * we lock both at once, lockdep will report false positives saying
1161 * we have violated locking orders.
1162 */
1163void
1164xfs_lock_two_inodes(
1165	xfs_inode_t		*ip0,
1166	xfs_inode_t		*ip1,
1167	uint			lock_mode)
1168{
1169	xfs_inode_t		*temp;
1170	int			attempts = 0;
1171	xfs_log_item_t		*lp;
1172
1173	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1174		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
1175	ASSERT(ip0->i_ino != ip1->i_ino);
1176
1177	if (ip0->i_ino > ip1->i_ino) {
1178		temp = ip0;
1179		ip0 = ip1;
1180		ip1 = temp;
1181	}
1182
1183 again:
1184	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
1185
1186	/*
1187	 * If the first lock we have locked is in the AIL, we must TRY to get
1188	 * the second lock. If we can't get it, we must release the first one
1189	 * and try again.
1190	 */
1191	lp = (xfs_log_item_t *)ip0->i_itemp;
1192	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1193		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
1194			xfs_iunlock(ip0, lock_mode);
1195			if ((++attempts % 5) == 0)
1196				delay(1); /* Don't just spin the CPU */
1197			goto again;
1198		}
1199	} else {
1200		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
1201	}
1202}
1203
1204int
1205xfs_remove(
1206	xfs_inode_t             *dp,
1207	struct xfs_name		*name,
1208	xfs_inode_t		*ip)
1209{
1210	xfs_mount_t		*mp = dp->i_mount;
1211	xfs_trans_t             *tp = NULL;
1212	int			is_dir = S_ISDIR(ip->i_d.di_mode);
1213	int                     error = 0;
1214	xfs_bmap_free_t         free_list;
1215	xfs_fsblock_t           first_block;
1216	int			cancel_flags;
1217	int			committed;
1218	int			link_zero;
1219	uint			resblks;
1220	uint			log_count;
1221
1222	trace_xfs_remove(dp, name);
1223
1224	if (XFS_FORCED_SHUTDOWN(mp))
1225		return XFS_ERROR(EIO);
1226
1227	error = xfs_qm_dqattach(dp, 0);
1228	if (error)
1229		goto std_return;
1230
1231	error = xfs_qm_dqattach(ip, 0);
1232	if (error)
1233		goto std_return;
1234
1235	if (is_dir) {
1236		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1237		log_count = XFS_DEFAULT_LOG_COUNT;
1238	} else {
1239		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1240		log_count = XFS_REMOVE_LOG_COUNT;
1241	}
1242	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1243
1244	/*
1245	 * We try to get the real space reservation first,
1246	 * allowing for directory btree deletion(s) implying
1247	 * possible bmap insert(s).  If we can't get the space
1248	 * reservation then we use 0 instead, and avoid the bmap
1249	 * btree insert(s) in the directory code by, if the bmap
1250	 * insert tries to happen, instead trimming the LAST
1251	 * block from the directory.
1252	 */
1253	resblks = XFS_REMOVE_SPACE_RES(mp);
1254	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
1255				  XFS_TRANS_PERM_LOG_RES, log_count);
1256	if (error == ENOSPC) {
1257		resblks = 0;
1258		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
1259					  XFS_TRANS_PERM_LOG_RES, log_count);
1260	}
1261	if (error) {
1262		ASSERT(error != ENOSPC);
1263		cancel_flags = 0;
1264		goto out_trans_cancel;
1265	}
1266
1267	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
1268
1269	xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1270	xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1271
1272	/*
1273	 * If we're removing a directory perform some additional validation.
1274	 */
1275	if (is_dir) {
1276		ASSERT(ip->i_d.di_nlink >= 2);
1277		if (ip->i_d.di_nlink != 2) {
1278			error = XFS_ERROR(ENOTEMPTY);
1279			goto out_trans_cancel;
1280		}
1281		if (!xfs_dir_isempty(ip)) {
1282			error = XFS_ERROR(ENOTEMPTY);
1283			goto out_trans_cancel;
1284		}
1285	}
1286
1287	xfs_bmap_init(&free_list, &first_block);
1288	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
1289					&first_block, &free_list, resblks);
1290	if (error) {
1291		ASSERT(error != ENOENT);
1292		goto out_bmap_cancel;
1293	}
1294	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1295
1296	if (is_dir) {
1297		/*
1298		 * Drop the link from ip's "..".
1299		 */
1300		error = xfs_droplink(tp, dp);
1301		if (error)
1302			goto out_bmap_cancel;
1303
1304		/*
1305		 * Drop the "." link from ip to self.
1306		 */
1307		error = xfs_droplink(tp, ip);
1308		if (error)
1309			goto out_bmap_cancel;
1310	} else {
1311		/*
1312		 * When removing a non-directory we need to log the parent
1313		 * inode here.  For a directory this is done implicitly
1314		 * by the xfs_droplink call for the ".." entry.
1315		 */
1316		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1317	}
1318
1319	/*
1320	 * Drop the link from dp to ip.
1321	 */
1322	error = xfs_droplink(tp, ip);
1323	if (error)
1324		goto out_bmap_cancel;
1325
1326	/*
1327	 * Determine if this is the last link while
1328	 * we are in the transaction.
1329	 */
1330	link_zero = (ip->i_d.di_nlink == 0);
1331
1332	/*
1333	 * If this is a synchronous mount, make sure that the
1334	 * remove transaction goes to disk before returning to
1335	 * the user.
1336	 */
1337	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1338		xfs_trans_set_sync(tp);
1339
1340	error = xfs_bmap_finish(&tp, &free_list, &committed);
1341	if (error)
1342		goto out_bmap_cancel;
1343
1344	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1345	if (error)
1346		goto std_return;
1347
1348	/*
1349	 * If we are using filestreams, kill the stream association.
1350	 * If the file is still open it may get a new one but that
1351	 * will get killed on last close in xfs_close() so we don't
1352	 * have to worry about that.
1353	 */
1354	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1355		xfs_filestream_deassociate(ip);
1356
1357	return 0;
1358
1359 out_bmap_cancel:
1360	xfs_bmap_cancel(&free_list);
1361	cancel_flags |= XFS_TRANS_ABORT;
1362 out_trans_cancel:
1363	xfs_trans_cancel(tp, cancel_flags);
1364 std_return:
1365	return error;
1366}
1367
1368int
1369xfs_link(
1370	xfs_inode_t		*tdp,
1371	xfs_inode_t		*sip,
1372	struct xfs_name		*target_name)
1373{
1374	xfs_mount_t		*mp = tdp->i_mount;
1375	xfs_trans_t		*tp;
1376	int			error;
1377	xfs_bmap_free_t         free_list;
1378	xfs_fsblock_t           first_block;
1379	int			cancel_flags;
1380	int			committed;
1381	int			resblks;
1382
1383	trace_xfs_link(tdp, target_name);
1384
1385	ASSERT(!S_ISDIR(sip->i_d.di_mode));
1386
1387	if (XFS_FORCED_SHUTDOWN(mp))
1388		return XFS_ERROR(EIO);
1389
1390	error = xfs_qm_dqattach(sip, 0);
1391	if (error)
1392		goto std_return;
1393
1394	error = xfs_qm_dqattach(tdp, 0);
1395	if (error)
1396		goto std_return;
1397
1398	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
1399	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1400	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1401	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
1402			XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1403	if (error == ENOSPC) {
1404		resblks = 0;
1405		error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
1406				XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1407	}
1408	if (error) {
1409		cancel_flags = 0;
1410		goto error_return;
1411	}
1412
1413	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1414
1415	xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL);
1416	xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL);
1417
1418	/*
1419	 * If the source has too many links, we can't make any more to it.
1420	 */
1421	if (sip->i_d.di_nlink >= XFS_MAXLINK) {
1422		error = XFS_ERROR(EMLINK);
1423		goto error_return;
1424	}
1425
1426	/*
1427	 * If we are using project inheritance, we only allow hard link
1428	 * creation in our tree when the project IDs are the same; else
1429	 * the tree quota mechanism could be circumvented.
1430	 */
1431	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1432		     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
1433		error = XFS_ERROR(EXDEV);
1434		goto error_return;
1435	}
1436
1437	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
1438	if (error)
1439		goto error_return;
1440
1441	xfs_bmap_init(&free_list, &first_block);
1442
1443	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1444					&first_block, &free_list, resblks);
1445	if (error)
1446		goto abort_return;
1447	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1448	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
1449
1450	error = xfs_bumplink(tp, sip);
1451	if (error)
1452		goto abort_return;
1453
1454	/*
1455	 * If this is a synchronous mount, make sure that the
1456	 * link transaction goes to disk before returning to
1457	 * the user.
1458	 */
1459	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1460		xfs_trans_set_sync(tp);
1461	}
1462
1463	error = xfs_bmap_finish (&tp, &free_list, &committed);
1464	if (error) {
1465		xfs_bmap_cancel(&free_list);
1466		goto abort_return;
1467	}
1468
1469	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1470
1471 abort_return:
1472	cancel_flags |= XFS_TRANS_ABORT;
1473 error_return:
1474	xfs_trans_cancel(tp, cancel_flags);
1475 std_return:
1476	return error;
1477}
1478
1479int
1480xfs_symlink(
1481	xfs_inode_t		*dp,
1482	struct xfs_name		*link_name,
1483	const char		*target_path,
1484	mode_t			mode,
1485	xfs_inode_t		**ipp)
1486{
1487	xfs_mount_t		*mp = dp->i_mount;
1488	xfs_trans_t		*tp;
1489	xfs_inode_t		*ip;
1490	int			error;
1491	int			pathlen;
1492	xfs_bmap_free_t		free_list;
1493	xfs_fsblock_t		first_block;
1494	boolean_t		unlock_dp_on_error = B_FALSE;
1495	uint			cancel_flags;
1496	int			committed;
1497	xfs_fileoff_t		first_fsb;
1498	xfs_filblks_t		fs_blocks;
1499	int			nmaps;
1500	xfs_bmbt_irec_t		mval[SYMLINK_MAPS];
1501	xfs_daddr_t		d;
1502	const char		*cur_chunk;
1503	int			byte_cnt;
1504	int			n;
1505	xfs_buf_t		*bp;
1506	prid_t			prid;
1507	struct xfs_dquot	*udqp, *gdqp;
1508	uint			resblks;
1509
1510	*ipp = NULL;
1511	error = 0;
1512	ip = NULL;
1513	tp = NULL;
1514
1515	trace_xfs_symlink(dp, link_name);
1516
1517	if (XFS_FORCED_SHUTDOWN(mp))
1518		return XFS_ERROR(EIO);
1519
1520	/*
1521	 * Check component lengths of the target path name.
1522	 */
1523	pathlen = strlen(target_path);
1524	if (pathlen >= MAXPATHLEN)      /* total string too long */
1525		return XFS_ERROR(ENAMETOOLONG);
1526
1527	udqp = gdqp = NULL;
1528	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1529		prid = xfs_get_projid(dp);
1530	else
1531		prid = XFS_PROJID_DEFAULT;
1532
1533	/*
1534	 * Make sure that we have allocated dquot(s) on disk.
1535	 */
1536	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1537			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1538	if (error)
1539		goto std_return;
1540
1541	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
1542	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1543	/*
1544	 * The symlink will fit into the inode data fork?
1545	 * There can't be any attributes so we get the whole variable part.
1546	 */
1547	if (pathlen <= XFS_LITINO(mp))
1548		fs_blocks = 0;
1549	else
1550		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
1551	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
1552	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
1553			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1554	if (error == ENOSPC && fs_blocks == 0) {
1555		resblks = 0;
1556		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
1557				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1558	}
1559	if (error) {
1560		cancel_flags = 0;
1561		goto error_return;
1562	}
1563
1564	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1565	unlock_dp_on_error = B_TRUE;
1566
1567	/*
1568	 * Check whether the directory allows new symlinks or not.
1569	 */
1570	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
1571		error = XFS_ERROR(EPERM);
1572		goto error_return;
1573	}
1574
1575	/*
1576	 * Reserve disk quota : blocks and inode.
1577	 */
1578	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
1579	if (error)
1580		goto error_return;
1581
1582	/*
1583	 * Check for ability to enter directory entry, if no space reserved.
1584	 */
1585	error = xfs_dir_canenter(tp, dp, link_name, resblks);
1586	if (error)
1587		goto error_return;
1588	/*
1589	 * Initialize the bmap freelist prior to calling either
1590	 * bmapi or the directory create code.
1591	 */
1592	xfs_bmap_init(&free_list, &first_block);
1593
1594	/*
1595	 * Allocate an inode for the symlink.
1596	 */
1597	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
1598			       prid, resblks > 0, &ip, NULL);
1599	if (error) {
1600		if (error == ENOSPC)
1601			goto error_return;
1602		goto error1;
1603	}
1604
1605	/*
1606	 * An error after we've joined dp to the transaction will result in the
1607	 * transaction cancel unlocking dp so don't do it explicitly in the
1608	 * error path.
1609	 */
1610	xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1611	unlock_dp_on_error = B_FALSE;
1612
1613	/*
1614	 * Also attach the dquot(s) to it, if applicable.
1615	 */
1616	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1617
1618	if (resblks)
1619		resblks -= XFS_IALLOC_SPACE_RES(mp);
1620	/*
1621	 * If the symlink will fit into the inode, write it inline.
1622	 */
1623	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
1624		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
1625		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
1626		ip->i_d.di_size = pathlen;
1627
1628		/*
1629		 * The inode was initially created in extent format.
1630		 */
1631		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
1632		ip->i_df.if_flags |= XFS_IFINLINE;
1633
1634		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
1635		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
1636
1637	} else {
1638		first_fsb = 0;
1639		nmaps = SYMLINK_MAPS;
1640
1641		error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
1642				  XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
1643				  &first_block, resblks, mval, &nmaps,
1644				  &free_list);
1645		if (error)
1646			goto error2;
1647
1648		if (resblks)
1649			resblks -= fs_blocks;
1650		ip->i_d.di_size = pathlen;
1651		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1652
1653		cur_chunk = target_path;
1654		for (n = 0; n < nmaps; n++) {
1655			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1656			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1657			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
1658					       BTOBB(byte_cnt), 0);
1659			ASSERT(!xfs_buf_geterror(bp));
1660			if (pathlen < byte_cnt) {
1661				byte_cnt = pathlen;
1662			}
1663			pathlen -= byte_cnt;
1664
1665			memcpy(bp->b_addr, cur_chunk, byte_cnt);
1666			cur_chunk += byte_cnt;
1667
1668			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
1669		}
1670	}
1671
1672	/*
1673	 * Create the directory entry for the symlink.
1674	 */
1675	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
1676					&first_block, &free_list, resblks);
1677	if (error)
1678		goto error2;
1679	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1680	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1681
1682	/*
1683	 * If this is a synchronous mount, make sure that the
1684	 * symlink transaction goes to disk before returning to
1685	 * the user.
1686	 */
1687	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1688		xfs_trans_set_sync(tp);
1689	}
1690
1691	error = xfs_bmap_finish(&tp, &free_list, &committed);
1692	if (error) {
1693		goto error2;
1694	}
1695	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1696	xfs_qm_dqrele(udqp);
1697	xfs_qm_dqrele(gdqp);
1698
1699	*ipp = ip;
1700	return 0;
1701
1702 error2:
1703	IRELE(ip);
1704 error1:
1705	xfs_bmap_cancel(&free_list);
1706	cancel_flags |= XFS_TRANS_ABORT;
1707 error_return:
1708	xfs_trans_cancel(tp, cancel_flags);
1709	xfs_qm_dqrele(udqp);
1710	xfs_qm_dqrele(gdqp);
1711
1712	if (unlock_dp_on_error)
1713		xfs_iunlock(dp, XFS_ILOCK_EXCL);
1714 std_return:
1715	return error;
1716}
1717
1718int
1719xfs_set_dmattrs(
1720	xfs_inode_t     *ip,
1721	u_int		evmask,
1722	u_int16_t	state)
1723{
1724	xfs_mount_t	*mp = ip->i_mount;
1725	xfs_trans_t	*tp;
1726	int		error;
1727
1728	if (!capable(CAP_SYS_ADMIN))
1729		return XFS_ERROR(EPERM);
1730
1731	if (XFS_FORCED_SHUTDOWN(mp))
1732		return XFS_ERROR(EIO);
1733
1734	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
1735	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
1736	if (error) {
1737		xfs_trans_cancel(tp, 0);
1738		return error;
1739	}
1740	xfs_ilock(ip, XFS_ILOCK_EXCL);
1741	xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1742
1743	ip->i_d.di_dmevmask = evmask;
1744	ip->i_d.di_dmstate  = state;
1745
1746	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1747	error = xfs_trans_commit(tp, 0);
1748
1749	return error;
1750}
1751
1752/*
1753 * xfs_alloc_file_space()
1754 *      This routine allocates disk space for the given file.
1755 *
1756 *	If alloc_type == 0, this request is for an ALLOCSP type
1757 *	request which will change the file size.  In this case, no
1758 *	DMAPI event will be generated by the call.  A TRUNCATE event
1759 *	will be generated later by xfs_setattr.
1760 *
1761 *	If alloc_type != 0, this request is for a RESVSP type
1762 *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
1763 *	lower block boundary byte address is less than the file's
1764 *	length.
1765 *
1766 * RETURNS:
1767 *       0 on success
1768 *      errno on error
1769 *
1770 */
1771STATIC int
1772xfs_alloc_file_space(
1773	xfs_inode_t		*ip,
1774	xfs_off_t		offset,
1775	xfs_off_t		len,
1776	int			alloc_type,
1777	int			attr_flags)
1778{
1779	xfs_mount_t		*mp = ip->i_mount;
1780	xfs_off_t		count;
1781	xfs_filblks_t		allocated_fsb;
1782	xfs_filblks_t		allocatesize_fsb;
1783	xfs_extlen_t		extsz, temp;
1784	xfs_fileoff_t		startoffset_fsb;
1785	xfs_fsblock_t		firstfsb;
1786	int			nimaps;
1787	int			bmapi_flag;
1788	int			quota_flag;
1789	int			rt;
1790	xfs_trans_t		*tp;
1791	xfs_bmbt_irec_t		imaps[1], *imapp;
1792	xfs_bmap_free_t		free_list;
1793	uint			qblocks, resblks, resrtextents;
1794	int			committed;
1795	int			error;
1796
1797	trace_xfs_alloc_file_space(ip);
1798
1799	if (XFS_FORCED_SHUTDOWN(mp))
1800		return XFS_ERROR(EIO);
1801
1802	error = xfs_qm_dqattach(ip, 0);
1803	if (error)
1804		return error;
1805
1806	if (len <= 0)
1807		return XFS_ERROR(EINVAL);
1808
1809	rt = XFS_IS_REALTIME_INODE(ip);
1810	extsz = xfs_get_extsz_hint(ip);
1811
1812	count = len;
1813	imapp = &imaps[0];
1814	nimaps = 1;
1815	bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
1816	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
1817	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
1818
1819	/*
1820	 * Allocate file space until done or until there is an error
1821	 */
1822	while (allocatesize_fsb && !error) {
1823		xfs_fileoff_t	s, e;
1824
1825		/*
1826		 * Determine space reservations for data/realtime.
1827		 */
1828		if (unlikely(extsz)) {
1829			s = startoffset_fsb;
1830			do_div(s, extsz);
1831			s *= extsz;
1832			e = startoffset_fsb + allocatesize_fsb;
1833			if ((temp = do_mod(startoffset_fsb, extsz)))
1834				e += temp;
1835			if ((temp = do_mod(e, extsz)))
1836				e += extsz - temp;
1837		} else {
1838			s = 0;
1839			e = allocatesize_fsb;
1840		}
1841
1842		/*
1843		 * The transaction reservation is limited to a 32-bit block
1844		 * count, hence we need to limit the number of blocks we are
1845		 * trying to reserve to avoid an overflow. We can't allocate
1846		 * more than @nimaps extents, and an extent is limited on disk
1847		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
1848		 */
1849		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
1850		if (unlikely(rt)) {
1851			resrtextents = qblocks = resblks;
1852			resrtextents /= mp->m_sb.sb_rextsize;
1853			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1854			quota_flag = XFS_QMOPT_RES_RTBLKS;
1855		} else {
1856			resrtextents = 0;
1857			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
1858			quota_flag = XFS_QMOPT_RES_REGBLKS;
1859		}
1860
1861		/*
1862		 * Allocate and setup the transaction.
1863		 */
1864		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1865		error = xfs_trans_reserve(tp, resblks,
1866					  XFS_WRITE_LOG_RES(mp), resrtextents,
1867					  XFS_TRANS_PERM_LOG_RES,
1868					  XFS_WRITE_LOG_COUNT);
1869		/*
1870		 * Check for running out of space
1871		 */
1872		if (error) {
1873			/*
1874			 * Free the transaction structure.
1875			 */
1876			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1877			xfs_trans_cancel(tp, 0);
1878			break;
1879		}
1880		xfs_ilock(ip, XFS_ILOCK_EXCL);
1881		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
1882						      0, quota_flag);
1883		if (error)
1884			goto error1;
1885
1886		xfs_trans_ijoin(tp, ip);
1887
1888		/*
1889		 * Issue the xfs_bmapi() call to allocate the blocks
1890		 */
1891		xfs_bmap_init(&free_list, &firstfsb);
1892		error = xfs_bmapi(tp, ip, startoffset_fsb,
1893				  allocatesize_fsb, bmapi_flag,
1894				  &firstfsb, 0, imapp, &nimaps,
1895				  &free_list);
1896		if (error) {
1897			goto error0;
1898		}
1899
1900		/*
1901		 * Complete the transaction
1902		 */
1903		error = xfs_bmap_finish(&tp, &free_list, &committed);
1904		if (error) {
1905			goto error0;
1906		}
1907
1908		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1909		xfs_iunlock(ip, XFS_ILOCK_EXCL);
1910		if (error) {
1911			break;
1912		}
1913
1914		allocated_fsb = imapp->br_blockcount;
1915
1916		if (nimaps == 0) {
1917			error = XFS_ERROR(ENOSPC);
1918			break;
1919		}
1920
1921		startoffset_fsb += allocated_fsb;
1922		allocatesize_fsb -= allocated_fsb;
1923	}
1924
1925	return error;
1926
1927error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1928	xfs_bmap_cancel(&free_list);
1929	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
1930
1931error1:	/* Just cancel transaction */
1932	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1933	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1934	return error;
1935}
1936
1937/*
1938 * Zero file bytes between startoff and endoff inclusive.
1939 * The iolock is held exclusive and no blocks are buffered.
1940 *
1941 * This function is used by xfs_free_file_space() to zero
1942 * partial blocks when the range to free is not block aligned.
1943 * When unreserving space with boundaries that are not block
1944 * aligned we round up the start and round down the end
1945 * boundaries and then use this function to zero the parts of
1946 * the blocks that got dropped during the rounding.
1947 */
1948STATIC int
1949xfs_zero_remaining_bytes(
1950	xfs_inode_t		*ip,
1951	xfs_off_t		startoff,
1952	xfs_off_t		endoff)
1953{
1954	xfs_bmbt_irec_t		imap;
1955	xfs_fileoff_t		offset_fsb;
1956	xfs_off_t		lastoffset;
1957	xfs_off_t		offset;
1958	xfs_buf_t		*bp;
1959	xfs_mount_t		*mp = ip->i_mount;
1960	int			nimap;
1961	int			error = 0;
1962
1963	/*
1964	 * Avoid doing I/O beyond eof - it's not necessary
1965	 * since nothing can read beyond eof.  The space will
1966	 * be zeroed when the file is extended anyway.
1967	 */
1968	if (startoff >= ip->i_size)
1969		return 0;
1970
1971	if (endoff > ip->i_size)
1972		endoff = ip->i_size;
1973
1974	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1975					mp->m_rtdev_targp : mp->m_ddev_targp,
1976				mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
1977	if (!bp)
1978		return XFS_ERROR(ENOMEM);
1979
1980	xfs_buf_unlock(bp);
1981
1982	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
1983		offset_fsb = XFS_B_TO_FSBT(mp, offset);
1984		nimap = 1;
1985		error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
1986			NULL, 0, &imap, &nimap, NULL);
1987		if (error || nimap < 1)
1988			break;
1989		ASSERT(imap.br_blockcount >= 1);
1990		ASSERT(imap.br_startoff == offset_fsb);
1991		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
1992		if (lastoffset > endoff)
1993			lastoffset = endoff;
1994		if (imap.br_startblock == HOLESTARTBLOCK)
1995			continue;
1996		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1997		if (imap.br_state == XFS_EXT_UNWRITTEN)
1998			continue;
1999		XFS_BUF_UNDONE(bp);
2000		XFS_BUF_UNWRITE(bp);
2001		XFS_BUF_READ(bp);
2002		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
2003		xfsbdstrat(mp, bp);
2004		error = xfs_buf_iowait(bp);
2005		if (error) {
2006			xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
2007					  mp, bp, XFS_BUF_ADDR(bp));
2008			break;
2009		}
2010		memset

Large files files are truncated, but you can click here to view the full file