/fs/xfs/xfs_vnodeops.c
C | 2394 lines | 1541 code | 287 blank | 566 comment | 333 complexity | e157a16cdbc83f260967e2297329a154 MD5 | raw file
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_mount.h" 30#include "xfs_da_btree.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_ialloc_btree.h" 33#include "xfs_dinode.h" 34#include "xfs_inode.h" 35#include "xfs_inode_item.h" 36#include "xfs_itable.h" 37#include "xfs_ialloc.h" 38#include "xfs_alloc.h" 39#include "xfs_bmap.h" 40#include "xfs_acl.h" 41#include "xfs_attr.h" 42#include "xfs_rw.h" 43#include "xfs_error.h" 44#include "xfs_quota.h" 45#include "xfs_utils.h" 46#include "xfs_rtalloc.h" 47#include "xfs_trans_space.h" 48#include "xfs_log_priv.h" 49#include "xfs_filestream.h" 50#include "xfs_vnodeops.h" 51#include "xfs_trace.h" 52 53/* 54 * The maximum pathlen is 1024 bytes. Since the minimum file system 55 * blocksize is 512 bytes, we can get a max of 2 extents back from 56 * bmapi. 57 */ 58#define SYMLINK_MAPS 2 59 60STATIC int 61xfs_readlink_bmap( 62 xfs_inode_t *ip, 63 char *link) 64{ 65 xfs_mount_t *mp = ip->i_mount; 66 int pathlen = ip->i_d.di_size; 67 int nmaps = SYMLINK_MAPS; 68 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 69 xfs_daddr_t d; 70 int byte_cnt; 71 int n; 72 xfs_buf_t *bp; 73 int error = 0; 74 75 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 76 mval, &nmaps, NULL); 77 if (error) 78 goto out; 79 80 for (n = 0; n < nmaps; n++) { 81 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 82 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 83 84 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 85 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); 86 if (!bp) 87 return XFS_ERROR(ENOMEM); 88 error = bp->b_error; 89 if (error) { 90 xfs_ioerror_alert("xfs_readlink", 91 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 92 xfs_buf_relse(bp); 93 goto out; 94 } 95 if (pathlen < byte_cnt) 96 byte_cnt = pathlen; 97 pathlen -= byte_cnt; 98 99 memcpy(link, bp->b_addr, byte_cnt); 100 xfs_buf_relse(bp); 101 } 102 103 link[ip->i_d.di_size] = '\0'; 104 error = 0; 105 106 out: 107 return error; 108} 109 110int 111xfs_readlink( 112 xfs_inode_t *ip, 113 char *link) 114{ 115 xfs_mount_t *mp = ip->i_mount; 116 xfs_fsize_t pathlen; 117 int error = 0; 118 119 trace_xfs_readlink(ip); 120 121 if (XFS_FORCED_SHUTDOWN(mp)) 122 return XFS_ERROR(EIO); 123 124 xfs_ilock(ip, XFS_ILOCK_SHARED); 125 126 pathlen = ip->i_d.di_size; 127 if (!pathlen) 128 goto out; 129 130 if (pathlen < 0 || pathlen > MAXPATHLEN) { 131 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", 132 __func__, (unsigned long long) ip->i_ino, 133 (long long) pathlen); 134 ASSERT(0); 135 return XFS_ERROR(EFSCORRUPTED); 136 } 137 138 139 if (ip->i_df.if_flags & XFS_IFINLINE) { 140 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 141 link[pathlen] = '\0'; 142 } else { 143 error = xfs_readlink_bmap(ip, link); 144 } 145 146 out: 147 xfs_iunlock(ip, XFS_ILOCK_SHARED); 148 return error; 149} 150 151/* 152 * Flags for xfs_free_eofblocks 153 */ 154#define XFS_FREE_EOF_TRYLOCK (1<<0) 155 156/* 157 * This is called by xfs_inactive to free any blocks beyond eof 158 * when the link count isn't zero and by xfs_dm_punch_hole() when 159 * punching a hole to EOF. 160 */ 161STATIC int 162xfs_free_eofblocks( 163 xfs_mount_t *mp, 164 xfs_inode_t *ip, 165 int flags) 166{ 167 xfs_trans_t *tp; 168 int error; 169 xfs_fileoff_t end_fsb; 170 xfs_fileoff_t last_fsb; 171 xfs_filblks_t map_len; 172 int nimaps; 173 xfs_bmbt_irec_t imap; 174 175 /* 176 * Figure out if there are any blocks beyond the end 177 * of the file. If not, then there is nothing to do. 178 */ 179 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 180 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 181 if (last_fsb <= end_fsb) 182 return 0; 183 map_len = last_fsb - end_fsb; 184 185 nimaps = 1; 186 xfs_ilock(ip, XFS_ILOCK_SHARED); 187 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 188 NULL, 0, &imap, &nimaps, NULL); 189 xfs_iunlock(ip, XFS_ILOCK_SHARED); 190 191 if (!error && (nimaps != 0) && 192 (imap.br_startblock != HOLESTARTBLOCK || 193 ip->i_delayed_blks)) { 194 /* 195 * Attach the dquots to the inode up front. 196 */ 197 error = xfs_qm_dqattach(ip, 0); 198 if (error) 199 return error; 200 201 /* 202 * There are blocks after the end of file. 203 * Free them up now by truncating the file to 204 * its current size. 205 */ 206 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 207 208 if (flags & XFS_FREE_EOF_TRYLOCK) { 209 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 210 xfs_trans_cancel(tp, 0); 211 return 0; 212 } 213 } else { 214 xfs_ilock(ip, XFS_IOLOCK_EXCL); 215 } 216 217 error = xfs_trans_reserve(tp, 0, 218 XFS_ITRUNCATE_LOG_RES(mp), 219 0, XFS_TRANS_PERM_LOG_RES, 220 XFS_ITRUNCATE_LOG_COUNT); 221 if (error) { 222 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 223 xfs_trans_cancel(tp, 0); 224 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 225 return error; 226 } 227 228 xfs_ilock(ip, XFS_ILOCK_EXCL); 229 xfs_trans_ijoin(tp, ip); 230 231 error = xfs_itruncate_data(&tp, ip, ip->i_size); 232 if (error) { 233 /* 234 * If we get an error at this point we simply don't 235 * bother truncating the file. 236 */ 237 xfs_trans_cancel(tp, 238 (XFS_TRANS_RELEASE_LOG_RES | 239 XFS_TRANS_ABORT)); 240 } else { 241 error = xfs_trans_commit(tp, 242 XFS_TRANS_RELEASE_LOG_RES); 243 } 244 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); 245 } 246 return error; 247} 248 249/* 250 * Free a symlink that has blocks associated with it. 251 */ 252STATIC int 253xfs_inactive_symlink_rmt( 254 xfs_inode_t *ip, 255 xfs_trans_t **tpp) 256{ 257 xfs_buf_t *bp; 258 int committed; 259 int done; 260 int error; 261 xfs_fsblock_t first_block; 262 xfs_bmap_free_t free_list; 263 int i; 264 xfs_mount_t *mp; 265 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 266 int nmaps; 267 xfs_trans_t *ntp; 268 int size; 269 xfs_trans_t *tp; 270 271 tp = *tpp; 272 mp = ip->i_mount; 273 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 274 /* 275 * We're freeing a symlink that has some 276 * blocks allocated to it. Free the 277 * blocks here. We know that we've got 278 * either 1 or 2 extents and that we can 279 * free them all in one bunmapi call. 280 */ 281 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 282 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 283 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 284 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 285 xfs_trans_cancel(tp, 0); 286 *tpp = NULL; 287 return error; 288 } 289 /* 290 * Lock the inode, fix the size, and join it to the transaction. 291 * Hold it so in the normal path, we still have it locked for 292 * the second transaction. In the error paths we need it 293 * held so the cancel won't rele it, see below. 294 */ 295 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 296 size = (int)ip->i_d.di_size; 297 ip->i_d.di_size = 0; 298 xfs_trans_ijoin(tp, ip); 299 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 300 /* 301 * Find the block(s) so we can inval and unmap them. 302 */ 303 done = 0; 304 xfs_bmap_init(&free_list, &first_block); 305 nmaps = ARRAY_SIZE(mval); 306 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 307 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 308 &free_list))) 309 goto error0; 310 /* 311 * Invalidate the block(s). 312 */ 313 for (i = 0; i < nmaps; i++) { 314 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 315 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 316 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 317 xfs_trans_binval(tp, bp); 318 } 319 /* 320 * Unmap the dead block(s) to the free_list. 321 */ 322 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 323 &first_block, &free_list, &done))) 324 goto error1; 325 ASSERT(done); 326 /* 327 * Commit the first transaction. This logs the EFI and the inode. 328 */ 329 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 330 goto error1; 331 /* 332 * The transaction must have been committed, since there were 333 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 334 * The new tp has the extent freeing and EFDs. 335 */ 336 ASSERT(committed); 337 /* 338 * The first xact was committed, so add the inode to the new one. 339 * Mark it dirty so it will be logged and moved forward in the log as 340 * part of every commit. 341 */ 342 xfs_trans_ijoin(tp, ip); 343 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 344 /* 345 * Get a new, empty transaction to return to our caller. 346 */ 347 ntp = xfs_trans_dup(tp); 348 /* 349 * Commit the transaction containing extent freeing and EFDs. 350 * If we get an error on the commit here or on the reserve below, 351 * we need to unlock the inode since the new transaction doesn't 352 * have the inode attached. 353 */ 354 error = xfs_trans_commit(tp, 0); 355 tp = ntp; 356 if (error) { 357 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 358 goto error0; 359 } 360 /* 361 * transaction commit worked ok so we can drop the extra ticket 362 * reference that we gained in xfs_trans_dup() 363 */ 364 xfs_log_ticket_put(tp->t_ticket); 365 366 /* 367 * Remove the memory for extent descriptions (just bookkeeping). 368 */ 369 if (ip->i_df.if_bytes) 370 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 371 ASSERT(ip->i_df.if_bytes == 0); 372 /* 373 * Put an itruncate log reservation in the new transaction 374 * for our caller. 375 */ 376 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 377 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 378 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 379 goto error0; 380 } 381 /* 382 * Return with the inode locked but not joined to the transaction. 383 */ 384 *tpp = tp; 385 return 0; 386 387 error1: 388 xfs_bmap_cancel(&free_list); 389 error0: 390 /* 391 * Have to come here with the inode locked and either 392 * (held and in the transaction) or (not in the transaction). 393 * If the inode isn't held then cancel would iput it, but 394 * that's wrong since this is inactive and the vnode ref 395 * count is 0 already. 396 * Cancel won't do anything to the inode if held, but it still 397 * needs to be locked until the cancel is done, if it was 398 * joined to the transaction. 399 */ 400 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 401 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 402 *tpp = NULL; 403 return error; 404 405} 406 407STATIC int 408xfs_inactive_symlink_local( 409 xfs_inode_t *ip, 410 xfs_trans_t **tpp) 411{ 412 int error; 413 414 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 415 /* 416 * We're freeing a symlink which fit into 417 * the inode. Just free the memory used 418 * to hold the old symlink. 419 */ 420 error = xfs_trans_reserve(*tpp, 0, 421 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 422 0, XFS_TRANS_PERM_LOG_RES, 423 XFS_ITRUNCATE_LOG_COUNT); 424 425 if (error) { 426 xfs_trans_cancel(*tpp, 0); 427 *tpp = NULL; 428 return error; 429 } 430 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 431 432 /* 433 * Zero length symlinks _can_ exist. 434 */ 435 if (ip->i_df.if_bytes > 0) { 436 xfs_idata_realloc(ip, 437 -(ip->i_df.if_bytes), 438 XFS_DATA_FORK); 439 ASSERT(ip->i_df.if_bytes == 0); 440 } 441 return 0; 442} 443 444STATIC int 445xfs_inactive_attrs( 446 xfs_inode_t *ip, 447 xfs_trans_t **tpp) 448{ 449 xfs_trans_t *tp; 450 int error; 451 xfs_mount_t *mp; 452 453 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 454 tp = *tpp; 455 mp = ip->i_mount; 456 ASSERT(ip->i_d.di_forkoff != 0); 457 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 458 xfs_iunlock(ip, XFS_ILOCK_EXCL); 459 if (error) 460 goto error_unlock; 461 462 error = xfs_attr_inactive(ip); 463 if (error) 464 goto error_unlock; 465 466 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 467 error = xfs_trans_reserve(tp, 0, 468 XFS_IFREE_LOG_RES(mp), 469 0, XFS_TRANS_PERM_LOG_RES, 470 XFS_INACTIVE_LOG_COUNT); 471 if (error) 472 goto error_cancel; 473 474 xfs_ilock(ip, XFS_ILOCK_EXCL); 475 xfs_trans_ijoin(tp, ip); 476 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 477 478 ASSERT(ip->i_d.di_anextents == 0); 479 480 *tpp = tp; 481 return 0; 482 483error_cancel: 484 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 485 xfs_trans_cancel(tp, 0); 486error_unlock: 487 *tpp = NULL; 488 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 489 return error; 490} 491 492int 493xfs_release( 494 xfs_inode_t *ip) 495{ 496 xfs_mount_t *mp = ip->i_mount; 497 int error; 498 499 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 500 return 0; 501 502 /* If this is a read-only mount, don't do this (would generate I/O) */ 503 if (mp->m_flags & XFS_MOUNT_RDONLY) 504 return 0; 505 506 if (!XFS_FORCED_SHUTDOWN(mp)) { 507 int truncated; 508 509 /* 510 * If we are using filestreams, and we have an unlinked 511 * file that we are processing the last close on, then nothing 512 * will be able to reopen and write to this file. Purge this 513 * inode from the filestreams cache so that it doesn't delay 514 * teardown of the inode. 515 */ 516 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 517 xfs_filestream_deassociate(ip); 518 519 /* 520 * If we previously truncated this file and removed old data 521 * in the process, we want to initiate "early" writeout on 522 * the last close. This is an attempt to combat the notorious 523 * NULL files problem which is particularly noticeable from a 524 * truncate down, buffered (re-)write (delalloc), followed by 525 * a crash. What we are effectively doing here is 526 * significantly reducing the time window where we'd otherwise 527 * be exposed to that problem. 528 */ 529 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 530 if (truncated) { 531 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 532 if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 533 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 534 } 535 } 536 537 if (ip->i_d.di_nlink == 0) 538 return 0; 539 540 if ((S_ISREG(ip->i_d.di_mode) && 541 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 542 ip->i_delayed_blks > 0)) && 543 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 544 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 545 546 /* 547 * If we can't get the iolock just skip truncating the blocks 548 * past EOF because we could deadlock with the mmap_sem 549 * otherwise. We'll get another chance to drop them once the 550 * last reference to the inode is dropped, so we'll never leak 551 * blocks permanently. 552 * 553 * Further, check if the inode is being opened, written and 554 * closed frequently and we have delayed allocation blocks 555 * outstanding (e.g. streaming writes from the NFS server), 556 * truncating the blocks past EOF will cause fragmentation to 557 * occur. 558 * 559 * In this case don't do the truncation, either, but we have to 560 * be careful how we detect this case. Blocks beyond EOF show 561 * up as i_delayed_blks even when the inode is clean, so we 562 * need to truncate them away first before checking for a dirty 563 * release. Hence on the first dirty close we will still remove 564 * the speculative allocation, but after that we will leave it 565 * in place. 566 */ 567 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 568 return 0; 569 570 error = xfs_free_eofblocks(mp, ip, 571 XFS_FREE_EOF_TRYLOCK); 572 if (error) 573 return error; 574 575 /* delalloc blocks after truncation means it really is dirty */ 576 if (ip->i_delayed_blks) 577 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 578 } 579 return 0; 580} 581 582/* 583 * xfs_inactive 584 * 585 * This is called when the vnode reference count for the vnode 586 * goes to zero. If the file has been unlinked, then it must 587 * now be truncated. Also, we clear all of the read-ahead state 588 * kept for the inode here since the file is now closed. 589 */ 590int 591xfs_inactive( 592 xfs_inode_t *ip) 593{ 594 xfs_bmap_free_t free_list; 595 xfs_fsblock_t first_block; 596 int committed; 597 xfs_trans_t *tp; 598 xfs_mount_t *mp; 599 int error; 600 int truncate; 601 602 /* 603 * If the inode is already free, then there can be nothing 604 * to clean up here. 605 */ 606 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { 607 ASSERT(ip->i_df.if_real_bytes == 0); 608 ASSERT(ip->i_df.if_broot_bytes == 0); 609 return VN_INACTIVE_CACHE; 610 } 611 612 /* 613 * Only do a truncate if it's a regular file with 614 * some actual space in it. It's OK to look at the 615 * inode's fields without the lock because we're the 616 * only one with a reference to the inode. 617 */ 618 truncate = ((ip->i_d.di_nlink == 0) && 619 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 620 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 621 S_ISREG(ip->i_d.di_mode)); 622 623 mp = ip->i_mount; 624 625 error = 0; 626 627 /* If this is a read-only mount, don't do this (would generate I/O) */ 628 if (mp->m_flags & XFS_MOUNT_RDONLY) 629 goto out; 630 631 if (ip->i_d.di_nlink != 0) { 632 if ((S_ISREG(ip->i_d.di_mode) && 633 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 634 ip->i_delayed_blks > 0)) && 635 (ip->i_df.if_flags & XFS_IFEXTENTS) && 636 (!(ip->i_d.di_flags & 637 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 638 (ip->i_delayed_blks != 0)))) { 639 error = xfs_free_eofblocks(mp, ip, 0); 640 if (error) 641 return VN_INACTIVE_CACHE; 642 } 643 goto out; 644 } 645 646 ASSERT(ip->i_d.di_nlink == 0); 647 648 error = xfs_qm_dqattach(ip, 0); 649 if (error) 650 return VN_INACTIVE_CACHE; 651 652 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 653 if (truncate) { 654 xfs_ilock(ip, XFS_IOLOCK_EXCL); 655 656 xfs_ioend_wait(ip); 657 658 error = xfs_trans_reserve(tp, 0, 659 XFS_ITRUNCATE_LOG_RES(mp), 660 0, XFS_TRANS_PERM_LOG_RES, 661 XFS_ITRUNCATE_LOG_COUNT); 662 if (error) { 663 /* Don't call itruncate_cleanup */ 664 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 665 xfs_trans_cancel(tp, 0); 666 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 667 return VN_INACTIVE_CACHE; 668 } 669 670 xfs_ilock(ip, XFS_ILOCK_EXCL); 671 xfs_trans_ijoin(tp, ip); 672 673 error = xfs_itruncate_data(&tp, ip, 0); 674 if (error) { 675 xfs_trans_cancel(tp, 676 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 677 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 678 return VN_INACTIVE_CACHE; 679 } 680 } else if (S_ISLNK(ip->i_d.di_mode)) { 681 682 /* 683 * If we get an error while cleaning up a 684 * symlink we bail out. 685 */ 686 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 687 xfs_inactive_symlink_rmt(ip, &tp) : 688 xfs_inactive_symlink_local(ip, &tp); 689 690 if (error) { 691 ASSERT(tp == NULL); 692 return VN_INACTIVE_CACHE; 693 } 694 695 xfs_trans_ijoin(tp, ip); 696 } else { 697 error = xfs_trans_reserve(tp, 0, 698 XFS_IFREE_LOG_RES(mp), 699 0, XFS_TRANS_PERM_LOG_RES, 700 XFS_INACTIVE_LOG_COUNT); 701 if (error) { 702 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 703 xfs_trans_cancel(tp, 0); 704 return VN_INACTIVE_CACHE; 705 } 706 707 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 708 xfs_trans_ijoin(tp, ip); 709 } 710 711 /* 712 * If there are attributes associated with the file 713 * then blow them away now. The code calls a routine 714 * that recursively deconstructs the attribute fork. 715 * We need to just commit the current transaction 716 * because we can't use it for xfs_attr_inactive(). 717 */ 718 if (ip->i_d.di_anextents > 0) { 719 error = xfs_inactive_attrs(ip, &tp); 720 /* 721 * If we got an error, the transaction is already 722 * cancelled, and the inode is unlocked. Just get out. 723 */ 724 if (error) 725 return VN_INACTIVE_CACHE; 726 } else if (ip->i_afp) { 727 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 728 } 729 730 /* 731 * Free the inode. 732 */ 733 xfs_bmap_init(&free_list, &first_block); 734 error = xfs_ifree(tp, ip, &free_list); 735 if (error) { 736 /* 737 * If we fail to free the inode, shut down. The cancel 738 * might do that, we need to make sure. Otherwise the 739 * inode might be lost for a long time or forever. 740 */ 741 if (!XFS_FORCED_SHUTDOWN(mp)) { 742 xfs_notice(mp, "%s: xfs_ifree returned error %d", 743 __func__, error); 744 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 745 } 746 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 747 } else { 748 /* 749 * Credit the quota account(s). The inode is gone. 750 */ 751 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 752 753 /* 754 * Just ignore errors at this point. There is nothing we can 755 * do except to try to keep going. Make sure it's not a silent 756 * error. 757 */ 758 error = xfs_bmap_finish(&tp, &free_list, &committed); 759 if (error) 760 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 761 __func__, error); 762 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 763 if (error) 764 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 765 __func__, error); 766 } 767 768 /* 769 * Release the dquots held by inode, if any. 770 */ 771 xfs_qm_dqdetach(ip); 772 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 773 774 out: 775 return VN_INACTIVE_CACHE; 776} 777 778/* 779 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 780 * is allowed, otherwise it has to be an exact match. If a CI match is found, 781 * ci_name->name will point to a the actual name (caller must free) or 782 * will be set to NULL if an exact match is found. 783 */ 784int 785xfs_lookup( 786 xfs_inode_t *dp, 787 struct xfs_name *name, 788 xfs_inode_t **ipp, 789 struct xfs_name *ci_name) 790{ 791 xfs_ino_t inum; 792 int error; 793 uint lock_mode; 794 795 trace_xfs_lookup(dp, name); 796 797 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 798 return XFS_ERROR(EIO); 799 800 lock_mode = xfs_ilock_map_shared(dp); 801 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 802 xfs_iunlock_map_shared(dp, lock_mode); 803 804 if (error) 805 goto out; 806 807 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 808 if (error) 809 goto out_free_name; 810 811 return 0; 812 813out_free_name: 814 if (ci_name) 815 kmem_free(ci_name->name); 816out: 817 *ipp = NULL; 818 return error; 819} 820 821int 822xfs_create( 823 xfs_inode_t *dp, 824 struct xfs_name *name, 825 mode_t mode, 826 xfs_dev_t rdev, 827 xfs_inode_t **ipp) 828{ 829 int is_dir = S_ISDIR(mode); 830 struct xfs_mount *mp = dp->i_mount; 831 struct xfs_inode *ip = NULL; 832 struct xfs_trans *tp = NULL; 833 int error; 834 xfs_bmap_free_t free_list; 835 xfs_fsblock_t first_block; 836 boolean_t unlock_dp_on_error = B_FALSE; 837 uint cancel_flags; 838 int committed; 839 prid_t prid; 840 struct xfs_dquot *udqp = NULL; 841 struct xfs_dquot *gdqp = NULL; 842 uint resblks; 843 uint log_res; 844 uint log_count; 845 846 trace_xfs_create(dp, name); 847 848 if (XFS_FORCED_SHUTDOWN(mp)) 849 return XFS_ERROR(EIO); 850 851 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 852 prid = xfs_get_projid(dp); 853 else 854 prid = XFS_PROJID_DEFAULT; 855 856 /* 857 * Make sure that we have allocated dquot(s) on disk. 858 */ 859 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 860 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 861 if (error) 862 return error; 863 864 if (is_dir) { 865 rdev = 0; 866 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 867 log_res = XFS_MKDIR_LOG_RES(mp); 868 log_count = XFS_MKDIR_LOG_COUNT; 869 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 870 } else { 871 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 872 log_res = XFS_CREATE_LOG_RES(mp); 873 log_count = XFS_CREATE_LOG_COUNT; 874 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 875 } 876 877 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 878 879 /* 880 * Initially assume that the file does not exist and 881 * reserve the resources for that case. If that is not 882 * the case we'll drop the one we have and get a more 883 * appropriate transaction later. 884 */ 885 error = xfs_trans_reserve(tp, resblks, log_res, 0, 886 XFS_TRANS_PERM_LOG_RES, log_count); 887 if (error == ENOSPC) { 888 /* flush outstanding delalloc blocks and retry */ 889 xfs_flush_inodes(dp); 890 error = xfs_trans_reserve(tp, resblks, log_res, 0, 891 XFS_TRANS_PERM_LOG_RES, log_count); 892 } 893 if (error == ENOSPC) { 894 /* No space at all so try a "no-allocation" reservation */ 895 resblks = 0; 896 error = xfs_trans_reserve(tp, 0, log_res, 0, 897 XFS_TRANS_PERM_LOG_RES, log_count); 898 } 899 if (error) { 900 cancel_flags = 0; 901 goto out_trans_cancel; 902 } 903 904 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 905 unlock_dp_on_error = B_TRUE; 906 907 /* 908 * Check for directory link count overflow. 909 */ 910 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) { 911 error = XFS_ERROR(EMLINK); 912 goto out_trans_cancel; 913 } 914 915 xfs_bmap_init(&free_list, &first_block); 916 917 /* 918 * Reserve disk quota and the inode. 919 */ 920 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 921 if (error) 922 goto out_trans_cancel; 923 924 error = xfs_dir_canenter(tp, dp, name, resblks); 925 if (error) 926 goto out_trans_cancel; 927 928 /* 929 * A newly created regular or special file just has one directory 930 * entry pointing to them, but a directory also the "." entry 931 * pointing to itself. 932 */ 933 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 934 prid, resblks > 0, &ip, &committed); 935 if (error) { 936 if (error == ENOSPC) 937 goto out_trans_cancel; 938 goto out_trans_abort; 939 } 940 941 /* 942 * Now we join the directory inode to the transaction. We do not do it 943 * earlier because xfs_dir_ialloc might commit the previous transaction 944 * (and release all the locks). An error from here on will result in 945 * the transaction cancel unlocking dp so don't do it explicitly in the 946 * error path. 947 */ 948 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 949 unlock_dp_on_error = B_FALSE; 950 951 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 952 &first_block, &free_list, resblks ? 953 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 954 if (error) { 955 ASSERT(error != ENOSPC); 956 goto out_trans_abort; 957 } 958 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 959 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 960 961 if (is_dir) { 962 error = xfs_dir_init(tp, ip, dp); 963 if (error) 964 goto out_bmap_cancel; 965 966 error = xfs_bumplink(tp, dp); 967 if (error) 968 goto out_bmap_cancel; 969 } 970 971 /* 972 * If this is a synchronous mount, make sure that the 973 * create transaction goes to disk before returning to 974 * the user. 975 */ 976 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 977 xfs_trans_set_sync(tp); 978 979 /* 980 * Attach the dquot(s) to the inodes and modify them incore. 981 * These ids of the inode couldn't have changed since the new 982 * inode has been locked ever since it was created. 983 */ 984 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 985 986 error = xfs_bmap_finish(&tp, &free_list, &committed); 987 if (error) 988 goto out_bmap_cancel; 989 990 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 991 if (error) 992 goto out_release_inode; 993 994 xfs_qm_dqrele(udqp); 995 xfs_qm_dqrele(gdqp); 996 997 *ipp = ip; 998 return 0; 999 1000 out_bmap_cancel: 1001 xfs_bmap_cancel(&free_list); 1002 out_trans_abort: 1003 cancel_flags |= XFS_TRANS_ABORT; 1004 out_trans_cancel: 1005 xfs_trans_cancel(tp, cancel_flags); 1006 out_release_inode: 1007 /* 1008 * Wait until after the current transaction is aborted to 1009 * release the inode. This prevents recursive transactions 1010 * and deadlocks from xfs_inactive. 1011 */ 1012 if (ip) 1013 IRELE(ip); 1014 1015 xfs_qm_dqrele(udqp); 1016 xfs_qm_dqrele(gdqp); 1017 1018 if (unlock_dp_on_error) 1019 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1020 return error; 1021} 1022 1023#ifdef DEBUG 1024int xfs_locked_n; 1025int xfs_small_retries; 1026int xfs_middle_retries; 1027int xfs_lots_retries; 1028int xfs_lock_delays; 1029#endif 1030 1031/* 1032 * Bump the subclass so xfs_lock_inodes() acquires each lock with 1033 * a different value 1034 */ 1035static inline int 1036xfs_lock_inumorder(int lock_mode, int subclass) 1037{ 1038 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1039 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 1040 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 1041 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 1042 1043 return lock_mode; 1044} 1045 1046/* 1047 * The following routine will lock n inodes in exclusive mode. 1048 * We assume the caller calls us with the inodes in i_ino order. 1049 * 1050 * We need to detect deadlock where an inode that we lock 1051 * is in the AIL and we start waiting for another inode that is locked 1052 * by a thread in a long running transaction (such as truncate). This can 1053 * result in deadlock since the long running trans might need to wait 1054 * for the inode we just locked in order to push the tail and free space 1055 * in the log. 1056 */ 1057void 1058xfs_lock_inodes( 1059 xfs_inode_t **ips, 1060 int inodes, 1061 uint lock_mode) 1062{ 1063 int attempts = 0, i, j, try_lock; 1064 xfs_log_item_t *lp; 1065 1066 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 1067 1068 try_lock = 0; 1069 i = 0; 1070 1071again: 1072 for (; i < inodes; i++) { 1073 ASSERT(ips[i]); 1074 1075 if (i && (ips[i] == ips[i-1])) /* Already locked */ 1076 continue; 1077 1078 /* 1079 * If try_lock is not set yet, make sure all locked inodes 1080 * are not in the AIL. 1081 * If any are, set try_lock to be used later. 1082 */ 1083 1084 if (!try_lock) { 1085 for (j = (i - 1); j >= 0 && !try_lock; j--) { 1086 lp = (xfs_log_item_t *)ips[j]->i_itemp; 1087 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1088 try_lock++; 1089 } 1090 } 1091 } 1092 1093 /* 1094 * If any of the previous locks we have locked is in the AIL, 1095 * we must TRY to get the second and subsequent locks. If 1096 * we can't get any, we must release all we have 1097 * and try again. 1098 */ 1099 1100 if (try_lock) { 1101 /* try_lock must be 0 if i is 0. */ 1102 /* 1103 * try_lock means we have an inode locked 1104 * that is in the AIL. 1105 */ 1106 ASSERT(i != 0); 1107 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 1108 attempts++; 1109 1110 /* 1111 * Unlock all previous guys and try again. 1112 * xfs_iunlock will try to push the tail 1113 * if the inode is in the AIL. 1114 */ 1115 1116 for(j = i - 1; j >= 0; j--) { 1117 1118 /* 1119 * Check to see if we've already 1120 * unlocked this one. 1121 * Not the first one going back, 1122 * and the inode ptr is the same. 1123 */ 1124 if ((j != (i - 1)) && ips[j] == 1125 ips[j+1]) 1126 continue; 1127 1128 xfs_iunlock(ips[j], lock_mode); 1129 } 1130 1131 if ((attempts % 5) == 0) { 1132 delay(1); /* Don't just spin the CPU */ 1133#ifdef DEBUG 1134 xfs_lock_delays++; 1135#endif 1136 } 1137 i = 0; 1138 try_lock = 0; 1139 goto again; 1140 } 1141 } else { 1142 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 1143 } 1144 } 1145 1146#ifdef DEBUG 1147 if (attempts) { 1148 if (attempts < 5) xfs_small_retries++; 1149 else if (attempts < 100) xfs_middle_retries++; 1150 else xfs_lots_retries++; 1151 } else { 1152 xfs_locked_n++; 1153 } 1154#endif 1155} 1156 1157/* 1158 * xfs_lock_two_inodes() can only be used to lock one type of lock 1159 * at a time - the iolock or the ilock, but not both at once. If 1160 * we lock both at once, lockdep will report false positives saying 1161 * we have violated locking orders. 1162 */ 1163void 1164xfs_lock_two_inodes( 1165 xfs_inode_t *ip0, 1166 xfs_inode_t *ip1, 1167 uint lock_mode) 1168{ 1169 xfs_inode_t *temp; 1170 int attempts = 0; 1171 xfs_log_item_t *lp; 1172 1173 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1174 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 1175 ASSERT(ip0->i_ino != ip1->i_ino); 1176 1177 if (ip0->i_ino > ip1->i_ino) { 1178 temp = ip0; 1179 ip0 = ip1; 1180 ip1 = temp; 1181 } 1182 1183 again: 1184 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 1185 1186 /* 1187 * If the first lock we have locked is in the AIL, we must TRY to get 1188 * the second lock. If we can't get it, we must release the first one 1189 * and try again. 1190 */ 1191 lp = (xfs_log_item_t *)ip0->i_itemp; 1192 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1193 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 1194 xfs_iunlock(ip0, lock_mode); 1195 if ((++attempts % 5) == 0) 1196 delay(1); /* Don't just spin the CPU */ 1197 goto again; 1198 } 1199 } else { 1200 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 1201 } 1202} 1203 1204int 1205xfs_remove( 1206 xfs_inode_t *dp, 1207 struct xfs_name *name, 1208 xfs_inode_t *ip) 1209{ 1210 xfs_mount_t *mp = dp->i_mount; 1211 xfs_trans_t *tp = NULL; 1212 int is_dir = S_ISDIR(ip->i_d.di_mode); 1213 int error = 0; 1214 xfs_bmap_free_t free_list; 1215 xfs_fsblock_t first_block; 1216 int cancel_flags; 1217 int committed; 1218 int link_zero; 1219 uint resblks; 1220 uint log_count; 1221 1222 trace_xfs_remove(dp, name); 1223 1224 if (XFS_FORCED_SHUTDOWN(mp)) 1225 return XFS_ERROR(EIO); 1226 1227 error = xfs_qm_dqattach(dp, 0); 1228 if (error) 1229 goto std_return; 1230 1231 error = xfs_qm_dqattach(ip, 0); 1232 if (error) 1233 goto std_return; 1234 1235 if (is_dir) { 1236 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 1237 log_count = XFS_DEFAULT_LOG_COUNT; 1238 } else { 1239 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1240 log_count = XFS_REMOVE_LOG_COUNT; 1241 } 1242 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1243 1244 /* 1245 * We try to get the real space reservation first, 1246 * allowing for directory btree deletion(s) implying 1247 * possible bmap insert(s). If we can't get the space 1248 * reservation then we use 0 instead, and avoid the bmap 1249 * btree insert(s) in the directory code by, if the bmap 1250 * insert tries to happen, instead trimming the LAST 1251 * block from the directory. 1252 */ 1253 resblks = XFS_REMOVE_SPACE_RES(mp); 1254 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 1255 XFS_TRANS_PERM_LOG_RES, log_count); 1256 if (error == ENOSPC) { 1257 resblks = 0; 1258 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 1259 XFS_TRANS_PERM_LOG_RES, log_count); 1260 } 1261 if (error) { 1262 ASSERT(error != ENOSPC); 1263 cancel_flags = 0; 1264 goto out_trans_cancel; 1265 } 1266 1267 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1268 1269 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 1270 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); 1271 1272 /* 1273 * If we're removing a directory perform some additional validation. 1274 */ 1275 if (is_dir) { 1276 ASSERT(ip->i_d.di_nlink >= 2); 1277 if (ip->i_d.di_nlink != 2) { 1278 error = XFS_ERROR(ENOTEMPTY); 1279 goto out_trans_cancel; 1280 } 1281 if (!xfs_dir_isempty(ip)) { 1282 error = XFS_ERROR(ENOTEMPTY); 1283 goto out_trans_cancel; 1284 } 1285 } 1286 1287 xfs_bmap_init(&free_list, &first_block); 1288 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1289 &first_block, &free_list, resblks); 1290 if (error) { 1291 ASSERT(error != ENOENT); 1292 goto out_bmap_cancel; 1293 } 1294 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1295 1296 if (is_dir) { 1297 /* 1298 * Drop the link from ip's "..". 1299 */ 1300 error = xfs_droplink(tp, dp); 1301 if (error) 1302 goto out_bmap_cancel; 1303 1304 /* 1305 * Drop the "." link from ip to self. 1306 */ 1307 error = xfs_droplink(tp, ip); 1308 if (error) 1309 goto out_bmap_cancel; 1310 } else { 1311 /* 1312 * When removing a non-directory we need to log the parent 1313 * inode here. For a directory this is done implicitly 1314 * by the xfs_droplink call for the ".." entry. 1315 */ 1316 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1317 } 1318 1319 /* 1320 * Drop the link from dp to ip. 1321 */ 1322 error = xfs_droplink(tp, ip); 1323 if (error) 1324 goto out_bmap_cancel; 1325 1326 /* 1327 * Determine if this is the last link while 1328 * we are in the transaction. 1329 */ 1330 link_zero = (ip->i_d.di_nlink == 0); 1331 1332 /* 1333 * If this is a synchronous mount, make sure that the 1334 * remove transaction goes to disk before returning to 1335 * the user. 1336 */ 1337 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1338 xfs_trans_set_sync(tp); 1339 1340 error = xfs_bmap_finish(&tp, &free_list, &committed); 1341 if (error) 1342 goto out_bmap_cancel; 1343 1344 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1345 if (error) 1346 goto std_return; 1347 1348 /* 1349 * If we are using filestreams, kill the stream association. 1350 * If the file is still open it may get a new one but that 1351 * will get killed on last close in xfs_close() so we don't 1352 * have to worry about that. 1353 */ 1354 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1355 xfs_filestream_deassociate(ip); 1356 1357 return 0; 1358 1359 out_bmap_cancel: 1360 xfs_bmap_cancel(&free_list); 1361 cancel_flags |= XFS_TRANS_ABORT; 1362 out_trans_cancel: 1363 xfs_trans_cancel(tp, cancel_flags); 1364 std_return: 1365 return error; 1366} 1367 1368int 1369xfs_link( 1370 xfs_inode_t *tdp, 1371 xfs_inode_t *sip, 1372 struct xfs_name *target_name) 1373{ 1374 xfs_mount_t *mp = tdp->i_mount; 1375 xfs_trans_t *tp; 1376 int error; 1377 xfs_bmap_free_t free_list; 1378 xfs_fsblock_t first_block; 1379 int cancel_flags; 1380 int committed; 1381 int resblks; 1382 1383 trace_xfs_link(tdp, target_name); 1384 1385 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1386 1387 if (XFS_FORCED_SHUTDOWN(mp)) 1388 return XFS_ERROR(EIO); 1389 1390 error = xfs_qm_dqattach(sip, 0); 1391 if (error) 1392 goto std_return; 1393 1394 error = xfs_qm_dqattach(tdp, 0); 1395 if (error) 1396 goto std_return; 1397 1398 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1399 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1400 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1401 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 1402 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1403 if (error == ENOSPC) { 1404 resblks = 0; 1405 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 1406 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1407 } 1408 if (error) { 1409 cancel_flags = 0; 1410 goto error_return; 1411 } 1412 1413 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1414 1415 xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL); 1416 xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL); 1417 1418 /* 1419 * If the source has too many links, we can't make any more to it. 1420 */ 1421 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 1422 error = XFS_ERROR(EMLINK); 1423 goto error_return; 1424 } 1425 1426 /* 1427 * If we are using project inheritance, we only allow hard link 1428 * creation in our tree when the project IDs are the same; else 1429 * the tree quota mechanism could be circumvented. 1430 */ 1431 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1432 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1433 error = XFS_ERROR(EXDEV); 1434 goto error_return; 1435 } 1436 1437 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1438 if (error) 1439 goto error_return; 1440 1441 xfs_bmap_init(&free_list, &first_block); 1442 1443 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1444 &first_block, &free_list, resblks); 1445 if (error) 1446 goto abort_return; 1447 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1448 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1449 1450 error = xfs_bumplink(tp, sip); 1451 if (error) 1452 goto abort_return; 1453 1454 /* 1455 * If this is a synchronous mount, make sure that the 1456 * link transaction goes to disk before returning to 1457 * the user. 1458 */ 1459 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1460 xfs_trans_set_sync(tp); 1461 } 1462 1463 error = xfs_bmap_finish (&tp, &free_list, &committed); 1464 if (error) { 1465 xfs_bmap_cancel(&free_list); 1466 goto abort_return; 1467 } 1468 1469 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1470 1471 abort_return: 1472 cancel_flags |= XFS_TRANS_ABORT; 1473 error_return: 1474 xfs_trans_cancel(tp, cancel_flags); 1475 std_return: 1476 return error; 1477} 1478 1479int 1480xfs_symlink( 1481 xfs_inode_t *dp, 1482 struct xfs_name *link_name, 1483 const char *target_path, 1484 mode_t mode, 1485 xfs_inode_t **ipp) 1486{ 1487 xfs_mount_t *mp = dp->i_mount; 1488 xfs_trans_t *tp; 1489 xfs_inode_t *ip; 1490 int error; 1491 int pathlen; 1492 xfs_bmap_free_t free_list; 1493 xfs_fsblock_t first_block; 1494 boolean_t unlock_dp_on_error = B_FALSE; 1495 uint cancel_flags; 1496 int committed; 1497 xfs_fileoff_t first_fsb; 1498 xfs_filblks_t fs_blocks; 1499 int nmaps; 1500 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1501 xfs_daddr_t d; 1502 const char *cur_chunk; 1503 int byte_cnt; 1504 int n; 1505 xfs_buf_t *bp; 1506 prid_t prid; 1507 struct xfs_dquot *udqp, *gdqp; 1508 uint resblks; 1509 1510 *ipp = NULL; 1511 error = 0; 1512 ip = NULL; 1513 tp = NULL; 1514 1515 trace_xfs_symlink(dp, link_name); 1516 1517 if (XFS_FORCED_SHUTDOWN(mp)) 1518 return XFS_ERROR(EIO); 1519 1520 /* 1521 * Check component lengths of the target path name. 1522 */ 1523 pathlen = strlen(target_path); 1524 if (pathlen >= MAXPATHLEN) /* total string too long */ 1525 return XFS_ERROR(ENAMETOOLONG); 1526 1527 udqp = gdqp = NULL; 1528 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1529 prid = xfs_get_projid(dp); 1530 else 1531 prid = XFS_PROJID_DEFAULT; 1532 1533 /* 1534 * Make sure that we have allocated dquot(s) on disk. 1535 */ 1536 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1537 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1538 if (error) 1539 goto std_return; 1540 1541 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 1542 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1543 /* 1544 * The symlink will fit into the inode data fork? 1545 * There can't be any attributes so we get the whole variable part. 1546 */ 1547 if (pathlen <= XFS_LITINO(mp)) 1548 fs_blocks = 0; 1549 else 1550 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 1551 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 1552 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 1553 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1554 if (error == ENOSPC && fs_blocks == 0) { 1555 resblks = 0; 1556 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 1557 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1558 } 1559 if (error) { 1560 cancel_flags = 0; 1561 goto error_return; 1562 } 1563 1564 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1565 unlock_dp_on_error = B_TRUE; 1566 1567 /* 1568 * Check whether the directory allows new symlinks or not. 1569 */ 1570 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 1571 error = XFS_ERROR(EPERM); 1572 goto error_return; 1573 } 1574 1575 /* 1576 * Reserve disk quota : blocks and inode. 1577 */ 1578 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 1579 if (error) 1580 goto error_return; 1581 1582 /* 1583 * Check for ability to enter directory entry, if no space reserved. 1584 */ 1585 error = xfs_dir_canenter(tp, dp, link_name, resblks); 1586 if (error) 1587 goto error_return; 1588 /* 1589 * Initialize the bmap freelist prior to calling either 1590 * bmapi or the directory create code. 1591 */ 1592 xfs_bmap_init(&free_list, &first_block); 1593 1594 /* 1595 * Allocate an inode for the symlink. 1596 */ 1597 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 1598 prid, resblks > 0, &ip, NULL); 1599 if (error) { 1600 if (error == ENOSPC) 1601 goto error_return; 1602 goto error1; 1603 } 1604 1605 /* 1606 * An error after we've joined dp to the transaction will result in the 1607 * transaction cancel unlocking dp so don't do it explicitly in the 1608 * error path. 1609 */ 1610 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 1611 unlock_dp_on_error = B_FALSE; 1612 1613 /* 1614 * Also attach the dquot(s) to it, if applicable. 1615 */ 1616 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1617 1618 if (resblks) 1619 resblks -= XFS_IALLOC_SPACE_RES(mp); 1620 /* 1621 * If the symlink will fit into the inode, write it inline. 1622 */ 1623 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 1624 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 1625 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 1626 ip->i_d.di_size = pathlen; 1627 1628 /* 1629 * The inode was initially created in extent format. 1630 */ 1631 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 1632 ip->i_df.if_flags |= XFS_IFINLINE; 1633 1634 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 1635 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 1636 1637 } else { 1638 first_fsb = 0; 1639 nmaps = SYMLINK_MAPS; 1640 1641 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 1642 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 1643 &first_block, resblks, mval, &nmaps, 1644 &free_list); 1645 if (error) 1646 goto error2; 1647 1648 if (resblks) 1649 resblks -= fs_blocks; 1650 ip->i_d.di_size = pathlen; 1651 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1652 1653 cur_chunk = target_path; 1654 for (n = 0; n < nmaps; n++) { 1655 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1656 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1657 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 1658 BTOBB(byte_cnt), 0); 1659 ASSERT(!xfs_buf_geterror(bp)); 1660 if (pathlen < byte_cnt) { 1661 byte_cnt = pathlen; 1662 } 1663 pathlen -= byte_cnt; 1664 1665 memcpy(bp->b_addr, cur_chunk, byte_cnt); 1666 cur_chunk += byte_cnt; 1667 1668 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 1669 } 1670 } 1671 1672 /* 1673 * Create the directory entry for the symlink. 1674 */ 1675 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 1676 &first_block, &free_list, resblks); 1677 if (error) 1678 goto error2; 1679 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1680 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1681 1682 /* 1683 * If this is a synchronous mount, make sure that the 1684 * symlink transaction goes to disk before returning to 1685 * the user. 1686 */ 1687 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1688 xfs_trans_set_sync(tp); 1689 } 1690 1691 error = xfs_bmap_finish(&tp, &free_list, &committed); 1692 if (error) { 1693 goto error2; 1694 } 1695 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1696 xfs_qm_dqrele(udqp); 1697 xfs_qm_dqrele(gdqp); 1698 1699 *ipp = ip; 1700 return 0; 1701 1702 error2: 1703 IRELE(ip); 1704 error1: 1705 xfs_bmap_cancel(&free_list); 1706 cancel_flags |= XFS_TRANS_ABORT; 1707 error_return: 1708 xfs_trans_cancel(tp, cancel_flags); 1709 xfs_qm_dqrele(udqp); 1710 xfs_qm_dqrele(gdqp); 1711 1712 if (unlock_dp_on_error) 1713 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1714 std_return: 1715 return error; 1716} 1717 1718int 1719xfs_set_dmattrs( 1720 xfs_inode_t *ip, 1721 u_int evmask, 1722 u_int16_t state) 1723{ 1724 xfs_mount_t *mp = ip->i_mount; 1725 xfs_trans_t *tp; 1726 int error; 1727 1728 if (!capable(CAP_SYS_ADMIN)) 1729 return XFS_ERROR(EPERM); 1730 1731 if (XFS_FORCED_SHUTDOWN(mp)) 1732 return XFS_ERROR(EIO); 1733 1734 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 1735 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 1736 if (error) { 1737 xfs_trans_cancel(tp, 0); 1738 return error; 1739 } 1740 xfs_ilock(ip, XFS_ILOCK_EXCL); 1741 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); 1742 1743 ip->i_d.di_dmevmask = evmask; 1744 ip->i_d.di_dmstate = state; 1745 1746 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1747 error = xfs_trans_commit(tp, 0); 1748 1749 return error; 1750} 1751 1752/* 1753 * xfs_alloc_file_space() 1754 * This routine allocates disk space for the given file. 1755 * 1756 * If alloc_type == 0, this request is for an ALLOCSP type 1757 * request which will change the file size. In this case, no 1758 * DMAPI event will be generated by the call. A TRUNCATE event 1759 * will be generated later by xfs_setattr. 1760 * 1761 * If alloc_type != 0, this request is for a RESVSP type 1762 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 1763 * lower block boundary byte address is less than the file's 1764 * length. 1765 * 1766 * RETURNS: 1767 * 0 on success 1768 * errno on error 1769 * 1770 */ 1771STATIC int 1772xfs_alloc_file_space( 1773 xfs_inode_t *ip, 1774 xfs_off_t offset, 1775 xfs_off_t len, 1776 int alloc_type, 1777 int attr_flags) 1778{ 1779 xfs_mount_t *mp = ip->i_mount; 1780 xfs_off_t count; 1781 xfs_filblks_t allocated_fsb; 1782 xfs_filblks_t allocatesize_fsb; 1783 xfs_extlen_t extsz, temp; 1784 xfs_fileoff_t startoffset_fsb; 1785 xfs_fsblock_t firstfsb; 1786 int nimaps; 1787 int bmapi_flag; 1788 int quota_flag; 1789 int rt; 1790 xfs_trans_t *tp; 1791 xfs_bmbt_irec_t imaps[1], *imapp; 1792 xfs_bmap_free_t free_list; 1793 uint qblocks, resblks, resrtextents; 1794 int committed; 1795 int error; 1796 1797 trace_xfs_alloc_file_space(ip); 1798 1799 if (XFS_FORCED_SHUTDOWN(mp)) 1800 return XFS_ERROR(EIO); 1801 1802 error = xfs_qm_dqattach(ip, 0); 1803 if (error) 1804 return error; 1805 1806 if (len <= 0) 1807 return XFS_ERROR(EINVAL); 1808 1809 rt = XFS_IS_REALTIME_INODE(ip); 1810 extsz = xfs_get_extsz_hint(ip); 1811 1812 count = len; 1813 imapp = &imaps[0]; 1814 nimaps = 1; 1815 bmapi_flag = XFS_BMAPI_WRITE | alloc_type; 1816 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 1817 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 1818 1819 /* 1820 * Allocate file space until done or until there is an error 1821 */ 1822 while (allocatesize_fsb && !error) { 1823 xfs_fileoff_t s, e; 1824 1825 /* 1826 * Determine space reservations for data/realtime. 1827 */ 1828 if (unlikely(extsz)) { 1829 s = startoffset_fsb; 1830 do_div(s, extsz); 1831 s *= extsz; 1832 e = startoffset_fsb + allocatesize_fsb; 1833 if ((temp = do_mod(startoffset_fsb, extsz))) 1834 e += temp; 1835 if ((temp = do_mod(e, extsz))) 1836 e += extsz - temp; 1837 } else { 1838 s = 0; 1839 e = allocatesize_fsb; 1840 } 1841 1842 /* 1843 * The transaction reservation is limited to a 32-bit block 1844 * count, hence we need to limit the number of blocks we are 1845 * trying to reserve to avoid an overflow. We can't allocate 1846 * more than @nimaps extents, and an extent is limited on disk 1847 * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1848 */ 1849 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1850 if (unlikely(rt)) { 1851 resrtextents = qblocks = resblks; 1852 resrtextents /= mp->m_sb.sb_rextsize; 1853 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1854 quota_flag = XFS_QMOPT_RES_RTBLKS; 1855 } else { 1856 resrtextents = 0; 1857 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1858 quota_flag = XFS_QMOPT_RES_REGBLKS; 1859 } 1860 1861 /* 1862 * Allocate and setup the transaction. 1863 */ 1864 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1865 error = xfs_trans_reserve(tp, resblks, 1866 XFS_WRITE_LOG_RES(mp), resrtextents, 1867 XFS_TRANS_PERM_LOG_RES, 1868 XFS_WRITE_LOG_COUNT); 1869 /* 1870 * Check for running out of space 1871 */ 1872 if (error) { 1873 /* 1874 * Free the transaction structure. 1875 */ 1876 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1877 xfs_trans_cancel(tp, 0); 1878 break; 1879 } 1880 xfs_ilock(ip, XFS_ILOCK_EXCL); 1881 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1882 0, quota_flag); 1883 if (error) 1884 goto error1; 1885 1886 xfs_trans_ijoin(tp, ip); 1887 1888 /* 1889 * Issue the xfs_bmapi() call to allocate the blocks 1890 */ 1891 xfs_bmap_init(&free_list, &firstfsb); 1892 error = xfs_bmapi(tp, ip, startoffset_fsb, 1893 allocatesize_fsb, bmapi_flag, 1894 &firstfsb, 0, imapp, &nimaps, 1895 &free_list); 1896 if (error) { 1897 goto error0; 1898 } 1899 1900 /* 1901 * Complete the transaction 1902 */ 1903 error = xfs_bmap_finish(&tp, &free_list, &committed); 1904 if (error) { 1905 goto error0; 1906 } 1907 1908 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1909 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1910 if (error) { 1911 break; 1912 } 1913 1914 allocated_fsb = imapp->br_blockcount; 1915 1916 if (nimaps == 0) { 1917 error = XFS_ERROR(ENOSPC); 1918 break; 1919 } 1920 1921 startoffset_fsb += allocated_fsb; 1922 allocatesize_fsb -= allocated_fsb; 1923 } 1924 1925 return error; 1926 1927error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1928 xfs_bmap_cancel(&free_list); 1929 xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 1930 1931error1: /* Just cancel transaction */ 1932 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1933 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1934 return error; 1935} 1936 1937/* 1938 * Zero file bytes between startoff and endoff inclusive. 1939 * The iolock is held exclusive and no blocks are buffered. 1940 * 1941 * This function is used by xfs_free_file_space() to zero 1942 * partial blocks when the range to free is not block aligned. 1943 * When unreserving space with boundaries that are not block 1944 * aligned we round up the start and round down the end 1945 * boundaries and then use this function to zero the parts of 1946 * the blocks that got dropped during the rounding. 1947 */ 1948STATIC int 1949xfs_zero_remaining_bytes( 1950 xfs_inode_t *ip, 1951 xfs_off_t startoff, 1952 xfs_off_t endoff) 1953{ 1954 xfs_bmbt_irec_t imap; 1955 xfs_fileoff_t offset_fsb; 1956 xfs_off_t lastoffset; 1957 xfs_off_t offset; 1958 xfs_buf_t *bp; 1959 xfs_mount_t *mp = ip->i_mount; 1960 int nimap; 1961 int error = 0; 1962 1963 /* 1964 * Avoid doing I/O beyond eof - it's not necessary 1965 * since nothing can read beyond eof. The space will 1966 * be zeroed when the file is extended anyway. 1967 */ 1968 if (startoff >= ip->i_size) 1969 return 0; 1970 1971 if (endoff > ip->i_size) 1972 endoff = ip->i_size; 1973 1974 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? 1975 mp->m_rtdev_targp : mp->m_ddev_targp, 1976 mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); 1977 if (!bp) 1978 return XFS_ERROR(ENOMEM); 1979 1980 xfs_buf_unlock(bp); 1981 1982 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1983 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1984 nimap = 1; 1985 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, 1986 NULL, 0, &imap, &nimap, NULL); 1987 if (error || nimap < 1) 1988 break; 1989 ASSERT(imap.br_blockcount >= 1); 1990 ASSERT(imap.br_startoff == offset_fsb); 1991 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1992 if (lastoffset > endoff) 1993 lastoffset = endoff; 1994 if (imap.br_startblock == HOLESTARTBLOCK) 1995 continue; 1996 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1997 if (imap.br_state == XFS_EXT_UNWRITTEN) 1998 continue; 1999 XFS_BUF_UNDONE(bp); 2000 XFS_BUF_UNWRITE(bp); 2001 XFS_BUF_READ(bp); 2002 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 2003 xfsbdstrat(mp, bp); 2004 error = xfs_buf_iowait(bp); 2005 if (error) { 2006 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 2007 mp, bp, XFS_BUF_ADDR(bp)); 2008 break; 2009 } 2010 memset(bp->b_addr + 2011 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 2012 0, lastoffset - offset + 1); 2013 XFS_BUF_UNDONE(bp); 2014 XFS_BUF_UNREAD(bp); 2015 XFS_BUF_WRITE(bp); 2016 xfsbdstrat(mp, bp); 2017 error = xfs_buf_iowait(bp); 2018 if (error) { 2019 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 2020 mp, bp, XFS_BUF_ADDR(bp)); 2021 break; 2022 } 2023 } 2024 xfs_buf_free(bp); 2025 return error; 2026} 2027 2028/* 2029 * xfs_free_file_space() 2030 * This routine frees disk space for the given file. 2031 * 2032 * This routine is only called by xfs_change_file_space 2033 * for an UNRESVSP type call. 2034 * 2035 * RETURNS: 2036 * 0 on success 2037 * errno on error 2038 * 2039 */ 2040STATIC int 2041xfs_free_file_space( 2042 xfs_inode_t *ip, 2043 xfs_off_t offset, 2044 xfs_off_t len, 2045 int attr_flags) 2046{ 2047 int committed; 2048 int done; 2049 xfs_fileoff_t endoffset_fsb; 2050 int error; 2051 xfs_fsblock_t firstfsb; 2052 xfs_bmap_free_t free_list; 2053 xfs_bmbt_irec_t imap; 2054 xfs_off_t ioffset; 2055 xfs_extlen_t mod=0; 2056 xfs_mount_t *mp; 2057 int nimap; 2058 uint resblks; 2059 uint rounding; 2060 int rt; 2061 xfs_fileoff_t startoffset_fsb; 2062 xfs_trans_t *tp; 2063 int need_iolock = 1; 2064 2065 mp = ip->i_mount; 2066 2067 trace_xfs_free_file_space(ip); 2068 2069 error = xfs_qm_dqattach(ip, 0); 2070 if (error) 2071 return error; 2072 2073 error = 0; 2074 if (len <= 0) /* if nothing being freed */ 2075 return error; 2076 rt = XFS_IS_REALTIME_INODE(ip); 2077 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 2078 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 2079 2080 if (attr_flags & XFS_ATTR_NOLOCK) 2081 need_iolock = 0; 2082 if (need_iolock) { 2083 xfs_ilock(ip, XFS_IOLOCK_EXCL); 2084 /* wait for the completion of any pending DIOs */ 2085 xfs_ioend_wait(ip); 2086 } 2087 2088 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 2089 ioffset = offset & ~(rounding - 1); 2090 2091 if (VN_CACHED(VFS_I(ip)) != 0) { 2092 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2093 if (error) 2094 goto out_unlock_iolock; 2095 } 2096 2097 /* 2098 * Need to zero the stuff we're not freeing, on disk. 2099 * If it's a realtime file & can't use unwritten extents then we 2100 * actually need to zero the extent edges. Otherwise xfs_bunmapi 2101 * will take care of it for us. 2102 */ 2103 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 2104 nimap = 1; 2105 error = xfs_bmapi(NULL, ip, startoffset_fsb, 2106 1, 0, NULL, 0, &imap, &nimap, NULL); 2107 if (error) 2108 goto out_unlock_iolock; 2109 ASSERT(nimap == 0 || nimap == 1); 2110 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2111 xfs_daddr_t block; 2112 2113 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2114 block = imap.br_startblock; 2115 mod = do_div(block, mp->m_sb.sb_rextsize); 2116 if (mod) 2117 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 2118 } 2119 nimap = 1; 2120 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 2121 1, 0, NULL, 0, &imap, &nimap, NULL); 2122 if (error) 2123 goto out_unlock_iolock; 2124 ASSERT(nimap == 0 || nimap == 1); 2125 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2126 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2127 mod++; 2128 if (mod && (mod != mp->m_sb.sb_rextsize)) 2129 endoffset_fsb -= mod; 2130 } 2131 } 2132 if ((done = (endoffset_fsb <= startoffset_fsb))) 2133 /* 2134 * One contiguous piece to clear 2135 */ 2136 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 2137 else { 2138 /* 2139 * Some full blocks, possibly two pieces to clear 2140 */ 2141 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 2142 error = xfs_zero_remaining_bytes(ip, offset, 2143 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 2144 if (!error && 2145 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 2146 error = xfs_zero_remaining_bytes(ip, 2147 XFS_FSB_TO_B(mp, endoffset_fsb), 2148 offset + len - 1); 2149 } 2150 2151 /* 2152 * free file space until done or until there is an error 2153 */ 2154 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2155 while (!error && !done) { 2156 2157 /* 2158 * allocate and setup the transaction. Allow this 2159 * transaction to dip into the reserve blocks to ensure 2160 * the freeing of the space succeeds at ENOSPC. 2161 */ 2162 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 2163 tp->t_flags |= XFS_TRANS_RESERVE; 2164 error = xfs_trans_reserve(tp, 2165 resblks, 2166 XFS_WRITE_LOG_RES(mp), 2167 0, 2168 XFS_TRANS_PERM_LOG_RES, 2169 XFS_WRITE_LOG_COUNT); 2170 2171 /* 2172 * check for running out of space 2173 */ 2174 if (error) { 2175 /* 2176 * Free the transaction structure. 2177 */ 2178 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 2179 xfs_trans_cancel(tp, 0); 2180 break; 2181 } 2182 xfs_ilock(ip, XFS_ILOCK_EXCL); 2183 error = xfs_trans_reserve_quota(tp, mp, 2184 ip->i_udquot, ip->i_gdquot, 2185 resblks, 0, XFS_QMOPT_RES_REGBLKS); 2186 if (error) 2187 goto error1; 2188 2189 xfs_trans_ijoin(tp, ip); 2190 2191 /* 2192 * issue the bunmapi() call to free the blocks 2193 */ 2194 xfs_bmap_init(&free_list, &firstfsb); 2195 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2196 endoffset_fsb - startoffset_fsb, 2197 0, 2, &firstfsb, &free_list, &done); 2198 if (error) { 2199 goto error0; 2200 } 2201 2202 /* 2203 * complete the transaction 2204 */ 2205 error = xfs_bmap_finish(&tp, &free_list, &committed); 2206 if (error) { 2207 goto error0; 2208 } 2209 2210 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2211 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2212 } 2213 2214 out_unlock_iolock: 2215 if (need_iolock) 2216 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2217 return error; 2218 2219 error0: 2220 xfs_bmap_cancel(&free_list); 2221 error1: 2222 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2223 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 2224 XFS_ILOCK_EXCL); 2225 return error; 2226} 2227 2228/* 2229 * xfs_change_file_space() 2230 * This routine allocates or frees disk space for the given file. 2231 * The user specified parameters are checked for alignment and size 2232 * limitations. 2233 * 2234 * RETURNS: 2235 * 0 on success 2236 * errno on error 2237 * 2238 */ 2239int 2240xfs_change_file_space( 2241 xfs_inode_t *ip, 2242 int cmd, 2243 xfs_flock64_t *bf, 2244 xfs_off_t offset, 2245 int attr_flags) 2246{ 2247 xfs_mount_t *mp = ip->i_mount; 2248 int clrprealloc; 2249 int error; 2250 xfs_fsize_t fsize; 2251 int setprealloc; 2252 xfs_off_t startoffset; 2253 xfs_off_t llen; 2254 xfs_trans_t *tp; 2255 struct iattr iattr; 2256 int prealloc_type; 2257 2258 if (!S_ISREG(ip->i_d.di_mode)) 2259 return XFS_ERROR(EINVAL); 2260 2261 switch (bf->l_whence) { 2262 case 0: /*SEEK_SET*/ 2263 break; 2264 case 1: /*SEEK_CUR*/ 2265 bf->l_start += offset; 2266 break; 2267 case 2: /*SEEK_END*/ 2268 bf->l_start += ip->i_size; 2269 break; 2270 default: 2271 return XFS_ERROR(EINVAL); 2272 } 2273 2274 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 2275 2276 if ( (bf->l_start < 0) 2277 || (bf->l_start > XFS_MAXIOFFSET(mp)) 2278 || (bf->l_start + llen < 0) 2279 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 2280 return XFS_ERROR(EINVAL); 2281 2282 bf->l_whence = 0; 2283 2284 startoffset = bf->l_start; 2285 fsize = ip->i_size; 2286 2287 /* 2288 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 2289 * file space. 2290 * These calls do NOT zero the data space allocated to the file, 2291 * nor do they change the file size. 2292 * 2293 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 2294 * space. 2295 * These calls cause the new file data to be zeroed and the file 2296 * size to be changed. 2297 */ 2298 setprealloc = clrprealloc = 0; 2299 prealloc_type = XFS_BMAPI_PREALLOC; 2300 2301 switch (cmd) { 2302 case XFS_IOC_ZERO_RANGE: 2303 prealloc_type |= XFS_BMAPI_CONVERT; 2304 xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); 2305 /* FALLTHRU */ 2306 case XFS_IOC_RESVSP: 2307 case XFS_IOC_RESVSP64: 2308 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 2309 prealloc_type, attr_flags); 2310 if (error) 2311 return error; 2312 setprealloc = 1; 2313 break; 2314 2315 case XFS_IOC_UNRESVSP: 2316 case XFS_IOC_UNRESVSP64: 2317 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 2318 attr_flags))) 2319 return error; 2320 break; 2321 2322 case XFS_IOC_ALLOCSP: 2323 case XFS_IOC_ALLOCSP64: 2324 case XFS_IOC_FREESP: 2325 case XFS_IOC_FREESP64: 2326 if (startoffset > fsize) { 2327 error = xfs_alloc_file_space(ip, fsize, 2328 startoffset - fsize, 0, attr_flags); 2329 if (error) 2330 break; 2331 } 2332 2333 iattr.ia_valid = ATTR_SIZE; 2334 iattr.ia_size = startoffset; 2335 2336 error = xfs_setattr_size(ip, &iattr, attr_flags); 2337 2338 if (error) 2339 return error; 2340 2341 clrprealloc = 1; 2342 break; 2343 2344 default: 2345 ASSERT(0); 2346 return XFS_ERROR(EINVAL); 2347 } 2348 2349 /* 2350 * update the inode timestamp, mode, and prealloc flag bits 2351 */ 2352 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 2353 2354 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 2355 0, 0, 0))) { 2356 /* ASSERT(0); */ 2357 xfs_trans_cancel(tp, 0); 2358 return error; 2359 } 2360 2361 xfs_ilock(ip, XFS_ILOCK_EXCL); 2362 2363 xfs_trans_ijoin(tp, ip); 2364 2365 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2366 ip->i_d.di_mode &= ~S_ISUID; 2367 2368 /* 2369 * Note that we don't have to worry about mandatory 2370 * file locking being disabled here because we only 2371 * clear the S_ISGID bit if the Group execute bit is 2372 * on, but if it was on then mandatory locking wouldn't 2373 * have been enabled. 2374 */ 2375 if (ip->i_d.di_mode & S_IXGRP) 2376 ip->i_d.di_mode &= ~S_ISGID; 2377 2378 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2379 } 2380 if (setprealloc) 2381 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 2382 else if (clrprealloc) 2383 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2384 2385 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2386 if (attr_flags & XFS_ATTR_SYNC) 2387 xfs_trans_set_sync(tp); 2388 2389 error = xfs_trans_commit(tp, 0); 2390 2391 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2392 2393 return error; 2394}