/kern_oII/fs/jfs/jfs_imap.c
http://omnia2droid.googlecode.com/ · C · 3188 lines · 1602 code · 377 blank · 1209 comment · 350 complexity · 702b0d3fa15acb1cf47ec717e9212f48 MD5 · raw file
Large files are truncated click here to view the full file
- /*
- * Copyright (C) International Business Machines Corp., 2000-2004
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
- /*
- * jfs_imap.c: inode allocation map manager
- *
- * Serialization:
- * Each AG has a simple lock which is used to control the serialization of
- * the AG level lists. This lock should be taken first whenever an AG
- * level list will be modified or accessed.
- *
- * Each IAG is locked by obtaining the buffer for the IAG page.
- *
- * There is also a inode lock for the inode map inode. A read lock needs to
- * be taken whenever an IAG is read from the map or the global level
- * information is read. A write lock needs to be taken whenever the global
- * level information is modified or an atomic operation needs to be used.
- *
- * If more than one IAG is read at one time, the read lock may not
- * be given up until all of the IAG's are read. Otherwise, a deadlock
- * may occur when trying to obtain the read lock while another thread
- * holding the read lock is waiting on the IAG already being held.
- *
- * The control page of the inode map is read into memory by diMount().
- * Thereafter it should only be modified in memory and then it will be
- * written out when the filesystem is unmounted by diUnmount().
- */
- #include <linux/fs.h>
- #include <linux/buffer_head.h>
- #include <linux/pagemap.h>
- #include <linux/quotaops.h>
- #include "jfs_incore.h"
- #include "jfs_inode.h"
- #include "jfs_filsys.h"
- #include "jfs_dinode.h"
- #include "jfs_dmap.h"
- #include "jfs_imap.h"
- #include "jfs_metapage.h"
- #include "jfs_superblock.h"
- #include "jfs_debug.h"
- /*
- * imap locks
- */
- /* iag free list lock */
- #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock)
- #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock)
- #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock)
- /* per ag iag list locks */
- #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index]))
- #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno])
- #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno])
- /*
- * forward references
- */
- static int diAllocAG(struct inomap *, int, bool, struct inode *);
- static int diAllocAny(struct inomap *, int, bool, struct inode *);
- static int diAllocBit(struct inomap *, struct iag *, int);
- static int diAllocExt(struct inomap *, int, struct inode *);
- static int diAllocIno(struct inomap *, int, struct inode *);
- static int diFindFree(u32, int);
- static int diNewExt(struct inomap *, struct iag *, int);
- static int diNewIAG(struct inomap *, int *, int, struct metapage **);
- static void duplicateIXtree(struct super_block *, s64, int, s64 *);
- static int diIAGRead(struct inomap * imap, int, struct metapage **);
- static int copy_from_dinode(struct dinode *, struct inode *);
- static void copy_to_dinode(struct dinode *, struct inode *);
- /*
- * NAME: diMount()
- *
- * FUNCTION: initialize the incore inode map control structures for
- * a fileset or aggregate init time.
- *
- * the inode map's control structure (dinomap) is
- * brought in from disk and placed in virtual memory.
- *
- * PARAMETERS:
- * ipimap - pointer to inode map inode for the aggregate or fileset.
- *
- * RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient free virtual memory.
- * -EIO - i/o error.
- */
- int diMount(struct inode *ipimap)
- {
- struct inomap *imap;
- struct metapage *mp;
- int index;
- struct dinomap_disk *dinom_le;
- /*
- * allocate/initialize the in-memory inode map control structure
- */
- /* allocate the in-memory inode map control structure. */
- imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
- if (imap == NULL) {
- jfs_err("diMount: kmalloc returned NULL!");
- return -ENOMEM;
- }
- /* read the on-disk inode map control structure. */
- mp = read_metapage(ipimap,
- IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
- PSIZE, 0);
- if (mp == NULL) {
- kfree(imap);
- return -EIO;
- }
- /* copy the on-disk version to the in-memory version. */
- dinom_le = (struct dinomap_disk *) mp->data;
- imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
- imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
- atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
- atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
- imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
- imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
- for (index = 0; index < MAXAG; index++) {
- imap->im_agctl[index].inofree =
- le32_to_cpu(dinom_le->in_agctl[index].inofree);
- imap->im_agctl[index].extfree =
- le32_to_cpu(dinom_le->in_agctl[index].extfree);
- imap->im_agctl[index].numinos =
- le32_to_cpu(dinom_le->in_agctl[index].numinos);
- imap->im_agctl[index].numfree =
- le32_to_cpu(dinom_le->in_agctl[index].numfree);
- }
- /* release the buffer. */
- release_metapage(mp);
- /*
- * allocate/initialize inode allocation map locks
- */
- /* allocate and init iag free list lock */
- IAGFREE_LOCK_INIT(imap);
- /* allocate and init ag list locks */
- for (index = 0; index < MAXAG; index++) {
- AG_LOCK_INIT(imap, index);
- }
- /* bind the inode map inode and inode map control structure
- * to each other.
- */
- imap->im_ipimap = ipimap;
- JFS_IP(ipimap)->i_imap = imap;
- return (0);
- }
- /*
- * NAME: diUnmount()
- *
- * FUNCTION: write to disk the incore inode map control structures for
- * a fileset or aggregate at unmount time.
- *
- * PARAMETERS:
- * ipimap - pointer to inode map inode for the aggregate or fileset.
- *
- * RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient free virtual memory.
- * -EIO - i/o error.
- */
- int diUnmount(struct inode *ipimap, int mounterror)
- {
- struct inomap *imap = JFS_IP(ipimap)->i_imap;
- /*
- * update the on-disk inode map control structure
- */
- if (!(mounterror || isReadOnly(ipimap)))
- diSync(ipimap);
- /*
- * Invalidate the page cache buffers
- */
- truncate_inode_pages(ipimap->i_mapping, 0);
- /*
- * free in-memory control structure
- */
- kfree(imap);
- return (0);
- }
- /*
- * diSync()
- */
- int diSync(struct inode *ipimap)
- {
- struct dinomap_disk *dinom_le;
- struct inomap *imp = JFS_IP(ipimap)->i_imap;
- struct metapage *mp;
- int index;
- /*
- * write imap global conrol page
- */
- /* read the on-disk inode map control structure */
- mp = get_metapage(ipimap,
- IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
- PSIZE, 0);
- if (mp == NULL) {
- jfs_err("diSync: get_metapage failed!");
- return -EIO;
- }
- /* copy the in-memory version to the on-disk version */
- dinom_le = (struct dinomap_disk *) mp->data;
- dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
- dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
- dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
- dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
- dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
- dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
- for (index = 0; index < MAXAG; index++) {
- dinom_le->in_agctl[index].inofree =
- cpu_to_le32(imp->im_agctl[index].inofree);
- dinom_le->in_agctl[index].extfree =
- cpu_to_le32(imp->im_agctl[index].extfree);
- dinom_le->in_agctl[index].numinos =
- cpu_to_le32(imp->im_agctl[index].numinos);
- dinom_le->in_agctl[index].numfree =
- cpu_to_le32(imp->im_agctl[index].numfree);
- }
- /* write out the control structure */
- write_metapage(mp);
- /*
- * write out dirty pages of imap
- */
- filemap_write_and_wait(ipimap->i_mapping);
- diWriteSpecial(ipimap, 0);
- return (0);
- }
- /*
- * NAME: diRead()
- *
- * FUNCTION: initialize an incore inode from disk.
- *
- * on entry, the specifed incore inode should itself
- * specify the disk inode number corresponding to the
- * incore inode (i.e. i_number should be initialized).
- *
- * this routine handles incore inode initialization for
- * both "special" and "regular" inodes. special inodes
- * are those required early in the mount process and
- * require special handling since much of the file system
- * is not yet initialized. these "special" inodes are
- * identified by a NULL inode map inode pointer and are
- * actually initialized by a call to diReadSpecial().
- *
- * for regular inodes, the iag describing the disk inode
- * is read from disk to determine the inode extent address
- * for the disk inode. with the inode extent address in
- * hand, the page of the extent that contains the disk
- * inode is read and the disk inode is copied to the
- * incore inode.
- *
- * PARAMETERS:
- * ip - pointer to incore inode to be initialized from disk.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOMEM - insufficient memory
- *
- */
- int diRead(struct inode *ip)
- {
- struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
- int iagno, ino, extno, rc;
- struct inode *ipimap;
- struct dinode *dp;
- struct iag *iagp;
- struct metapage *mp;
- s64 blkno, agstart;
- struct inomap *imap;
- int block_offset;
- int inodes_left;
- unsigned long pageno;
- int rel_inode;
- jfs_info("diRead: ino = %ld", ip->i_ino);
- ipimap = sbi->ipimap;
- JFS_IP(ip)->ipimap = ipimap;
- /* determine the iag number for this inode (number) */
- iagno = INOTOIAG(ip->i_ino);
- /* read the iag */
- imap = JFS_IP(ipimap)->i_imap;
- IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
- rc = diIAGRead(imap, iagno, &mp);
- IREAD_UNLOCK(ipimap);
- if (rc) {
- jfs_err("diRead: diIAGRead returned %d", rc);
- return (rc);
- }
- iagp = (struct iag *) mp->data;
- /* determine inode extent that holds the disk inode */
- ino = ip->i_ino & (INOSPERIAG - 1);
- extno = ino >> L2INOSPEREXT;
- if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
- (addressPXD(&iagp->inoext[extno]) == 0)) {
- release_metapage(mp);
- return -ESTALE;
- }
- /* get disk block number of the page within the inode extent
- * that holds the disk inode.
- */
- blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
- /* get the ag for the iag */
- agstart = le64_to_cpu(iagp->agstart);
- release_metapage(mp);
- rel_inode = (ino & (INOSPERPAGE - 1));
- pageno = blkno >> sbi->l2nbperpage;
- if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
- /*
- * OS/2 didn't always align inode extents on page boundaries
- */
- inodes_left =
- (sbi->nbperpage - block_offset) << sbi->l2niperblk;
- if (rel_inode < inodes_left)
- rel_inode += block_offset << sbi->l2niperblk;
- else {
- pageno += 1;
- rel_inode -= inodes_left;
- }
- }
- /* read the page of disk inode */
- mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
- if (!mp) {
- jfs_err("diRead: read_metapage failed");
- return -EIO;
- }
- /* locate the disk inode requested */
- dp = (struct dinode *) mp->data;
- dp += rel_inode;
- if (ip->i_ino != le32_to_cpu(dp->di_number)) {
- jfs_error(ip->i_sb, "diRead: i_ino != di_number");
- rc = -EIO;
- } else if (le32_to_cpu(dp->di_nlink) == 0)
- rc = -ESTALE;
- else
- /* copy the disk inode to the in-memory inode */
- rc = copy_from_dinode(dp, ip);
- release_metapage(mp);
- /* set the ag for the inode */
- JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
- JFS_IP(ip)->active_ag = -1;
- return (rc);
- }
- /*
- * NAME: diReadSpecial()
- *
- * FUNCTION: initialize a 'special' inode from disk.
- *
- * this routines handles aggregate level inodes. The
- * inode cache cannot differentiate between the
- * aggregate inodes and the filesystem inodes, so we
- * handle these here. We don't actually use the aggregate
- * inode map, since these inodes are at a fixed location
- * and in some cases the aggregate inode map isn't initialized
- * yet.
- *
- * PARAMETERS:
- * sb - filesystem superblock
- * inum - aggregate inode number
- * secondary - 1 if secondary aggregate inode table
- *
- * RETURN VALUES:
- * new inode - success
- * NULL - i/o error.
- */
- struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
- {
- struct jfs_sb_info *sbi = JFS_SBI(sb);
- uint address;
- struct dinode *dp;
- struct inode *ip;
- struct metapage *mp;
- ip = new_inode(sb);
- if (ip == NULL) {
- jfs_err("diReadSpecial: new_inode returned NULL!");
- return ip;
- }
- if (secondary) {
- address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
- JFS_IP(ip)->ipimap = sbi->ipaimap2;
- } else {
- address = AITBL_OFF >> L2PSIZE;
- JFS_IP(ip)->ipimap = sbi->ipaimap;
- }
- ASSERT(inum < INOSPEREXT);
- ip->i_ino = inum;
- address += inum >> 3; /* 8 inodes per 4K page */
- /* read the page of fixed disk inode (AIT) in raw mode */
- mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
- if (mp == NULL) {
- ip->i_nlink = 1; /* Don't want iput() deleting it */
- iput(ip);
- return (NULL);
- }
- /* get the pointer to the disk inode of interest */
- dp = (struct dinode *) (mp->data);
- dp += inum % 8; /* 8 inodes per 4K page */
- /* copy on-disk inode to in-memory inode */
- if ((copy_from_dinode(dp, ip)) != 0) {
- /* handle bad return by returning NULL for ip */
- ip->i_nlink = 1; /* Don't want iput() deleting it */
- iput(ip);
- /* release the page */
- release_metapage(mp);
- return (NULL);
- }
- ip->i_mapping->a_ops = &jfs_metapage_aops;
- mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
- /* Allocations to metadata inodes should not affect quotas */
- ip->i_flags |= S_NOQUOTA;
- if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
- sbi->gengen = le32_to_cpu(dp->di_gengen);
- sbi->inostamp = le32_to_cpu(dp->di_inostamp);
- }
- /* release the page */
- release_metapage(mp);
- /*
- * __mark_inode_dirty expects inodes to be hashed. Since we don't
- * want special inodes in the fileset inode space, we make them
- * appear hashed, but do not put on any lists. hlist_del()
- * will work fine and require no locking.
- */
- ip->i_hash.pprev = &ip->i_hash.next;
- return (ip);
- }
- /*
- * NAME: diWriteSpecial()
- *
- * FUNCTION: Write the special inode to disk
- *
- * PARAMETERS:
- * ip - special inode
- * secondary - 1 if secondary aggregate inode table
- *
- * RETURN VALUES: none
- */
- void diWriteSpecial(struct inode *ip, int secondary)
- {
- struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
- uint address;
- struct dinode *dp;
- ino_t inum = ip->i_ino;
- struct metapage *mp;
- if (secondary)
- address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
- else
- address = AITBL_OFF >> L2PSIZE;
- ASSERT(inum < INOSPEREXT);
- address += inum >> 3; /* 8 inodes per 4K page */
- /* read the page of fixed disk inode (AIT) in raw mode */
- mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
- if (mp == NULL) {
- jfs_err("diWriteSpecial: failed to read aggregate inode "
- "extent!");
- return;
- }
- /* get the pointer to the disk inode of interest */
- dp = (struct dinode *) (mp->data);
- dp += inum % 8; /* 8 inodes per 4K page */
- /* copy on-disk inode to in-memory inode */
- copy_to_dinode(dp, ip);
- memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
- if (inum == FILESYSTEM_I)
- dp->di_gengen = cpu_to_le32(sbi->gengen);
- /* write the page */
- write_metapage(mp);
- }
- /*
- * NAME: diFreeSpecial()
- *
- * FUNCTION: Free allocated space for special inode
- */
- void diFreeSpecial(struct inode *ip)
- {
- if (ip == NULL) {
- jfs_err("diFreeSpecial called with NULL ip!");
- return;
- }
- filemap_write_and_wait(ip->i_mapping);
- truncate_inode_pages(ip->i_mapping, 0);
- iput(ip);
- }
- /*
- * NAME: diWrite()
- *
- * FUNCTION: write the on-disk inode portion of the in-memory inode
- * to its corresponding on-disk inode.
- *
- * on entry, the specifed incore inode should itself
- * specify the disk inode number corresponding to the
- * incore inode (i.e. i_number should be initialized).
- *
- * the inode contains the inode extent address for the disk
- * inode. with the inode extent address in hand, the
- * page of the extent that contains the disk inode is
- * read and the disk inode portion of the incore inode
- * is copied to the disk inode.
- *
- * PARAMETERS:
- * tid - transacation id
- * ip - pointer to incore inode to be written to the inode extent.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- */
- int diWrite(tid_t tid, struct inode *ip)
- {
- struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
- struct jfs_inode_info *jfs_ip = JFS_IP(ip);
- int rc = 0;
- s32 ino;
- struct dinode *dp;
- s64 blkno;
- int block_offset;
- int inodes_left;
- struct metapage *mp;
- unsigned long pageno;
- int rel_inode;
- int dioffset;
- struct inode *ipimap;
- uint type;
- lid_t lid;
- struct tlock *ditlck, *tlck;
- struct linelock *dilinelock, *ilinelock;
- struct lv *lv;
- int n;
- ipimap = jfs_ip->ipimap;
- ino = ip->i_ino & (INOSPERIAG - 1);
- if (!addressPXD(&(jfs_ip->ixpxd)) ||
- (lengthPXD(&(jfs_ip->ixpxd)) !=
- JFS_IP(ipimap)->i_imap->im_nbperiext)) {
- jfs_error(ip->i_sb, "diWrite: ixpxd invalid");
- return -EIO;
- }
- /*
- * read the page of disk inode containing the specified inode:
- */
- /* compute the block address of the page */
- blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
- rel_inode = (ino & (INOSPERPAGE - 1));
- pageno = blkno >> sbi->l2nbperpage;
- if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
- /*
- * OS/2 didn't always align inode extents on page boundaries
- */
- inodes_left =
- (sbi->nbperpage - block_offset) << sbi->l2niperblk;
- if (rel_inode < inodes_left)
- rel_inode += block_offset << sbi->l2niperblk;
- else {
- pageno += 1;
- rel_inode -= inodes_left;
- }
- }
- /* read the page of disk inode */
- retry:
- mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
- if (!mp)
- return -EIO;
- /* get the pointer to the disk inode */
- dp = (struct dinode *) mp->data;
- dp += rel_inode;
- dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
- /*
- * acquire transaction lock on the on-disk inode;
- * N.B. tlock is acquired on ipimap not ip;
- */
- if ((ditlck =
- txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
- goto retry;
- dilinelock = (struct linelock *) & ditlck->lock;
- /*
- * copy btree root from in-memory inode to on-disk inode
- *
- * (tlock is taken from inline B+-tree root in in-memory
- * inode when the B+-tree root is updated, which is pointed
- * by jfs_ip->blid as well as being on tx tlock list)
- *
- * further processing of btree root is based on the copy
- * in in-memory inode, where txLog() will log from, and,
- * for xtree root, txUpdateMap() will update map and reset
- * XAD_NEW bit;
- */
- if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
- /*
- * This is the special xtree inside the directory for storing
- * the directory table
- */
- xtpage_t *p, *xp;
- xad_t *xad;
- jfs_ip->xtlid = 0;
- tlck = lid_to_tlock(lid);
- assert(tlck->type & tlckXTREE);
- tlck->type |= tlckBTROOT;
- tlck->mp = mp;
- ilinelock = (struct linelock *) & tlck->lock;
- /*
- * copy xtree root from inode to dinode:
- */
- p = &jfs_ip->i_xtroot;
- xp = (xtpage_t *) &dp->di_dirtable;
- lv = ilinelock->lv;
- for (n = 0; n < ilinelock->index; n++, lv++) {
- memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
- lv->length << L2XTSLOTSIZE);
- }
- /* reset on-disk (metadata page) xtree XAD_NEW bit */
- xad = &xp->xad[XTENTRYSTART];
- for (n = XTENTRYSTART;
- n < le16_to_cpu(xp->header.nextindex); n++, xad++)
- if (xad->flag & (XAD_NEW | XAD_EXTENDED))
- xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
- }
- if ((lid = jfs_ip->blid) == 0)
- goto inlineData;
- jfs_ip->blid = 0;
- tlck = lid_to_tlock(lid);
- type = tlck->type;
- tlck->type |= tlckBTROOT;
- tlck->mp = mp;
- ilinelock = (struct linelock *) & tlck->lock;
- /*
- * regular file: 16 byte (XAD slot) granularity
- */
- if (type & tlckXTREE) {
- xtpage_t *p, *xp;
- xad_t *xad;
- /*
- * copy xtree root from inode to dinode:
- */
- p = &jfs_ip->i_xtroot;
- xp = &dp->di_xtroot;
- lv = ilinelock->lv;
- for (n = 0; n < ilinelock->index; n++, lv++) {
- memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
- lv->length << L2XTSLOTSIZE);
- }
- /* reset on-disk (metadata page) xtree XAD_NEW bit */
- xad = &xp->xad[XTENTRYSTART];
- for (n = XTENTRYSTART;
- n < le16_to_cpu(xp->header.nextindex); n++, xad++)
- if (xad->flag & (XAD_NEW | XAD_EXTENDED))
- xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
- }
- /*
- * directory: 32 byte (directory entry slot) granularity
- */
- else if (type & tlckDTREE) {
- dtpage_t *p, *xp;
- /*
- * copy dtree root from inode to dinode:
- */
- p = (dtpage_t *) &jfs_ip->i_dtroot;
- xp = (dtpage_t *) & dp->di_dtroot;
- lv = ilinelock->lv;
- for (n = 0; n < ilinelock->index; n++, lv++) {
- memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
- lv->length << L2DTSLOTSIZE);
- }
- } else {
- jfs_err("diWrite: UFO tlock");
- }
- inlineData:
- /*
- * copy inline symlink from in-memory inode to on-disk inode
- */
- if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
- lv = & dilinelock->lv[dilinelock->index];
- lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
- lv->length = 2;
- memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
- dilinelock->index++;
- }
- /*
- * copy inline data from in-memory inode to on-disk inode:
- * 128 byte slot granularity
- */
- if (test_cflag(COMMIT_Inlineea, ip)) {
- lv = & dilinelock->lv[dilinelock->index];
- lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
- lv->length = 1;
- memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
- dilinelock->index++;
- clear_cflag(COMMIT_Inlineea, ip);
- }
- /*
- * lock/copy inode base: 128 byte slot granularity
- */
- lv = & dilinelock->lv[dilinelock->index];
- lv->offset = dioffset >> L2INODESLOTSIZE;
- copy_to_dinode(dp, ip);
- if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
- lv->length = 2;
- memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
- } else
- lv->length = 1;
- dilinelock->index++;
- /* release the buffer holding the updated on-disk inode.
- * the buffer will be later written by commit processing.
- */
- write_metapage(mp);
- return (rc);
- }
- /*
- * NAME: diFree(ip)
- *
- * FUNCTION: free a specified inode from the inode working map
- * for a fileset or aggregate.
- *
- * if the inode to be freed represents the first (only)
- * free inode within the iag, the iag will be placed on
- * the ag free inode list.
- *
- * freeing the inode will cause the inode extent to be
- * freed if the inode is the only allocated inode within
- * the extent. in this case all the disk resource backing
- * up the inode extent will be freed. in addition, the iag
- * will be placed on the ag extent free list if the extent
- * is the first free extent in the iag. if freeing the
- * extent also means that no free inodes will exist for
- * the iag, the iag will also be removed from the ag free
- * inode list.
- *
- * the iag describing the inode will be freed if the extent
- * is to be freed and it is the only backed extent within
- * the iag. in this case, the iag will be removed from the
- * ag free extent list and ag free inode list and placed on
- * the inode map's free iag list.
- *
- * a careful update approach is used to provide consistency
- * in the face of updates to multiple buffers. under this
- * approach, all required buffers are obtained before making
- * any updates and are held until all updates are complete.
- *
- * PARAMETERS:
- * ip - inode to be freed.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- */
- int diFree(struct inode *ip)
- {
- int rc;
- ino_t inum = ip->i_ino;
- struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
- struct metapage *mp, *amp, *bmp, *cmp, *dmp;
- int iagno, ino, extno, bitno, sword, agno;
- int back, fwd;
- u32 bitmap, mask;
- struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
- struct inomap *imap = JFS_IP(ipimap)->i_imap;
- pxd_t freepxd;
- tid_t tid;
- struct inode *iplist[3];
- struct tlock *tlck;
- struct pxd_lock *pxdlock;
- /*
- * This is just to suppress compiler warnings. The same logic that
- * references these variables is used to initialize them.
- */
- aiagp = biagp = ciagp = diagp = NULL;
- /* get the iag number containing the inode.
- */
- iagno = INOTOIAG(inum);
- /* make sure that the iag is contained within
- * the map.
- */
- if (iagno >= imap->im_nextiag) {
- print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
- imap, 32, 0);
- jfs_error(ip->i_sb,
- "diFree: inum = %d, iagno = %d, nextiag = %d",
- (uint) inum, iagno, imap->im_nextiag);
- return -EIO;
- }
- /* get the allocation group for this ino.
- */
- agno = JFS_IP(ip)->agno;
- /* Lock the AG specific inode map information
- */
- AG_LOCK(imap, agno);
- /* Obtain read lock in imap inode. Don't release it until we have
- * read all of the IAG's that we are going to.
- */
- IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
- /* read the iag.
- */
- if ((rc = diIAGRead(imap, iagno, &mp))) {
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- return (rc);
- }
- iagp = (struct iag *) mp->data;
- /* get the inode number and extent number of the inode within
- * the iag and the inode number within the extent.
- */
- ino = inum & (INOSPERIAG - 1);
- extno = ino >> L2INOSPEREXT;
- bitno = ino & (INOSPEREXT - 1);
- mask = HIGHORDER >> bitno;
- if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
- jfs_error(ip->i_sb,
- "diFree: wmap shows inode already free");
- }
- if (!addressPXD(&iagp->inoext[extno])) {
- release_metapage(mp);
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- jfs_error(ip->i_sb, "diFree: invalid inoext");
- return -EIO;
- }
- /* compute the bitmap for the extent reflecting the freed inode.
- */
- bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
- if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
- release_metapage(mp);
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- jfs_error(ip->i_sb, "diFree: numfree > numinos");
- return -EIO;
- }
- /*
- * inode extent still has some inodes or below low water mark:
- * keep the inode extent;
- */
- if (bitmap ||
- imap->im_agctl[agno].numfree < 96 ||
- (imap->im_agctl[agno].numfree < 288 &&
- (((imap->im_agctl[agno].numfree * 100) /
- imap->im_agctl[agno].numinos) <= 25))) {
- /* if the iag currently has no free inodes (i.e.,
- * the inode being freed is the first free inode of iag),
- * insert the iag at head of the inode free list for the ag.
- */
- if (iagp->nfreeinos == 0) {
- /* check if there are any iags on the ag inode
- * free list. if so, read the first one so that
- * we can link the current iag onto the list at
- * the head.
- */
- if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
- /* read the iag that currently is the head
- * of the list.
- */
- if ((rc = diIAGRead(imap, fwd, &))) {
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- release_metapage(mp);
- return (rc);
- }
- aiagp = (struct iag *) amp->data;
- /* make current head point back to the iag.
- */
- aiagp->inofreeback = cpu_to_le32(iagno);
- write_metapage(amp);
- }
- /* iag points forward to current head and iag
- * becomes the new head of the list.
- */
- iagp->inofreefwd =
- cpu_to_le32(imap->im_agctl[agno].inofree);
- iagp->inofreeback = cpu_to_le32(-1);
- imap->im_agctl[agno].inofree = iagno;
- }
- IREAD_UNLOCK(ipimap);
- /* update the free inode summary map for the extent if
- * freeing the inode means the extent will now have free
- * inodes (i.e., the inode being freed is the first free
- * inode of extent),
- */
- if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
- sword = extno >> L2EXTSPERSUM;
- bitno = extno & (EXTSPERSUM - 1);
- iagp->inosmap[sword] &=
- cpu_to_le32(~(HIGHORDER >> bitno));
- }
- /* update the bitmap.
- */
- iagp->wmap[extno] = cpu_to_le32(bitmap);
- /* update the free inode counts at the iag, ag and
- * map level.
- */
- le32_add_cpu(&iagp->nfreeinos, 1);
- imap->im_agctl[agno].numfree += 1;
- atomic_inc(&imap->im_numfree);
- /* release the AG inode map lock
- */
- AG_UNLOCK(imap, agno);
- /* write the iag */
- write_metapage(mp);
- return (0);
- }
- /*
- * inode extent has become free and above low water mark:
- * free the inode extent;
- */
- /*
- * prepare to update iag list(s) (careful update step 1)
- */
- amp = bmp = cmp = dmp = NULL;
- fwd = back = -1;
- /* check if the iag currently has no free extents. if so,
- * it will be placed on the head of the ag extent free list.
- */
- if (iagp->nfreeexts == 0) {
- /* check if the ag extent free list has any iags.
- * if so, read the iag at the head of the list now.
- * this (head) iag will be updated later to reflect
- * the addition of the current iag at the head of
- * the list.
- */
- if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
- if ((rc = diIAGRead(imap, fwd, &)))
- goto error_out;
- aiagp = (struct iag *) amp->data;
- }
- } else {
- /* iag has free extents. check if the addition of a free
- * extent will cause all extents to be free within this
- * iag. if so, the iag will be removed from the ag extent
- * free list and placed on the inode map's free iag list.
- */
- if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
- /* in preparation for removing the iag from the
- * ag extent free list, read the iags preceeding
- * and following the iag on the ag extent free
- * list.
- */
- if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
- if ((rc = diIAGRead(imap, fwd, &)))
- goto error_out;
- aiagp = (struct iag *) amp->data;
- }
- if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
- if ((rc = diIAGRead(imap, back, &bmp)))
- goto error_out;
- biagp = (struct iag *) bmp->data;
- }
- }
- }
- /* remove the iag from the ag inode free list if freeing
- * this extent cause the iag to have no free inodes.
- */
- if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
- int inofreeback = le32_to_cpu(iagp->inofreeback);
- int inofreefwd = le32_to_cpu(iagp->inofreefwd);
- /* in preparation for removing the iag from the
- * ag inode free list, read the iags preceeding
- * and following the iag on the ag inode free
- * list. before reading these iags, we must make
- * sure that we already don't have them in hand
- * from up above, since re-reading an iag (buffer)
- * we are currently holding would cause a deadlock.
- */
- if (inofreefwd >= 0) {
- if (inofreefwd == fwd)
- ciagp = (struct iag *) amp->data;
- else if (inofreefwd == back)
- ciagp = (struct iag *) bmp->data;
- else {
- if ((rc =
- diIAGRead(imap, inofreefwd, &cmp)))
- goto error_out;
- ciagp = (struct iag *) cmp->data;
- }
- assert(ciagp != NULL);
- }
- if (inofreeback >= 0) {
- if (inofreeback == fwd)
- diagp = (struct iag *) amp->data;
- else if (inofreeback == back)
- diagp = (struct iag *) bmp->data;
- else {
- if ((rc =
- diIAGRead(imap, inofreeback, &dmp)))
- goto error_out;
- diagp = (struct iag *) dmp->data;
- }
- assert(diagp != NULL);
- }
- }
- IREAD_UNLOCK(ipimap);
- /*
- * invalidate any page of the inode extent freed from buffer cache;
- */
- freepxd = iagp->inoext[extno];
- invalidate_pxd_metapages(ip, freepxd);
- /*
- * update iag list(s) (careful update step 2)
- */
- /* add the iag to the ag extent free list if this is the
- * first free extent for the iag.
- */
- if (iagp->nfreeexts == 0) {
- if (fwd >= 0)
- aiagp->extfreeback = cpu_to_le32(iagno);
- iagp->extfreefwd =
- cpu_to_le32(imap->im_agctl[agno].extfree);
- iagp->extfreeback = cpu_to_le32(-1);
- imap->im_agctl[agno].extfree = iagno;
- } else {
- /* remove the iag from the ag extent list if all extents
- * are now free and place it on the inode map iag free list.
- */
- if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
- if (fwd >= 0)
- aiagp->extfreeback = iagp->extfreeback;
- if (back >= 0)
- biagp->extfreefwd = iagp->extfreefwd;
- else
- imap->im_agctl[agno].extfree =
- le32_to_cpu(iagp->extfreefwd);
- iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
- IAGFREE_LOCK(imap);
- iagp->iagfree = cpu_to_le32(imap->im_freeiag);
- imap->im_freeiag = iagno;
- IAGFREE_UNLOCK(imap);
- }
- }
- /* remove the iag from the ag inode free list if freeing
- * this extent causes the iag to have no free inodes.
- */
- if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
- if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
- ciagp->inofreeback = iagp->inofreeback;
- if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
- diagp->inofreefwd = iagp->inofreefwd;
- else
- imap->im_agctl[agno].inofree =
- le32_to_cpu(iagp->inofreefwd);
- iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
- }
- /* update the inode extent address and working map
- * to reflect the free extent.
- * the permanent map should have been updated already
- * for the inode being freed.
- */
- if (iagp->pmap[extno] != 0) {
- jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
- }
- iagp->wmap[extno] = 0;
- PXDlength(&iagp->inoext[extno], 0);
- PXDaddress(&iagp->inoext[extno], 0);
- /* update the free extent and free inode summary maps
- * to reflect the freed extent.
- * the inode summary map is marked to indicate no inodes
- * available for the freed extent.
- */
- sword = extno >> L2EXTSPERSUM;
- bitno = extno & (EXTSPERSUM - 1);
- mask = HIGHORDER >> bitno;
- iagp->inosmap[sword] |= cpu_to_le32(mask);
- iagp->extsmap[sword] &= cpu_to_le32(~mask);
- /* update the number of free inodes and number of free extents
- * for the iag.
- */
- le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
- le32_add_cpu(&iagp->nfreeexts, 1);
- /* update the number of free inodes and backed inodes
- * at the ag and inode map level.
- */
- imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
- imap->im_agctl[agno].numinos -= INOSPEREXT;
- atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
- atomic_sub(INOSPEREXT, &imap->im_numinos);
- if (amp)
- write_metapage(amp);
- if (bmp)
- write_metapage(bmp);
- if (cmp)
- write_metapage(cmp);
- if (dmp)
- write_metapage(dmp);
- /*
- * start transaction to update block allocation map
- * for the inode extent freed;
- *
- * N.B. AG_LOCK is released and iag will be released below, and
- * other thread may allocate inode from/reusing the ixad freed
- * BUT with new/different backing inode extent from the extent
- * to be freed by the transaction;
- */
- tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
- mutex_lock(&JFS_IP(ipimap)->commit_mutex);
- /* acquire tlock of the iag page of the freed ixad
- * to force the page NOHOMEOK (even though no data is
- * logged from the iag page) until NOREDOPAGE|FREEXTENT log
- * for the free of the extent is committed;
- * write FREEXTENT|NOREDOPAGE log record
- * N.B. linelock is overlaid as freed extent descriptor;
- */
- tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
- pxdlock = (struct pxd_lock *) & tlck->lock;
- pxdlock->flag = mlckFREEPXD;
- pxdlock->pxd = freepxd;
- pxdlock->index = 1;
- write_metapage(mp);
- iplist[0] = ipimap;
- /*
- * logredo needs the IAG number and IAG extent index in order
- * to ensure that the IMap is consistent. The least disruptive
- * way to pass these values through to the transaction manager
- * is in the iplist array.
- *
- * It's not pretty, but it works.
- */
- iplist[1] = (struct inode *) (size_t)iagno;
- iplist[2] = (struct inode *) (size_t)extno;
- rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
- txEnd(tid);
- mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
- /* unlock the AG inode map information */
- AG_UNLOCK(imap, agno);
- return (0);
- error_out:
- IREAD_UNLOCK(ipimap);
- if (amp)
- release_metapage(amp);
- if (bmp)
- release_metapage(bmp);
- if (cmp)
- release_metapage(cmp);
- if (dmp)
- release_metapage(dmp);
- AG_UNLOCK(imap, agno);
- release_metapage(mp);
- return (rc);
- }
- /*
- * There are several places in the diAlloc* routines where we initialize
- * the inode.
- */
- static inline void
- diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
- {
- struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
- struct jfs_inode_info *jfs_ip = JFS_IP(ip);
- ip->i_ino = (iagno << L2INOSPERIAG) + ino;
- jfs_ip->ixpxd = iagp->inoext[extno];
- jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
- jfs_ip->active_ag = -1;
- }
- /*
- * NAME: diAlloc(pip,dir,ip)
- *
- * FUNCTION: allocate a disk inode from the inode working map
- * for a fileset or aggregate.
- *
- * PARAMETERS:
- * pip - pointer to incore inode for the parent inode.
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to a new inode
- *
- * RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
- */
- int diAlloc(struct inode *pip, bool dir, struct inode *ip)
- {
- int rc, ino, iagno, addext, extno, bitno, sword;
- int nwords, rem, i, agno;
- u32 mask, inosmap, extsmap;
- struct inode *ipimap;
- struct metapage *mp;
- ino_t inum;
- struct iag *iagp;
- struct inomap *imap;
- /* get the pointers to the inode map inode and the
- * corresponding imap control structure.
- */
- ipimap = JFS_SBI(pip->i_sb)->ipimap;
- imap = JFS_IP(ipimap)->i_imap;
- JFS_IP(ip)->ipimap = ipimap;
- JFS_IP(ip)->fileset = FILESYSTEM_I;
- /* for a directory, the allocation policy is to start
- * at the ag level using the preferred ag.
- */
- if (dir) {
- agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
- AG_LOCK(imap, agno);
- goto tryag;
- }
- /* for files, the policy starts off by trying to allocate from
- * the same iag containing the parent disk inode:
- * try to allocate the new disk inode close to the parent disk
- * inode, using parent disk inode number + 1 as the allocation
- * hint. (we use a left-to-right policy to attempt to avoid
- * moving backward on the disk.) compute the hint within the
- * file system and the iag.
- */
- /* get the ag number of this iag */
- agno = JFS_IP(pip)->agno;
- if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
- /*
- * There is an open file actively growing. We want to
- * allocate new inodes from a different ag to avoid
- * fragmentation problems.
- */
- agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
- AG_LOCK(imap, agno);
- goto tryag;
- }
- inum = pip->i_ino + 1;
- ino = inum & (INOSPERIAG - 1);
- /* back off the hint if it is outside of the iag */
- if (ino == 0)
- inum = pip->i_ino;
- /* lock the AG inode map information */
- AG_LOCK(imap, agno);
- /* Get read lock on imap inode */
- IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
- /* get the iag number and read the iag */
- iagno = INOTOIAG(inum);
- if ((rc = diIAGRead(imap, iagno, &mp))) {
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- return (rc);
- }
- iagp = (struct iag *) mp->data;
- /* determine if new inode extent is allowed to be added to the iag.
- * new inode extent can be added to the iag if the ag
- * has less than 32 free disk inodes and the iag has free extents.
- */
- addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
- /*
- * try to allocate from the IAG
- */
- /* check if the inode may be allocated from the iag
- * (i.e. the inode has free inodes or new extent can be added).
- */
- if (iagp->nfreeinos || addext) {
- /* determine the extent number of the hint.
- */
- extno = ino >> L2INOSPEREXT;
- /* check if the extent containing the hint has backed
- * inodes. if so, try to allocate within this extent.
- */
- if (addressPXD(&iagp->inoext[extno])) {
- bitno = ino & (INOSPEREXT - 1);
- if ((bitno =
- diFindFree(le32_to_cpu(iagp->wmap[extno]),
- bitno))
- < INOSPEREXT) {
- ino = (extno << L2INOSPEREXT) + bitno;
- /* a free inode (bit) was found within this
- * extent, so allocate it.
- */
- rc = diAllocBit(imap, iagp, ino);
- IREAD_UNLOCK(ipimap);
- if (rc) {
- assert(rc == -EIO);
- } else {
- /* set the results of the allocation
- * and write the iag.
- */
- diInitInode(ip, iagno, ino, extno,
- iagp);
- mark_metapage_dirty(mp);
- }
- release_metapage(mp);
- /* free the AG lock and return.
- */
- AG_UNLOCK(imap, agno);
- return (rc);
- }
- if (!addext)
- extno =
- (extno ==
- EXTSPERIAG - 1) ? 0 : extno + 1;
- }
- /*
- * no free inodes within the extent containing the hint.
- *
- * try to allocate from the backed extents following
- * hint or, if appropriate (i.e. addext is true), allocate
- * an extent of free inodes at or following the extent
- * containing the hint.
- *
- * the free inode and free extent summary maps are used
- * here, so determine the starting summary map position
- * and the number of words we'll have to examine. again,
- * the approach is to allocate following the hint, so we
- * might have to initially ignore prior bits of the summary
- * map that represent extents prior to the extent containing
- * the hint and later revisit these bits.
- */
- bitno = extno & (EXTSPERSUM - 1);
- nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
- sword = extno >> L2EXTSPERSUM;
- /* mask any prior bits for the starting words of the
- * summary map.
- */
- mask = ONES << (EXTSPERSUM - bitno);
- inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
- extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
- /* scan the free inode and free extent summary maps for
- * free resources.
- */
- for (i = 0; i < nwords; i++) {
- /* check if this word of the free inode summary
- * map describes an extent with free inodes.
- */
- if (~inosmap) {
- /* an extent with free inodes has been
- * found. determine the extent number
- * and the inode number within the extent.
- */
- rem = diFindFree(inosmap, 0);
- extno = (sword << L2EXTSPERSUM) + rem;
- rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
- 0);
- if (rem >= INOSPEREXT) {
- IREAD_UNLOCK(ipimap);
- release_metapage(mp);
- AG_UNLOCK(imap, agno);
- jfs_error(ip->i_sb,
- "diAlloc: can't find free bit "
- "in wmap");
- return -EIO;
- }
- /* determine the inode number within the
- * iag and allocate the inode from the
- * map.
- */
- ino = (extno << L2INOSPEREXT) + rem;
- rc = diAllocBit(imap, iagp, ino);
- IREAD_UNLOCK(ipimap);
- if (rc)
- assert(rc == -EIO);
- else {
- /* set the results of the allocation
- * and write the iag.
- */
- diInitInode(ip, iagno, ino, extno,
- iagp);
- mark_metapage_dirty(mp);
- }
- release_metapage(mp);
- /* free the AG lock and return.
- */
- AG_UNLOCK(imap, agno);
- return (rc);
- }
- /* check if we may allocate an extent of free
- * inodes and whether this word of the free
- * extents summary map describes a free extent.
- */
- if (addext && ~extsmap) {
- /* a free extent has been found. determine
- * the extent number.
- */
- rem = diFindFree(extsmap, 0);
- extno = (sword << L2EXTSPERSUM) + rem;
- /* allocate an extent of free inodes.
- */
- if ((rc = diNewExt(imap, iagp, extno))) {
- /* if there is no disk space for a
- * new extent, try to allocate the
- * disk inode from somewhere else.
- */
- if (rc == -ENOSPC)
- break;
- assert(rc == -EIO);
- } else {
- /* set the results of the allocation
- * and write the iag.
- */
- diInitInode(ip, iagno,
- extno << L2INOSPEREXT,
- extno, iagp);
- mark_metapage_dirty(mp);
- }
- release_metapage(mp);
- /* free the imap inode & the AG lock & return.
- */
- IREAD_UNLOCK(ipimap);
- AG_UNLOCK(imap, agno);
- return (rc);
- }
- /* move on to the next set of summary map words.
- */
- sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
- inosmap = le32_to_cpu(iagp->inosmap[sword]);
- extsmap = le32_to_cpu(iagp->extsmap[sword]);
- }
- }
- /* unlock imap inode */
- IREAD_UNLOCK(ipimap);
- /* nothing doing in this iag, so release it. */
- release_metapage(mp);
- tryag:
- /*
- * try to allocate anywhere within the same AG as the parent inode.
- */
- rc = diAllocAG(imap, agno, dir, ip);
- AG_UNLOCK(imap, agno);
- if (rc != -ENOSPC)
- return (rc);
- /*
- * try to allocate in any AG.
- */
- return (diAllocAny(imap, agno, dir, ip));
- }
- /*
- * NAME: diAllocAG(imap,agno,dir,ip)
- *
- * FUNCTION: allocate a disk inode from the allocation group.
- *
- * this routine first determines if a new extent of free
- * inodes should be added for the allocation group, with
- * the current request satisfied from this extent. if this
- * is the case, an attempt will be made to do just that. if
- * this attempt fails or it has been determined that a new
- * extent should not be added, an attempt is made to satisfy
- * the request by allocating an existing (backed) free inode
- * from the allocation group.
- *
- * PRE CONDITION: Already have the AG lock for this AG.
- *
- * PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - allocation group to allocate from.
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to the new inode to be filled in on successful return
- * with the disk inode number allocated, its extent address
- * and the start of the ag.
- *
- * RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
- */
- static int
- diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
- {
- int rc, addext, numfree, numinos;
- /* get the number of free and the number of backed disk
- * inodes currently within the ag.
- */
- numfree = imap->im_agctl[agno].numfree;
- numinos = imap->im_agctl[agno].numinos;
- if (numfree > numinos) {
- jfs_error(ip->i_sb, "diAllocAG: numfree > numinos");
- return -EIO;
- }
- /* determine if we should allocate a new extent of free inodes
- * within the ag: for directory inodes, add a new extent
- * if there are a small number of free inodes or number of free
- * inodes is a small percentage of the number of backed inodes.
- */
- if (dir)
- addext = (numfree < 64 ||
- (numfree < 256
- && ((numfree * 100) / numinos) <= 20));
- else
- addext = (numfree == 0);
- /*
- * try to allocate a new extent of free inodes.
- */
- if (addext) {
- /* if free space is not avaliable for this new extent, try
- * below to allocate a free and existing (already backed)
- * inode from the ag.
- */
- if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
- return (rc);
- }
- /*
- * try to allocate an existing free inode from the ag.
- */
- return (diAllocIno(imap, agno, ip));
- }
- /*
- * NAME: diAllocAny(imap,agno,dir,iap)
- *
- * FUNCTION: allocate a disk inode from any other allocation group.
- *
- * this routine is called when an allocation attempt within
- * the primary allocation group has failed. if attempts to
- * allocate an inode from any allocation group other than the
- * specified primary group.
- *
- * PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - primary allocation group (to avoid).
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to a new inode to be filled in on successful return
- * with the disk inode number allocated, its extent address
- * and the start of the ag.
- *
- * RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
- */
- static int
- diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
- {
- int ag, rc;
- int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
- /* try to allocate from the ags following agno up to
- * the maximum ag number.
- */
- for (ag = agno + 1; ag <= maxag; ag++) {
- AG_LOCK(imap, ag);
- rc = diAllocAG(imap, ag, dir, ip);
- AG_UNLOCK(imap, ag);
- if (rc != -ENOSPC)
- return (rc);
- }
- /* try to allocate from the ags in front of agno.
- */
- for (ag = 0; ag < agno; ag++) {
- AG_LOCK(imap, ag);
- rc = diAllocAG(imap, ag, dir, ip);
- AG_UNLOCK(imap, ag);
- if (rc != -ENOSPC)
- return (rc);
- }
- /* no free disk inodes.
- */
- return -ENOSPC;
- }
- /*
- * NAME: diAllocIno(imap,agno,ip)
- *
- * FUNCTION: allocate a disk inode from the allocation group's free
- * inode list, returning an error if this free list is
- * empty (i.e. no iags on the list).
- *
- * allocation occurs from the first iag on the list using
- * the iag's free inode summary map to find the leftmost
- * free inode in the iag.
- *
- * PRE CONDITION: Already have AG lock for this AG.
- *
- * PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - allocation group.
- * ip - pointer to new inode to be filled in on successful return
- * with the disk inode number allocated, its extent address
- * and the start of the ag.
- *
- * RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
- */
- static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
- {
- int iagno, ino, rc, rem, extno, sword;
- struct metapage *mp;
- struct iag *iagp;
- /* check if there are iags on the ag's free inode list.
- */
- if ((iagno = imap->im_agctl[agno].inofree) < 0)
- return -ENOSPC;
- /* obtain read lock on imap inode */
- IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
- /* read the iag at the head of the list.
- */
- if ((rc = diIAGRead(imap, iagno, &mp))) {
- IREAD_UNLOCK(imap->im_ipimap);
- return (rc);
- }
- iagp = (struct iag *) mp->data;
- /* better be free inodes in this iag if it is on the
- * list.
- */
- if (!iagp->nfreeinos) {
- IREAD_UNLOCK(imap->im_ipimap);
- release_metapage(mp);
- jfs_error(ip->i_sb,
- "diAllocIno: nfreeinos = 0, but iag on freelist");
- return -EIO;
- }
- /* scan the free inode summary map to find an extent
- * with free inodes.
- */
- for (sword = 0;; sword++) {
- if (sword >= SMAPSZ) {
- IREAD_UNLOCK(imap->im_ipimap);
- release_metapage(mp);
- jfs_error(ip->i_sb,
- "diAllocIno: free inode not found in summary map");
- return -EIO;
- }
- if (~iagp->inosmap[sword])
- break;
- }
- /* found a extent with free inodes. determine
- * the extent number.
- */
- rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
- if (rem >= EXTSPERSUM) {
- IREAD_UNLOCK(imap->im_ipimap);
- release_metapage(mp);
- jfs_error(ip->i_sb, "diAllocIno: no free extent found");
- return -EIO;
- }
- extno = (sword << L2EXTSPERSUM) + rem;
- /* find the first free inode in the extent.
- */
- rem…