Author: lulf
Date: Thu Jan 14 14:30:54 2010
New Revision: 202283
URL: http://svn.freebsd.org/changeset/base/202283

Log:
  Bring in the ext2fs work done by Aditya Sarawgi during and after Google Summer
  of Code 2009:
  
  - BSDL block and inode allocation policies for ext2fs. This involves the use
    FFS1 style block and inode allocation for ext2fs. Preallocation was removed
    since it was GPL'd.
  - Make ext2fs MPSAFE by introducing locks to per-mount datastructures.
  - Fixes for kern/122047 PR.
  - Various small bugfixes.
  - Move out of gnu/ directory.
  
  Sponsored by:   Google Inc.
  Submitted by: Aditya Sarawgi <sarawgi.aditya AT SPAMFREE gmail DOT com>

Added:
  head/sys/fs/ext2fs/
     - copied from r201427, head/sys/gnu/fs/ext2fs/
  head/sys/fs/ext2fs/ext2_dinode.h   (contents, props changed)
  head/sys/fs/ext2fs/ext2_dir.h   (contents, props changed)
  head/sys/fs/ext2fs/ext2fs.h   (contents, props changed)
Deleted:
  head/sys/fs/ext2fs/COPYRIGHT.INFO
  head/sys/fs/ext2fs/ext2_bitops.h
  head/sys/fs/ext2fs/ext2_fs.h
  head/sys/fs/ext2fs/ext2_fs_sb.h
  head/sys/fs/ext2fs/ext2_linux_balloc.c
  head/sys/fs/ext2fs/ext2_linux_ialloc.c
  head/sys/fs/ext2fs/i386-bitops.h
  head/sys/gnu/fs/ext2fs/
Modified:
  head/sys/conf/files
  head/sys/fs/ext2fs/ext2_alloc.c
  head/sys/fs/ext2fs/ext2_balloc.c
  head/sys/fs/ext2fs/ext2_bmap.c
  head/sys/fs/ext2fs/ext2_extern.h
  head/sys/fs/ext2fs/ext2_inode.c
  head/sys/fs/ext2fs/ext2_inode_cnv.c
  head/sys/fs/ext2fs/ext2_lookup.c
  head/sys/fs/ext2fs/ext2_mount.h
  head/sys/fs/ext2fs/ext2_readwrite.c
  head/sys/fs/ext2fs/ext2_subr.c
  head/sys/fs/ext2fs/ext2_vfsops.c
  head/sys/fs/ext2fs/ext2_vnops.c
  head/sys/fs/ext2fs/fs.h
  head/sys/fs/ext2fs/inode.h
  head/sys/gnu/fs/reiserfs/reiserfs_fs.h
  head/sys/modules/ext2fs/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Jan 14 11:03:26 2010        (r202282)
+++ head/sys/conf/files Thu Jan 14 14:30:54 2010        (r202283)
@@ -1956,18 +1956,15 @@ geom/virstor/binstream.c        optional geom_v
 geom/virstor/g_virstor.c       optional geom_virstor
 geom/virstor/g_virstor_md.c    optional geom_virstor
 geom/zero/g_zero.c             optional geom_zero
-gnu/fs/ext2fs/ext2_alloc.c             optional ext2fs \
-       warning "kernel contains GPL contaminated ext2fs filesystem"
-gnu/fs/ext2fs/ext2_balloc.c    optional ext2fs
-gnu/fs/ext2fs/ext2_bmap.c      optional ext2fs
-gnu/fs/ext2fs/ext2_inode.c     optional ext2fs
-gnu/fs/ext2fs/ext2_inode_cnv.c optional ext2fs
-gnu/fs/ext2fs/ext2_linux_balloc.c      optional ext2fs
-gnu/fs/ext2fs/ext2_linux_ialloc.c      optional ext2fs
-gnu/fs/ext2fs/ext2_lookup.c    optional ext2fs
-gnu/fs/ext2fs/ext2_subr.c      optional ext2fs
-gnu/fs/ext2fs/ext2_vfsops.c    optional ext2fs
-gnu/fs/ext2fs/ext2_vnops.c     optional ext2fs
+fs/ext2fs/ext2_alloc.c         optional ext2fs
+fs/ext2fs/ext2_balloc.c                optional ext2fs
+fs/ext2fs/ext2_bmap.c          optional ext2fs
+fs/ext2fs/ext2_inode.c         optional ext2fs
+fs/ext2fs/ext2_inode_cnv.c     optional ext2fs
+fs/ext2fs/ext2_lookup.c                optional ext2fs
+fs/ext2fs/ext2_subr.c          optional ext2fs
+fs/ext2fs/ext2_vfsops.c                optional ext2fs
+fs/ext2fs/ext2_vnops.c         optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c      optional reiserfs \
        warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c       optional reiserfs

Modified: head/sys/fs/ext2fs/ext2_alloc.c
==============================================================================
--- head/sys/gnu/fs/ext2fs/ext2_alloc.c Sun Jan  3 12:17:51 2010        
(r201427)
+++ head/sys/fs/ext2fs/ext2_alloc.c     Thu Jan 14 14:30:54 2010        
(r202283)
@@ -43,52 +43,54 @@
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/syslog.h>
+#include <sys/buf.h>
 
-#include <gnu/fs/ext2fs/inode.h>
-#include <gnu/fs/ext2fs/ext2_mount.h>
-#include <gnu/fs/ext2fs/ext2_fs.h>
-#include <gnu/fs/ext2fs/ext2_fs_sb.h>
-#include <gnu/fs/ext2fs/fs.h>
-#include <gnu/fs/ext2fs/ext2_extern.h>
-
-static void    ext2_fserr(struct ext2_sb_info *, u_int, char *);
-
-/*
- * Linux calls this functions at the following locations:
- * (1) the inode is freed
- * (2) a preallocation miss occurs
- * (3) truncate is called
- * (4) release_file is called and f_mode & 2
- *
- * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2)
- * the call in vfree might be redundant
- */
-void
-ext2_discard_prealloc(ip)
-       struct inode * ip;
-{
-#ifdef EXT2_PREALLOCATE
-        if (ip->i_prealloc_count) {
-                int i = ip->i_prealloc_count;
-                ip->i_prealloc_count = 0;
-                ext2_free_blocks (ITOV(ip)->v_mount,
-                                  ip->i_prealloc_block,
-                                  i);
-        }
-#endif
-}
-
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extern.h>
+
+static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int);
+static u_long  ext2_dirpref(struct inode *);
+static void    ext2_fserr(struct m_ext2fs *, uid_t, char *);
+static u_long  ext2_hashalloc(struct inode *, int, long, int,
+                               daddr_t (*)(struct inode *, int, daddr_t, 
+                                               int));
+static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int);
+static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
 /*
  * Allocate a block in the file system.
- * 
- * this takes the framework from ffs_alloc. To implement the
- * actual allocation, it calls ext2_new_block, the ported version
- * of the same Linux routine.
  *
- * we note that this is always called in connection with ext2_blkpref
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
+ * If no block preference is given the following hierarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *        inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
  *
- * preallocation is done as Linux does it
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
+ * If no block preference is given the following hierarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *        inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
  */
+
 int
 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
        struct inode *ip;
@@ -97,75 +99,46 @@ ext2_alloc(ip, lbn, bpref, size, cred, b
        struct ucred *cred;
        int32_t *bnp;
 {
-       struct ext2_sb_info *fs;
+       struct m_ext2fs *fs;
+       struct ext2mount *ump;
        int32_t bno;
-       
+       int cg; 
        *bnp = 0;
        fs = ip->i_e2fs;
+       ump = ip->i_ump;
+       mtx_assert(EXT2_MTX(ump), MA_OWNED);
 #ifdef DIAGNOSTIC
-       if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) {
+       if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
                vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
-                   fs->s_blocksize, size, fs->fs_fsmnt);
+                   (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
                panic("ext2_alloc: bad size");
        }
        if (cred == NOCRED)
                panic("ext2_alloc: missing credential");
 #endif /* DIAGNOSTIC */
-       if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0)
+       if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
                goto nospace;
        if (cred->cr_uid != 0 && 
-               fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count)
+               fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
                goto nospace;
-       if (bpref >= fs->s_es->s_blocks_count)
+       if (bpref >= fs->e2fs->e2fs_bcount)
                bpref = 0;
-       /* call the Linux code */
-#ifdef EXT2_PREALLOCATE
-       /* To have a preallocation hit, we must
-        * - have at least one block preallocated
-        * - and our preferred block must have that block number or one below
-        */
-        if (ip->i_prealloc_count &&
-            (bpref == ip->i_prealloc_block ||
-             bpref + 1 == ip->i_prealloc_block))
-        {
-                bno = ip->i_prealloc_block++;
-                ip->i_prealloc_count--;
-                /* ext2_debug ("preallocation hit (%lu/%lu).\n",
-                            ++alloc_hits, ++alloc_attempts); */
-
-               /* Linux gets, clears, and releases the buffer at this
-                  point - we don't have to that; we leave it to the caller
-                */
-        } else {
-                ext2_discard_prealloc (ip);
-                /* ext2_debug ("preallocation miss (%lu/%lu).\n",
-                            alloc_hits, ++alloc_attempts); */
-                if (S_ISREG(ip->i_mode))
-                        bno = ext2_new_block
-                                (ITOV(ip)->v_mount, bpref,
-                                 &ip->i_prealloc_count,
-                                 &ip->i_prealloc_block);
-                else
-                       bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, 
-                                       bpref, 0, 0);
+        if (bpref == 0)
+                cg = ino_to_cg(fs, ip->i_number);
+        else
+                cg = dtog(fs, bpref);
+        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
+                                                 ext2_alloccg);
+        if (bno > 0) {
+                ip->i_blocks += btodb(fs->e2fs_bsize);
+                ip->i_flag |= IN_CHANGE | IN_UPDATE;
+                *bnp = bno;
+                return (0);
         }
-#else
-       bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0);
-#endif
-
-       if (bno > 0) {
-               /* set next_alloc fields as done in block_getblk */
-               ip->i_next_alloc_block = lbn;
-               ip->i_next_alloc_goal = bno;
-
-               ip->i_blocks += btodb(size);
-               ip->i_flag |= IN_CHANGE | IN_UPDATE;
-               *bnp = bno;
-               return (0);
-       }
 nospace:
+       EXT2_UNLOCK(ump);
        ext2_fserr(fs, cred->cr_uid, "file system full");
-       uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+       uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
        return (ENOSPC);
 }
 
@@ -187,6 +160,8 @@ nospace:
 #ifdef FANCY_REALLOC
 #include <sys/sysctl.h>
 static int doasyncfree = 1;
+static int doreallocblks = 1;
+
 #ifdef OPT_DEBUG
 SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
 #endif /* OPT_DEBUG */
@@ -204,19 +179,21 @@ ext2_reallocblks(ap)
 return ENOSPC;
 #else
 
-       struct ext2_sb_info *fs;
+       struct m_ext2fs *fs;
        struct inode *ip;
        struct vnode *vp;
        struct buf *sbp, *ebp;
-       int32_t *bap, *sbap, *ebap;
+       int32_t *bap, *sbap, *ebap = 0;
+       struct ext2mount *ump;
        struct cluster_save *buflist;
-       int32_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
        struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+       int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
        int i, len, start_lvl, end_lvl, pref, ssize;
 
        vp = ap->a_vp;
        ip = VTOI(vp);
        fs = ip->i_e2fs;
+       ump = ip->i_ump;
 #ifdef UNKLAR
        if (fs->fs_contigsumsize <= 0)
                return (ENOSPC);
@@ -238,8 +215,8 @@ return ENOSPC;
        if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
            dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
                return (ENOSPC);
-       if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
-           ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+       if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+           ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
                return (ENOSPC);
        /*
         * Get the starting offset and block map for the first block.
@@ -249,7 +226,7 @@ return ENOSPC;
                soff = start_lbn;
        } else {
                idp = &start_ap[start_lvl - 1];
-               if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) 
{
+               if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
                        brelse(sbp);
                        return (ENOSPC);
                }
@@ -259,7 +236,8 @@ return ENOSPC;
        /*
         * Find the preferred location for the cluster.
         */
-       pref = ext2_blkpref(ip, start_lbn, soff, sbap);
+       EXT2_LOCK(ump); 
+       pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
        /*
         * If the block range spans two block maps, get the second map.
         */
@@ -271,16 +249,20 @@ return ENOSPC;
                        panic("ext2_reallocblk: start == end");
 #endif
                ssize = len - (idp->in_off + 1);
-               if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp))
+               if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
+                       EXT2_UNLOCK(ump);       
                        goto fail;
+               }
                ebap = (int32_t *)ebp->b_data;
        }
        /*
         * Search the block map looking for an allocation of the desired size.
         */
-       if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref,
-           len, (u_long (*)())ext2_clusteralloc)) == 0)
+       if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
+           len, ext2_clusteralloc)) == 0){
+               EXT2_UNLOCK(ump);
                goto fail;
+       }       
        /*
         * We have found a new contiguous block.
         *
@@ -289,9 +271,10 @@ return ENOSPC;
         * with the file.
         */
        blkno = newblk;
-       for (bap = &sbap[soff], i = 0; i < len; i++, blkno += 
fs->s_frags_per_block) {
+       for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
                if (i == ssize)
                        bap = ebap;
+                       soff = -i;
 #ifdef DIAGNOSTIC
                if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
                        panic("ext2_reallocblks: alloc mismatch");
@@ -322,17 +305,18 @@ return ENOSPC;
                if (!doasyncfree)
                        ext2_update(vp, 1);
        }
-       if (ssize < len)
+       if (ssize < len) {
                if (doasyncfree)
                        bdwrite(ebp);
                else
                        bwrite(ebp);
+       }
        /*
         * Last, free the old blocks and assign the new blocks to the buffers.
         */
-       for (blkno = newblk, i = 0; i < len; i++, blkno += 
fs->s_frags_per_block) {
+       for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
                ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-                   fs->s_blocksize);
+                   fs->e2fs_bsize);
                buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
        }
        return (0);
@@ -350,8 +334,6 @@ fail:
 /*
  * Allocate an inode in the file system.
  * 
- * we leave the actual allocation strategy to the (modified)
- * ext2_new_inode(), to make sure we get the policies right
  */
 int
 ext2_valloc(pvp, mode, cred, vpp)
@@ -361,21 +343,38 @@ ext2_valloc(pvp, mode, cred, vpp)
        struct vnode **vpp;
 {
        struct inode *pip;
-       struct ext2_sb_info *fs;
+       struct m_ext2fs *fs;
        struct inode *ip;
-       ino_t ino;
-       int i, error;
+       struct ext2mount *ump;
+       ino_t ino, ipref;
+       int i, error, cg;
        
        *vpp = NULL;
        pip = VTOI(pvp);
        fs = pip->i_e2fs;
-       if (fs->s_es->s_free_inodes_count == 0)
-               goto noinodes;
+       ump = pip->i_ump;
 
-       /* call the Linux routine - it returns the inode number only */
-       ino = ext2_new_inode(pip, mode);
+       EXT2_LOCK(ump);
+       if (fs->e2fs->e2fs_ficount == 0)
+               goto noinodes;
+       /*
+        * If it is a directory then obtain a cylinder group based on
+        * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
+        * always the next inode.
+        */
+       if((mode & IFMT) == IFDIR) {
+               cg = ext2_dirpref(pip);
+               if (fs->e2fs_contigdirs[cg] < 255)
+                       fs->e2fs_contigdirs[cg]++;
+       } else {
+               cg = ino_to_cg(fs, pip->i_number);
+               if (fs->e2fs_contigdirs[cg] > 0)
+                       fs->e2fs_contigdirs[cg]--;
+       }
+       ipref = cg * fs->e2fs->e2fs_ipg + 1;
+       ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, 
ext2_nodealloccg);
 
-       if (ino == 0)
+       if (ino == 0) 
                goto noinodes;
        error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
        if (error) {
@@ -410,12 +409,126 @@ printf("ext2_valloc: allocated inode %d\
 */
        return (0);
 noinodes:
+       EXT2_UNLOCK(ump);
        ext2_fserr(fs, cred->cr_uid, "out of inodes");
-       uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
+       uprintf("\n%s: create/symlink failed, no inodes free\n", 
fs->e2fs_fsmnt);
        return (ENOSPC);
 }
 
 /*
+ * Find a cylinder to place a directory.
+ *
+ * The policy implemented by this algorithm is to allocate a
+ * directory inode in the same cylinder group as its parent
+ * directory, but also to reserve space for its files inodes
+ * and data. Restrict the number of directories which may be
+ * allocated one after another in the same cylinder group
+ * without intervening allocation of files.
+ *
+ * If we allocate a first level directory then force allocation
+ * in another cylinder group.
+ *
+ */
+static u_long
+ext2_dirpref(struct inode *pip)
+{
+       struct m_ext2fs *fs;
+        int cg, prefcg, dirsize, cgsize;
+       int avgifree, avgbfree, avgndir, curdirsize;
+       int minifree, minbfree, maxndir;
+       int mincg, minndir;
+       int maxcontigdirs;
+
+       mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
+       fs = pip->i_e2fs;
+
+       avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
+       avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
+       avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
+
+       /*
+        * Force allocation in another cg if creating a first level dir.
+        */
+       ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
+       if (ITOV(pip)->v_vflag & VV_ROOT) {
+               prefcg = arc4random() % fs->e2fs_gcount;
+               mincg = prefcg;
+               minndir = fs->e2fs_ipg;
+               for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+                       if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
+                           fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
+                           fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
+                               mincg = cg;
+                               minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
+                       }
+               for (cg = 0; cg < prefcg; cg++)
+                       if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
+                            fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
+                            fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
+                                mincg = cg;
+                                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
+                        }
+
+               return (mincg);
+       }
+
+       /*
+        * Count various limits which used for
+        * optimal allocation of a directory inode.
+        */
+       maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
+       minifree = avgifree - avgifree / 4;
+       if (minifree < 1)
+               minifree = 1;
+       minbfree = avgbfree - avgbfree / 4;
+       if (minbfree < 1)
+               minbfree = 1;
+       cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
+       dirsize = AVGDIRSIZE;
+       curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 
0;
+       if (dirsize < curdirsize)
+               dirsize = curdirsize;
+       if (dirsize <= 0)
+               maxcontigdirs = 0;              /* dirsize overflowed */
+       else
+               maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
+       maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
+       if (maxcontigdirs == 0)
+               maxcontigdirs = 1;
+
+       /*
+        * Limit number of dirs in one cg and reserve space for 
+        * regular files, but only if we have no deficit in
+        * inodes or space.
+        */
+       prefcg = ino_to_cg(fs, pip->i_number);
+       for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+               if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
+                   fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
+                   fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
+                       if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
+                               return (cg);
+               }
+       for (cg = 0; cg < prefcg; cg++)
+               if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
+                   fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
+                   fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
+                       if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
+                               return (cg);
+               }
+       /*
+        * This is a backstop when we have deficit in space.
+        */
+       for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+               if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
+                       return (cg);
+       for (cg = 0; cg < prefcg; cg++)
+               if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
+                       break;
+       return (cg);
+}
+
+/*
  * Select the desired position for the next block in a file.  
  *
  * we try to mimic what Remy does in inode_getblk/block_getblk
@@ -437,11 +550,12 @@ ext2_blkpref(ip, lbn, indx, bap, blocknr
        int32_t blocknr;
 {
        int     tmp;
+       mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
 
        /* if the next block is actually what we thought it is,
           then set the goal to what we thought it should be
        */
-       if(ip->i_next_alloc_block == lbn)
+       if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
                return ip->i_next_alloc_goal;
 
        /* now check whether we were provided with an array that basically
@@ -458,13 +572,230 @@ ext2_blkpref(ip, lbn, indx, bap, blocknr
        return blocknr ? blocknr :
                        (int32_t)(ip->i_block_group * 
                        EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 
-                       ip->i_e2fs->s_es->s_first_data_block;
+                       ip->i_e2fs->e2fs->e2fs_first_dblock;
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ *   1) allocate the block in its requested cylinder group.
+ *   2) quadradically rehash on the cylinder group number.
+ *   3) brute force search for a free block.
+ */
+static u_long
+ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
+                daddr_t (*allocator)(struct inode *, int, daddr_t, int))
+{
+       struct m_ext2fs *fs;
+       ino_t result;
+       int i, icg = cg;
+
+       mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
+       fs = ip->i_e2fs;
+       /*
+        * 1: preferred cylinder group
+        */
+       result = (*allocator)(ip, cg, pref, size);
+       if (result)
+               return (result);
+       /*
+        * 2: quadratic rehash
+        */
+       for (i = 1; i < fs->e2fs_gcount; i *= 2) {
+               cg += i;
+               if (cg >= fs->e2fs_gcount)
+                       cg -= fs->e2fs_gcount;
+               result = (*allocator)(ip, cg, 0, size);
+               if (result)
+                       return (result);
+       }
+       /*
+        * 3: brute force search
+        * Note that we start at i == 2, since 0 was checked initially,
+        * and 1 is always checked in the quadratic rehash.
+        */
+       cg = (icg + 2) % fs->e2fs_gcount;
+       for (i = 2; i < fs->e2fs_gcount; i++) {
+               result = (*allocator)(ip, cg, 0, size);
+               if (result)
+                       return (result);
+               cg++;
+               if (cg == fs->e2fs_gcount)
+                       cg = 0;
+       }
+       return (0);
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static daddr_t
+ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
+{
+       struct m_ext2fs *fs;
+       struct buf *bp;
+       struct ext2mount *ump;
+       int error, bno, start, end, loc;
+       char *bbp;
+       /* XXX ondisk32 */
+       fs = ip->i_e2fs;
+       ump = ip->i_ump;
+       if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
+               return (0);
+       EXT2_UNLOCK(ump);
+       error = bread(ip->i_devvp, fsbtodb(fs,
+               fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+               (int)fs->e2fs_bsize, NOCRED, &bp);
+       if (error) {
+               brelse(bp);
+               EXT2_LOCK(ump);
+               return (0);
+       }
+       bbp = (char *)bp->b_data;
+
+       if (dtog(fs, bpref) != cg)
+               bpref = 0;
+       if (bpref != 0) {
+               bpref = dtogd(fs, bpref);
+               /*
+                * if the requested block is available, use it
+                */
+               if (isclr(bbp, bpref)) {
+                       bno = bpref;
+                       goto gotit;
+               }
+       }
+       /*
+        * no blocks in the requested cylinder, so take next
+        * available one in this cylinder group.
+        * first try to get 8 contigous blocks, then fall back to a single
+        * block.
+        */
+       if (bpref)
+               start = dtogd(fs, bpref) / NBBY;
+       else
+               start = 0;
+       end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+       for (loc = start; loc < end; loc++) {
+               if (bbp[loc] == 0) {
+                       bno = loc * NBBY;
+                       goto gotit;
+               }
+       }
+       for (loc = 0; loc < start; loc++) {
+               if (bbp[loc] == 0) {
+                       bno = loc * NBBY;
+                       goto gotit;
+               }
+       }
+
+       bno = ext2_mapsearch(fs, bbp, bpref);
+       if (bno < 0){
+               brelse(bp);
+               EXT2_LOCK(ump);
+               return (0);
+       }
+gotit:
+#ifdef DIAGNOSTIC
+       if (isset(bbp, (daddr_t)bno)) {
+               printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
+                       cg, bno, fs->e2fs_fsmnt);
+               panic("ext2fs_alloccg: dup alloc");
+       }
+#endif
+       setbit(bbp, (daddr_t)bno);
+       EXT2_LOCK(ump);
+       fs->e2fs->e2fs_fbcount--;
+       fs->e2fs_gd[cg].ext2bgd_nbfree--;
+       fs->e2fs_fmod = 1;
+       EXT2_UNLOCK(ump);
+       bdwrite(bp);
+       return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
+}
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using tode in the specified cylinder group.
+ */
+static daddr_t
+ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
+{
+       struct m_ext2fs *fs;
+       struct buf *bp;
+       struct ext2mount *ump;
+       int error, start, len, loc, map, i;
+       char *ibp;
+       ipref--; /* to avoid a lot of (ipref -1) */
+       if (ipref == -1)
+               ipref = 0;
+       fs = ip->i_e2fs;
+       ump = ip->i_ump;
+       if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
+               return (0);
+       EXT2_UNLOCK(ump);       
+       error = bread(ip->i_devvp, fsbtodb(fs,
+               fs->e2fs_gd[cg].ext2bgd_i_bitmap),
+               (int)fs->e2fs_bsize, NOCRED, &bp);
+       if (error) {
+               brelse(bp);
+               EXT2_LOCK(ump);
+               return (0);
+       }
+       ibp = (char *)bp->b_data;
+       if (ipref) {
+               ipref %= fs->e2fs->e2fs_ipg;
+               if (isclr(ibp, ipref))
+                       goto gotit;
+       }
+       start = ipref / NBBY;
+       len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
+       loc = skpc(0xff, len, &ibp[start]);
+       if (loc == 0) {
+               len = start + 1;
+               start = 0;
+               loc = skpc(0xff, len, &ibp[0]);
+               if (loc == 0) {
+                       printf("cg = %d, ipref = %lld, fs = %s\n",
+                               cg, (long long)ipref, fs->e2fs_fsmnt);
+                       panic("ext2fs_nodealloccg: map corrupted");
+                       /* NOTREACHED */
+               }
+       } 
+       i = start + len - loc;
+       map = ibp[i];
+       ipref = i * NBBY;
+       for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
+               if ((map & i) == 0) {
+                       goto gotit;
+               }
+       }
+       printf("fs = %s\n", fs->e2fs_fsmnt);
+       panic("ext2fs_nodealloccg: block not in map");
+       /* NOTREACHED */
+gotit:
+       setbit(ibp, ipref);
+       EXT2_LOCK(ump);
+       fs->e2fs_gd[cg].ext2bgd_nifree--;
+       fs->e2fs->e2fs_ficount--;
+       fs->e2fs_fmod = 1;
+       if ((mode & IFMT) == IFDIR) {
+               fs->e2fs_gd[cg].ext2bgd_ndirs++;
+               fs->e2fs_total_dir++;
+       }
+       EXT2_UNLOCK(ump);
+       bdwrite(bp);
+       return (cg * fs->e2fs->e2fs_ipg + ipref +1);
 }
 
 /*
  * Free a block or fragment.
  *
- * pass on to the Linux code
  */
 void
 ext2_blkfree(ip, bno, size)
@@ -472,19 +803,47 @@ ext2_blkfree(ip, bno, size)
        int32_t bno;
        long size;
 {
-       struct ext2_sb_info *fs;
+       struct m_ext2fs *fs;
+       struct buf *bp;
+       struct ext2mount *ump;
+       int cg, error;
+       char *bbp;
 
        fs = ip->i_e2fs;
-       /*
-        *      call Linux code with mount *, block number, count
-        */
-       ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size);
+       ump = ip->i_ump;
+       cg = dtog(fs, bno);
+       if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
+                printf("bad block %lld, ino %llu\n", (long long)bno,
+                    (unsigned long long)ip->i_number);
+                ext2_fserr(fs, ip->i_uid, "bad block");
+                return;
+        }
+        error = bread(ip->i_devvp,
+                fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+                (int)fs->e2fs_bsize, NOCRED, &bp);
+        if (error) {
+                brelse(bp);
+                return;
+        }
+        bbp = (char *)bp->b_data;
+        bno = dtogd(fs, bno);
+        if (isclr(bbp, bno)) {
+                printf("block = %lld, fs = %s\n",
+                     (long long)bno, fs->e2fs_fsmnt);
+                panic("blkfree: freeing free block");
+        }
+        clrbit(bbp, bno);
+       EXT2_LOCK(ump);
+        fs->e2fs->e2fs_fbcount++;
+        fs->e2fs_gd[cg].ext2bgd_nbfree++;
+        fs->e2fs_fmod = 1;
+       EXT2_UNLOCK(ump);
+        bdwrite(bp);
 }
 
 /*
  * Free an inode.
  *
- * the maintenance of the actual bitmaps is again up to the linux code
  */
 int
 ext2_vfree(pvp, ino, mode)
@@ -492,30 +851,94 @@ ext2_vfree(pvp, ino, mode)
        ino_t ino;
        int mode;
 {
-       struct ext2_sb_info *fs;
+       struct m_ext2fs *fs;
        struct inode *pip;
-       mode_t save_i_mode;
+       struct buf *bp;
+       struct ext2mount *ump;
+       int error, cg;
+       char * ibp;
+/*     mode_t save_i_mode; */
 
        pip = VTOI(pvp);
        fs = pip->i_e2fs;
-       if ((u_int)ino > fs->s_inodes_per_group * fs->s_groups_count)
+       ump = pip->i_ump;
+       if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
                panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
-                   pip->i_devvp, ino, fs->fs_fsmnt);
+                   pip->i_devvp, ino, fs->e2fs_fsmnt);
 
-/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode);
+       cg = ino_to_cg(fs, ino);
+       error = bread(pip->i_devvp,
+               fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
+               (int)fs->e2fs_bsize, NOCRED, &bp);
+       if (error) {
+               brelse(bp);
+               return (0);
+       }
+       ibp = (char *)bp->b_data;
+       ino = (ino - 1) % fs->e2fs->e2fs_ipg;
+       if (isclr(ibp, ino)) {
+               printf("ino = %llu, fs = %s\n",
+                        (unsigned long long)ino, fs->e2fs_fsmnt);
+               if (fs->e2fs_ronly == 0)
+                       panic("ifree: freeing free inode");
+       }
+       clrbit(ibp, ino);
+       EXT2_LOCK(ump);
+       fs->e2fs->e2fs_ficount++;
+       fs->e2fs_gd[cg].ext2bgd_nifree++;
+       if ((mode & IFMT) == IFDIR) {
+               fs->e2fs_gd[cg].ext2bgd_ndirs--;
+               fs->e2fs_total_dir--;
+       }
+       fs->e2fs_fmod = 1;
+       EXT2_UNLOCK(ump);
+       bdwrite(bp);
+       return (0);
+}
+
+/*
+ * Find a block in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
  */
-       ext2_discard_prealloc(pip);
+static daddr_t
+ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
+{
+       daddr_t bno;
+       int start, len, loc, i, map;
 
-       /* we need to make sure that ext2_free_inode can adjust the
-          used_dir_counts in the group summary information - I'd
-          really like to know what the rationale behind this
-          'set i_mode to zero to denote an unused inode' is
+       /*
+        * find the fragment by searching through the free block
+        * map for an appropriate bit pattern
         */
-       save_i_mode = pip->i_mode;
-       pip->i_mode = mode;
-       ext2_free_inode(pip);   
-       pip->i_mode = save_i_mode;
-       return (0);
+       if (bpref)
+               start = dtogd(fs, bpref) / NBBY;
+       else
+               start = 0;
+       len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+       loc = skpc(0xff, len, &bbp[start]);
+       if (loc == 0) {
+               len = start + 1;
+               start = 0;
+               loc = skpc(0xff, len, &bbp[start]);
+               if (loc == 0) {
+                       printf("start = %d, len = %d, fs = %s\n",
+                               start, len, fs->e2fs_fsmnt);
+                       panic("ext2fs_alloccg: map corrupted");
+                       /* NOTREACHED */
+               }
+       }
+       i = start + len - loc;
+       map = bbp[i];
+       bno = i * NBBY;
+       for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
+               if ((map & i) == 0)
+                       return (bno);
+       }
+       printf("fs = %s\n", fs->e2fs_fsmnt);
+       panic("ext2fs_mapsearch: block not in map");
+       /* NOTREACHED */
 }
 
 /*
@@ -526,10 +949,25 @@ ext2_vfree(pvp, ino, mode)
  */
 static void
 ext2_fserr(fs, uid, cp)
-       struct ext2_sb_info *fs;
-       u_int uid;
+       struct m_ext2fs *fs;
+       uid_t uid;
        char *cp;
 {
 
-       log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
+       log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
+}
+
+int
+cg_has_sb(int i)
+{
+        int a3, a5, a7;
+
+        if (i == 0 || i == 1)
+                return 1;
+        for (a3 = 3, a5 = 5, a7 = 7;
+            a3 <= i || a5 <= i || a7 <= i;
+            a3 *= 3, a5 *= 5, a7 *= 7)
+                if (i == a3 || i == a5 || i == a7)
+                        return 1;
+        return 0;
 }

Modified: head/sys/fs/ext2fs/ext2_balloc.c
==============================================================================
--- head/sys/gnu/fs/ext2fs/ext2_balloc.c        Sun Jan  3 12:17:51 2010        
(r201427)
+++ head/sys/fs/ext2fs/ext2_balloc.c    Thu Jan 14 14:30:54 2010        
(r202283)
@@ -44,42 +44,39 @@
 #include <sys/ucred.h>
 #include <sys/vnode.h>
 
-#include <gnu/fs/ext2fs/inode.h>
-#include <gnu/fs/ext2fs/ext2_fs.h>
-#include <gnu/fs/ext2fs/ext2_fs_sb.h>
-#include <gnu/fs/ext2fs/fs.h>
-#include <gnu/fs/ext2fs/ext2_extern.h>
-
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_mount.h>
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  */
 int
-ext2_balloc(ip, bn, size, cred, bpp, flags)
+ext2_balloc(ip, lbn, size, cred, bpp, flags)
        struct inode *ip;
-       int32_t bn;
+       int32_t lbn;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to