[PATCH 01/14] GFS: headers

Central header files that are widely used.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/gfs2.h              |   77 +++
 fs/gfs2/incore.h            |  691 +++++++++++++++++++++++++++
 include/linux/gfs2_ioctl.h  |   30 +
 include/linux/gfs2_ondisk.h | 1119 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1917 insertions(+)

--- a/fs/gfs2/gfs2.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/gfs2.h	2005-09-01 17:36:55.202132648 +0800
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __GFS2_DOT_H__
+#define __GFS2_DOT_H__
+
+#include <linux/gfs2_ondisk.h>
+
+#include "locking/harness/lm_interface.h"
+#include "lvb.h"
+#include "incore.h"
+#include "util.h"
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define NO_CREATE 0
+#define CREATE 1
+
+#define NO_WAIT 0
+#define WAIT 1
+
+#define NO_FORCE 0
+#define FORCE 1
+
+#if (BITS_PER_LONG == 64)
+#define PRIu64 "lu"
+#define PRId64 "ld"
+#define PRIx64 "lx"
+#define PRIX64 "lX"
+#else
+#define PRIu64 "Lu"
+#define PRId64 "Ld"
+#define PRIx64 "Lx"
+#define PRIX64 "LX"
+#endif
+
+/*  Divide num by den.  Round up if there is a remainder.  */
+#define DIV_RU(num, den) (((num) + (den) - 1) / (den))
+#define MAKE_MULT8(x) (((x) + 7) & ~7)
+
+#define GFS2_FAST_NAME_SIZE 8
+
+#define get_v2sdp(sb) ((struct gfs2_sbd *)(sb)->s_fs_info)
+#define set_v2sdp(sb, sdp) (sb)->s_fs_info = (sdp)
+#define get_v2ip(inode) ((struct gfs2_inode *)(inode)->u.generic_ip)
+#define set_v2ip(inode, ip) (inode)->u.generic_ip = (ip)
+#define get_v2fp(file) ((struct gfs2_file *)(file)->private_data)
+#define set_v2fp(file, fp) (file)->private_data = (fp)
+#define get_v2bd(bh) ((struct gfs2_bufdata *)(bh)->b_private)
+#define set_v2bd(bh, bd) (bh)->b_private = (bd)
+#define get_v2db(bh) ((struct gfs2_databuf *)(bh)->b_private)
+#define set_v2db(bh, db) (bh)->b_private = (db)
+
+#define get_transaction ((struct gfs2_trans *)(current->journal_info))
+#define set_transaction(tr) (current->journal_info) = (tr)
+
+#define get_gl2ip(gl) ((struct gfs2_inode *)(gl)->gl_object)
+#define set_gl2ip(gl, ip) (gl)->gl_object = (ip)
+#define get_gl2rgd(gl) ((struct gfs2_rgrpd *)(gl)->gl_object)
+#define set_gl2rgd(gl, rgd) (gl)->gl_object = (rgd)
+#define get_gl2gl(gl) ((struct gfs2_glock *)(gl)->gl_object)
+#define set_gl2gl(gl, gl2) (gl)->gl_object = (gl2)
+
+#endif /* __GFS2_DOT_H__ */
+
--- a/fs/gfs2/incore.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/incore.h	2005-09-01 17:36:55.283120336 +0800
@@ -0,0 +1,691 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __INCORE_DOT_H__
+#define __INCORE_DOT_H__
+
+#define DIO_FORCE	0x00000001
+#define DIO_CLEAN	0x00000002
+#define DIO_DIRTY	0x00000004
+#define DIO_START	0x00000008
+#define DIO_WAIT	0x00000010
+#define DIO_METADATA	0x00000020
+#define DIO_DATA	0x00000040
+#define DIO_RELEASE	0x00000080
+#define DIO_ALL		0x00000100
+
+struct gfs2_log_operations;
+struct gfs2_log_element;
+struct gfs2_bitmap;
+struct gfs2_rgrpd;
+struct gfs2_bufdata;
+struct gfs2_databuf;
+struct gfs2_glock_operations;
+struct gfs2_holder;
+struct gfs2_glock;
+struct gfs2_alloc;
+struct gfs2_inode;
+struct gfs2_file;
+struct gfs2_revoke;
+struct gfs2_revoke_replay;
+struct gfs2_unlinked;
+struct gfs2_quota_data;
+struct gfs2_log_buf;
+struct gfs2_trans;
+struct gfs2_ail;
+struct gfs2_jdesc;
+struct gfs2_args;
+struct gfs2_tune;
+struct gfs2_gl_hash_bucket;
+struct gfs2_sbd;
+
+typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
+
+/*
+ * Structure of operations that are associated with each
+ * type of element in the log.
+ */
+
+struct gfs2_log_operations {
+	void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
+	void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
+	void (*lo_before_commit) (struct gfs2_sbd *sdp);
+	void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
+	void (*lo_before_scan) (struct gfs2_jdesc *jd,
+				struct gfs2_log_header *head, int pass);
+	int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
+				 struct gfs2_log_descriptor *ld, int pass);
+	void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
+	char *lo_name;
+};
+
+struct gfs2_log_element {
+	struct list_head le_list;
+	struct gfs2_log_operations *le_ops;
+};
+
+struct gfs2_bitmap {
+	struct buffer_head *bi_bh;
+	char *bi_clone;
+	uint32_t bi_offset;
+	uint32_t bi_start;
+	uint32_t bi_len;
+};
+
+struct gfs2_rgrpd {
+	struct list_head rd_list;	/* Link with superblock */
+	struct list_head rd_list_mru;
+	struct list_head rd_recent;	/* Recently used rgrps */
+	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
+	struct gfs2_rindex rd_ri;
+	struct gfs2_rgrp rd_rg;
+	uint64_t rd_rg_vn;
+	struct gfs2_bitmap *rd_bits;
+	unsigned int rd_bh_count;
+	struct semaphore rd_mutex;
+	uint32_t rd_free_clone;
+	struct gfs2_log_element rd_le;
+	uint32_t rd_last_alloc_data;
+	uint32_t rd_last_alloc_meta;
+	struct gfs2_sbd *rd_sbd;
+};
+
+enum gfs2_state_bits {
+	BH_Pinned = BH_PrivateStart,
+};
+
+BUFFER_FNS(Pinned, pinned)
+TAS_BUFFER_FNS(Pinned, pinned)
+
+struct gfs2_bufdata {
+	struct buffer_head *bd_bh;
+	struct gfs2_glock *bd_gl;
+
+	struct list_head bd_list_tr;
+	struct gfs2_log_element bd_le;
+
+	struct gfs2_ail *bd_ail;
+	struct list_head bd_ail_st_list;
+	struct list_head bd_ail_gl_list;
+};
+
+struct gfs2_databuf {
+	struct gfs2_log_element db_le;
+	struct buffer_head *db_bh;
+};
+
+struct gfs2_glock_operations {
+	void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
+			     int flags);
+	void (*go_xmote_bh) (struct gfs2_glock * gl);
+	void (*go_drop_th) (struct gfs2_glock * gl);
+	void (*go_drop_bh) (struct gfs2_glock * gl);
+	void (*go_sync) (struct gfs2_glock * gl, int flags);
+	void (*go_inval) (struct gfs2_glock * gl, int flags);
+	int (*go_demote_ok) (struct gfs2_glock * gl);
+	int (*go_lock) (struct gfs2_holder * gh);
+	void (*go_unlock) (struct gfs2_holder * gh);
+	void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
+	void (*go_greedy) (struct gfs2_glock * gl);
+	int go_type;
+};
+
+/* Actions */
+#define HIF_MUTEX		0
+#define HIF_PROMOTE		1
+#define HIF_DEMOTE		2
+#define HIF_GREEDY		3
+
+/* States */
+#define HIF_ALLOCED		4
+#define HIF_DEALLOC		5
+#define HIF_HOLDER		6
+#define HIF_FIRST		7
+#define HIF_RECURSE		8
+#define HIF_ABORTED		9
+
+struct gfs2_holder {
+	struct list_head gh_list;
+
+	struct gfs2_glock *gh_gl;
+	struct task_struct *gh_owner;
+	unsigned int gh_state;
+	int gh_flags;
+
+	int gh_error;
+	unsigned long gh_iflags;
+	struct completion gh_wait;
+};
+
+#define GLF_PLUG		0
+#define GLF_LOCK		1
+#define GLF_STICKY		2
+#define GLF_PREFETCH		3
+#define GLF_SYNC		4
+#define GLF_DIRTY		5
+#define GLF_SKIP_WAITERS2	6
+#define GLF_GREEDY		7
+
+struct gfs2_glock {
+	struct list_head gl_list;
+	unsigned long gl_flags;		/* GLF_... */
+	struct lm_lockname gl_name;
+	atomic_t gl_count;
+
+	spinlock_t gl_spin;
+
+	unsigned int gl_state;
+	struct list_head gl_holders;
+	struct list_head gl_waiters1;	/* HIF_MUTEX */
+	struct list_head gl_waiters2;	/* HIF_DEMOTE, HIF_GREEDY */
+	struct list_head gl_waiters3;	/* HIF_PROMOTE */
+
+	struct gfs2_glock_operations *gl_ops;
+
+	struct gfs2_holder *gl_req_gh;
+	gfs2_glop_bh_t gl_req_bh;
+
+	lm_lock_t *gl_lock;
+	char *gl_lvb;
+	atomic_t gl_lvb_count;
+
+	uint64_t gl_vn;
+	unsigned long gl_stamp;
+	void *gl_object;
+
+	struct gfs2_gl_hash_bucket *gl_bucket;
+	struct list_head gl_reclaim;
+
+	struct gfs2_sbd *gl_sbd;
+
+	struct inode *gl_aspace;
+	struct gfs2_log_element gl_le;
+	struct list_head gl_ail_list;
+	atomic_t gl_ail_count;
+};
+
+struct gfs2_alloc {
+	/* Quota stuff */
+
+	unsigned int al_qd_num;
+	struct gfs2_quota_data *al_qd[4];
+	struct gfs2_holder al_qd_ghs[4];
+
+	/* Filled in by the caller to gfs2_inplace_reserve() */
+
+	uint32_t al_requested;
+
+	/* Filled in by gfs2_inplace_reserve() */
+
+	char *al_file;
+	unsigned int al_line;
+	struct gfs2_holder al_ri_gh;
+	struct gfs2_holder al_rgd_gh;
+	struct gfs2_rgrpd *al_rgd;
+
+	/* Filled in by gfs2_alloc_*() */
+
+	uint32_t al_alloced;
+};
+
+#define GIF_MIN_INIT		0
+#define GIF_QD_LOCKED		1
+#define GIF_PAGED		2
+#define GIF_SW_PAGED		3
+
+struct gfs2_inode {
+	struct gfs2_inum i_num;
+
+	atomic_t i_count;
+	unsigned long i_flags;		/* GIF_... */
+
+	uint64_t i_vn;
+	struct gfs2_dinode i_di;
+
+	struct gfs2_glock *i_gl;
+	struct gfs2_sbd *i_sbd;
+	struct inode *i_vnode;
+
+	struct gfs2_holder i_iopen_gh;
+
+	struct gfs2_alloc *i_alloc;
+	uint64_t i_last_rg_alloc;
+
+	spinlock_t i_spin;
+	struct rw_semaphore i_rw_mutex;
+
+	unsigned int i_greedy;
+	unsigned long i_last_pfault;
+
+	struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
+};
+
+#define GFF_DID_DIRECT_ALLOC	0
+
+struct gfs2_file {
+	unsigned long f_flags;		/* GFF_... */
+
+	struct semaphore f_fl_mutex;
+	struct gfs2_holder f_fl_gh;
+
+	struct gfs2_inode *f_inode;
+	struct file *f_vfile;
+};
+
+struct gfs2_revoke {
+	struct gfs2_log_element rv_le;
+	uint64_t rv_blkno;
+};
+
+struct gfs2_revoke_replay {
+	struct list_head rr_list;
+	uint64_t rr_blkno;
+	unsigned int rr_where;
+};
+
+#define ULF_LOCKED		0
+
+struct gfs2_unlinked {
+	struct list_head ul_list;
+	unsigned int ul_count;
+	struct gfs2_unlinked_tag ul_ut;
+	unsigned long ul_flags;		/* ULF_... */
+	unsigned int ul_slot;
+};
+
+#define QDF_USER		0
+#define QDF_CHANGE		1
+#define QDF_LOCKED		2
+
+struct gfs2_quota_data {
+	struct list_head qd_list;
+	unsigned int qd_count;
+
+	uint32_t qd_id;
+	unsigned long qd_flags;		/* QDF_... */
+
+	int64_t qd_change;
+	int64_t qd_change_sync;
+
+	unsigned int qd_slot;
+	unsigned int qd_slot_count;
+
+	struct buffer_head *qd_bh;
+	struct gfs2_quota_change *qd_bh_qc;
+	unsigned int qd_bh_count;
+
+	struct gfs2_glock *qd_gl;
+	struct gfs2_quota_lvb qd_qb;
+
+	uint64_t qd_sync_gen;
+	unsigned long qd_last_warn;
+	unsigned long qd_last_touched;
+};
+
+struct gfs2_log_buf {
+	struct list_head lb_list;
+	struct buffer_head *lb_bh;
+	struct buffer_head *lb_real;
+};
+
+struct gfs2_trans {
+	char *tr_file;
+	unsigned int tr_line;
+
+	unsigned int tr_blocks;
+	unsigned int tr_revokes;
+	unsigned int tr_reserved;
+
+	struct gfs2_holder *tr_t_gh;
+
+	int tr_touched;
+
+	unsigned int tr_num_buf;
+	unsigned int tr_num_buf_new;
+	unsigned int tr_num_buf_rm;
+	struct list_head tr_list_buf;
+
+	unsigned int tr_num_revoke;
+	unsigned int tr_num_revoke_rm;
+};
+
+struct gfs2_ail {
+	struct list_head ai_list;
+
+	unsigned int ai_first;
+	struct list_head ai_ail1_list;
+	struct list_head ai_ail2_list;
+
+	uint64_t ai_sync_gen;
+};
+
+struct gfs2_jdesc {
+	struct list_head jd_list;
+
+	struct gfs2_inode *jd_inode;
+	unsigned int jd_jid;
+	int jd_dirty;
+
+	unsigned int jd_blocks;
+};
+
+#define GFS2_GLOCKD_DEFAULT	1
+#define GFS2_GLOCKD_MAX		16
+
+#define GFS2_QUOTA_DEFAULT	GFS2_QUOTA_OFF
+#define GFS2_QUOTA_OFF		0
+#define GFS2_QUOTA_ACCOUNT	1
+#define GFS2_QUOTA_ON		2
+
+#define GFS2_DATA_DEFAULT	GFS2_DATA_ORDERED
+#define GFS2_DATA_WRITEBACK	1
+#define GFS2_DATA_ORDERED	2
+
+struct gfs2_args {
+	char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
+	char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
+	char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
+	int ar_spectator; /* Don't get a journal because we're always RO */
+	int ar_ignore_local_fs; /* Don't optimize even if local_fs is TRUE */
+	int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
+	int ar_localcaching; /* Local-style caching (dangerous on multihost) */
+	int ar_oopses_ok; /* Allow oopses */
+	int ar_debug; /* Oops on errors instead of trying to be graceful */
+	int ar_upgrade; /* Upgrade ondisk/multihost format */
+	unsigned int ar_num_glockd; /* Number of glockd threads */
+	int ar_posix_acl; /* Enable posix acls */
+	int ar_quota; /* off/account/on */
+	int ar_suiddir; /* suiddir support */
+	int ar_data; /* ordered/writeback */
+};
+
+struct gfs2_tune {
+	spinlock_t gt_spin;
+
+	unsigned int gt_ilimit;
+	unsigned int gt_ilimit_tries;
+	unsigned int gt_ilimit_min;
+	unsigned int gt_demote_secs; /* Cache retention for unheld glock */
+	unsigned int gt_incore_log_blocks;
+	unsigned int gt_log_flush_secs;
+	unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
+
+	unsigned int gt_scand_secs;
+	unsigned int gt_recoverd_secs;
+	unsigned int gt_logd_secs;
+	unsigned int gt_quotad_secs;
+	unsigned int gt_inoded_secs;
+
+	unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
+	unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
+	unsigned int gt_quota_scale_num; /* Numerator */
+	unsigned int gt_quota_scale_den; /* Denominator */
+	unsigned int gt_quota_cache_secs;
+	unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
+	unsigned int gt_atime_quantum; /* Min secs between atime updates */
+	unsigned int gt_new_files_jdata;
+	unsigned int gt_new_files_directio;
+	unsigned int gt_max_atomic_write; /* Split big writes into this size */
+	unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
+	unsigned int gt_lockdump_size;
+	unsigned int gt_stall_secs; /* Detects trouble! */
+	unsigned int gt_complain_secs;
+	unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
+	unsigned int gt_entries_per_readdir;
+	unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
+	unsigned int gt_greedy_default;
+	unsigned int gt_greedy_quantum;
+	unsigned int gt_greedy_max;
+	unsigned int gt_statfs_quantum;
+	unsigned int gt_statfs_slow;
+};
+
+struct gfs2_gl_hash_bucket {
+	rwlock_t hb_lock;
+	struct list_head hb_list;
+};
+
+#define SDF_JOURNAL_CHECKED	0
+#define SDF_JOURNAL_LIVE	1
+#define SDF_SHUTDOWN		2
+#define SDF_NOATIME		3
+
+#define GFS2_GL_HASH_SHIFT	13
+#define GFS2_GL_HASH_SIZE	(1 << GFS2_GL_HASH_SHIFT)
+#define GFS2_GL_HASH_MASK	(GFS2_GL_HASH_SIZE - 1)
+
+struct gfs2_sbd {
+	struct super_block *sd_vfs;
+	struct kobject sd_kobj;
+	unsigned long sd_flags;	/* SDF_... */
+	struct gfs2_sb sd_sb;
+
+	/* Constants computed on mount */
+
+	uint32_t sd_fsb2bb;
+	uint32_t sd_fsb2bb_shift;
+	uint32_t sd_diptrs;	/* Number of pointers in a dinode */
+	uint32_t sd_inptrs;	/* Number of pointers in a indirect block */
+	uint32_t sd_jbsize;	/* Size of a journaled data block */
+	uint32_t sd_hash_bsize;	/* sizeof(exhash block) */
+	uint32_t sd_hash_bsize_shift;
+	uint32_t sd_hash_ptrs;	/* Number of pointers in a hash block */
+	uint32_t sd_ut_per_block;
+	uint32_t sd_qc_per_block;
+	uint32_t sd_max_dirres;	/* Max blocks needed to add a directory entry */
+	uint32_t sd_max_height;	/* Max height of a file's metadata tree */
+	uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT];
+	uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */
+	uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT];
+
+	struct gfs2_args sd_args;	/* Mount arguments */
+	struct gfs2_tune sd_tune;	/* Filesystem tuning structure */
+
+	/* Lock Stuff */
+
+	struct lm_lockstruct sd_lockstruct;
+	struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE];
+	struct list_head sd_reclaim_list;
+	spinlock_t sd_reclaim_lock;
+	wait_queue_head_t sd_reclaim_wq;
+	atomic_t sd_reclaim_count;
+	struct gfs2_holder sd_live_gh;
+	struct gfs2_glock *sd_rename_gl;
+	struct gfs2_glock *sd_trans_gl;
+
+	/* Inode Stuff */
+
+	struct gfs2_inode *sd_master_dir;
+	struct gfs2_inode *sd_jindex;
+	struct gfs2_inode *sd_inum_inode;
+	struct gfs2_inode *sd_statfs_inode;
+	struct gfs2_inode *sd_ir_inode;
+	struct gfs2_inode *sd_sc_inode;
+	struct gfs2_inode *sd_ut_inode;
+	struct gfs2_inode *sd_qc_inode;
+	struct gfs2_inode *sd_rindex;
+	struct gfs2_inode *sd_quota_inode;
+	struct gfs2_inode *sd_root_dir;
+
+	/* Inum stuff */
+
+	struct semaphore sd_inum_mutex;
+
+	/* StatFS stuff */
+
+	spinlock_t sd_statfs_spin;
+	struct semaphore sd_statfs_mutex;
+	struct gfs2_statfs_change sd_statfs_master;
+	struct gfs2_statfs_change sd_statfs_local;
+	unsigned long sd_statfs_sync_time;
+
+	/* Resource group stuff */
+
+	uint64_t sd_rindex_vn;
+	spinlock_t sd_rindex_spin;
+	struct semaphore sd_rindex_mutex;
+	struct list_head sd_rindex_list;
+	struct list_head sd_rindex_mru_list;
+	struct list_head sd_rindex_recent_list;
+	struct gfs2_rgrpd *sd_rindex_forward;
+	unsigned int sd_rgrps;
+
+	/* Journal index stuff */
+
+	struct list_head sd_jindex_list;
+	spinlock_t sd_jindex_spin;
+	struct semaphore sd_jindex_mutex;
+	unsigned int sd_journals;
+	unsigned long sd_jindex_refresh_time;
+
+	struct gfs2_jdesc *sd_jdesc;
+	struct gfs2_holder sd_journal_gh;
+	struct gfs2_holder sd_jinode_gh;
+
+	struct gfs2_holder sd_ir_gh;
+	struct gfs2_holder sd_sc_gh;
+	struct gfs2_holder sd_ut_gh;
+	struct gfs2_holder sd_qc_gh;
+
+	/* Daemon stuff */
+
+	struct task_struct *sd_scand_process;
+	struct task_struct *sd_recoverd_process;
+	struct task_struct *sd_logd_process;
+	struct task_struct *sd_quotad_process;
+	struct task_struct *sd_inoded_process;
+	struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
+	unsigned int sd_glockd_num;
+
+	/* Unlinked inode stuff */
+
+	struct list_head sd_unlinked_list;
+	atomic_t sd_unlinked_count;
+	spinlock_t sd_unlinked_spin;
+	struct semaphore sd_unlinked_mutex;
+
+	unsigned int sd_unlinked_slots;
+	unsigned int sd_unlinked_chunks;
+	unsigned char **sd_unlinked_bitmap;
+
+	/* Quota stuff */
+
+	struct list_head sd_quota_list;
+	atomic_t sd_quota_count;
+	spinlock_t sd_quota_spin;
+	struct semaphore sd_quota_mutex;
+
+	unsigned int sd_quota_slots;
+	unsigned int sd_quota_chunks;
+	unsigned char **sd_quota_bitmap;
+
+	uint64_t sd_quota_sync_gen;
+	unsigned long sd_quota_sync_time;
+
+	/* Log stuff */
+
+	spinlock_t sd_log_lock;
+	atomic_t sd_log_trans_count;
+	wait_queue_head_t sd_log_trans_wq;
+	atomic_t sd_log_flush_count;
+	wait_queue_head_t sd_log_flush_wq;
+
+	unsigned int sd_log_blks_reserved;
+	unsigned int sd_log_commited_buf;
+	unsigned int sd_log_commited_revoke;
+
+	unsigned int sd_log_num_gl;
+	unsigned int sd_log_num_buf;
+	unsigned int sd_log_num_revoke;
+	unsigned int sd_log_num_rg;
+	unsigned int sd_log_num_databuf;
+	struct list_head sd_log_le_gl;
+	struct list_head sd_log_le_buf;
+	struct list_head sd_log_le_revoke;
+	struct list_head sd_log_le_rg;
+	struct list_head sd_log_le_databuf;
+
+	unsigned int sd_log_blks_free;
+	struct list_head sd_log_blks_list;
+	wait_queue_head_t sd_log_blks_wait;
+
+	uint64_t sd_log_sequence;
+	unsigned int sd_log_head;
+	unsigned int sd_log_tail;
+	uint64_t sd_log_wraps;
+	int sd_log_idle;
+
+	unsigned long sd_log_flush_time;
+	struct semaphore sd_log_flush_lock;
+	struct list_head sd_log_flush_list;
+
+	unsigned int sd_log_flush_head;
+	uint64_t sd_log_flush_wrapped;
+
+	struct list_head sd_ail1_list;
+	struct list_head sd_ail2_list;
+	uint64_t sd_ail_sync_gen;
+
+	/* Replay stuff */
+
+	struct list_head sd_revoke_list;
+	unsigned int sd_replay_tail;
+
+	unsigned int sd_found_blocks;
+	unsigned int sd_found_revokes;
+	unsigned int sd_replayed_blocks;
+
+	/* For quiescing the filesystem */
+
+	struct gfs2_holder sd_freeze_gh;
+	struct semaphore sd_freeze_lock;
+	unsigned int sd_freeze_count;
+
+	/* Counters */
+
+	atomic_t sd_glock_count;
+	atomic_t sd_glock_held_count;
+	atomic_t sd_inode_count;
+	atomic_t sd_bufdata_count;
+
+	atomic_t sd_fh2dentry_misses;
+	atomic_t sd_reclaimed;
+	atomic_t sd_log_flush_incore;
+	atomic_t sd_log_flush_ondisk;
+
+	atomic_t sd_glock_nq_calls;
+	atomic_t sd_glock_dq_calls;
+	atomic_t sd_glock_prefetch_calls;
+	atomic_t sd_lm_lock_calls;
+	atomic_t sd_lm_unlock_calls;
+	atomic_t sd_lm_callbacks;
+
+	atomic_t sd_bio_reads;
+	atomic_t sd_bio_writes;
+	atomic_t sd_bio_outstanding;
+
+	atomic_t sd_ops_address;
+	atomic_t sd_ops_dentry;
+	atomic_t sd_ops_export;
+	atomic_t sd_ops_file;
+	atomic_t sd_ops_inode;
+	atomic_t sd_ops_super;
+	atomic_t sd_ops_vm;
+
+	char sd_fsname[256];
+
+	/* Debugging crud */
+
+	unsigned long sd_last_warning;
+
+	struct list_head sd_list;
+};
+
+#endif /* __INCORE_DOT_H__ */
+
--- a/include/linux/gfs2_ioctl.h	1970-01-01 07:30:00.000000000 +0730
+++ b/include/linux/gfs2_ioctl.h	2005-09-01 17:36:55.202132648 +0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __GFS2_IOCTL_DOT_H__
+#define __GFS2_IOCTL_DOT_H__
+
+#define _GFS2C_(x)               (('G' << 16) | ('2' << 8) | (x))
+
+/* Ioctls implemented */
+
+#define GFS2_IOCTL_IDENTIFY      _GFS2C_(1)
+#define GFS2_IOCTL_SUPER         _GFS2C_(2)
+
+struct gfs2_ioctl {
+	unsigned int gi_argc;
+	char **gi_argv;
+
+        char __user *gi_data;
+	unsigned int gi_size;
+	uint64_t gi_offset;
+};
+
+#endif /* ___GFS2_IOCTL_DOT_H__ */
+
--- a/include/linux/gfs2_ondisk.h	1970-01-01 07:30:00.000000000 +0730
+++ b/include/linux/gfs2_ondisk.h	2005-09-01 17:36:55.208131736 +0800
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __GFS2_ONDISK_DOT_H__
+#define __GFS2_ONDISK_DOT_H__
+
+#define GFS2_MAGIC		0x07131974
+#define GFS2_BASIC_BLOCK	512
+#define GFS2_BASIC_BLOCK_SHIFT	9
+
+/* Lock numbers of the LM_TYPE_NONDISK type */
+
+#define GFS2_MOUNT_LOCK		0
+#define GFS2_LIVE_LOCK		1
+#define GFS2_TRANS_LOCK		2
+#define GFS2_RENAME_LOCK	3
+
+/* Format numbers for various metadata types */
+
+#define GFS2_FORMAT_NONE	0
+#define GFS2_FORMAT_SB		100
+#define GFS2_FORMAT_RG		200
+#define GFS2_FORMAT_RB		300
+#define GFS2_FORMAT_DI		400
+#define GFS2_FORMAT_IN		500
+#define GFS2_FORMAT_LF		600
+#define GFS2_FORMAT_JD		700
+#define GFS2_FORMAT_LH		800
+#define GFS2_FORMAT_LD		900
+#define GFS2_FORMAT_LB		1000
+#define GFS2_FORMAT_EA		1100
+#define GFS2_FORMAT_ED		1200
+#define GFS2_FORMAT_UT		1300
+#define GFS2_FORMAT_QC		1400
+/* These are format numbers for entities contained in files */
+#define GFS2_FORMAT_RI		1500
+#define GFS2_FORMAT_DE		1600
+#define GFS2_FORMAT_QU		1700
+/* These are part of the superblock */
+#define GFS2_FORMAT_FS		1801
+#define GFS2_FORMAT_MULTI	1900
+
+/*
+ * An on-disk inode number
+ */
+
+#define gfs2_inum_equal(ino1, ino2) \
+	(((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \
+	((ino1)->no_addr == (ino2)->no_addr))
+
+struct gfs2_inum {
+	uint64_t no_formal_ino;
+	uint64_t no_addr;
+};
+
+/*
+ * Generic metadata head structure
+ * Every inplace buffer logged in the journal must start with this.
+ */
+
+#define GFS2_METATYPE_NONE	0
+#define GFS2_METATYPE_SB	1
+#define GFS2_METATYPE_RG	2
+#define GFS2_METATYPE_RB	3
+#define GFS2_METATYPE_DI	4
+#define GFS2_METATYPE_IN	5
+#define GFS2_METATYPE_LF	6
+#define GFS2_METATYPE_JD	7
+#define GFS2_METATYPE_LH	8
+#define GFS2_METATYPE_LD	9
+#define GFS2_METATYPE_LB	10
+#define GFS2_METATYPE_EA	11
+#define GFS2_METATYPE_ED	12
+#define GFS2_METATYPE_UT	13
+#define GFS2_METATYPE_QC	14
+
+struct gfs2_meta_header {
+	uint32_t mh_magic;
+	uint16_t mh_type;
+	uint16_t mh_format;
+	uint64_t mh_blkno;
+};
+
+/*
+ * super-block structure
+ *
+ * It's probably good if SIZEOF_SB <= GFS2_BASIC_BLOCK (512 bytes)
+ *
+ * Order is important, need to be able to read old superblocks to do on-disk
+ * version upgrades.
+ */
+
+/* Address of superblock in GFS2 basic blocks */
+#define GFS2_SB_ADDR		128
+
+/* The lock number for the superblock (must be zero) */
+#define GFS2_SB_LOCK		0
+
+/* Requirement:  GFS2_LOCKNAME_LEN % 8 == 0
+   Includes: the fencing zero at the end */
+#define GFS2_LOCKNAME_LEN	64
+
+struct gfs2_sb {
+	struct gfs2_meta_header sb_header;
+
+	uint32_t sb_fs_format;
+	uint32_t sb_multihost_format;
+
+	uint32_t sb_bsize;
+	uint32_t sb_bsize_shift;
+
+	struct gfs2_inum sb_master_dir;
+
+	char sb_lockproto[GFS2_LOCKNAME_LEN];
+	char sb_locktable[GFS2_LOCKNAME_LEN];
+};
+
+/*
+ * resource index structure
+ */
+
+struct gfs2_rindex {
+	uint64_t ri_addr;	/* grp block disk address */
+	uint32_t ri_length;	/* length of rgrp header in fs blocks */
+	uint32_t ri_pad;
+
+	uint64_t ri_data0;	/* first data location */
+	uint32_t ri_data;	/* num of data blocks in rgrp */
+
+	uint32_t ri_bitbytes;	/* number of bytes in data bitmaps */
+
+	char ri_reserved[32];
+};
+
+/*
+ * resource group header structure
+ */
+
+/* Number of blocks per byte in rgrp */
+#define GFS2_NBBY		4
+#define GFS2_BIT_SIZE		2
+#define GFS2_BIT_MASK		0x00000003
+
+#define GFS2_BLKST_FREE		0
+#define GFS2_BLKST_USED		1
+#define GFS2_BLKST_INVALID	2
+#define GFS2_BLKST_DINODE	3
+
+#define GFS2_RGF_JOURNAL	0x00000001
+#define GFS2_RGF_METAONLY	0x00000002
+#define GFS2_RGF_DATAONLY	0x00000004
+#define GFS2_RGF_NOALLOC	0x00000008
+
+struct gfs2_rgrp {
+	struct gfs2_meta_header rg_header;
+
+	uint32_t rg_flags;
+	uint32_t rg_free;
+	uint32_t rg_dinodes;
+
+	char rg_reserved[36];
+};
+
+/*
+ * quota structure
+ */
+
+struct gfs2_quota {
+	uint64_t qu_limit;
+	uint64_t qu_warn;
+	int64_t qu_value;
+};
+
+/*
+ * dinode structure
+ */
+
+#define GFS2_MAX_META_HEIGHT	10
+#define GFS2_DIR_MAX_DEPTH	17
+
+#define DT2IF(dt) (((dt) << 12) & S_IFMT)
+#define IF2DT(sif) (((sif) & S_IFMT) >> 12)
+
+/* Dinode flags */
+#define GFS2_DIF_SYSTEM			0x00000001
+#define GFS2_DIF_JDATA			0x00000002
+#define GFS2_DIF_EXHASH			0x00000004
+#define GFS2_DIF_EA_INDIRECT		0x00000008
+#define GFS2_DIF_DIRECTIO		0x00000010
+#define GFS2_DIF_IMMUTABLE		0x00000020
+#define GFS2_DIF_APPENDONLY		0x00000040
+#define GFS2_DIF_NOATIME		0x00000080
+#define GFS2_DIF_SYNC			0x00000100
+#define GFS2_DIF_INHERIT_DIRECTIO	0x00000200
+#define GFS2_DIF_INHERIT_JDATA		0x00000400
+#define GFS2_DIF_TRUNC_IN_PROG		0x00000800
+
+struct gfs2_dinode {
+	struct gfs2_meta_header di_header;
+
+	struct gfs2_inum di_num;
+
+	uint32_t di_mode;	/* mode of file */
+	uint32_t di_uid;	/* owner's user id */
+	uint32_t di_gid;	/* owner's group id */
+	uint32_t di_nlink;	/* number of links to this file */
+	uint64_t di_size;	/* number of bytes in file */
+	uint64_t di_blocks;	/* number of blocks in file */
+	int64_t di_atime;	/* time last accessed */
+	int64_t di_mtime;	/* time last modified */
+	int64_t di_ctime;	/* time last changed */
+	uint32_t di_major;	/* device major number */
+	uint32_t di_minor;	/* device minor number */
+
+	uint64_t di_goal_meta;	/* rgrp to alloc from next */
+	uint64_t di_goal_data;	/* data block goal */
+
+	uint32_t di_flags;	/* GFS2_DIF_... */
+	uint32_t di_payload_format;  /* GFS2_FORMAT_... */
+	uint16_t di_height;	/* height of metadata */
+
+	/* These only apply to directories  */
+	uint16_t di_depth;	/* Number of bits in the table */
+	uint32_t di_entries;	/* The number of entries in the directory */
+
+	uint64_t di_eattr;	/* extended attribute block number */
+
+	char di_reserved[32];
+};
+
+/*
+ * directory structure - many of these per directory file
+ */
+
+#define GFS2_FNAMESIZE		255
+#define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7)
+
+struct gfs2_dirent {
+	struct gfs2_inum de_inum;
+	uint32_t de_hash;
+	uint32_t de_rec_len;
+	uint8_t de_name_len;
+	uint8_t de_type;
+	uint16_t de_pad1;
+	uint32_t de_pad2;
+};
+
+/*
+ * Header of leaf directory nodes
+ */
+
+struct gfs2_leaf {
+	struct gfs2_meta_header lf_header;
+
+	uint16_t lf_depth;		/* Depth of leaf */
+	uint16_t lf_entries;		/* Number of dirents in leaf */
+	uint32_t lf_dirent_format;	/* Format of the dirents */
+	uint64_t lf_next;		/* Next leaf, if overflow */
+
+	char lf_reserved[32];
+};
+
+/*
+ * Extended attribute header format
+ */
+
+#define GFS2_EA_MAX_NAME_LEN	255
+#define GFS2_EA_MAX_DATA_LEN	65536
+
+#define GFS2_EATYPE_UNUSED	0
+#define GFS2_EATYPE_USR		1
+#define GFS2_EATYPE_SYS		2
+
+#define GFS2_EATYPE_LAST	2
+#define GFS2_EATYPE_VALID(x)	((x) <= GFS2_EATYPE_LAST)
+
+#define GFS2_EAFLAG_LAST	0x01	/* last ea in block */
+
+struct gfs2_ea_header {
+	uint32_t ea_rec_len;
+	uint32_t ea_data_len;
+	uint8_t ea_name_len;	/* no NULL pointer after the string */
+	uint8_t ea_type;	/* GFS2_EATYPE_... */
+	uint8_t ea_flags;	/* GFS2_EAFLAG_... */
+	uint8_t ea_num_ptrs;
+	uint32_t ea_pad;
+};
+
+/*
+ * Log header structure
+ */
+
+#define GFS2_LOG_HEAD_UNMOUNT	0x00000001	/* log is clean */
+
+struct gfs2_log_header {
+	struct gfs2_meta_header lh_header;
+
+	uint64_t lh_sequence;	/* Sequence number of this transaction */
+	uint32_t lh_flags;	/* GFS2_LOG_HEAD_... */
+	uint32_t lh_tail;	/* Block number of log tail */
+	uint32_t lh_blkno;
+	uint32_t lh_hash;
+};
+
+/*
+ * Log type descriptor
+ */
+
+#define GFS2_LOG_DESC_METADATA	300
+/* ld_data1 is the number of metadata blocks in the descriptor.
+   ld_data2 is unused. */
+
+#define GFS2_LOG_DESC_REVOKE	301
+/* ld_data1 is the number of revoke blocks in the descriptor.
+   ld_data2 is unused. */
+
+struct gfs2_log_descriptor {
+	struct gfs2_meta_header ld_header;
+
+	uint32_t ld_type;	/* GFS2_LOG_DESC_... */
+	uint32_t ld_length;	/* Number of buffers in this chunk */
+	uint32_t ld_data1;	/* descriptor-specific field */
+	uint32_t ld_data2;	/* descriptor-specific field */
+
+	char ld_reserved[32];
+};
+
+/*
+ * Inum Range
+ * Describe a range of formal inode numbers allocated to
+ * one machine to assign to inodes.
+ */
+
+#define GFS2_INUM_QUANTUM	1048576
+
+struct gfs2_inum_range {
+	uint64_t ir_start;
+	uint64_t ir_length;
+};
+
+/*
+ * Statfs change
+ * Describes an change to the pool of free and allocated
+ * blocks.
+ */
+
+struct gfs2_statfs_change {
+	int64_t sc_total;
+	int64_t sc_free;
+	int64_t sc_dinodes;
+};
+
+/*
+ * Unlinked Tag
+ * Describes an allocated inode that isn't linked into
+ * the directory tree and might need to be deallocated.
+ */
+
+#define GFS2_UTF_UNINIT		0x00000001
+
+struct gfs2_unlinked_tag {
+	struct gfs2_inum ut_inum;
+	uint32_t ut_flags;	/* GFS2_UTF_... */
+	uint32_t ut_pad;
+};
+
+/*
+ * Quota change
+ * Describes an allocation change for a particular
+ * user or group.
+ */
+
+#define GFS2_QCF_USER		0x00000001
+
+struct gfs2_quota_change {
+	int64_t qc_change;
+	uint32_t qc_flags;	/* GFS2_QCF_... */
+	uint32_t qc_id;
+};
+
+/* Endian functions */
+
+#undef GFS2_ENDIAN_BIG
+
+#ifdef GFS2_ENDIAN_BIG
+
+#define gfs2_16_to_cpu be16_to_cpu
+#define gfs2_32_to_cpu be32_to_cpu
+#define gfs2_64_to_cpu be64_to_cpu
+
+#define cpu_to_gfs2_16 cpu_to_be16
+#define cpu_to_gfs2_32 cpu_to_be32
+#define cpu_to_gfs2_64 cpu_to_be64
+
+#else /* GFS2_ENDIAN_BIG */
+
+#define gfs2_16_to_cpu le16_to_cpu
+#define gfs2_32_to_cpu le32_to_cpu
+#define gfs2_64_to_cpu le64_to_cpu
+
+#define cpu_to_gfs2_16 cpu_to_le16
+#define cpu_to_gfs2_32 cpu_to_le32
+#define cpu_to_gfs2_64 cpu_to_le64
+
+#endif /* GFS2_ENDIAN_BIG */
+
+/* Translation functions */
+
+void gfs2_inum_in(struct gfs2_inum *no, char *buf);
+void gfs2_inum_out(struct gfs2_inum *no, char *buf);
+void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf);
+void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf);
+void gfs2_sb_in(struct gfs2_sb *sb, char *buf);
+void gfs2_sb_out(struct gfs2_sb *sb, char *buf);
+void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf);
+void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf);
+void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf);
+void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf);
+void gfs2_quota_in(struct gfs2_quota *qu, char *buf);
+void gfs2_quota_out(struct gfs2_quota *qu, char *buf);
+void gfs2_dinode_in(struct gfs2_dinode *di, char *buf);
+void gfs2_dinode_out(struct gfs2_dinode *di, char *buf);
+void gfs2_dirent_in(struct gfs2_dirent *de, char *buf);
+void gfs2_dirent_out(struct gfs2_dirent *de, char *buf);
+void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf);
+void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf);
+void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf);
+void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf);
+void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf);
+void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf);
+void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf);
+void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf);
+void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf);
+void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf);
+void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf);
+void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf);
+void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf);
+void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf);
+void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf);
+void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf);
+
+/* Printing functions */
+
+void gfs2_inum_print(struct gfs2_inum *no);
+void gfs2_meta_header_print(struct gfs2_meta_header *mh);
+void gfs2_sb_print(struct gfs2_sb *sb);
+void gfs2_rindex_print(struct gfs2_rindex *ri);
+void gfs2_rgrp_print(struct gfs2_rgrp *rg);
+void gfs2_quota_print(struct gfs2_quota *qu);
+void gfs2_dinode_print(struct gfs2_dinode *di);
+void gfs2_dirent_print(struct gfs2_dirent *de, char *name);
+void gfs2_leaf_print(struct gfs2_leaf *lf);
+void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name);
+void gfs2_log_header_print(struct gfs2_log_header *lh);
+void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld);
+void gfs2_inum_range_print(struct gfs2_inum_range *ir);
+void gfs2_statfs_change_print(struct gfs2_statfs_change *sc);
+void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut);
+void gfs2_quota_change_print(struct gfs2_quota_change *qc);
+
+#endif /* __GFS2_ONDISK_DOT_H__ */
+
+
+
+#ifdef WANT_GFS2_CONVERSION_FUNCTIONS
+
+#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
+#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
+#define CPIN_16(s1, s2, member) {(s1->member) = gfs2_16_to_cpu((s2->member));}
+#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs2_16((s1->member));}
+#define CPIN_32(s1, s2, member) {(s1->member) = gfs2_32_to_cpu((s2->member));}
+#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs2_32((s1->member));}
+#define CPIN_64(s1, s2, member) {(s1->member) = gfs2_64_to_cpu((s2->member));}
+#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs2_64((s1->member));}
+
+#define pv(struct, member, fmt) printk("  "#member" = "fmt"\n", struct->member);
+#define pa(struct, member, count) print_array(#member, struct->member, count);
+
+/**
+ * print_array - Print out an array of bytes
+ * @title: what to print before the array
+ * @buf: the array
+ * @count: the number of bytes
+ *
+ */
+
+static void print_array(char *title, char *buf, int count)
+{
+	int x;
+
+	printk("  %s =\n", title);
+	for (x = 0; x < count; x++) {
+		printk("%.2X ", (unsigned char)buf[x]);
+		if (x % 16 == 15)
+			printk("\n");
+	}
+	if (x % 16)
+		printk("\n");
+}
+
+/*
+ * gfs2_xxx_in - read in an xxx struct
+ * first arg: the cpu-order structure
+ * buf: the disk-order buffer
+ *
+ * gfs2_xxx_out - write out an xxx struct
+ * first arg: the cpu-order structure
+ * buf: the disk-order buffer
+ *
+ * gfs2_xxx_print - print out an xxx struct
+ * first arg: the cpu-order structure
+ */
+
+void gfs2_inum_in(struct gfs2_inum *no, char *buf)
+{
+	struct gfs2_inum *str = (struct gfs2_inum *)buf;
+
+	CPIN_64(no, str, no_formal_ino);
+	CPIN_64(no, str, no_addr);
+}
+
+void gfs2_inum_out(struct gfs2_inum *no, char *buf)
+{
+	struct gfs2_inum *str = (struct gfs2_inum *)buf;
+
+	CPOUT_64(no, str, no_formal_ino);
+	CPOUT_64(no, str, no_addr);
+}
+
+void gfs2_inum_print(struct gfs2_inum *no)
+{
+	pv(no, no_formal_ino, "%"PRIu64);
+	pv(no, no_addr, "%"PRIu64);
+}
+
+void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
+{
+	struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
+
+	CPIN_32(mh, str, mh_magic);
+	CPIN_16(mh, str, mh_type);
+	CPIN_16(mh, str, mh_format);
+	CPIN_64(mh, str, mh_blkno);
+}
+
+void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
+{
+	struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
+
+	CPOUT_32(mh, str, mh_magic);
+	CPOUT_16(mh, str, mh_type);
+	CPOUT_16(mh, str, mh_format);
+	CPOUT_64(mh, str, mh_blkno);
+}
+
+void gfs2_meta_header_print(struct gfs2_meta_header *mh)
+{
+	pv(mh, mh_magic, "0x%.8X");
+	pv(mh, mh_type, "%u");
+	pv(mh, mh_format, "%u");
+	pv(mh, mh_blkno, "%"PRIu64);
+}
+
+void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
+{
+	struct gfs2_sb *str = (struct gfs2_sb *)buf;
+
+	gfs2_meta_header_in(&sb->sb_header, buf);
+
+	CPIN_32(sb, str, sb_fs_format);
+	CPIN_32(sb, str, sb_multihost_format);
+
+	CPIN_32(sb, str, sb_bsize);
+	CPIN_32(sb, str, sb_bsize_shift);
+
+	gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
+
+	CPIN_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN);
+	CPIN_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
+void gfs2_sb_out(struct gfs2_sb *sb, char *buf)
+{
+	struct gfs2_sb *str = (struct gfs2_sb *)buf;
+
+	gfs2_meta_header_out(&sb->sb_header, buf);
+
+	CPOUT_32(sb, str, sb_fs_format);
+	CPOUT_32(sb, str, sb_multihost_format);
+
+	CPOUT_32(sb, str, sb_bsize);
+	CPOUT_32(sb, str, sb_bsize_shift);
+
+	gfs2_inum_out(&sb->sb_master_dir, (char *)&str->sb_master_dir);
+
+	CPOUT_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN);
+	CPOUT_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
+void gfs2_sb_print(struct gfs2_sb *sb)
+{
+	gfs2_meta_header_print(&sb->sb_header);
+
+	pv(sb, sb_fs_format, "%u");
+	pv(sb, sb_multihost_format, "%u");
+
+	pv(sb, sb_bsize, "%u");
+	pv(sb, sb_bsize_shift, "%u");
+
+	gfs2_inum_print(&sb->sb_master_dir);
+
+	pv(sb, sb_lockproto, "%s");
+	pv(sb, sb_locktable, "%s");
+}
+
+void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf)
+{
+	struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
+
+	CPIN_64(ri, str, ri_addr);
+	CPIN_32(ri, str, ri_length);
+	CPIN_32(ri, str, ri_pad);
+
+	CPIN_64(ri, str, ri_data0);
+	CPIN_32(ri, str, ri_data);
+
+	CPIN_32(ri, str, ri_bitbytes);
+
+	CPIN_08(ri, str, ri_reserved, 32);
+}
+
+void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf)
+{
+	struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
+
+	CPOUT_64(ri, str, ri_addr);
+	CPOUT_32(ri, str, ri_length);
+	CPOUT_32(ri, str, ri_pad);
+
+	CPOUT_64(ri, str, ri_data0);
+	CPOUT_32(ri, str, ri_data);
+
+	CPOUT_32(ri, str, ri_bitbytes);
+
+	CPOUT_08(ri, str, ri_reserved, 32);
+}
+
+void gfs2_rindex_print(struct gfs2_rindex *ri)
+{
+	pv(ri, ri_addr, "%"PRIu64);
+	pv(ri, ri_length, "%u");
+	pv(ri, ri_pad, "%u");
+
+	pv(ri, ri_data0, "%"PRIu64);
+	pv(ri, ri_data, "%u");
+
+	pv(ri, ri_bitbytes, "%u");
+
+	pa(ri, ri_reserved, 32);
+}
+
+void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
+{
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
+
+	gfs2_meta_header_in(&rg->rg_header, buf);
+	CPIN_32(rg, str, rg_flags);
+	CPIN_32(rg, str, rg_free);
+	CPIN_32(rg, str, rg_dinodes);
+
+	CPIN_08(rg, str, rg_reserved, 36);
+}
+
+void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
+{
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
+
+	gfs2_meta_header_out(&rg->rg_header, buf);
+	CPOUT_32(rg, str, rg_flags);
+	CPOUT_32(rg, str, rg_free);
+	CPOUT_32(rg, str, rg_dinodes);
+
+	CPOUT_08(rg, str, rg_reserved, 36);
+}
+
+void gfs2_rgrp_print(struct gfs2_rgrp *rg)
+{
+	gfs2_meta_header_print(&rg->rg_header);
+	pv(rg, rg_flags, "%u");
+	pv(rg, rg_free, "%u");
+	pv(rg, rg_dinodes, "%u");
+
+	pa(rg, rg_reserved, 36);
+}
+
+void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
+{
+	struct gfs2_quota *str = (struct gfs2_quota *)buf;
+
+	CPIN_64(qu, str, qu_limit);
+	CPIN_64(qu, str, qu_warn);
+	CPIN_64(qu, str, qu_value);
+}
+
+void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
+{
+	struct gfs2_quota *str = (struct gfs2_quota *)buf;
+
+	CPOUT_64(qu, str, qu_limit);
+	CPOUT_64(qu, str, qu_warn);
+	CPOUT_64(qu, str, qu_value);
+}
+
+void gfs2_quota_print(struct gfs2_quota *qu)
+{
+	pv(qu, qu_limit, "%"PRIu64);
+	pv(qu, qu_warn, "%"PRIu64);
+	pv(qu, qu_value, "%"PRId64);
+}
+
+void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
+{
+	struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
+
+	gfs2_meta_header_in(&di->di_header, buf);
+	gfs2_inum_in(&di->di_num, (char *)&str->di_num);
+
+	CPIN_32(di, str, di_mode);
+	CPIN_32(di, str, di_uid);
+	CPIN_32(di, str, di_gid);
+	CPIN_32(di, str, di_nlink);
+	CPIN_64(di, str, di_size);
+	CPIN_64(di, str, di_blocks);
+	CPIN_64(di, str, di_atime);
+	CPIN_64(di, str, di_mtime);
+	CPIN_64(di, str, di_ctime);
+	CPIN_32(di, str, di_major);
+	CPIN_32(di, str, di_minor);
+
+	CPIN_64(di, str, di_goal_meta);
+	CPIN_64(di, str, di_goal_data);
+
+	CPIN_32(di, str, di_flags);
+	CPIN_32(di, str, di_payload_format);
+	CPIN_16(di, str, di_height);
+
+	CPIN_16(di, str, di_depth);
+	CPIN_32(di, str, di_entries);
+
+	CPIN_64(di, str, di_eattr);
+
+	CPIN_08(di, str, di_reserved, 32);
+}
+
+void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
+{
+	struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
+
+	gfs2_meta_header_out(&di->di_header, buf);
+	gfs2_inum_out(&di->di_num, (char *)&str->di_num);
+
+	CPOUT_32(di, str, di_mode);
+	CPOUT_32(di, str, di_uid);
+	CPOUT_32(di, str, di_gid);
+	CPOUT_32(di, str, di_nlink);
+	CPOUT_64(di, str, di_size);
+	CPOUT_64(di, str, di_blocks);
+	CPOUT_64(di, str, di_atime);
+	CPOUT_64(di, str, di_mtime);
+	CPOUT_64(di, str, di_ctime);
+	CPOUT_32(di, str, di_major);
+	CPOUT_32(di, str, di_minor);
+
+	CPOUT_64(di, str, di_goal_meta);
+	CPOUT_64(di, str, di_goal_data);
+
+	CPOUT_32(di, str, di_flags);
+	CPOUT_32(di, str, di_payload_format);
+	CPOUT_16(di, str, di_height);
+
+	CPOUT_16(di, str, di_depth);
+	CPOUT_32(di, str, di_entries);
+
+	CPOUT_64(di, str, di_eattr);
+
+	CPOUT_08(di, str, di_reserved, 32);
+}
+
+void gfs2_dinode_print(struct gfs2_dinode *di)
+{
+	gfs2_meta_header_print(&di->di_header);
+	gfs2_inum_print(&di->di_num);
+
+	pv(di, di_mode, "0%o");
+	pv(di, di_uid, "%u");
+	pv(di, di_gid, "%u");
+	pv(di, di_nlink, "%u");
+	pv(di, di_size, "%"PRIu64);
+	pv(di, di_blocks, "%"PRIu64);
+	pv(di, di_atime, "%"PRId64);
+	pv(di, di_mtime, "%"PRId64);
+	pv(di, di_ctime, "%"PRId64);
+	pv(di, di_major, "%u");
+	pv(di, di_minor, "%u");
+
+	pv(di, di_goal_meta, "%"PRIu64);
+	pv(di, di_goal_data, "%"PRIu64);
+
+	pv(di, di_flags, "0x%.8X");
+	pv(di, di_payload_format, "%u");
+	pv(di, di_height, "%u");
+
+	pv(di, di_depth, "%u");
+	pv(di, di_entries, "%u");
+
+	pv(di, di_eattr, "%"PRIu64);
+
+	pa(di, di_reserved, 32);
+}
+
+void gfs2_dirent_in(struct gfs2_dirent *de, char *buf)
+{
+	struct gfs2_dirent *str = (struct gfs2_dirent *)buf;
+
+	gfs2_inum_in(&de->de_inum, buf);
+	CPIN_32(de, str, de_hash);
+	CPIN_32(de, str, de_rec_len);
+	de->de_name_len = str->de_name_len;
+	de->de_type = str->de_type;
+	CPIN_16(de, str, de_pad1);
+	CPIN_32(de, str, de_pad2);
+}
+
+void gfs2_dirent_out(struct gfs2_dirent *de, char *buf)
+{
+	struct gfs2_dirent *str = (struct gfs2_dirent *)buf;
+
+	gfs2_inum_out(&de->de_inum, buf);
+	CPOUT_32(de, str, de_hash);
+	CPOUT_32(de, str, de_rec_len);
+	str->de_name_len = de->de_name_len;
+	str->de_type = de->de_type;
+	CPOUT_16(de, str, de_pad1);
+	CPOUT_32(de, str, de_pad2);
+}
+
+void gfs2_dirent_print(struct gfs2_dirent *de, char *name)
+{
+	char buf[GFS2_FNAMESIZE + 1];
+
+	gfs2_inum_print(&de->de_inum);
+	pv(de, de_hash, "0x%.8X");
+	pv(de, de_rec_len, "%u");
+	pv(de, de_name_len, "%u");
+	pv(de, de_type, "%u");
+	pv(de, de_pad1, "%u");
+	pv(de, de_pad2, "%u");
+
+	memset(buf, 0, GFS2_FNAMESIZE + 1);
+	memcpy(buf, name, de->de_name_len);
+	printk("  name = %s\n", buf);
+}
+
+void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf)
+{
+	struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
+
+	gfs2_meta_header_in(&lf->lf_header, buf);
+	CPIN_16(lf, str, lf_depth);
+	CPIN_16(lf, str, lf_entries);
+	CPIN_32(lf, str, lf_dirent_format);
+	CPIN_64(lf, str, lf_next);
+
+	CPIN_08(lf, str, lf_reserved, 32);
+}
+
+void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf)
+{
+	struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
+
+	gfs2_meta_header_out(&lf->lf_header, buf);
+	CPOUT_16(lf, str, lf_depth);
+	CPOUT_16(lf, str, lf_entries);
+	CPOUT_32(lf, str, lf_dirent_format);
+	CPOUT_64(lf, str, lf_next);
+
+	CPOUT_08(lf, str, lf_reserved, 32);
+}
+
+void gfs2_leaf_print(struct gfs2_leaf *lf)
+{
+	gfs2_meta_header_print(&lf->lf_header);
+	pv(lf, lf_depth, "%u");
+	pv(lf, lf_entries, "%u");
+	pv(lf, lf_dirent_format, "%u");
+	pv(lf, lf_next, "%"PRIu64);
+
+	pa(lf, lf_reserved, 32);
+}
+
+void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf)
+{
+	struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
+
+	CPIN_32(ea, str, ea_rec_len);
+	CPIN_32(ea, str, ea_data_len);
+	ea->ea_name_len = str->ea_name_len;
+	ea->ea_type = str->ea_type;
+	ea->ea_flags = str->ea_flags;
+	ea->ea_num_ptrs = str->ea_num_ptrs;
+	CPIN_32(ea, str, ea_pad);
+}
+
+void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf)
+{
+	struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
+
+	CPOUT_32(ea, str, ea_rec_len);
+	CPOUT_32(ea, str, ea_data_len);
+	str->ea_name_len = ea->ea_name_len;
+	str->ea_type = ea->ea_type;
+	str->ea_flags = ea->ea_flags;
+	str->ea_num_ptrs = ea->ea_num_ptrs;
+	CPOUT_32(ea, str, ea_pad);
+}
+
+void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name)
+{
+	char buf[GFS2_EA_MAX_NAME_LEN + 1];
+
+	pv(ea, ea_rec_len, "%u");
+	pv(ea, ea_data_len, "%u");
+	pv(ea, ea_name_len, "%u");
+	pv(ea, ea_type, "%u");
+	pv(ea, ea_flags, "%u");
+	pv(ea, ea_num_ptrs, "%u");
+	pv(ea, ea_pad, "%u");
+
+	memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1);
+	memcpy(buf, name, ea->ea_name_len);
+	printk("  name = %s\n", buf);
+}
+
+void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
+{
+	struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
+
+	gfs2_meta_header_in(&lh->lh_header, buf);
+	CPIN_64(lh, str, lh_sequence);
+	CPIN_32(lh, str, lh_flags);
+	CPIN_32(lh, str, lh_tail);
+	CPIN_32(lh, str, lh_blkno);
+	CPIN_32(lh, str, lh_hash);
+}
+
+void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf)
+{
+	struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
+
+	gfs2_meta_header_out(&lh->lh_header, buf);
+	CPOUT_64(lh, str, lh_sequence);
+	CPOUT_32(lh, str, lh_flags);
+	CPOUT_32(lh, str, lh_tail);
+	CPOUT_32(lh, str, lh_blkno);
+	CPOUT_32(lh, str, lh_hash);
+}
+
+void gfs2_log_header_print(struct gfs2_log_header *lh)
+{
+	gfs2_meta_header_print(&lh->lh_header);
+	pv(lh, lh_sequence, "%"PRIu64);
+	pv(lh, lh_flags, "0x%.8X");
+	pv(lh, lh_tail, "%u");
+	pv(lh, lh_blkno, "%u");
+	pv(lh, lh_hash, "0x%.8X");
+}
+
+void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf)
+{
+	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf;
+
+	gfs2_meta_header_in(&ld->ld_header, buf);
+	CPIN_32(ld, str, ld_type);
+	CPIN_32(ld, str, ld_length);
+	CPIN_32(ld, str, ld_data1);
+	CPIN_32(ld, str, ld_data2);
+
+	CPIN_08(ld, str, ld_reserved, 32);
+}
+
+void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf)
+{
+	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf;
+
+	gfs2_meta_header_out(&ld->ld_header, buf);
+	CPOUT_32(ld, str, ld_type);
+	CPOUT_32(ld, str, ld_length);
+	CPOUT_32(ld, str, ld_data1);
+	CPOUT_32(ld, str, ld_data2);
+
+	CPOUT_08(ld, str, ld_reserved, 32);
+}
+
+void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
+{
+	gfs2_meta_header_print(&ld->ld_header);
+	pv(ld, ld_type, "%u");
+	pv(ld, ld_length, "%u");
+	pv(ld, ld_data1, "%u");
+	pv(ld, ld_data2, "%u");
+
+	pa(ld, ld_reserved, 32);
+}
+
+void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
+{
+	struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
+
+	CPIN_64(ir, str, ir_start);
+	CPIN_64(ir, str, ir_length);
+}
+
+void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
+{
+	struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
+
+	CPOUT_64(ir, str, ir_start);
+	CPOUT_64(ir, str, ir_length);
+}
+
+void gfs2_inum_range_print(struct gfs2_inum_range *ir)
+{
+	pv(ir, ir_start, "%"PRIu64);
+	pv(ir, ir_length, "%"PRIu64);
+}
+
+void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
+{
+	struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
+
+	CPIN_64(sc, str, sc_total);
+	CPIN_64(sc, str, sc_free);
+	CPIN_64(sc, str, sc_dinodes);
+}
+
+void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf)
+{
+	struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
+
+	CPOUT_64(sc, str, sc_total);
+	CPOUT_64(sc, str, sc_free);
+	CPOUT_64(sc, str, sc_dinodes);
+}
+
+void gfs2_statfs_change_print(struct gfs2_statfs_change *sc)
+{
+	pv(sc, sc_total, "%"PRId64);
+	pv(sc, sc_free, "%"PRId64);
+	pv(sc, sc_dinodes, "%"PRId64);
+}
+
+void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf)
+{
+	struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
+
+	gfs2_inum_in(&ut->ut_inum, buf);
+	CPIN_32(ut, str, ut_flags);
+	CPIN_32(ut, str, ut_pad);
+}
+
+void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf)
+{
+	struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
+
+	gfs2_inum_out(&ut->ut_inum, buf);
+	CPOUT_32(ut, str, ut_flags);
+	CPOUT_32(ut, str, ut_pad);
+}
+
+void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut)
+{
+	gfs2_inum_print(&ut->ut_inum);
+	pv(ut, ut_flags, "%u");
+	pv(ut, ut_pad, "%u");
+}
+
+void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
+{
+	struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
+
+	CPIN_64(qc, str, qc_change);
+	CPIN_32(qc, str, qc_flags);
+	CPIN_32(qc, str, qc_id);
+}
+
+void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf)
+{
+	struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
+
+	CPOUT_64(qc, str, qc_change);
+	CPOUT_32(qc, str, qc_flags);
+	CPOUT_32(qc, str, qc_id);
+}
+
+void gfs2_quota_change_print(struct gfs2_quota_change *qc)
+{
+	pv(qc, qc_change, "%"PRId64);
+	pv(qc, qc_flags, "0x%.8X");
+	pv(qc, qc_id, "%u");
+}
+
+#endif /* WANT_GFS2_CONVERSION_FUNCTIONS */
+

[PATCH 02/13] GFS: core fs

Core file system functions.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/bmap.c        | 1213 +++++++++++++++++++++++++++++++++
 fs/gfs2/bmap.h        |   39 +
 fs/gfs2/daemon.c      |  226 ++++++
 fs/gfs2/daemon.h      |   20 
 fs/gfs2/format.h      |   21 
 fs/gfs2/glops.c       |  488 +++++++++++++
 fs/gfs2/glops.h       |   23 
 fs/gfs2/inode.c       | 1793 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/inode.h       |   74 ++
 fs/gfs2/jdata.c       |  383 ++++++++++
 fs/gfs2/jdata.h       |   52 +
 fs/gfs2/lops.c        |  510 ++++++++++++++
 fs/gfs2/lops.h        |   95 ++
 fs/gfs2/main.c        |  102 ++
 fs/gfs2/meta_io.c     |  884 ++++++++++++++++++++++++
 fs/gfs2/meta_io.h     |   88 ++
 fs/gfs2/ondisk.c      |   28 
 fs/gfs2/ops_address.c |  516 ++++++++++++++
 fs/gfs2/ops_address.h |   15 
 fs/gfs2/ops_dentry.c  |  117 +++
 fs/gfs2/ops_dentry.h  |   15 
 fs/gfs2/ops_export.c  |  311 ++++++++
 fs/gfs2/ops_export.h  |   15 
 fs/gfs2/ops_file.c    | 1522 ++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/ops_file.h    |   16 
 fs/gfs2/ops_fstype.c  |  801 ++++++++++++++++++++++
 fs/gfs2/ops_fstype.h  |   15 
 fs/gfs2/ops_inode.c   | 1270 +++++++++++++++++++++++++++++++++++
 fs/gfs2/ops_inode.h   |   18 
 fs/gfs2/ops_super.c   |  404 +++++++++++
 fs/gfs2/ops_super.h   |   15 
 fs/gfs2/ops_vm.c      |  200 +++++
 fs/gfs2/ops_vm.h      |   16 
 fs/gfs2/page.c        |  274 +++++++
 fs/gfs2/page.h        |   23 
 fs/gfs2/super.c       |  945 ++++++++++++++++++++++++++
 fs/gfs2/super.h       |   55 +
 fs/gfs2/trans.c       |  215 +++++
 fs/gfs2/trans.h       |   40 +
 fs/gfs2/unlinked.c    |  454 ++++++++++++
 fs/gfs2/unlinked.h    |   25 
 fs/gfs2/util.c        |  355 +++++++++
 fs/gfs2/util.h        |  201 +++++
 43 files changed, 13892 insertions(+)

--- a/fs/gfs2/bmap.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/bmap.c	2005-09-01 17:36:55.156139640 +0800
@@ -0,0 +1,1213 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "inode.h"
+#include "jdata.h"
+#include "meta_io.h"
+#include "page.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+
+struct metapath {
+	unsigned int mp_list[GFS2_MAX_META_HEIGHT];
+};
+
+typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
+			     struct buffer_head *bh, uint64_t *top,
+			     uint64_t *bottom, unsigned int height,
+			     void *data);
+
+struct strip_mine {
+	int sm_first;
+	unsigned int sm_height;
+};
+
+/**
+ * @gfs2_unstuffer_sync - Synchronously unstuff a dinode
+ * @ip:
+ * @dibh:
+ * @block:
+ * @private:
+ *
+ * Cheat and use a metadata buffer instead of a data page.
+ *
+ * Returns: errno
+ */
+
+int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
+			uint64_t block, void *private)
+{
+	struct buffer_head *bh;
+	int error;
+
+	bh = gfs2_meta_new(ip->i_gl, block);
+
+	gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode));
+
+	set_buffer_dirty(bh);
+	error = sync_dirty_buffer(bh);
+
+	brelse(bh);
+
+	return error;
+}
+
+/**
+ * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
+ * @ip: The GFS2 inode to unstuff
+ * @unstuffer: the routine that handles unstuffing a non-zero length file
+ * @private: private data for the unstuffer
+ *
+ * This routine unstuffs a dinode and returns it to a "normal" state such
+ * that the height can be grown in the traditional way.
+ *
+ * Returns: errno
+ */
+
+int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
+			void *private)
+{
+	struct buffer_head *bh, *dibh;
+	uint64_t block = 0;
+	int journaled = gfs2_is_jdata(ip);
+	int error;
+
+	down_write(&ip->i_rw_mutex);
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+		
+	if (ip->i_di.di_size) {
+		/* Get a free block, fill it with the stuffed data,
+		   and write it out to disk */
+
+		if (journaled) {
+			block = gfs2_alloc_meta(ip);
+
+			error = gfs2_jdata_get_buffer(ip, block, TRUE, &bh);
+			if (error)
+				goto out_brelse;
+			gfs2_buffer_copy_tail(bh,
+					      sizeof(struct gfs2_meta_header),
+					      dibh, sizeof(struct gfs2_dinode));
+			brelse(bh);
+		} else {
+			block = gfs2_alloc_data(ip);
+
+			error = unstuffer(ip, dibh, block, private);
+			if (error)
+				goto out_brelse;
+		}
+	}
+
+	/*  Set up the pointer to the new block  */
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+
+	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+
+	if (ip->i_di.di_size) {
+		*(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_gfs2_64(block);
+		ip->i_di.di_blocks++;
+	}
+
+	ip->i_di.di_height = 1;
+
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+
+ out_brelse:
+	brelse(dibh);
+
+ out:
+	up_write(&ip->i_rw_mutex);
+
+	return error;
+}
+
+/**
+ * calc_tree_height - Calculate the height of a metadata tree
+ * @ip: The GFS2 inode
+ * @size: The proposed size of the file
+ *
+ * Work out how tall a metadata tree needs to be in order to accommodate a
+ * file of a particular size. If size is less than the current size of
+ * the inode, then the current size of the inode is used instead of the
+ * supplied one.
+ *
+ * Returns: the height the tree should be
+ */
+
+static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	uint64_t *arr;
+	unsigned int max, height;
+
+	if (ip->i_di.di_size > size)
+		size = ip->i_di.di_size;
+
+	if (gfs2_is_jdata(ip)) {
+		arr = sdp->sd_jheightsize;
+		max = sdp->sd_max_jheight;
+	} else {
+		arr = sdp->sd_heightsize;
+		max = sdp->sd_max_height;
+	}
+
+	for (height = 0; height < max; height++)
+		if (arr[height] >= size)
+			break;
+
+	return height;
+}
+
+/**
+ * build_height - Build a metadata tree of the requested height
+ * @ip: The GFS2 inode
+ * @height: The height to build to
+ *
+ * This routine makes sure that the metadata tree is tall enough to hold
+ * "size" bytes of data.
+ *
+ * Returns: errno
+ */
+
+static int build_height(struct gfs2_inode *ip, int height)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *bh, *dibh;
+	uint64_t block = 0, *bp;
+	unsigned int x;
+	int new_block;
+	int error;
+
+	while (ip->i_di.di_height < height) {
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			return error;
+
+		new_block = FALSE;
+		bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
+		for (x = 0; x < sdp->sd_diptrs; x++, bp++)
+			if (*bp) {
+				new_block = TRUE;
+				break;
+			}
+
+		if (new_block) {
+			/* Get a new block, fill it with the old direct
+			   pointers, and write it out */
+
+			block = gfs2_alloc_meta(ip);
+
+			bh = gfs2_meta_new(ip->i_gl, block);
+			gfs2_trans_add_bh(ip->i_gl, bh);
+			gfs2_metatype_set(bh,
+					  GFS2_METATYPE_IN,
+					  GFS2_FORMAT_IN);
+			gfs2_buffer_copy_tail(bh,
+					      sizeof(struct gfs2_meta_header),
+					      dibh, sizeof(struct gfs2_dinode));
+
+			brelse(bh);
+		}
+
+		/*  Set up the new direct pointer and write it out to disk  */
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+
+		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+
+		if (new_block) {
+			*(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_gfs2_64(block);
+			ip->i_di.di_blocks++;
+		}
+
+		ip->i_di.di_height++;
+
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	return 0;
+}
+
+/**
+ * find_metapath - Find path through the metadata tree
+ * @ip: The inode pointer
+ * @mp: The metapath to return the result in
+ * @block: The disk block to look up
+ *
+ *   This routine returns a struct metapath structure that defines a path
+ *   through the metadata of inode "ip" to get to block "block".
+ *
+ *   Example:
+ *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
+ *   filesystem with a blocksize of 4096.
+ *
+ *   find_metapath() would return a struct metapath structure set to:
+ *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
+ *   and mp_list[2] = 165.
+ *
+ *   That means that in order to get to the block containing the byte at
+ *   offset 101342453, we would load the indirect block pointed to by pointer
+ *   0 in the dinode.  We would then load the indirect block pointed to by
+ *   pointer 48 in that indirect block.  We would then load the data block
+ *   pointed to by pointer 165 in that indirect block.
+ *
+ *             ----------------------------------------
+ *             | Dinode |                             |
+ *             |        |                            4|
+ *             |        |0 1 2 3 4 5                 9|
+ *             |        |                            6|
+ *             ----------------------------------------
+ *                       |
+ *                       |
+ *                       V
+ *             ----------------------------------------
+ *             | Indirect Block                       |
+ *             |                                     5|
+ *             |            4 4 4 4 4 5 5            1|
+ *             |0           5 6 7 8 9 0 1            2|
+ *             ----------------------------------------
+ *                                |
+ *                                |
+ *                                V
+ *             ----------------------------------------
+ *             | Indirect Block                       |
+ *             |                         1 1 1 1 1   5|
+ *             |                         6 6 6 6 6   1|
+ *             |0                        3 4 5 6 7   2|
+ *             ----------------------------------------
+ *                                           |
+ *                                           |
+ *                                           V
+ *             ----------------------------------------
+ *             | Data block containing offset         |
+ *             |            101342453                 |
+ *             |                                      |
+ *             |                                      |
+ *             ----------------------------------------
+ *
+ */
+
+static struct metapath *find_metapath(struct gfs2_inode *ip, uint64_t block)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct metapath *mp;
+	uint64_t b = block;
+	unsigned int i;
+
+	mp = kzalloc(sizeof(struct metapath), GFP_KERNEL | __GFP_NOFAIL);
+
+	for (i = ip->i_di.di_height; i--;)
+		mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
+
+	return mp;
+}
+
+/**
+ * metapointer - Return pointer to start of metadata in a buffer
+ * @bh: The buffer
+ * @height: The metadata height (0 = dinode)
+ * @mp: The metapath
+ *
+ * Return a pointer to the block number of the next height of the metadata
+ * tree given a buffer containing the pointer to the current height of the
+ * metadata tree.
+ */
+
+static inline uint64_t *metapointer(struct buffer_head *bh,
+				    unsigned int height, struct metapath *mp)
+{
+	unsigned int head_size = (height > 0) ?
+		sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
+
+	return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
+}
+
+/**
+ * lookup_block - Get the next metadata block in metadata tree
+ * @ip: The GFS2 inode
+ * @bh: Buffer containing the pointers to metadata blocks
+ * @height: The height of the tree (0 = dinode)
+ * @mp: The metapath
+ * @create: Non-zero if we may create a new meatdata block
+ * @new: Used to indicate if we did create a new metadata block
+ * @block: the returned disk block number
+ *
+ * Given a metatree, complete to a particular height, checks to see if the next
+ * height of the tree exists. If not the next height of the tree is created.
+ * The block number of the next height of the metadata tree is returned.
+ *
+ */
+
+static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
+			 unsigned int height, struct metapath *mp, int create,
+			 int *new, uint64_t *block)
+{
+	uint64_t *ptr = metapointer(bh, height, mp);
+
+	if (*ptr) {
+		*block = gfs2_64_to_cpu(*ptr);
+		return;
+	}
+
+	*block = 0;
+
+	if (!create)
+		return;
+
+	if (height == ip->i_di.di_height - 1 &&
+	    !gfs2_is_jdata(ip))
+		*block = gfs2_alloc_data(ip);
+	else
+		*block = gfs2_alloc_meta(ip);
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+
+	*ptr = cpu_to_gfs2_64(*block);
+	ip->i_di.di_blocks++;
+
+	*new = 1;
+}
+
+/**
+ * gfs2_block_map - Map a block from an inode to a disk block
+ * @ip: The GFS2 inode
+ * @lblock: The logical block number
+ * @new: Value/Result argument (1 = may create/did create new blocks)
+ * @dblock: the disk block number of the start of an extent
+ * @extlen: the size of the extent
+ *
+ * Find the block number on the current device which corresponds to an
+ * inode's block. If the block had to be created, "new" will be set.
+ *
+ * Returns: errno
+ */
+
+int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
+		   uint64_t *dblock, uint32_t *extlen)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *bh;
+	struct metapath *mp;
+	int create = *new;
+	unsigned int bsize;
+	unsigned int height;
+	unsigned int end_of_metadata;
+	unsigned int x;
+	int error = 0;
+
+	*new = 0;
+	*dblock = 0;
+	if (extlen)
+		*extlen = 0;
+
+	if (create)
+		down_write(&ip->i_rw_mutex);
+	else
+		down_read(&ip->i_rw_mutex);
+
+	if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+		goto out;
+
+	bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
+
+	height = calc_tree_height(ip, (lblock + 1) * bsize);
+	if (ip->i_di.di_height < height) {
+		if (!create)
+			goto out;
+
+		error = build_height(ip, height);
+		if (error)
+			goto out;
+	}
+
+	mp = find_metapath(ip, lblock);
+	end_of_metadata = ip->i_di.di_height - 1;
+
+	error = gfs2_meta_inode_buffer(ip, &bh);
+	if (error)
+		goto out_kfree;
+
+	for (x = 0; x < end_of_metadata; x++) {
+		lookup_block(ip, bh, x, mp, create, new, dblock);
+		brelse(bh);
+		if (!*dblock)
+			goto out_kfree;
+
+		error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh);
+		if (error)
+			goto out_kfree;
+	}
+
+	lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock);
+
+	if (extlen && *dblock) {
+		*extlen = 1;
+
+		if (!*new) {
+			uint64_t tmp_dblock;
+			int tmp_new;
+			unsigned int nptrs;
+
+			nptrs = (end_of_metadata) ? sdp->sd_inptrs :
+						    sdp->sd_diptrs;
+
+			while (++mp->mp_list[end_of_metadata] < nptrs) {
+				lookup_block(ip, bh, end_of_metadata, mp,
+					     FALSE, &tmp_new, &tmp_dblock);
+
+				if (*dblock + *extlen != tmp_dblock)
+					break;
+
+				(*extlen)++;
+			}
+		}
+	}
+
+	brelse(bh);
+
+	if (*new) {
+		error = gfs2_meta_inode_buffer(ip, &bh);
+		if (!error) {
+			gfs2_trans_add_bh(ip->i_gl, bh);
+			gfs2_dinode_out(&ip->i_di, bh->b_data);
+			brelse(bh);
+		}
+	}
+
+ out_kfree:
+	kfree(mp);
+
+ out:
+	if (create)
+		up_write(&ip->i_rw_mutex);
+	else
+		up_read(&ip->i_rw_mutex);
+
+	return error;
+}
+
+/**
+ * recursive_scan - recursively scan through the end of a file
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @mp: the path through the metadata to the point to start
+ * @height: the height the recursion is at
+ * @block: the indirect block to look at
+ * @first: TRUE if this is the first block
+ * @bc: the call to make for each piece of metadata
+ * @data: data opaque to this function to pass to @bc
+ *
+ * When this is first called @height and @block should be zero and
+ * @first should be TRUE.
+ *
+ * Returns: errno
+ */
+
+static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
+			  struct metapath *mp, unsigned int height,
+			  uint64_t block, int first, block_call_t bc,
+			  void *data)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *bh = NULL;
+	uint64_t *top, *bottom;
+	uint64_t bn;
+	int error;
+	int mh_size = sizeof(struct gfs2_meta_header);
+
+	if (!height) {
+		error = gfs2_meta_inode_buffer(ip, &bh);
+		if (error)
+			return error;
+		dibh = bh;
+
+		top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
+			mp->mp_list[0];
+		bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
+			sdp->sd_diptrs;
+	} else {
+		error = gfs2_meta_indirect_buffer(ip, height, block, FALSE,
+						  &bh);
+		if (error)
+			return error;
+
+		top = (uint64_t *)(bh->b_data + mh_size) +
+				  ((first) ? mp->mp_list[height] : 0);
+
+		bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+	}
+
+	error = bc(ip, dibh, bh, top, bottom, height, data);
+	if (error)
+		goto out;
+
+	if (height < ip->i_di.di_height - 1)
+		for (; top < bottom; top++, first = FALSE) {
+			if (!*top)
+				continue;
+
+			bn = gfs2_64_to_cpu(*top);
+
+			error = recursive_scan(ip, dibh, mp, height + 1, bn,
+					       first, bc, data);
+			if (error)
+				break;
+		}
+
+ out:
+	brelse(bh);
+
+	return error;
+}
+
+/**
+ * do_strip - Look for a layer a particular layer of the file and strip it off
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @bh: A buffer of pointers
+ * @top: The first pointer in the buffer
+ * @bottom: One more than the last pointer
+ * @height: the height this buffer is at
+ * @data: a pointer to a struct strip_mine
+ *
+ * Returns: errno
+ */
+
+static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
+		    struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
+		    unsigned int height, void *data)
+{
+	struct strip_mine *sm = (struct strip_mine *)data;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrp_list rlist;
+	uint64_t bn, bstart;
+	uint32_t blen;
+	uint64_t *p;
+	unsigned int rg_blocks = 0;
+	int metadata;
+	unsigned int revokes = 0;
+	int x;
+	int error;
+
+	if (!*top)
+		sm->sm_first = FALSE;
+
+	if (height != sm->sm_height)
+		return 0;
+
+	if (sm->sm_first) {
+		top++;
+		sm->sm_first = FALSE;
+	}
+
+	metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip);
+	if (metadata)
+		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
+
+	error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+	if (error)
+		return error;
+
+	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
+	bstart = 0;
+	blen = 0;
+
+	for (p = top; p < bottom; p++) {
+		if (!*p)
+			continue;
+
+		bn = gfs2_64_to_cpu(*p);
+
+		if (bstart + blen == bn)
+			blen++;
+		else {
+			if (bstart)
+				gfs2_rlist_add(sdp, &rlist, bstart);
+
+			bstart = bn;
+			blen = 1;
+		}
+	}
+
+	if (bstart)
+		gfs2_rlist_add(sdp, &rlist, bstart);
+	else
+		goto out; /* Nothing to do */
+
+	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
+
+	for (x = 0; x < rlist.rl_rgrps; x++) {
+		struct gfs2_rgrpd *rgd;
+		rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
+		rg_blocks += rgd->rd_ri.ri_length;
+	}
+
+	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
+	if (error)
+		goto out_rlist;
+
+	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
+				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
+				 revokes);
+	if (error)
+		goto out_rg_gunlock;
+
+	down_write(&ip->i_rw_mutex);
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_trans_add_bh(ip->i_gl, bh);
+
+	bstart = 0;
+	blen = 0;
+
+	for (p = top; p < bottom; p++) {
+		if (!*p)
+			continue;
+
+		bn = gfs2_64_to_cpu(*p);
+
+		if (bstart + blen == bn)
+			blen++;
+		else {
+			if (bstart) {
+				if (metadata)
+					gfs2_free_meta(ip, bstart, blen);
+				else
+					gfs2_free_data(ip, bstart, blen);
+			}
+
+			bstart = bn;
+			blen = 1;
+		}
+
+		*p = 0;
+		if (!ip->i_di.di_blocks)
+			gfs2_consist_inode(ip);
+		ip->i_di.di_blocks--;
+	}
+	if (bstart) {
+		if (metadata)
+			gfs2_free_meta(ip, bstart, blen);
+		else
+			gfs2_free_data(ip, bstart, blen);
+	}
+
+	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+
+	up_write(&ip->i_rw_mutex);
+
+	gfs2_trans_end(sdp);
+
+ out_rg_gunlock:
+	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
+
+ out_rlist:
+	gfs2_rlist_free(&rlist);
+
+ out:
+	gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+
+	return error;
+}
+
+/**
+ * do_grow - Make a file look bigger than it is
+ * @ip: the inode
+ * @size: the size to set the file to
+ *
+ * Called with an exclusive lock on @ip.
+ *
+ * Returns: errno
+ */
+
+static int do_grow(struct gfs2_inode *ip, uint64_t size)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al;
+	struct buffer_head *dibh;
+	unsigned int h;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	if (error)
+		goto out_gunlock_q;
+
+	al->al_requested = sdp->sd_max_height + RES_DATA;
+
+	error = gfs2_inplace_reserve(ip);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_trans_begin(sdp,
+			sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
+	if (error)
+		goto out_ipres;
+
+	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+		if (gfs2_is_stuffed(ip)) {
+			error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
+						    NULL);
+			if (error)
+				goto out_end_trans;
+		}
+
+		h = calc_tree_height(ip, size);
+		if (ip->i_di.di_height < h) {
+			down_write(&ip->i_rw_mutex);
+			error = build_height(ip, h);
+			up_write(&ip->i_rw_mutex);
+			if (error)
+				goto out_end_trans;
+		}
+	}
+
+	ip->i_di.di_size = size;
+	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_end_trans;
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipres:
+	gfs2_inplace_release(ip);
+
+ out_gunlock_q:
+	gfs2_quota_unlock(ip);
+
+ out:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+static int truncator_journaled(struct gfs2_inode *ip, uint64_t size)
+{
+	uint64_t lbn, dbn;
+	uint32_t off;
+	struct buffer_head *bh;
+	int new = FALSE;
+	int error;
+
+	lbn = size;
+	off = do_div(lbn, ip->i_sbd->sd_jbsize);
+
+	error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
+	if (error || !dbn)
+		return error;
+
+	error = gfs2_jdata_get_buffer(ip, dbn, FALSE, &bh);
+	if (error)
+		return error;
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+	gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off);
+
+	brelse(bh);
+
+	return 0;
+}
+
+static int trunc_start(struct gfs2_inode *ip, uint64_t size,
+		       gfs2_truncator_t truncator)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *dibh;
+	int journaled = gfs2_is_jdata(ip);
+	int error;
+
+	error = gfs2_trans_begin(sdp,
+				 RES_DINODE + ((journaled) ? RES_JDATA : 0), 0);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		ip->i_di.di_size = size;
+		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
+		error = 1;
+
+	} else {
+		if (journaled) {
+			uint64_t junk = size;
+			/* we're just interested in the modulus */
+			if (do_div(junk, sdp->sd_jbsize))
+				error = truncator_journaled(ip, size);
+		} else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
+			error = truncator(ip, size);
+
+		if (!error) {
+			ip->i_di.di_size = size;
+			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
+			gfs2_trans_add_bh(ip->i_gl, dibh);
+			gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		}
+	}
+
+	brelse(dibh);
+
+ out:
+	gfs2_trans_end(sdp);
+
+	return error;
+}
+
+static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
+{
+	unsigned int height = ip->i_di.di_height;
+	uint64_t lblock;
+	struct metapath *mp;
+	int error;
+
+	if (!size)
+		lblock = 0;
+	else if (gfs2_is_jdata(ip)) {
+		lblock = size - 1;
+		do_div(lblock, ip->i_sbd->sd_jbsize);
+	} else
+		lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
+
+	mp = find_metapath(ip, lblock);
+	gfs2_alloc_get(ip);
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	while (height--) {
+		struct strip_mine sm;
+		sm.sm_first = !!size;
+		sm.sm_height = height;
+
+		error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_strip, &sm);
+		if (error)
+			break;
+	}
+
+	gfs2_quota_unhold(ip);
+
+ out:
+	gfs2_alloc_put(ip);
+	kfree(mp);
+
+	return error;
+}
+
+static int trunc_end(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		return error;
+
+	down_write(&ip->i_rw_mutex);
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	if (!ip->i_di.di_size) {
+		ip->i_di.di_height = 0;
+		ip->i_di.di_goal_meta =
+			ip->i_di.di_goal_data =
+			ip->i_num.no_addr;
+		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+	}
+	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+ out:
+	up_write(&ip->i_rw_mutex);
+
+	gfs2_trans_end(sdp);
+
+	return error;
+}
+
+/**
+ * do_shrink - make a file smaller
+ * @ip: the inode
+ * @size: the size to make the file
+ * @truncator: function to truncate the last partial block
+ *
+ * Called with an exclusive lock on @ip.
+ *
+ * Returns: errno
+ */
+
+static int do_shrink(struct gfs2_inode *ip, uint64_t size,
+		     gfs2_truncator_t truncator)
+{
+	int error;
+
+	error = trunc_start(ip, size, truncator);
+	if (error < 0)
+		return error;
+	if (error > 0)
+		return 0;
+
+	error = trunc_dealloc(ip, size);
+	if (!error)
+		error = trunc_end(ip);
+
+	return error;
+}
+
+/**
+ * gfs2_truncatei - make a file a give size
+ * @ip: the inode
+ * @size: the size to make the file
+ * @truncator: function to truncate the last partial block
+ *
+ * The file size can grow, shrink, or stay the same size.
+ *
+ * Returns: errno
+ */
+
+int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size,
+		   gfs2_truncator_t truncator)
+{
+	int error;
+
+	if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode)))
+		return -EINVAL;
+
+	if (size > ip->i_di.di_size)
+		error = do_grow(ip, size);
+	else
+		error = do_shrink(ip, size, truncator);
+
+	return error;
+}
+
+int gfs2_truncatei_resume(struct gfs2_inode *ip)
+{
+	int error;
+	error = trunc_dealloc(ip, ip->i_di.di_size);
+	if (!error)
+		error = trunc_end(ip);
+	return error;
+}
+
+int gfs2_file_dealloc(struct gfs2_inode *ip)
+{
+	return trunc_dealloc(ip, 0);
+}
+
+/**
+ * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
+ * @ip: the file
+ * @len: the number of bytes to be written to the file
+ * @data_blocks: returns the number of data blocks required
+ * @ind_blocks: returns the number of indirect blocks required
+ *
+ */
+
+void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
+			    unsigned int *data_blocks, unsigned int *ind_blocks)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int tmp;
+
+	if (gfs2_is_jdata(ip)) {
+		*data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
+		*ind_blocks = 3 * (sdp->sd_max_jheight - 1);
+	} else {
+		*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
+		*ind_blocks = 3 * (sdp->sd_max_height - 1);
+	}
+
+	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
+		tmp = DIV_RU(tmp, sdp->sd_inptrs);
+		*ind_blocks += tmp;
+	}
+}
+
+/**
+ * gfs2_write_alloc_required - figure out if a write will require an allocation
+ * @ip: the file being written to
+ * @offset: the offset to write to
+ * @len: the number of bytes being written
+ * @alloc_required: set to TRUE if an alloc is required, FALSE otherwise
+ *
+ * Returns: errno
+ */
+
+int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
+			      unsigned int len, int *alloc_required)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	uint64_t lblock, lblock_stop, dblock;
+	uint32_t extlen;
+	int new = FALSE;
+	int error = 0;
+
+	*alloc_required = FALSE;
+
+	if (!len)
+		return 0;
+
+	if (gfs2_is_stuffed(ip)) {
+		if (offset + len >
+		    sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
+			*alloc_required = TRUE;
+		return 0;
+	}
+
+	if (gfs2_is_jdata(ip)) {
+		unsigned int bsize = sdp->sd_jbsize;
+		lblock = offset;
+		do_div(lblock, bsize);
+		lblock_stop = offset + len + bsize - 1;
+		do_div(lblock_stop, bsize);
+	} else {
+		unsigned int shift = sdp->sd_sb.sb_bsize_shift;
+		lblock = offset >> shift;
+		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
+	}
+
+	for (; lblock < lblock_stop; lblock += extlen) {
+		error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
+		if (error)
+			return error;
+
+		if (!dblock) {
+			*alloc_required = TRUE;
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * do_gfm - Copy out the dinode/indirect blocks of a file
+ * @ip: the file
+ * @dibh: the dinode buffer
+ * @bh: the indirect buffer we're looking at
+ * @top: the first pointer in the block
+ * @bottom: one more than the last pointer in the block
+ * @height: the height the block is at
+ * @data: a pointer to a struct gfs2_user_buffer structure
+ *
+ * If this is a journaled file, copy out the data too.
+ *
+ * Returns: errno
+ */
+
+static int do_gfm(struct gfs2_inode *ip, struct buffer_head *dibh,
+		  struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
+		  unsigned int height, void *data)
+{
+	struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data;
+	int error;
+
+	error = gfs2_add_bh_to_ub(ub, bh);
+	if (error)
+		return error;
+
+	if (!S_ISDIR(ip->i_di.di_mode) ||
+	    height + 1 != ip->i_di.di_height)
+		return 0;
+
+	for (; top < bottom; top++)
+		if (*top) {
+			struct buffer_head *data_bh;
+
+			error = gfs2_meta_read(ip->i_gl, gfs2_64_to_cpu(*top),
+					       DIO_START | DIO_WAIT,
+					       &data_bh);
+			if (error)
+				return error;
+
+			error = gfs2_add_bh_to_ub(ub, data_bh);
+
+			brelse(data_bh);
+
+			if (error)
+				return error;
+		}
+
+	return 0;
+}
+
+/**
+ * gfs2_get_file_meta - return all the metadata for a file
+ * @ip: the file
+ * @ub: the structure representing the meta
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub)
+{
+	int error;
+
+	if (gfs2_is_stuffed(ip)) {
+		struct buffer_head *dibh;
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (!error) {
+			error = gfs2_add_bh_to_ub(ub, dibh);
+			brelse(dibh);
+		}
+	} else {
+		struct metapath *mp = find_metapath(ip, 0);
+		error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub);
+		kfree(mp);
+	}
+
+	return error;
+}
+
--- a/fs/gfs2/bmap.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/bmap.h	2005-09-01 17:36:55.156139640 +0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __BMAP_DOT_H__
+#define __BMAP_DOT_H__
+
+typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip,
+				 struct buffer_head * dibh, uint64_t block,
+				 void *private);
+int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
+			uint64_t block, void *private);
+int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
+			void *private);
+
+int gfs2_block_map(struct gfs2_inode *ip,
+		   uint64_t lblock, int *new,
+		   uint64_t *dblock, uint32_t *extlen);
+
+typedef int (*gfs2_truncator_t) (struct gfs2_inode * ip, uint64_t size);
+int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size,
+		   gfs2_truncator_t truncator);
+int gfs2_truncatei_resume(struct gfs2_inode *ip);
+int gfs2_file_dealloc(struct gfs2_inode *ip);
+
+void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
+			    unsigned int *data_blocks,
+			    unsigned int *ind_blocks);
+int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
+			      unsigned int len, int *alloc_required);
+
+int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
+
+#endif /* __BMAP_DOT_H__ */
--- a/fs/gfs2/daemon.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/daemon.c	2005-09-01 17:36:55.167137968 +0800
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "daemon.h"
+#include "glock.h"
+#include "log.h"
+#include "quota.h"
+#include "recovery.h"
+#include "super.h"
+#include "unlinked.h"
+
+/* This uses schedule_timeout() instead of msleep() because it's good for
+   the daemons to wake up more often than the timeout when unmounting so
+   the user's unmount doesn't sit there forever.
+   
+   The kthread functions used to start these daemons block and flush signals. */
+
+/**
+ * gfs2_scand - Look for cached glocks and inodes to toss from memory
+ * @sdp: Pointer to GFS2 superblock
+ *
+ * One of these daemons runs, finding candidates to add to sd_reclaim_list.
+ * See gfs2_glockd()
+ */
+
+int gfs2_scand(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	unsigned long t;
+
+	while (!kthread_should_stop()) {
+		gfs2_scand_internal(sdp);
+		t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_glockd - Reclaim unused glock structures
+ * @sdp: Pointer to GFS2 superblock
+ *
+ * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
+ * Number of daemons can be set by user, with num_glockd mount option.
+ */
+
+int gfs2_glockd(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	DECLARE_WAITQUEUE(wait_chan, current);
+
+	while (!kthread_should_stop()) {
+		while (atomic_read(&sdp->sd_reclaim_count))
+			gfs2_reclaim_glock(sdp);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
+		if (!atomic_read(&sdp->sd_reclaim_count) &&
+		    !kthread_should_stop())
+			schedule();
+		remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
+		set_current_state(TASK_RUNNING);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_recoverd - Recover dead machine's journals
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_recoverd(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	unsigned long t;
+
+	while (!kthread_should_stop()) {
+		gfs2_check_journals(sdp);
+		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
+ * @sdp: Pointer to GFS2 superblock
+ *
+ * Also, periodically check to make sure that we're using the most recent
+ * journal index.
+ */
+
+int gfs2_logd(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	struct gfs2_holder ji_gh;
+	unsigned long t;
+
+	while (!kthread_should_stop()) {
+		/* Advance the log tail */
+
+		t = sdp->sd_log_flush_time +
+		    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
+
+		gfs2_ail1_empty(sdp, DIO_ALL);
+
+		if (time_after_eq(jiffies, t)) {
+			gfs2_log_flush(sdp);
+			sdp->sd_log_flush_time = jiffies;
+		}
+
+		/* Check for latest journal index */
+
+		t = sdp->sd_jindex_refresh_time +
+		    gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
+
+		if (time_after_eq(jiffies, t)) {
+			if (!gfs2_jindex_hold(sdp, &ji_gh))
+				gfs2_glock_dq_uninit(&ji_gh);
+			sdp->sd_jindex_refresh_time = jiffies;
+		}
+
+		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_quotad - Write cached quota changes into the quota file
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_quotad(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	unsigned long t;
+	int error;
+
+	while (!kthread_should_stop()) {
+		/* Update the master statfs file */
+
+		t = sdp->sd_statfs_sync_time +
+		    gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
+
+		if (time_after_eq(jiffies, t)) {
+			error = gfs2_statfs_sync(sdp);
+			if (error &&
+			    error != -EROFS &&
+			    !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+				fs_err(sdp, "quotad: (1) error=%d\n", error);
+			sdp->sd_statfs_sync_time = jiffies;
+		}
+
+		/* Update quota file */
+
+		t = sdp->sd_quota_sync_time +
+		    gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
+
+		if (time_after_eq(jiffies, t)) {
+			error = gfs2_quota_sync(sdp);
+			if (error &&
+			    error != -EROFS &&
+			    !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+				fs_err(sdp, "quotad: (2) error=%d\n", error);
+			sdp->sd_quota_sync_time = jiffies;
+		}
+
+		gfs2_quota_scan(sdp);
+
+		t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_inoded - Deallocate unlinked inodes
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_inoded(void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
+	unsigned long t;
+	int error;
+
+	while (!kthread_should_stop()) {
+		error = gfs2_unlinked_dealloc(sdp);
+		if (error &&
+		    error != -EROFS &&
+		    !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+			fs_err(sdp, "inoded: error = %d\n", error);
+
+		t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ;
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
--- a/fs/gfs2/daemon.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/daemon.h	2005-09-01 17:36:55.172137208 +0800
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __DAEMON_DOT_H__
+#define __DAEMON_DOT_H__
+
+int gfs2_scand(void *data);
+int gfs2_glockd(void *data);
+int gfs2_recoverd(void *data);
+int gfs2_logd(void *data);
+int gfs2_quotad(void *data);
+int gfs2_inoded(void *data);
+
+#endif /* __DAEMON_DOT_H__ */
--- a/fs/gfs2/format.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/format.h	2005-09-01 17:36:55.202132648 +0800
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __FORMAT_DOT_H__
+#define __FORMAT_DOT_H__
+
+static const uint32_t gfs2_old_fs_formats[] = {
+	0
+};
+
+static const uint32_t gfs2_old_multihost_formats[] = {
+	0
+};
+
+#endif /* __FORMAT_DOT_H__ */
--- a/fs/gfs2/glops.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/glops.c	2005-09-01 17:36:55.262123528 +0800
@@ -0,0 +1,488 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "page.h"
+#include "recovery.h"
+#include "rgrp.h"
+
+/**
+ * meta_go_sync - sync out the metadata for this glock
+ * @gl: the glock
+ * @flags: DIO_*
+ *
+ * Called when demoting or unlocking an EX glock.  We must flush
+ * to disk all dirty buffers/pages relating to this glock, and must not
+ * not return to caller to demote/unlock the glock until I/O is complete.
+ */
+
+static void meta_go_sync(struct gfs2_glock *gl, int flags)
+{
+	if (!(flags & DIO_METADATA))
+		return;
+
+	if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
+		gfs2_log_flush_glock(gl);
+		gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
+		if (flags & DIO_RELEASE)
+			gfs2_ail_empty_gl(gl);
+	}
+
+	clear_bit(GLF_SYNC, &gl->gl_flags);
+}
+
+/**
+ * meta_go_inval - invalidate the metadata for this glock
+ * @gl: the glock
+ * @flags:
+ *
+ */
+
+static void meta_go_inval(struct gfs2_glock *gl, int flags)
+{
+	if (!(flags & DIO_METADATA))
+		return;
+
+	gfs2_meta_inval(gl);
+	gl->gl_vn++;
+}
+
+/**
+ * meta_go_demote_ok - Check to see if it's ok to unlock a glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if we have no cached data; ok to demote meta glock
+ */
+
+static int meta_go_demote_ok(struct gfs2_glock *gl)
+{
+	return !gl->gl_aspace->i_mapping->nrpages;
+}
+
+/**
+ * inode_go_xmote_th - promote/demote a glock
+ * @gl: the glock
+ * @state: the requested state
+ * @flags:
+ *
+ */
+
+static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
+			      int flags)
+{
+	if (gl->gl_state != LM_ST_UNLOCKED)
+		gfs2_pte_inval(gl);
+	gfs2_glock_xmote_th(gl, state, flags);
+}
+
+/**
+ * inode_go_xmote_bh - After promoting/demoting a glock
+ * @gl: the glock
+ *
+ */
+
+static void inode_go_xmote_bh(struct gfs2_glock *gl)
+{
+	struct gfs2_holder *gh = gl->gl_req_gh;
+	struct buffer_head *bh;
+	int error;
+
+	if (gl->gl_state != LM_ST_UNLOCKED &&
+	    (!gh || !(gh->gh_flags & GL_SKIP))) {
+		error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START,
+				       &bh);
+		if (!error)
+			brelse(bh);
+	}
+}
+
+/**
+ * inode_go_drop_th - unlock a glock
+ * @gl: the glock
+ *
+ * Invoked from rq_demote().
+ * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
+ * is being purged from our node's glock cache; we're dropping lock.
+ */
+
+static void inode_go_drop_th(struct gfs2_glock *gl)
+{
+	gfs2_pte_inval(gl);
+	gfs2_glock_drop_th(gl);
+}
+
+/**
+ * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
+ * @gl: the glock protecting the inode
+ * @flags:
+ *
+ */
+
+static void inode_go_sync(struct gfs2_glock *gl, int flags)
+{
+	int meta = (flags & DIO_METADATA);
+	int data = (flags & DIO_DATA);
+
+	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
+		if (meta && data) {
+			gfs2_page_sync(gl, flags | DIO_START);
+			gfs2_log_flush_glock(gl);
+			gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
+			gfs2_page_sync(gl, flags | DIO_WAIT);
+			clear_bit(GLF_DIRTY, &gl->gl_flags);
+		} else if (meta) {
+			gfs2_log_flush_glock(gl);
+			gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
+		} else if (data)
+			gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT);
+		if (flags & DIO_RELEASE)
+			gfs2_ail_empty_gl(gl);
+	}
+
+	clear_bit(GLF_SYNC, &gl->gl_flags);
+}
+
+/**
+ * inode_go_inval - prepare a inode glock to be released
+ * @gl: the glock
+ * @flags:
+ *
+ */
+
+static void inode_go_inval(struct gfs2_glock *gl, int flags)
+{
+	int meta = (flags & DIO_METADATA);
+	int data = (flags & DIO_DATA);
+
+	if (meta) {
+		gfs2_meta_inval(gl);
+		gl->gl_vn++;
+	}
+	if (data)
+		gfs2_page_inval(gl);
+}
+
+/**
+ * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if it's ok
+ */
+
+static int inode_go_demote_ok(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	int demote = FALSE;
+
+	if (!get_gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages)
+		demote = TRUE;
+	else if (!sdp->sd_args.ar_localcaching &&
+		 time_after_eq(jiffies, gl->gl_stamp +
+			       gfs2_tune_get(sdp, gt_demote_secs) * HZ))
+		demote = TRUE;
+
+	return demote;
+}
+
+/**
+ * inode_go_lock - operation done after an inode lock is locked by a process
+ * @gl: the glock
+ * @flags:
+ *
+ * Returns: errno
+ */
+
+static int inode_go_lock(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_inode *ip = get_gl2ip(gl);
+	int error = 0;
+
+	if (!ip)
+		return 0;
+
+	if (ip->i_vn != gl->gl_vn) {
+		error = gfs2_inode_refresh(ip);
+		if (error)
+			return error;
+		gfs2_inode_attr_in(ip);
+	}
+
+	if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
+	    (gl->gl_state == LM_ST_EXCLUSIVE) &&
+	    (gh->gh_flags & GL_LOCAL_EXCL))
+		error = gfs2_truncatei_resume(ip);
+
+	return error;
+}
+
+/**
+ * inode_go_unlock - operation done before an inode lock is unlocked by a
+ *		     process
+ * @gl: the glock
+ * @flags:
+ *
+ */
+
+static void inode_go_unlock(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_inode *ip = get_gl2ip(gl);
+
+	if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
+		gfs2_inode_attr_in(ip);
+
+	if (ip)
+		gfs2_meta_cache_flush(ip);
+}
+
+/**
+ * inode_greedy -
+ * @gl: the glock
+ *
+ */
+
+static void inode_greedy(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_inode *ip = get_gl2ip(gl);
+	unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
+	unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
+	unsigned int new_time;
+
+	spin_lock(&ip->i_spin);
+
+	if (time_after(ip->i_last_pfault + quantum, jiffies)) {
+		new_time = ip->i_greedy + quantum;
+		if (new_time > max)
+			new_time = max;
+	} else {
+		new_time = ip->i_greedy - quantum;
+		if (!new_time || new_time > max)
+			new_time = 1;
+	}
+
+	ip->i_greedy = new_time;
+
+	spin_unlock(&ip->i_spin);
+
+	gfs2_inode_put(ip);
+}
+
+/**
+ * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if it's ok
+ */
+
+static int rgrp_go_demote_ok(struct gfs2_glock *gl)
+{
+	return !gl->gl_aspace->i_mapping->nrpages;
+}
+
+/**
+ * rgrp_go_lock - operation done after an rgrp lock is locked by
+ *    a first holder on this node.
+ * @gl: the glock
+ * @flags:
+ *
+ * Returns: errno
+ */
+
+static int rgrp_go_lock(struct gfs2_holder *gh)
+{
+	return gfs2_rgrp_bh_get(get_gl2rgd(gh->gh_gl));
+}
+
+/**
+ * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
+ *    a last holder on this node.
+ * @gl: the glock
+ * @flags:
+ *
+ */
+
+static void rgrp_go_unlock(struct gfs2_holder *gh)
+{
+	gfs2_rgrp_bh_put(get_gl2rgd(gh->gh_gl));
+}
+
+/**
+ * trans_go_xmote_th - promote/demote the transaction glock
+ * @gl: the glock
+ * @state: the requested state
+ * @flags:
+ *
+ */
+
+static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
+			      int flags)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+
+	if (gl->gl_state != LM_ST_UNLOCKED &&
+	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+		gfs2_meta_syncfs(sdp);
+		gfs2_log_shutdown(sdp);
+	}
+
+	gfs2_glock_xmote_th(gl, state, flags);
+}
+
+/**
+ * trans_go_xmote_bh - After promoting/demoting the transaction glock
+ * @gl: the glock
+ *
+ */
+
+static void trans_go_xmote_bh(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl;
+	struct gfs2_log_header head;
+	int error;
+
+	if (gl->gl_state != LM_ST_UNLOCKED &&
+	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+		gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode);
+		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+
+		error = gfs2_find_jhead(sdp->sd_jdesc, &head);
+		if (error)
+			gfs2_consist(sdp);
+		if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
+			gfs2_consist(sdp);
+
+		/*  Initialize some head of the log stuff  */
+		if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+			sdp->sd_log_sequence = head.lh_sequence + 1;
+			gfs2_log_pointers_init(sdp, head.lh_blkno);
+		}
+	}
+}
+
+/**
+ * trans_go_drop_th - unlock the transaction glock
+ * @gl: the glock
+ *
+ * We want to sync the device even with localcaching.  Remember
+ * that localcaching journal replay only marks buffers dirty.
+ */
+
+static void trans_go_drop_th(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+
+	if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+		gfs2_meta_syncfs(sdp);
+		gfs2_log_shutdown(sdp);
+	}
+
+	gfs2_glock_drop_th(gl);
+}
+
+/**
+ * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if it's ok
+ */
+
+static int quota_go_demote_ok(struct gfs2_glock *gl)
+{
+	return !atomic_read(&gl->gl_lvb_count);
+}
+
+struct gfs2_glock_operations gfs2_meta_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_sync = meta_go_sync,
+	.go_inval = meta_go_inval,
+	.go_demote_ok = meta_go_demote_ok,
+	.go_type = LM_TYPE_META
+};
+
+struct gfs2_glock_operations gfs2_inode_glops = {
+	.go_xmote_th = inode_go_xmote_th,
+	.go_xmote_bh = inode_go_xmote_bh,
+	.go_drop_th = inode_go_drop_th,
+	.go_sync = inode_go_sync,
+	.go_inval = inode_go_inval,
+	.go_demote_ok = inode_go_demote_ok,
+	.go_lock = inode_go_lock,
+	.go_unlock = inode_go_unlock,
+	.go_greedy = inode_greedy,
+	.go_type = LM_TYPE_INODE
+};
+
+struct gfs2_glock_operations gfs2_rgrp_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_sync = meta_go_sync,
+	.go_inval = meta_go_inval,
+	.go_demote_ok = rgrp_go_demote_ok,
+	.go_lock = rgrp_go_lock,
+	.go_unlock = rgrp_go_unlock,
+	.go_type = LM_TYPE_RGRP
+};
+
+struct gfs2_glock_operations gfs2_trans_glops = {
+	.go_xmote_th = trans_go_xmote_th,
+	.go_xmote_bh = trans_go_xmote_bh,
+	.go_drop_th = trans_go_drop_th,
+	.go_type = LM_TYPE_NONDISK
+};
+
+struct gfs2_glock_operations gfs2_iopen_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_callback = gfs2_iopen_go_callback,
+	.go_type = LM_TYPE_IOPEN
+};
+
+struct gfs2_glock_operations gfs2_flock_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_type = LM_TYPE_FLOCK
+};
+
+struct gfs2_glock_operations gfs2_nondisk_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_type = LM_TYPE_NONDISK
+};
+
+struct gfs2_glock_operations gfs2_quota_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_demote_ok = quota_go_demote_ok,
+	.go_type = LM_TYPE_QUOTA
+};
+
+struct gfs2_glock_operations gfs2_journal_glops = {
+	.go_xmote_th = gfs2_glock_xmote_th,
+	.go_drop_th = gfs2_glock_drop_th,
+	.go_type = LM_TYPE_JOURNAL
+};
+
--- a/fs/gfs2/glops.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/glops.h	2005-09-01 17:36:55.262123528 +0800
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __GLOPS_DOT_H__
+#define __GLOPS_DOT_H__
+
+extern struct gfs2_glock_operations gfs2_meta_glops;
+extern struct gfs2_glock_operations gfs2_inode_glops;
+extern struct gfs2_glock_operations gfs2_rgrp_glops;
+extern struct gfs2_glock_operations gfs2_trans_glops;
+extern struct gfs2_glock_operations gfs2_iopen_glops;
+extern struct gfs2_glock_operations gfs2_flock_glops;
+extern struct gfs2_glock_operations gfs2_nondisk_glops;
+extern struct gfs2_glock_operations gfs2_quota_glops;
+extern struct gfs2_glock_operations gfs2_journal_glops;
+
+#endif /* __GLOPS_DOT_H__ */
--- a/fs/gfs2/inode.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/inode.c	2005-09-01 17:36:55.303117296 +0800
@@ -0,0 +1,1793 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/posix_acl.h>
+#include <linux/sort.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "acl.h"
+#include "bmap.h"
+#include "dir.h"
+#include "eattr.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "ops_address.h"
+#include "ops_file.h"
+#include "ops_inode.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+#include "unlinked.h"
+
+/**
+ * inode_attr_in - Copy attributes from the dinode into the VFS inode
+ * @ip: The GFS2 inode (with embedded disk inode data)
+ * @inode:  The Linux VFS inode
+ *
+ */
+
+static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
+{
+	inode->i_ino = ip->i_num.no_formal_ino;
+
+	switch (ip->i_di.di_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
+		break;
+	default:
+		inode->i_rdev = 0;
+		break;
+	};
+
+	inode->i_mode = ip->i_di.di_mode;
+	inode->i_nlink = ip->i_di.di_nlink;
+	inode->i_uid = ip->i_di.di_uid;
+	inode->i_gid = ip->i_di.di_gid;
+	i_size_write(inode, ip->i_di.di_size);
+	inode->i_atime.tv_sec = ip->i_di.di_atime;
+	inode->i_mtime.tv_sec = ip->i_di.di_mtime;
+	inode->i_ctime.tv_sec = ip->i_di.di_ctime;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_nsec = 0;
+	inode->i_blksize = PAGE_SIZE;
+	inode->i_blocks = ip->i_di.di_blocks <<
+		(ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+
+	if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+
+	if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+}
+
+/**
+ * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
+ * @ip: The GFS2 inode (with embedded disk inode data)
+ *
+ */
+
+void gfs2_inode_attr_in(struct gfs2_inode *ip)
+{
+	struct inode *inode;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (inode) {
+		inode_attr_in(ip, inode);
+		iput(inode);
+	}
+}
+
+/**
+ * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
+ * @ip: The GFS2 inode
+ *
+ * Only copy out the attributes that we want the VFS layer
+ * to be able to modify.
+ */
+
+void gfs2_inode_attr_out(struct gfs2_inode *ip)
+{
+	struct inode *inode = ip->i_vnode;
+
+	gfs2_assert_withdraw(ip->i_sbd,
+		(ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
+	ip->i_di.di_mode = inode->i_mode;
+	ip->i_di.di_uid = inode->i_uid;
+	ip->i_di.di_gid = inode->i_gid;
+	ip->i_di.di_atime = inode->i_atime.tv_sec;
+	ip->i_di.di_mtime = inode->i_mtime.tv_sec;
+	ip->i_di.di_ctime = inode->i_ctime.tv_sec;
+}
+
+/**
+ * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
+ * @ip: the struct gfs2_inode to get the struct inode for
+ *
+ * Returns: A VFS inode, or NULL if none
+ */
+
+struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
+{
+	struct inode *inode = NULL;
+
+	gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
+
+	spin_lock(&ip->i_spin);
+	if (ip->i_vnode)
+		inode = igrab(ip->i_vnode);
+	spin_unlock(&ip->i_spin);
+
+	return inode;
+}
+
+/**
+ * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
+ * @ip: the struct gfs2_inode to get the struct inode for
+ *
+ * Returns: A VFS inode, or NULL if no mem
+ */
+
+struct inode *gfs2_ip2v(struct gfs2_inode *ip)
+{
+	struct inode *inode, *tmp;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (inode)
+		return inode;
+
+	tmp = new_inode(ip->i_sbd->sd_vfs);
+	if (!tmp)
+		return NULL;
+
+	inode_attr_in(ip, tmp);
+
+	if (S_ISREG(ip->i_di.di_mode)) {
+		tmp->i_op = &gfs2_file_iops;
+		tmp->i_fop = &gfs2_file_fops;
+		tmp->i_mapping->a_ops = &gfs2_file_aops;
+	} else if (S_ISDIR(ip->i_di.di_mode)) {
+		tmp->i_op = &gfs2_dir_iops;
+		tmp->i_fop = &gfs2_dir_fops;
+	} else if (S_ISLNK(ip->i_di.di_mode)) {
+		tmp->i_op = &gfs2_symlink_iops;
+	} else {
+		tmp->i_op = &gfs2_dev_iops;
+		init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
+	}
+
+	set_v2ip(tmp, NULL);
+
+	for (;;) {
+		spin_lock(&ip->i_spin);
+		if (!ip->i_vnode)
+			break;
+		inode = igrab(ip->i_vnode);
+		spin_unlock(&ip->i_spin);
+
+		if (inode) {
+			iput(tmp);
+			return inode;
+		}
+		yield();
+	}
+
+	inode = tmp;
+
+	gfs2_inode_hold(ip);
+	ip->i_vnode = inode;
+	set_v2ip(inode, ip);
+
+	spin_unlock(&ip->i_spin);
+
+	insert_inode_hash(inode);
+
+	return inode;
+}
+
+static int iget_test(struct inode *inode, void *opaque)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
+
+	if (ip && ip->i_num.no_addr == inum->no_addr)
+		return 1;
+
+	return 0;
+}
+
+struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
+{
+	return ilookup5(sb, (unsigned long)inum->no_formal_ino,
+			iget_test, inum);
+}
+
+void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
+{
+	spin_lock(&ip->i_spin);
+	if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
+		ip->i_di.di_nlink = 1;
+		ip->i_di.di_mode = DT2IF(type);
+	}
+	spin_unlock(&ip->i_spin);
+}
+
+/**
+ * gfs2_inode_refresh - Refresh the incore copy of the dinode
+ * @ip: The GFS2 inode
+ *
+ * Returns: errno
+ */
+
+int gfs2_inode_refresh(struct gfs2_inode *ip)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
+		brelse(dibh);
+		return -EIO;
+	}
+
+	spin_lock(&ip->i_spin);
+	gfs2_dinode_in(&ip->i_di, dibh->b_data);
+	set_bit(GIF_MIN_INIT, &ip->i_flags);
+	spin_unlock(&ip->i_spin);
+
+	brelse(dibh);
+
+	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		return -EIO;
+	}
+	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
+		return -ESTALE;
+
+	ip->i_vn = ip->i_gl->gl_vn;
+
+	return 0;
+}
+
+/**
+ * inode_create - create a struct gfs2_inode
+ * @i_gl: The glock covering the inode
+ * @inum: The inode number
+ * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
+ * @io_state: the state the iopen glock should be acquired in
+ * @ipp: pointer to put the returned inode in
+ *
+ * Returns: errno
+ */
+
+static int inode_create(struct gfs2_glock *i_gl, struct gfs2_inum *inum,
+			struct gfs2_glock *io_gl, unsigned int io_state,
+			struct gfs2_inode **ipp)
+{
+	struct gfs2_sbd *sdp = i_gl->gl_sbd;
+	struct gfs2_inode *ip;
+	int error = 0;
+
+	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
+	if (!ip)
+		return -ENOMEM;
+	memset(ip, 0, sizeof(struct gfs2_inode));
+
+	ip->i_num = *inum;
+
+	atomic_set(&ip->i_count, 1);
+
+	ip->i_vn = i_gl->gl_vn - 1;
+
+	ip->i_gl = i_gl;
+	ip->i_sbd = sdp;
+
+	spin_lock_init(&ip->i_spin);
+	init_rwsem(&ip->i_rw_mutex);
+
+	ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
+
+	error = gfs2_glock_nq_init(io_gl,
+				   io_state, GL_LOCAL_EXCL | GL_EXACT,
+				   &ip->i_iopen_gh);
+	if (error)
+		goto fail;
+	ip->i_iopen_gh.gh_owner = NULL;
+
+	spin_lock(&io_gl->gl_spin);
+	gfs2_glock_hold(i_gl);
+	set_gl2gl(io_gl, i_gl);
+	spin_unlock(&io_gl->gl_spin);
+
+	gfs2_glock_hold(i_gl);
+	set_gl2ip(i_gl, ip);
+
+	atomic_inc(&sdp->sd_inode_count);
+
+	*ipp = ip;
+
+	return 0;
+
+ fail:
+	gfs2_meta_cache_flush(ip);
+	kmem_cache_free(gfs2_inode_cachep, ip);
+	*ipp = NULL;
+
+	return error;
+}
+
+/**
+ * gfs2_inode_get - Create or get a reference on an inode
+ * @i_gl: The glock covering the inode
+ * @inum: The inode number
+ * @create:
+ * @ipp: pointer to put the returned inode in
+ *
+ * Returns: errno
+ */
+
+int gfs2_inode_get(struct gfs2_glock *i_gl, struct gfs2_inum *inum, int create,
+		   struct gfs2_inode **ipp)
+{
+	struct gfs2_sbd *sdp = i_gl->gl_sbd;
+	struct gfs2_glock *io_gl;
+	int error = 0;
+
+	gfs2_glmutex_lock(i_gl);
+
+	*ipp = get_gl2ip(i_gl);
+	if (*ipp) {
+		error = -ESTALE;
+		if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
+			goto out;
+		atomic_inc(&(*ipp)->i_count);
+		error = 0;
+		goto out;
+	}
+
+	if (!create)
+		goto out;
+
+	error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
+			       CREATE, &io_gl);
+	if (!error) {
+		error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp);
+		gfs2_glock_put(io_gl);
+	}
+
+ out:
+	gfs2_glmutex_unlock(i_gl);
+
+	return error;
+}
+
+void gfs2_inode_hold(struct gfs2_inode *ip)
+{
+	gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0,);
+	atomic_inc(&ip->i_count);
+}
+
+void gfs2_inode_put(struct gfs2_inode *ip)
+{
+	gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0,);
+	atomic_dec(&ip->i_count);
+}
+
+void gfs2_inode_destroy(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
+	struct gfs2_glock *i_gl = ip->i_gl;
+
+	gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
+	gfs2_assert(sdp, get_gl2gl(io_gl) == i_gl,);
+
+	spin_lock(&io_gl->gl_spin);
+	set_gl2gl(io_gl, NULL);
+	gfs2_glock_put(i_gl);
+	spin_unlock(&io_gl->gl_spin);
+
+	gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+
+	gfs2_meta_cache_flush(ip);
+	kmem_cache_free(gfs2_inode_cachep, ip);
+
+	set_gl2ip(i_gl, NULL);
+	gfs2_glock_put(i_gl);
+
+	atomic_dec(&sdp->sd_inode_count);
+}
+
+static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al;
+	struct gfs2_rgrpd *rgd;
+	int error;
+
+	if (ip->i_di.di_blocks != 1) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		return -EIO;
+	}
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	if (error)
+		goto out_qs;
+
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	if (!rgd) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out_rindex_relse;
+	}
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+				   &al->al_rgd_gh);
+	if (error)
+		goto out_rindex_relse;
+
+	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
+				 RES_STATFS + RES_QUOTA, 1);
+	if (error)
+		goto out_rg_gunlock;
+
+	gfs2_trans_add_gl(ip->i_gl);
+
+	gfs2_free_di(rgd, ip);
+
+	error = gfs2_unlinked_ondisk_rm(sdp, ul);
+
+	gfs2_trans_end(sdp);
+	clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
+
+ out_rg_gunlock:
+	gfs2_glock_dq_uninit(&al->al_rgd_gh);
+
+ out_rindex_relse:
+	gfs2_glock_dq_uninit(&al->al_ri_gh);
+
+ out_qs:
+	gfs2_quota_unhold(ip);
+
+ out:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/**
+ * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
+ * @sdp: the filesystem
+ * @inum: the inode number to deallocate
+ * @io_gh: a holder for the iopen glock for this inode
+ *
+ * Returns: errno
+ */
+
+static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
+			 struct gfs2_holder *io_gh)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_holder i_gh;
+	int error;
+
+	error = gfs2_glock_nq_num(sdp,
+				  ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops,
+				  LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	/* We reacquire the iopen lock here to avoid a race with the NFS server
+	   calling gfs2_read_inode() with the inode number of a inode we're in
+	   the process of deallocating.  And we can't keep our hold on the lock
+	   from inode_dealloc_init() for deadlock reasons. */
+
+	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
+	error = gfs2_glock_nq(io_gh);
+	switch (error) {
+	case 0:
+		break;
+	case GLR_TRYFAILED:
+		error = 1;
+	default:
+		goto out;
+	}
+
+	gfs2_assert_warn(sdp, !get_gl2ip(i_gh.gh_gl));
+	error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
+			     LM_ST_EXCLUSIVE, &ip);
+
+	gfs2_glock_dq(io_gh);
+
+	if (error)
+		goto out;
+
+	error = gfs2_inode_refresh(ip);
+	if (error)
+		goto out_iput;
+
+	if (ip->i_di.di_nlink) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		error = -EIO;
+		goto out_iput;
+	}
+
+	if (S_ISDIR(ip->i_di.di_mode) &&
+	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+		error = gfs2_dir_exhash_dealloc(ip);
+		if (error)
+			goto out_iput;
+	}
+
+	if (ip->i_di.di_eattr) {
+		error = gfs2_ea_dealloc(ip);
+		if (error)
+			goto out_iput;
+	}
+
+	if (!gfs2_is_stuffed(ip)) {
+		error = gfs2_file_dealloc(ip);
+		if (error)
+			goto out_iput;
+	}
+
+	error = dinode_dealloc(ip, ul);
+	if (error)
+		goto out_iput;
+
+ out_iput:
+	gfs2_glmutex_lock(i_gh.gh_gl);
+	gfs2_inode_put(ip);
+	gfs2_inode_destroy(ip);
+	gfs2_glmutex_unlock(i_gh.gh_gl);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * try_inode_dealloc - Try to deallocate an inode and all its blocks
+ * @sdp: the filesystem
+ *
+ * Returns: 0 on success, -errno on error, 1 on busy (inode open)
+ */
+
+static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	struct gfs2_holder io_gh;
+	int error = 0;
+
+	gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
+
+	error = gfs2_glock_nq_num(sdp,
+				  ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops,
+				  LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
+	switch (error) {
+	case 0:
+		break;
+	case GLR_TRYFAILED:
+		return 1;
+	default:
+		return error;
+	}
+
+	gfs2_glock_dq(&io_gh);
+	error = inode_dealloc(sdp, ul, &io_gh);
+	gfs2_holder_uninit(&io_gh);
+
+	return error;
+}
+
+static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_holder ri_gh, rgd_gh;
+	int error;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		return error;
+
+	rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
+	if (!rgd) {
+		gfs2_consist(sdp);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
+	if (error)
+		goto out;
+
+	error = gfs2_trans_begin(sdp,
+				 RES_RG_BIT + RES_UNLINKED + RES_STATFS,
+				 0);
+	if (error)
+		goto out_gunlock;
+
+	gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
+	gfs2_unlinked_ondisk_rm(sdp, ul);
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_uninit(&rgd_gh);
+ out:
+	gfs2_glock_dq_uninit(&ri_gh);
+
+	return error;
+}
+
+int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
+		return inode_dealloc_uninit(sdp, ul);
+	else
+		return try_inode_dealloc(sdp, ul);
+}
+
+/**
+ * gfs2_change_nlink - Change nlink count on inode
+ * @ip: The GFS2 inode
+ * @diff: The change in the nlink count required
+ *
+ * Returns: errno
+ */
+
+int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
+{
+	struct buffer_head *dibh;
+	uint32_t nlink;
+	int error;
+
+	nlink = ip->i_di.di_nlink + diff;
+
+	/* If we are reducing the nlink count, but the new value ends up being
+	   bigger than the old one, we must have underflowed. */
+	if (diff < 0 && nlink > ip->i_di.di_nlink) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		return -EIO;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	ip->i_di.di_nlink = nlink;
+	ip->i_di.di_ctime = get_seconds();
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	return 0;
+}
+
+/**
+ * gfs2_lookupi - Look up a filename in a directory and return its inode
+ * @d_gh: An initialized holder for the directory glock
+ * @name: The name of the inode to look for
+ * @is_root: If TRUE, ignore the caller's permissions
+ * @i_gh: An uninitialized holder for the new inode glock
+ *
+ * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
+ * @is_root is true.
+ *
+ * Returns: errno
+ */
+
+int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root,
+		 struct gfs2_inode **ipp)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_holder d_gh;
+	struct gfs2_inum inum;
+	unsigned int type;
+	struct gfs2_glock *gl;
+	int error;
+
+	if (!name->len || name->len > GFS2_FNAMESIZE)
+		return -ENAMETOOLONG;
+
+	if (gfs2_filecmp(name, ".", 1) ||
+	    (gfs2_filecmp(name, "..", 2) && dip == sdp->sd_root_dir)) {
+		gfs2_inode_hold(dip);
+		*ipp = dip;
+		return 0;
+	}
+
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	if (error)
+		return error;
+
+	if (!is_root) {
+		error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL);
+		if (error)
+			goto out;
+	}
+
+	error = gfs2_dir_search(dip, name, &inum, &type);
+	if (error)
+		goto out;
+
+	error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
+			       CREATE, &gl);
+	if (error)
+		goto out;
+
+	error = gfs2_inode_get(gl, &inum, CREATE, ipp);
+	if (!error)
+		gfs2_inode_min_init(*ipp, type);
+
+	gfs2_glock_put(gl);
+
+ out:
+	gfs2_glock_dq_uninit(&d_gh);
+
+	return error;
+}
+
+static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
+{
+	struct gfs2_inode *ip = sdp->sd_ir_inode;
+	struct buffer_head *bh;
+	struct gfs2_inum_range ir;
+	int error;
+
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		return error;
+	down(&sdp->sd_inum_mutex);
+
+	error = gfs2_meta_inode_buffer(ip, &bh);
+	if (error) {
+		up(&sdp->sd_inum_mutex);
+		gfs2_trans_end(sdp);
+		return error;
+	}
+
+	gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
+
+	if (ir.ir_length) {
+		*formal_ino = ir.ir_start++;
+		ir.ir_length--;
+		gfs2_trans_add_bh(ip->i_gl, bh);
+		gfs2_inum_range_out(&ir,
+				    bh->b_data + sizeof(struct gfs2_dinode));
+		brelse(bh);
+		up(&sdp->sd_inum_mutex);
+		gfs2_trans_end(sdp);
+		return 0;
+	}
+
+	brelse(bh);
+
+	up(&sdp->sd_inum_mutex);
+	gfs2_trans_end(sdp);
+
+	return 1;
+}
+
+static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
+{
+	struct gfs2_inode *ip = sdp->sd_ir_inode;
+	struct gfs2_inode *m_ip = sdp->sd_inum_inode;
+	struct gfs2_holder gh;
+	struct buffer_head *bh;
+	struct gfs2_inum_range ir;
+	int error;
+
+	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (error)
+		return error;
+
+	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
+	if (error)
+		goto out;
+	down(&sdp->sd_inum_mutex);
+
+	error = gfs2_meta_inode_buffer(ip, &bh);
+	if (error)
+		goto out_end_trans;
+	
+	gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
+
+	if (!ir.ir_length) {
+		struct buffer_head *m_bh;
+		uint64_t x, y;
+
+		error = gfs2_meta_inode_buffer(m_ip, &m_bh);
+		if (error)
+			goto out_brelse;
+
+		x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
+		x = y = gfs2_64_to_cpu(x);
+		ir.ir_start = x;
+		ir.ir_length = GFS2_INUM_QUANTUM;
+		x += GFS2_INUM_QUANTUM;
+		if (x < y)
+			gfs2_consist_inode(m_ip);
+		x = cpu_to_gfs2_64(x);
+		gfs2_trans_add_bh(m_ip->i_gl, m_bh);
+		*(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
+
+		brelse(m_bh);
+	}
+
+	*formal_ino = ir.ir_start++;
+	ir.ir_length--;
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+	gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
+
+ out_brelse:
+	brelse(bh);
+
+ out_end_trans:
+	up(&sdp->sd_inum_mutex);
+	gfs2_trans_end(sdp);
+
+ out:
+	gfs2_glock_dq_uninit(&gh);
+
+	return error;
+}
+
+static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
+{
+	int error;
+
+	error = pick_formal_ino_1(sdp, inum);
+	if (error <= 0)
+		return error;
+
+	error = pick_formal_ino_2(sdp, inum);
+
+	return error;
+}
+
+/**
+ * create_ok - OK to create a new on-disk inode here?
+ * @dip:  Directory in which dinode is to be created
+ * @name:  Name of new dinode
+ * @mode:
+ *
+ * Returns: errno
+ */
+
+static int create_ok(struct gfs2_inode *dip, struct qstr *name,
+		     unsigned int mode)
+{
+	int error;
+
+	error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
+	if (error)
+		return error;
+
+	/*  Don't create entries in an unlinked directory  */
+	if (!dip->i_di.di_nlink)
+		return -EPERM;
+
+	error = gfs2_dir_search(dip, name, NULL, NULL);
+	switch (error) {
+	case -ENOENT:
+		error = 0;
+		break;
+	case 0:
+		return -EEXIST;
+	default:
+		return error;
+	}
+
+	if (dip->i_di.di_entries == (uint32_t)-1)
+		return -EFBIG;
+	if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
+		return -EMLINK;
+
+	return 0;
+}
+
+static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
+			       unsigned int *uid, unsigned int *gid)
+{
+	if (dip->i_sbd->sd_args.ar_suiddir &&
+	    (dip->i_di.di_mode & S_ISUID) &&
+	    dip->i_di.di_uid) {
+		if (S_ISDIR(*mode))
+			*mode |= S_ISUID;
+		else if (dip->i_di.di_uid != current->fsuid)
+			*mode &= ~07111;
+		*uid = dip->i_di.di_uid;
+	} else
+		*uid = current->fsuid;
+
+	if (dip->i_di.di_mode & S_ISGID) {
+		if (S_ISDIR(*mode))
+			*mode |= S_ISGID;
+		*gid = dip->i_di.di_gid;
+	} else
+		*gid = current->fsgid;
+}
+
+static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	int error;
+
+	gfs2_alloc_get(dip);
+
+	dip->i_alloc->al_requested = RES_DINODE;
+	error = gfs2_inplace_reserve(dip);
+	if (error)
+		goto out;
+
+	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
+				 RES_STATFS, 0);
+	if (error)
+		goto out_ipreserv;
+
+	ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
+
+	ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
+	error = gfs2_unlinked_ondisk_add(sdp, ul);
+
+	gfs2_trans_end(sdp);
+
+ out_ipreserv:
+	gfs2_inplace_release(dip);
+
+ out:
+	gfs2_alloc_put(dip);
+
+	return error;
+}
+
+/**
+ * init_dinode - Fill in a new dinode structure
+ * @dip: the directory this inode is being created in
+ * @gl: The glock covering the new inode
+ * @inum: the inode number
+ * @mode: the file permissions
+ * @uid:
+ * @gid:
+ *
+ */
+
+static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
+			struct gfs2_inum *inum, unsigned int mode,
+			unsigned int uid, unsigned int gid)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_dinode di;
+	struct buffer_head *dibh;
+
+	dibh = gfs2_meta_new(gl, inum->no_addr);
+	gfs2_trans_add_bh(gl, dibh);
+	gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
+	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+
+	memset(&di, 0, sizeof(struct gfs2_dinode));
+	gfs2_meta_header_in(&di.di_header, dibh->b_data);
+	di.di_num = *inum;
+	di.di_mode = mode;
+	di.di_uid = uid;
+	di.di_gid = gid;
+	di.di_blocks = 1;
+	di.di_atime = di.di_mtime = di.di_ctime = get_seconds();
+	di.di_goal_meta = di.di_goal_data = inum->no_addr;
+
+	if (S_ISREG(mode)) {
+		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
+		    gfs2_tune_get(sdp, gt_new_files_jdata))
+			di.di_flags |= GFS2_DIF_JDATA;
+		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
+		    gfs2_tune_get(sdp, gt_new_files_directio))
+			di.di_flags |= GFS2_DIF_DIRECTIO;
+	} else if (S_ISDIR(mode)) {
+		di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO);
+		di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA);
+	}
+
+	gfs2_dinode_out(&di, dibh->b_data);
+	brelse(dibh);
+}
+
+static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
+		       unsigned int mode, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	unsigned int uid, gid;
+	int error;
+
+	munge_mode_uid_gid(dip, &mode, &uid, &gid);
+
+	gfs2_alloc_get(dip);
+
+	error = gfs2_quota_lock(dip, uid, gid);
+	if (error)
+		goto out;
+
+	error = gfs2_quota_check(dip, uid, gid);
+	if (error)
+		goto out_quota;
+
+	error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED +
+				 RES_QUOTA, 0);
+	if (error)
+		goto out_quota;
+
+	ul->ul_ut.ut_flags = 0;
+	error = gfs2_unlinked_ondisk_munge(sdp, ul);
+
+	init_dinode(dip, gl, &ul->ul_ut.ut_inum,
+		     mode, uid, gid);
+
+	gfs2_quota_change(dip, +1, uid, gid);
+
+	gfs2_trans_end(sdp);
+
+ out_quota:
+	gfs2_quota_unlock(dip);
+
+ out:
+	gfs2_alloc_put(dip);
+
+	return error;
+}
+
+static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
+		       struct gfs2_inode *ip, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_alloc *al;
+	int alloc_required;
+	struct buffer_head *dibh;
+	int error;
+
+	al = gfs2_alloc_get(dip);
+
+	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto fail;
+
+	error = gfs2_diradd_alloc_required(dip, name, &alloc_required);
+	if (alloc_required) {
+		error = gfs2_quota_check(dip, dip->i_di.di_uid,
+					 dip->i_di.di_gid);
+		if (error)
+			goto fail_quota_locks;
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve(dip);
+		if (error)
+			goto fail_quota_locks;
+
+		error = gfs2_trans_begin(sdp,
+					 sdp->sd_max_dirres +
+					 al->al_rgd->rd_ri.ri_length +
+					 2 * RES_DINODE + RES_UNLINKED +
+					 RES_STATFS + RES_QUOTA, 0);
+		if (error)
+			goto fail_ipreserv;
+	} else {
+		error = gfs2_trans_begin(sdp,
+					 RES_LEAF +
+					 2 * RES_DINODE +
+					 RES_UNLINKED, 0);
+		if (error)
+			goto fail_quota_locks;
+	}
+
+	error = gfs2_dir_add(dip, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
+	if (error)
+		goto fail_end_trans;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto fail_end_trans;
+	ip->i_di.di_nlink = 1;
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	error = gfs2_unlinked_ondisk_rm(sdp, ul);
+	if (error)
+		goto fail_end_trans;
+
+	return 0;
+
+ fail_end_trans:
+	gfs2_trans_end(sdp);
+
+ fail_ipreserv:
+	if (dip->i_alloc->al_rgd)
+		gfs2_inplace_release(dip);
+
+ fail_quota_locks:
+	gfs2_quota_unlock(dip);
+
+ fail:
+	gfs2_alloc_put(dip);
+
+	return error;
+}
+
+/**
+ * gfs2_createi - Create a new inode
+ * @ghs: An array of two holders
+ * @name: The name of the new file
+ * @mode: the permissions on the new inode
+ *
+ * @ghs[0] is an initialized holder for the directory
+ * @ghs[1] is the holder for the inode lock
+ *
+ * If the return value is 0, the glocks on both the directory and the new
+ * file are held.  A transaction has been started and an inplace reservation
+ * is held, as well.
+ *
+ * Returns: errno
+ */
+
+int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode)
+{
+	struct gfs2_inode *dip = get_gl2ip(ghs->gh_gl);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_unlinked *ul;
+	struct gfs2_inode *ip;
+	int error;
+
+	if (!name->len || name->len > GFS2_FNAMESIZE)
+		return -ENAMETOOLONG;
+
+	error = gfs2_unlinked_get(sdp, &ul);
+	if (error)
+		return error;
+
+	gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
+	error = gfs2_glock_nq(ghs);
+	if (error)
+		goto fail;
+
+	error = create_ok(dip, name, mode);
+	if (error)
+		goto fail_gunlock;
+
+	error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
+	if (error)
+		goto fail_gunlock;
+
+	error = alloc_dinode(dip, ul);
+	if (error)
+		goto fail_gunlock;
+
+	if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
+		gfs2_glock_dq(ghs);
+
+		error = gfs2_glock_nq_num(sdp,
+					  ul->ul_ut.ut_inum.no_addr,
+					  &gfs2_inode_glops,
+					  LM_ST_EXCLUSIVE, GL_SKIP,
+					  ghs + 1);
+		if (error) {
+			gfs2_unlinked_put(sdp, ul);
+			return error;
+		}
+
+		gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
+		error = gfs2_glock_nq(ghs);
+		if (error) {
+			gfs2_glock_dq_uninit(ghs + 1);
+			gfs2_unlinked_put(sdp, ul);
+			return error;
+		}
+
+		error = create_ok(dip, name, mode);
+		if (error)
+			goto fail_gunlock2;
+	} else {
+		error = gfs2_glock_nq_num(sdp,
+					  ul->ul_ut.ut_inum.no_addr,
+					  &gfs2_inode_glops,
+					  LM_ST_EXCLUSIVE, GL_SKIP,
+					  ghs + 1);
+		if (error)
+			goto fail_gunlock;
+	}
+
+	error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
+	if (error)
+		goto fail_gunlock2;
+
+	error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
+	if (error)
+		goto fail_gunlock2;
+
+	error = gfs2_inode_refresh(ip);
+	if (error)
+		goto fail_iput;
+
+	error = gfs2_acl_create(dip, ip);
+	if (error)
+		goto fail_iput;
+
+	error = link_dinode(dip, name, ip, ul);
+	if (error)
+		goto fail_iput;
+
+	gfs2_unlinked_put(sdp, ul);
+
+	return 0;
+
+ fail_iput:
+	gfs2_inode_put(ip);
+
+ fail_gunlock2:
+	gfs2_glock_dq_uninit(ghs + 1);
+
+ fail_gunlock:
+	gfs2_glock_dq(ghs);
+
+ fail:
+	gfs2_unlinked_put(sdp, ul);
+
+	return error;
+}
+
+/**
+ * gfs2_unlinki - Unlink a file
+ * @dip: The inode of the directory
+ * @name: The name of the file to be unlinked
+ * @ip: The inode of the file to be removed
+ *
+ * Assumes Glocks on both dip and ip are held.
+ *
+ * Returns: errno
+ */
+
+int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
+		 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	int error;
+
+	error = gfs2_dir_del(dip, name);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	/* If this inode is being unlinked from the directory structure,
+	   we need to mark that in the log so that it isn't lost during
+	   a crash. */
+
+	if (!ip->i_di.di_nlink) {
+		ul->ul_ut.ut_inum = ip->i_num;
+		error = gfs2_unlinked_ondisk_add(sdp, ul);
+		if (!error)
+			set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_rmdiri - Remove a directory
+ * @dip: The parent directory of the directory to be removed
+ * @name: The name of the directory to be removed
+ * @ip: The GFS2 inode of the directory to be removed
+ *
+ * Assumes Glocks on dip and ip are held
+ *
+ * Returns: errno
+ */
+
+int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
+		struct gfs2_inode *ip, struct gfs2_unlinked *ul)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct qstr dotname;
+	int error;
+
+	if (ip->i_di.di_entries != 2) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		return -EIO;
+	}
+
+	error = gfs2_dir_del(dip, name);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(dip, -1);
+	if (error)
+		return error;
+
+	dotname.len = 1;
+	dotname.name = ".";
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	dotname.len = 2;
+	dotname.name = "..";
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -2);
+	if (error)
+		return error;
+
+	/* This inode is being unlinked from the directory structure and
+	   we need to mark that in the log so that it isn't lost during
+	   a crash. */
+
+	ul->ul_ut.ut_inum = ip->i_num;
+	error = gfs2_unlinked_ondisk_add(sdp, ul);
+	if (!error)
+		set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
+
+	return error;
+}
+
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
+		   struct gfs2_inode *ip)
+{
+	struct gfs2_inum inum;
+	unsigned int type;
+	int error;
+
+	if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
+		return -EPERM;
+
+	if ((dip->i_di.di_mode & S_ISVTX) &&
+	    dip->i_di.di_uid != current->fsuid &&
+	    ip->i_di.di_uid != current->fsuid &&
+	    !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (IS_APPEND(dip->i_vnode))
+		return -EPERM;
+
+	error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
+	if (error)
+		return error;
+
+	error = gfs2_dir_search(dip, name, &inum, &type);
+	if (error)
+		return error;
+
+	if (!gfs2_inum_equal(&inum, &ip->i_num))
+		return -ENOENT;
+
+	if (IF2DT(ip->i_di.di_mode) != type) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+	struct gfs2_sbd *sdp = this->i_sbd;
+	struct gfs2_inode *tmp;
+	struct qstr dotdot;
+	int error = 0;
+
+	memset(&dotdot, 0, sizeof(struct qstr));
+	dotdot.name = "..";
+	dotdot.len = 2;
+
+	gfs2_inode_hold(to);
+
+	for (;;) {
+		if (to == this) {
+			error = -EINVAL;
+			break;
+		}
+		if (to == sdp->sd_root_dir) {
+			error = 0;
+			break;
+		}
+
+		error = gfs2_lookupi(to, &dotdot, TRUE, &tmp);
+		if (error)
+			break;
+
+		gfs2_inode_put(to);
+		to = tmp;
+	}
+
+	gfs2_inode_put(to);
+
+	return error;
+}
+
+/**
+ * gfs2_readlinki - return the contents of a symlink
+ * @ip: the symlink's inode
+ * @buf: a pointer to the buffer to be filled
+ * @len: a pointer to the length of @buf
+ *
+ * If @buf is too small, a piece of memory is kmalloc()ed and needs
+ * to be freed by the caller.
+ *
+ * Returns: errno
+ */
+
+int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
+{
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	unsigned int x;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
+	error = gfs2_glock_nq_atime(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		return error;
+	}
+
+	if (!ip->i_di.di_size) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	x = ip->i_di.di_size + 1;
+	if (x > *len) {
+		*buf = kmalloc(x, GFP_KERNEL);
+		if (!*buf) {
+			error = -ENOMEM;
+			goto out_brelse;
+		}
+	}
+
+	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
+	*len = x;
+
+ out_brelse:
+	brelse(dibh);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
+ *       conditionally update the inode's atime
+ * @gh: the holder to acquire
+ *
+ * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
+ * Update if the difference between the current time and the inode's current
+ * atime is greater than an interval specified at mount.
+ *
+ * Returns: errno
+ */
+
+int gfs2_glock_nq_atime(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_inode *ip = get_gl2ip(gl);
+	int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+	unsigned int state;
+	int flags;
+	int error;
+
+	if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
+	    gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
+	    gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
+		return -EINVAL;
+
+	state = gh->gh_state;
+	flags = gh->gh_flags;
+
+	error = gfs2_glock_nq(gh);
+	if (error)
+		return error;
+
+	if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
+	    (sdp->sd_vfs->s_flags & MS_RDONLY))
+		return 0;
+
+	curtime = get_seconds();
+	if (curtime - ip->i_di.di_atime >= quantum) {
+		gfs2_glock_dq(gh);
+		gfs2_holder_reinit(LM_ST_EXCLUSIVE,
+				  gh->gh_flags & ~LM_FLAG_ANY,
+				  gh);
+		error = gfs2_glock_nq(gh);
+		if (error)
+			return error;
+
+		/* Verify that atime hasn't been updated while we were
+		   trying to get exclusive lock. */
+
+		curtime = get_seconds();
+		if (curtime - ip->i_di.di_atime >= quantum) {
+			struct buffer_head *dibh;
+
+			error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+			if (error == -EROFS)
+				return 0;
+			if (error)
+				goto fail;
+
+			error = gfs2_meta_inode_buffer(ip, &dibh);
+			if (error)
+				goto fail_end_trans;
+
+			ip->i_di.di_atime = curtime;
+
+			gfs2_trans_add_bh(ip->i_gl, dibh);
+			gfs2_dinode_out(&ip->i_di, dibh->b_data);
+			brelse(dibh);
+
+			gfs2_trans_end(sdp);
+		}
+
+		/* If someone else has asked for the glock,
+		   unlock and let them have it. Then reacquire
+		   in the original state. */
+		if (gfs2_glock_is_blocking(gl)) {
+			gfs2_glock_dq(gh);
+			gfs2_holder_reinit(state, flags, gh);
+			return gfs2_glock_nq(gh);
+		}
+	}
+
+	return 0;
+
+ fail_end_trans:
+	gfs2_trans_end(sdp);
+
+ fail:
+	gfs2_glock_dq(gh);
+
+	return error;
+}
+
+/**
+ * glock_compare_atime - Compare two struct gfs2_glock structures for sort
+ * @arg_a: the first structure
+ * @arg_b: the second structure
+ *
+ * Returns: 1 if A > B
+ *         -1 if A < B
+ *          0 if A = B
+ */
+
+static int glock_compare_atime(const void *arg_a, const void *arg_b)
+{
+	struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
+	struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
+	struct lm_lockname *a = &gh_a->gh_gl->gl_name;
+	struct lm_lockname *b = &gh_b->gh_gl->gl_name;
+	int ret = 0;
+
+	if (a->ln_number > b->ln_number)
+		ret = 1;
+	else if (a->ln_number < b->ln_number)
+		ret = -1;
+	else {
+		if (gh_a->gh_state == LM_ST_SHARED &&
+		    gh_b->gh_state == LM_ST_EXCLUSIVE)
+			ret = 1;
+		else if (gh_a->gh_state == LM_ST_SHARED &&
+			 (gh_b->gh_flags & GL_ATIME))
+			ret = 1;
+	}
+
+	return ret;
+}
+
+/**
+ * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
+ *      atime update
+ * @num_gh: the number of structures
+ * @ghs: an array of struct gfs2_holder structures
+ *
+ * Returns: 0 on success (all glocks acquired),
+ *          errno on failure (no glocks acquired)
+ */
+
+int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+	struct gfs2_holder **p;
+	unsigned int x;
+	int error = 0;
+
+	if (!num_gh)
+		return 0;
+
+	if (num_gh == 1) {
+		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
+		if (ghs->gh_flags & GL_ATIME)
+			error = gfs2_glock_nq_atime(ghs);
+		else
+			error = gfs2_glock_nq(ghs);
+		return error;
+	}
+
+	p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	for (x = 0; x < num_gh; x++)
+		p[x] = &ghs[x];
+
+	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
+
+	for (x = 0; x < num_gh; x++) {
+		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
+
+		if (p[x]->gh_flags & GL_ATIME)
+			error = gfs2_glock_nq_atime(p[x]);
+		else
+			error = gfs2_glock_nq(p[x]);
+
+		if (error) {
+			while (x--)
+				gfs2_glock_dq(p[x]);
+			break;
+		}
+	}
+
+	kfree(p);
+
+	return error;
+}
+
+/**
+ * gfs2_try_toss_vnode - See if we can toss a vnode from memory
+ * @ip: the inode
+ *
+ * Returns:  TRUE if the vnode was tossed
+ */
+
+void gfs2_try_toss_vnode(struct gfs2_inode *ip)
+{
+	struct inode *inode;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (!inode)
+		return;
+
+	d_prune_aliases(inode);
+
+	if (S_ISDIR(ip->i_di.di_mode)) {
+		struct list_head *head = &inode->i_dentry;
+		struct dentry *d = NULL;
+
+		spin_lock(&dcache_lock);
+		if (list_empty(head))
+			spin_unlock(&dcache_lock);
+		else {
+			d = list_entry(head->next, struct dentry, d_alias);
+			dget_locked(d);
+			spin_unlock(&dcache_lock);
+
+			if (have_submounts(d))
+				dput(d);
+			else {
+				shrink_dcache_parent(d);
+				dput(d);
+				d_prune_aliases(inode);
+			}
+		}
+	}
+
+	inode->i_nlink = 0;
+	iput(inode);
+}
+
+/**
+ * gfs2_setattr_simple -
+ * @ip:
+ * @attr:
+ *
+ * Called with a reference on the vnode.
+ *
+ * Returns: errno
+ */
+
+int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		error = inode_setattr(ip->i_vnode, attr);
+		gfs2_assert_warn(ip->i_sbd, !error);
+		gfs2_inode_attr_out(ip);
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(ip->i_sbd);
+
+	return error;
+}
+
+int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return permission(inode, mask, nd);
+}
+
--- a/fs/gfs2/inode.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/inode.h	2005-09-01 17:36:55.314115624 +0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __INODE_DOT_H__
+#define __INODE_DOT_H__
+
+static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+{
+	return !ip->i_di.di_height;
+}
+
+static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+{
+	return ip->i_di.di_flags & GFS2_DIF_JDATA;
+}
+
+void gfs2_inode_attr_in(struct gfs2_inode *ip);
+void gfs2_inode_attr_out(struct gfs2_inode *ip);
+struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
+struct inode *gfs2_ip2v(struct gfs2_inode *ip);
+struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum);
+
+void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type);
+int gfs2_inode_refresh(struct gfs2_inode *ip);
+
+int gfs2_inode_get(struct gfs2_glock *i_gl,
+		   struct gfs2_inum *inum, int create,
+		   struct gfs2_inode **ipp);
+void gfs2_inode_hold(struct gfs2_inode *ip);
+void gfs2_inode_put(struct gfs2_inode *ip);
+void gfs2_inode_destroy(struct gfs2_inode *ip);
+
+int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
+
+int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
+int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root,
+		 struct gfs2_inode **ipp);
+int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode);
+int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
+		 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
+int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
+		struct gfs2_inode *ip, struct gfs2_unlinked *ul);
+int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
+		   struct gfs2_inode *ip);
+int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
+int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
+
+int gfs2_glock_nq_atime(struct gfs2_holder *gh);
+int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
+
+void gfs2_try_toss_vnode(struct gfs2_inode *ip);
+
+int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
+
+int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd);
+
+static inline int gfs2_lookup_simple(struct gfs2_inode *dip, char *name,
+				     struct gfs2_inode **ipp)
+{
+	struct qstr qstr;
+	memset(&qstr, 0, sizeof(struct qstr));
+	qstr.name = name;
+	qstr.len = strlen(name);
+	return gfs2_lookupi(dip, &qstr, TRUE, ipp);
+}
+
+#endif /* __INODE_DOT_H__ */
+
--- a/fs/gfs2/jdata.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/jdata.c	2005-09-01 17:36:55.324114104 +0800
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "inode.h"
+#include "jdata.h"
+#include "meta_io.h"
+#include "trans.h"
+
+int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+			  struct buffer_head **bhp)
+{
+	struct buffer_head *bh;
+	int error = 0;
+
+	if (new) {
+		bh = gfs2_meta_new(ip->i_gl, block);
+		gfs2_trans_add_bh(ip->i_gl, bh);
+		gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
+		gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
+	} else {
+		error = gfs2_meta_read(ip->i_gl, block,
+				       DIO_START | DIO_WAIT, &bh);
+		if (error)
+			return error;
+		if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
+			brelse(bh);
+			return -EIO;
+		}
+	}
+
+	*bhp = bh;
+
+	return 0;
+}
+
+/**
+ * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
+ * @bh: The buffer to copy from, or NULL meaning zero the buffer
+ * @buf: The buffer to copy/zero
+ * @offset: The offset in the buffer to copy from
+ * @size: The amount of data to copy/zero
+ *
+ * Returns: errno
+ */
+
+int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
+		  unsigned int size)
+{
+	if (bh)
+		memcpy(*buf, bh->b_data + offset, size);
+	else
+		memset(*buf, 0, size);
+	*buf += size;
+	return 0;
+}
+
+/**
+ * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
+ * @bh: The buffer
+ * @buf: The destination of the data
+ * @offset: The offset into the buffer
+ * @size: The amount of data to copy
+ *
+ * Returns: errno
+ */
+
+int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
+		   unsigned int size)
+{
+	int error;
+
+	if (bh)
+		error = copy_to_user(*buf, bh->b_data + offset, size);
+	else
+		error = clear_user(*buf, size);
+
+	if (error)
+		error = -EFAULT;
+	else
+		*buf += size;
+
+	return error;
+}
+
+static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
+			      unsigned int offset, unsigned int size,
+			      read_copy_fn_t copy_fn)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		error = copy_fn(dibh, &buf,
+				offset + sizeof(struct gfs2_dinode), size);
+		brelse(dibh);
+	}
+
+	return (error) ? error : size;
+}
+
+/**
+ * gfs2_jdata_read - Read a jdata file
+ * @ip: The GFS2 Inode
+ * @buf: The buffer to place result into
+ * @offset: File offset to begin jdata_readng from
+ * @size: Amount of data to transfer
+ * @copy_fn: Function to actually perform the copy
+ *
+ * The @copy_fn only copies a maximum of a single block at once so
+ * we are safe calling it with int arguments. It is done so that
+ * we don't needlessly put 64bit arguments on the stack and it
+ * also makes the code in the @copy_fn nicer too.
+ *
+ * Returns: The amount of data actually copied or the error
+ */
+
+int gfs2_jdata_read(struct gfs2_inode *ip, char *buf, uint64_t offset,
+		    unsigned int size, read_copy_fn_t copy_fn)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	uint64_t lblock, dblock;
+	uint32_t extlen = 0;
+	unsigned int o;
+	int copied = 0;
+	int error = 0;
+
+	if (offset >= ip->i_di.di_size)
+		return 0;
+
+	if ((offset + size) > ip->i_di.di_size)
+		size = ip->i_di.di_size - offset;
+
+	if (!size)
+		return 0;
+
+	if (gfs2_is_stuffed(ip))
+		return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
+					  copy_fn);
+
+	if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
+		return -EINVAL;
+
+	lblock = offset;
+	o = do_div(lblock, sdp->sd_jbsize) +
+		sizeof(struct gfs2_meta_header);
+
+	while (copied < size) {
+		unsigned int amount;
+		struct buffer_head *bh;
+		int new;
+
+		amount = size - copied;
+		if (amount > sdp->sd_sb.sb_bsize - o)
+			amount = sdp->sd_sb.sb_bsize - o;
+
+		if (!extlen) {
+			new = FALSE;
+			error = gfs2_block_map(ip, lblock, &new,
+					       &dblock, &extlen);
+			if (error)
+				goto fail;
+		}
+
+		if (extlen > 1)
+			gfs2_meta_ra(ip->i_gl, dblock, extlen);
+
+		if (dblock) {
+			error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
+			if (error)
+				goto fail;
+			dblock++;
+			extlen--;
+		} else
+			bh = NULL;
+
+		error = copy_fn(bh, &buf, o, amount);
+		brelse(bh);
+		if (error)
+			goto fail;
+
+		copied += amount;
+		lblock++;
+
+		o = sizeof(struct gfs2_meta_header);
+	}
+
+	return copied;
+
+ fail:
+	return (copied) ? copied : error;
+}
+
+/**
+ * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
+ * @bh: The buffer to copy to or clear
+ * @buf: The buffer to copy from
+ * @offset: The offset in the buffer to write to
+ * @size: The amount of data to write
+ *
+ * Returns: errno
+ */
+
+int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
+		       char **buf, unsigned int offset, unsigned int size)
+{
+	gfs2_trans_add_bh(ip->i_gl, bh);
+	memcpy(bh->b_data + offset, *buf, size);
+
+	*buf += size;
+
+	return 0;
+}
+
+/**
+ * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
+ * @bh: The buffer to copy to or clear
+ * @buf: The buffer to copy from
+ * @offset: The offset in the buffer to write to
+ * @size: The amount of data to write
+ *
+ * Returns: errno
+ */
+
+int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
+			char **buf, unsigned int offset, unsigned int size)
+{
+	int error = 0;
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+	if (copy_from_user(bh->b_data + offset, *buf, size))
+		error = -EFAULT;
+	else
+		*buf += size;
+
+	return error;
+}
+
+static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
+			       unsigned int offset, unsigned int size,
+			       write_copy_fn_t copy_fn)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	error = copy_fn(ip,
+			dibh, &buf,
+			offset + sizeof(struct gfs2_dinode), size);
+	if (!error) {
+		if (ip->i_di.di_size < offset + size)
+			ip->i_di.di_size = offset + size;
+		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	}
+
+	brelse(dibh);
+
+	return (error) ? error : size;
+}
+
+/**
+ * gfs2_jdata_write - Write bytes to a file
+ * @ip: The GFS2 inode
+ * @buf: The buffer containing information to be written
+ * @offset: The file offset to start writing at
+ * @size: The amount of data to write
+ * @copy_fn: Function to do the actual copying
+ *
+ * Returns: The number of bytes correctly written or error code
+ */
+
+int gfs2_jdata_write(struct gfs2_inode *ip, char *buf, uint64_t offset,
+		     unsigned int size, write_copy_fn_t copy_fn)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *dibh;
+	uint64_t lblock, dblock;
+	uint32_t extlen = 0;
+	unsigned int o;
+	int copied = 0;
+	int error = 0;
+
+	if (!size)
+		return 0;
+
+	if (gfs2_is_stuffed(ip) &&
+	    offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
+		return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
+					   copy_fn);
+
+	if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
+		return -EINVAL;
+
+	if (gfs2_is_stuffed(ip)) {
+		error = gfs2_unstuff_dinode(ip, NULL, NULL);
+		if (error)
+			return error;
+	}
+
+	lblock = offset;
+	o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
+
+	while (copied < size) {
+		unsigned int amount;
+		struct buffer_head *bh;
+		int new;
+
+		amount = size - copied;
+		if (amount > sdp->sd_sb.sb_bsize - o)
+			amount = sdp->sd_sb.sb_bsize - o;
+
+		if (!extlen) {
+			new = TRUE;
+			error = gfs2_block_map(ip, lblock, &new,
+					       &dblock, &extlen);
+			if (error)
+				goto fail;
+			error = -EIO;
+			if (gfs2_assert_withdraw(sdp, dblock))
+				goto fail;
+		}
+
+		error = gfs2_jdata_get_buffer(ip, dblock,
+				(amount == sdp->sd_jbsize) ? TRUE : new,
+				&bh);
+		if (error)
+			goto fail;
+
+		error = copy_fn(ip, bh, &buf, o, amount);
+		brelse(bh);
+		if (error)
+			goto fail;
+
+		copied += amount;
+		lblock++;
+		dblock++;
+		extlen--;
+
+		o = sizeof(struct gfs2_meta_header);
+	}
+
+ out:
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	if (ip->i_di.di_size < offset + copied)
+		ip->i_di.di_size = offset + copied;
+	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	return copied;
+
+ fail:
+	if (copied)
+		goto out;
+	return error;
+}
+
--- a/fs/gfs2/jdata.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/jdata.h	2005-09-01 17:36:55.325113952 +0800
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __FILE_DOT_H__
+#define __FILE_DOT_H__
+
+int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+			  struct buffer_head **bhp);
+
+typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
+			       unsigned int offset, unsigned int size);
+typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
+				struct buffer_head *bh, char **buf,
+				unsigned int offset, unsigned int size);
+
+int gfs2_copy2mem(struct buffer_head *bh, char **buf,
+		  unsigned int offset, unsigned int size);
+int gfs2_copy2user(struct buffer_head *bh, char **buf,
+		   unsigned int offset, unsigned int size);
+int gfs2_jdata_read(struct gfs2_inode *ip, char *buf,
+		    uint64_t offset, unsigned int size,
+		    read_copy_fn_t copy_fn);
+
+int gfs2_copy_from_mem(struct gfs2_inode *ip,
+		       struct buffer_head *bh, char **buf,
+		       unsigned int offset, unsigned int size);
+int gfs2_copy_from_user(struct gfs2_inode *ip,
+			struct buffer_head *bh, char **buf,
+			unsigned int offset, unsigned int size);
+int gfs2_jdata_write(struct gfs2_inode *ip, char *buf,
+		     uint64_t offset, unsigned int size,
+		     write_copy_fn_t copy_fn);
+
+static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
+				      uint64_t offset, unsigned int size)
+{
+	return gfs2_jdata_read(ip, buf, offset, size, gfs2_copy2mem);
+}
+
+static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, char *buf,
+				       uint64_t offset, unsigned int size)
+{
+	return gfs2_jdata_write(ip, buf, offset, size, gfs2_copy_from_mem);
+}
+
+#endif /* __FILE_DOT_H__ */
--- a/fs/gfs2/lops.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lops.c	2005-09-01 17:36:55.359108784 +0800
@@ -0,0 +1,510 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "log.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "recovery.h"
+#include "rgrp.h"
+#include "trans.h"
+
+static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	struct gfs2_glock *gl;
+
+	get_transaction->tr_touched = TRUE;
+
+	if (!list_empty(&le->le_list))
+		return;
+
+	gl = container_of(le, struct gfs2_glock, gl_le);
+	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
+		return;
+	gfs2_glock_hold(gl);
+	set_bit(GLF_DIRTY, &gl->gl_flags);
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_num_gl++;
+	list_add(&le->le_list, &sdp->sd_log_le_gl);
+	gfs2_log_unlock(sdp);
+}
+
+static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head = &sdp->sd_log_le_gl;
+	struct gfs2_glock *gl;
+
+	while (!list_empty(head)) {
+		gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
+		list_del_init(&gl->gl_le.le_list);
+		sdp->sd_log_num_gl--;
+
+		gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
+		gfs2_glock_put(gl);
+	}
+	gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
+}
+
+static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+	struct gfs2_trans *tr;
+
+	if (!list_empty(&bd->bd_list_tr))
+		return;
+
+	tr = get_transaction;
+	tr->tr_touched = TRUE;
+	tr->tr_num_buf++;
+	list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+
+	if (!list_empty(&le->le_list))
+		return;
+
+	gfs2_trans_add_gl(bd->bd_gl);
+
+	gfs2_meta_check(sdp, bd->bd_bh);
+	gfs2_meta_pin(sdp, bd->bd_bh);
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_num_buf++;
+	list_add(&le->le_list, &sdp->sd_log_le_buf);
+	gfs2_log_unlock(sdp);
+
+	tr->tr_num_buf_new++;
+}
+
+static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	struct list_head *head = &tr->tr_list_buf;
+	struct gfs2_bufdata *bd;
+
+	while (!list_empty(head)) {
+		bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
+		list_del_init(&bd->bd_list_tr);
+		tr->tr_num_buf--;
+	}
+	gfs2_assert_warn(sdp, !tr->tr_num_buf);
+}
+
+static void buf_lo_before_commit(struct gfs2_sbd *sdp)
+{
+	struct buffer_head *bh;
+	struct gfs2_log_descriptor ld;
+	struct gfs2_bufdata *bd;
+
+	if (!sdp->sd_log_num_buf)
+		return;
+
+	bh = gfs2_log_get_buf(sdp);
+	memset(&ld, 0, sizeof(struct gfs2_log_descriptor));
+	ld.ld_header.mh_magic = GFS2_MAGIC;
+	ld.ld_header.mh_type = GFS2_METATYPE_LD;
+	ld.ld_header.mh_format = GFS2_FORMAT_LD;
+	ld.ld_header.mh_blkno = bh->b_blocknr;
+	ld.ld_type = GFS2_LOG_DESC_METADATA;
+	ld.ld_length = sdp->sd_log_num_buf + 1;
+	ld.ld_data1 = sdp->sd_log_num_buf;
+	gfs2_log_descriptor_out(&ld, bh->b_data);
+
+	set_buffer_dirty(bh);
+	ll_rw_block(WRITE, 1, &bh);
+
+	list_for_each_entry(bd, &sdp->sd_log_le_buf, bd_le.le_list) {
+		bh = gfs2_log_fake_buf(sdp, bd->bd_bh);
+		set_buffer_dirty(bh);
+		ll_rw_block(WRITE, 1, &bh);
+	}
+}
+
+static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head = &sdp->sd_log_le_buf;
+	struct gfs2_bufdata *bd;
+
+	while (!list_empty(head)) {
+		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+		list_del_init(&bd->bd_le.le_list);
+		sdp->sd_log_num_buf--;
+
+		gfs2_meta_unpin(sdp, bd->bd_bh, ai);
+	}
+	gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
+}
+
+static void buf_lo_before_scan(struct gfs2_jdesc *jd,
+			       struct gfs2_log_header *head, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+	if (pass != 0)
+		return;
+
+	sdp->sd_found_blocks = 0;
+	sdp->sd_replayed_blocks = 0;
+}
+
+static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+				struct gfs2_log_descriptor *ld, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+	struct gfs2_glock *gl = jd->jd_inode->i_gl;
+	unsigned int blks = ld->ld_data1;
+	struct buffer_head *bh_log, *bh_ip;
+	uint64_t blkno;
+	int error = 0;
+
+	if (pass != 1 || ld->ld_type != GFS2_LOG_DESC_METADATA)
+		return 0;
+
+	gfs2_replay_incr_blk(sdp, &start);
+
+	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+		error = gfs2_replay_read_block(jd, start, &bh_log);
+		if (error)
+			return error;
+
+		blkno = ((struct gfs2_meta_header *)bh_log->b_data)->mh_blkno;
+		blkno = gfs2_64_to_cpu(blkno);
+
+		sdp->sd_found_blocks++;
+
+		if (gfs2_revoke_check(sdp, blkno, start)) {
+			brelse(bh_log);
+			continue;
+		}
+
+		bh_ip = gfs2_meta_new(gl, blkno);
+		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+		if (gfs2_meta_check(sdp, bh_ip))
+			error = -EIO;
+		else
+			mark_buffer_dirty(bh_ip);
+
+		brelse(bh_log);
+		brelse(bh_ip);
+
+		if (error)
+			break;
+
+		sdp->sd_replayed_blocks++;
+	}
+
+	return error;
+}
+
+static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+	if (error) {
+		gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+		return;
+	}
+	if (pass != 1)
+		return;
+
+	gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+
+	fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
+	        jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
+}
+
+static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	struct gfs2_trans *tr;
+
+	tr = get_transaction;
+	tr->tr_touched = TRUE;
+	tr->tr_num_revoke++;
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_num_revoke++;
+	list_add(&le->le_list, &sdp->sd_log_le_revoke);
+	gfs2_log_unlock(sdp);
+}
+
+static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
+{
+	struct gfs2_log_descriptor ld;
+	struct gfs2_meta_header *mh = &ld.ld_header;
+	struct buffer_head *bh;
+	unsigned int offset;
+	struct list_head *head = &sdp->sd_log_le_revoke;
+	struct gfs2_revoke *rv;
+
+	if (!sdp->sd_log_num_revoke)
+		return;
+
+	bh = gfs2_log_get_buf(sdp);
+	memset(&ld, 0, sizeof(struct gfs2_log_descriptor));
+	ld.ld_header.mh_magic = GFS2_MAGIC;
+	ld.ld_header.mh_type = GFS2_METATYPE_LD;
+	ld.ld_header.mh_format = GFS2_FORMAT_LD;
+	ld.ld_header.mh_blkno = bh->b_blocknr;
+	ld.ld_type = GFS2_LOG_DESC_REVOKE;
+	ld.ld_length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(uint64_t));
+	ld.ld_data1 = sdp->sd_log_num_revoke;
+	gfs2_log_descriptor_out(&ld, bh->b_data);
+	offset = sizeof(struct gfs2_log_descriptor);
+
+	while (!list_empty(head)) {
+		rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
+		list_del(&rv->rv_le.le_list);
+		sdp->sd_log_num_revoke--;
+
+		if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) {
+			set_buffer_dirty(bh);
+			ll_rw_block(WRITE, 1, &bh);
+
+			bh = gfs2_log_get_buf(sdp);
+			mh->mh_type = GFS2_METATYPE_LB;
+			mh->mh_format = GFS2_FORMAT_LB;
+			mh->mh_blkno = bh->b_blocknr;
+			gfs2_meta_header_out(mh, bh->b_data);
+			offset = sizeof(struct gfs2_meta_header);
+		}
+
+		*(uint64_t *)(bh->b_data + offset) = cpu_to_gfs2_64(rv->rv_blkno);
+		kfree(rv);
+
+		offset += sizeof(uint64_t);
+	}
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
+
+	set_buffer_dirty(bh);
+	ll_rw_block(WRITE, 1, &bh);
+}
+
+static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
+				  struct gfs2_log_header *head, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+	if (pass != 0)
+		return;
+
+	sdp->sd_found_revokes = 0;
+	sdp->sd_replay_tail = head->lh_tail;
+}
+
+static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+				   struct gfs2_log_descriptor *ld, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+	unsigned int blks = ld->ld_length;
+	unsigned int revokes = ld->ld_data1;
+	struct buffer_head *bh;
+	unsigned int offset;
+	uint64_t blkno;
+	int first = TRUE;
+	int error;
+
+	if (pass != 0 || ld->ld_type != GFS2_LOG_DESC_REVOKE)
+		return 0;
+
+	offset = sizeof(struct gfs2_log_descriptor);
+
+	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+		error = gfs2_replay_read_block(jd, start, &bh);
+		if (error)
+			return error;
+
+		if (!first)
+			gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
+
+		while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
+			blkno = *(uint64_t *)(bh->b_data + offset);
+			blkno = gfs2_64_to_cpu(blkno);
+
+			error = gfs2_revoke_add(sdp, blkno, start);
+			if (error < 0)
+				return error;
+			else if (error)
+				sdp->sd_found_revokes++;
+
+			if (!--revokes)
+				break;
+			offset += sizeof(uint64_t);
+		}
+
+		brelse(bh);
+		offset = sizeof(struct gfs2_meta_header);
+		first = FALSE;
+	}
+
+	return 0;
+}
+
+static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+	if (error) {
+		gfs2_revoke_clean(sdp);
+		return;
+	}
+	if (pass != 1)
+		return;
+
+	fs_info(sdp, "jid=%u: Found %u revoke tags\n",
+	        jd->jd_jid, sdp->sd_found_revokes);
+
+	gfs2_revoke_clean(sdp);
+}
+
+static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	struct gfs2_rgrpd *rgd;
+
+	get_transaction->tr_touched = TRUE;
+
+	if (!list_empty(&le->le_list))
+		return;
+
+	rgd = container_of(le, struct gfs2_rgrpd, rd_le);
+	gfs2_rgrp_bh_hold(rgd);
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_num_rg++;
+	list_add(&le->le_list, &sdp->sd_log_le_rg);
+	gfs2_log_unlock(sdp);	
+}
+
+static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head = &sdp->sd_log_le_rg;
+	struct gfs2_rgrpd *rgd;
+
+	while (!list_empty(head)) {
+		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
+		list_del_init(&rgd->rd_le.le_list);
+		sdp->sd_log_num_rg--;
+
+		gfs2_rgrp_repolish_clones(rgd);
+		gfs2_rgrp_bh_put(rgd);
+	}
+	gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
+}
+
+static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	get_transaction->tr_touched = TRUE;
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_num_databuf++;
+	list_add(&le->le_list, &sdp->sd_log_le_databuf);
+	gfs2_log_unlock(sdp);
+}
+
+static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
+{
+	struct list_head *head = &sdp->sd_log_le_databuf;
+	LIST_HEAD(started);
+	struct gfs2_databuf *db;
+	struct buffer_head *bh;
+
+	while (!list_empty(head)) {
+		db = list_entry(head->prev, struct gfs2_databuf, db_le.le_list);
+		list_move(&db->db_le.le_list, &started);
+
+		gfs2_log_lock(sdp);
+		bh = db->db_bh;
+		if (bh) {
+			get_bh(bh);
+			gfs2_log_unlock(sdp);
+			if (buffer_dirty(bh)) {
+				wait_on_buffer(bh);
+				ll_rw_block(WRITE, 1, &bh);
+			}
+			brelse(bh);
+		} else
+			gfs2_log_unlock(sdp);
+	}
+
+	while (!list_empty(&started)) {
+		db = list_entry(started.next, struct gfs2_databuf,
+				db_le.le_list);
+		list_del(&db->db_le.le_list);
+		sdp->sd_log_num_databuf--;
+
+		gfs2_log_lock(sdp);
+		bh = db->db_bh;
+		if (bh) {
+			set_v2db(bh, NULL);
+			gfs2_log_unlock(sdp);
+			wait_on_buffer(bh);
+			brelse(bh);
+		} else
+			gfs2_log_unlock(sdp);
+
+		kfree(db);
+	}
+
+	gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
+}
+
+struct gfs2_log_operations gfs2_glock_lops = {
+	.lo_add = glock_lo_add,
+	.lo_after_commit = glock_lo_after_commit,
+	.lo_name = "glock"
+};
+
+struct gfs2_log_operations gfs2_buf_lops = {
+	.lo_add = buf_lo_add,
+	.lo_incore_commit = buf_lo_incore_commit,
+	.lo_before_commit = buf_lo_before_commit,
+	.lo_after_commit = buf_lo_after_commit,
+	.lo_before_scan = buf_lo_before_scan,
+	.lo_scan_elements = buf_lo_scan_elements,
+	.lo_after_scan = buf_lo_after_scan,
+	.lo_name = "buf"
+};
+
+struct gfs2_log_operations gfs2_revoke_lops = {
+	.lo_add = revoke_lo_add,
+	.lo_before_commit = revoke_lo_before_commit,
+	.lo_before_scan = revoke_lo_before_scan,
+	.lo_scan_elements = revoke_lo_scan_elements,
+	.lo_after_scan = revoke_lo_after_scan,
+	.lo_name = "revoke"
+};
+
+struct gfs2_log_operations gfs2_rg_lops = {
+	.lo_add = rg_lo_add,
+	.lo_after_commit = rg_lo_after_commit,
+	.lo_name = "rg"
+};
+
+struct gfs2_log_operations gfs2_databuf_lops = {
+	.lo_add = databuf_lo_add,
+	.lo_before_commit = databuf_lo_before_commit,
+	.lo_name = "databuf"
+};
+
+struct gfs2_log_operations *gfs2_log_ops[] = {
+	&gfs2_glock_lops,
+	&gfs2_buf_lops,
+	&gfs2_revoke_lops,
+	&gfs2_rg_lops,
+	&gfs2_databuf_lops,
+	NULL
+};
+
--- a/fs/gfs2/lops.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lops.h	2005-09-01 17:36:55.360108632 +0800
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LOPS_DOT_H__
+#define __LOPS_DOT_H__
+
+extern struct gfs2_log_operations gfs2_glock_lops;
+extern struct gfs2_log_operations gfs2_buf_lops;
+extern struct gfs2_log_operations gfs2_revoke_lops;
+extern struct gfs2_log_operations gfs2_rg_lops;
+extern struct gfs2_log_operations gfs2_databuf_lops;
+
+extern struct gfs2_log_operations *gfs2_log_ops[];
+
+static inline void lops_init_le(struct gfs2_log_element *le,
+				struct gfs2_log_operations *lops)
+{
+	INIT_LIST_HEAD(&le->le_list);
+	le->le_ops = lops;
+}
+
+static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+{
+	if (le->le_ops->lo_add)
+		le->le_ops->lo_add(sdp, le);
+}
+
+static inline void lops_incore_commit(struct gfs2_sbd *sdp,
+				      struct gfs2_trans *tr)
+{
+	int x;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_incore_commit)
+			gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
+}
+
+static inline void lops_before_commit(struct gfs2_sbd *sdp)
+{
+	int x;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_before_commit)
+			gfs2_log_ops[x]->lo_before_commit(sdp);
+}
+
+static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	int x;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_after_commit)
+			gfs2_log_ops[x]->lo_after_commit(sdp, ai);
+}
+
+static inline void lops_before_scan(struct gfs2_jdesc *jd,
+				    struct gfs2_log_header *head,
+				    unsigned int pass)
+{
+	int x;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_before_scan)
+			gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
+}
+
+static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+				     struct gfs2_log_descriptor *ld,
+				     unsigned int pass)
+{
+	int x, error;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_scan_elements) {
+			error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
+								  ld, pass);
+			if (error)
+				return error;
+		}
+
+	return 0;
+}
+
+static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
+				   unsigned int pass)
+{
+	int x;
+	for (x = 0; gfs2_log_ops[x]; x++)
+		if (gfs2_log_ops[x]->lo_before_scan)
+			gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
+}
+
+#endif /* __LOPS_DOT_H__ */
+
--- a/fs/gfs2/main.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/main.c	2005-09-01 17:36:55.367107568 +0800
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "ops_fstype.h"
+#include "sys.h"
+
+/**
+ * init_gfs2_fs - Register GFS2 as a filesystem
+ *
+ * Returns: 0 on success, error code on failure
+ */
+
+static int __init init_gfs2_fs(void)
+{
+	int error;
+
+	error = gfs2_sys_init();
+	if (error)
+		return error;
+
+	error = -ENOMEM;
+
+	gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
+					      sizeof(struct gfs2_glock),
+					      0, 0, NULL, NULL);
+	if (!gfs2_glock_cachep)
+		goto fail;
+
+	gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
+					      sizeof(struct gfs2_inode),
+					      0, 0, NULL, NULL);
+	if (!gfs2_inode_cachep)
+		goto fail;
+
+	gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
+						sizeof(struct gfs2_bufdata),
+					        0, 0, NULL, NULL);
+	if (!gfs2_bufdata_cachep)
+		goto fail;
+
+	error = register_filesystem(&gfs2_fs_type);
+	if (error)
+		goto fail;
+
+	printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
+
+	return 0;
+
+ fail:
+	if (gfs2_bufdata_cachep)
+		kmem_cache_destroy(gfs2_bufdata_cachep);
+
+	if (gfs2_inode_cachep)
+		kmem_cache_destroy(gfs2_inode_cachep);
+
+	if (gfs2_glock_cachep)
+		kmem_cache_destroy(gfs2_glock_cachep);
+
+	gfs2_sys_uninit();
+	return error;
+}
+
+/**
+ * exit_gfs2_fs - Unregister the file system
+ *
+ */
+
+static void __exit exit_gfs2_fs(void)
+{
+	unregister_filesystem(&gfs2_fs_type);
+
+	kmem_cache_destroy(gfs2_bufdata_cachep);
+	kmem_cache_destroy(gfs2_inode_cachep);
+	kmem_cache_destroy(gfs2_glock_cachep);
+
+	gfs2_sys_uninit();
+}
+
+MODULE_DESCRIPTION("Global File System");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+module_init(init_gfs2_fs);
+module_exit(exit_gfs2_fs);
+
--- a/fs/gfs2/meta_io.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/meta_io.c	2005-09-01 17:36:55.367107568 +0800
@@ -0,0 +1,884 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/swap.h>
+#include <linux/delay.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "rgrp.h"
+#include "trans.h"
+
+#define buffer_busy(bh) \
+((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
+#define buffer_in_io(bh) \
+((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
+
+static int aspace_get_block(struct inode *inode, sector_t lblock,
+			    struct buffer_head *bh_result, int create)
+{
+	gfs2_assert_warn(get_v2sdp(inode->i_sb), FALSE);
+	return -EOPNOTSUPP;
+}
+
+static int gfs2_aspace_writepage(struct page *page,
+				 struct writeback_control *wbc)
+{
+	return block_write_full_page(page, aspace_get_block, wbc);
+}
+
+/**
+ * stuck_releasepage - We're stuck in gfs2_releasepage().  Print stuff out.
+ * @bh: the buffer we're stuck on
+ *
+ */
+
+static void stuck_releasepage(struct buffer_head *bh)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb);
+	struct gfs2_bufdata *bd = get_v2bd(bh);
+	struct gfs2_glock *gl;
+
+	fs_warn(sdp, "stuck in gfs2_releasepage()\n");
+	fs_warn(sdp, "blkno = %"PRIu64", bh->b_count = %d\n",
+		(uint64_t)bh->b_blocknr, atomic_read(&bh->b_count));
+	fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
+	fs_warn(sdp, "get_v2bd(bh) = %s\n", (bd) ? "!NULL" : "NULL");
+
+	if (!bd)
+		return;
+
+	gl = bd->bd_gl;
+
+	fs_warn(sdp, "gl = (%u, %"PRIu64")\n", 
+		gl->gl_name.ln_type, gl->gl_name.ln_number);
+
+	fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
+		(list_empty(&bd->bd_list_tr)) ? "no" : "yes",
+		(list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
+
+	if (gl->gl_ops == &gfs2_inode_glops) {
+		struct gfs2_inode *ip = get_gl2ip(gl);
+		unsigned int x;
+
+		if (!ip)
+			return;
+
+		fs_warn(sdp, "ip = %"PRIu64"/%"PRIu64"\n",
+			ip->i_num.no_formal_ino, ip->i_num.no_addr);
+		fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n",
+			atomic_read(&ip->i_count),
+			(ip->i_vnode) ? "!NULL" : "NULL");
+
+		for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
+			fs_warn(sdp, "ip->i_cache[%u] = %s\n",
+				x, (ip->i_cache[x]) ? "!NULL" : "NULL");
+	}
+}
+
+/**
+ * gfs2_aspace_releasepage - free the metadata associated with a page
+ * @page: the page that's being released
+ * @gfp_mask: passed from Linux VFS, ignored by us
+ *
+ * Call try_to_free_buffers() if the buffers in this page can be
+ * released.
+ *
+ * Returns: 0
+ */
+
+static int gfs2_aspace_releasepage(struct page *page, int gfp_mask)
+{
+	struct inode *aspace = page->mapping->host;
+	struct gfs2_sbd *sdp = get_v2sdp(aspace->i_sb);
+	struct buffer_head *bh, *head;
+	struct gfs2_bufdata *bd;
+	unsigned long t;
+
+	if (!page_has_buffers(page))
+		goto out;
+
+	head = bh = page_buffers(page);
+	do {
+		t = jiffies;
+
+		while (atomic_read(&bh->b_count)) {
+			if (atomic_read(&aspace->i_writecount)) {
+				if (time_after_eq(jiffies, t +
+				    gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
+					stuck_releasepage(bh);
+					t = jiffies;
+				}
+
+				yield();
+				continue;
+			}
+
+			return 0;
+		}
+
+		gfs2_assert_warn(sdp, !buffer_pinned(bh));
+
+		bd = get_v2bd(bh);
+		if (bd) {
+			gfs2_assert_warn(sdp, bd->bd_bh == bh);
+			gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
+			gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list));
+			gfs2_assert_warn(sdp, !bd->bd_ail);
+			kmem_cache_free(gfs2_bufdata_cachep, bd);
+			atomic_dec(&sdp->sd_bufdata_count);
+			set_v2bd(bh, NULL);
+		}
+
+		bh = bh->b_this_page;
+	}
+	while (bh != head);
+
+ out:
+	return try_to_free_buffers(page);
+}
+
+static struct address_space_operations aspace_aops = {
+	.writepage = gfs2_aspace_writepage,
+	.releasepage = gfs2_aspace_releasepage,
+};
+
+/**
+ * gfs2_aspace_get - Create and initialize a struct inode structure
+ * @sdp: the filesystem the aspace is in
+ *
+ * Right now a struct inode is just a struct inode.  Maybe Linux
+ * will supply a more lightweight address space construct (that works)
+ * in the future.
+ *
+ * Make sure pages/buffers in this aspace aren't in high memory.
+ *
+ * Returns: the aspace
+ */
+
+struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
+{
+	struct inode *aspace;
+
+	aspace = new_inode(sdp->sd_vfs);
+	if (aspace) {
+		mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
+		aspace->i_mapping->a_ops = &aspace_aops;
+		aspace->i_size = ~0ULL;
+		set_v2ip(aspace, NULL);
+		insert_inode_hash(aspace);
+	}
+
+	return aspace;
+}
+
+void gfs2_aspace_put(struct inode *aspace)
+{
+	remove_inode_hash(aspace);
+	iput(aspace);
+}
+
+/**
+ * gfs2_ail1_start_one - Start I/O on a part of the AIL
+ * @sdp: the filesystem
+ * @tr: the part of the AIL
+ *
+ */
+
+void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head, *tmp, *prev;
+	struct gfs2_bufdata *bd;
+	struct buffer_head *bh;
+	int retry;
+
+	do {
+		retry = FALSE;
+
+		for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev;
+		     tmp != head;
+		     tmp = prev, prev = tmp->prev) {
+			bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list);
+			bh = bd->bd_bh;
+
+			gfs2_assert(sdp, bd->bd_ail == ai,);
+
+			if (!buffer_busy(bh)) {
+				if (!buffer_uptodate(bh))
+					gfs2_io_error_bh(sdp, bh);
+				list_move(&bd->bd_ail_st_list,
+					  &ai->ai_ail2_list);
+				continue;
+			}
+
+			if (!buffer_dirty(bh))
+				continue;
+
+			list_move(&bd->bd_ail_st_list, head);
+
+			gfs2_log_unlock(sdp);
+			wait_on_buffer(bh);
+			ll_rw_block(WRITE, 1, &bh);
+			gfs2_log_lock(sdp);
+
+			retry = TRUE;
+			break;
+		}
+	} while (retry);
+}
+
+/**
+ * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
+ * @sdp: the filesystem
+ * @ai: the AIL entry
+ *
+ */
+
+int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
+{
+	struct list_head *head, *tmp, *prev;
+	struct gfs2_bufdata *bd;
+	struct buffer_head *bh;
+
+	for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev;
+	     tmp != head;
+	     tmp = prev, prev = tmp->prev) {
+		bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list);
+		bh = bd->bd_bh;
+
+		gfs2_assert(sdp, bd->bd_ail == ai,);
+
+		if (buffer_busy(bh)) {
+			if (flags & DIO_ALL)
+				continue;
+			else
+				break;
+		}
+
+		if (!buffer_uptodate(bh))
+			gfs2_io_error_bh(sdp, bh);
+
+		list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+	}
+
+	return list_empty(head);
+}
+
+/**
+ * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
+ * @sdp: the filesystem
+ * @ai: the AIL entry
+ *
+ */
+
+void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head = &ai->ai_ail2_list;
+	struct gfs2_bufdata *bd;
+
+	while (!list_empty(head)) {
+		bd = list_entry(head->prev, struct gfs2_bufdata,
+				bd_ail_st_list);
+		gfs2_assert(sdp, bd->bd_ail == ai,);
+		bd->bd_ail = NULL;
+		list_del(&bd->bd_ail_st_list);
+		list_del(&bd->bd_ail_gl_list);
+		atomic_dec(&bd->bd_gl->gl_ail_count);
+		brelse(bd->bd_bh);
+	}
+}
+
+/**
+ * ail_empty_gl - remove all buffers for a given lock from the AIL
+ * @gl: the glock
+ *
+ * None of the buffers should be dirty, locked, or pinned.
+ */
+
+void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	unsigned int blocks;
+	struct list_head *head = &gl->gl_ail_list;
+	struct gfs2_bufdata *bd;
+	struct buffer_head *bh;
+	uint64_t blkno;
+	int error;
+
+	blocks = atomic_read(&gl->gl_ail_count);
+	if (!blocks)
+		return;
+
+	error = gfs2_trans_begin(sdp, 0, blocks);
+	if (gfs2_assert_withdraw(sdp, !error))
+		return;
+
+	gfs2_log_lock(sdp);
+	while (!list_empty(head)) {
+		bd = list_entry(head->next, struct gfs2_bufdata,
+				bd_ail_gl_list);
+		bh = bd->bd_bh;
+		blkno = bh->b_blocknr;
+		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
+
+		bd->bd_ail = NULL;
+		list_del(&bd->bd_ail_st_list);
+		list_del(&bd->bd_ail_gl_list);
+		atomic_dec(&gl->gl_ail_count);
+		brelse(bh);
+		gfs2_log_unlock(sdp);
+
+		gfs2_trans_add_revoke(sdp, blkno);
+
+		gfs2_log_lock(sdp);
+	}
+	gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
+	gfs2_log_unlock(sdp);
+
+	gfs2_trans_end(sdp);
+	gfs2_log_flush(sdp);
+}
+
+/**
+ * gfs2_meta_inval - Invalidate all buffers associated with a glock
+ * @gl: the glock
+ *
+ */
+
+void gfs2_meta_inval(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct inode *aspace = gl->gl_aspace;
+	struct address_space *mapping = gl->gl_aspace->i_mapping;
+
+	gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
+
+	atomic_inc(&aspace->i_writecount);
+	truncate_inode_pages(mapping, 0);
+	atomic_dec(&aspace->i_writecount);
+
+	gfs2_assert_withdraw(sdp, !mapping->nrpages);
+}
+
+/**
+ * gfs2_meta_sync - Sync all buffers associated with a glock
+ * @gl: The glock
+ * @flags: DIO_START | DIO_WAIT
+ *
+ */
+
+void gfs2_meta_sync(struct gfs2_glock *gl, int flags)
+{
+	struct address_space *mapping = gl->gl_aspace->i_mapping;
+	int error = 0;
+
+	if (flags & DIO_START)
+		filemap_fdatawrite(mapping);
+	if (!error && (flags & DIO_WAIT))
+		error = filemap_fdatawait(mapping);
+
+	if (error)
+		gfs2_io_error(gl->gl_sbd);
+}
+
+/**
+ * getbuf - Get a buffer with a given address space
+ * @sdp: the filesystem
+ * @aspace: the address space
+ * @blkno: the block number (filesystem scope)
+ * @create: TRUE if the buffer should be created
+ *
+ * Returns: the buffer
+ */
+
+static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
+				  uint64_t blkno, int create)
+{
+	struct page *page;
+	struct buffer_head *bh;
+	unsigned int shift;
+	unsigned long index;
+	unsigned int bufnum;
+
+	shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+	index = blkno >> shift;             /* convert block to page */
+	bufnum = blkno - (index << shift);  /* block buf index within page */
+
+	if (create) {
+		for (;;) {
+			page = grab_cache_page(aspace->i_mapping, index);
+			if (page)
+				break;
+			yield();
+		}
+	} else {
+		page = find_lock_page(aspace->i_mapping, index);
+		if (!page)
+			return NULL;
+	}
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
+
+	/* Locate header for our buffer within our page */
+	for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
+		/* Do nothing */;
+	get_bh(bh);
+
+	if (!buffer_mapped(bh))
+		map_bh(bh, sdp->sd_vfs, blkno);
+
+	unlock_page(page);
+	mark_page_accessed(page);
+	page_cache_release(page);
+
+	return bh;
+}
+
+static void meta_prep_new(struct buffer_head *bh)
+{
+	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
+
+	lock_buffer(bh);
+	clear_buffer_dirty(bh);
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+	mh->mh_magic = cpu_to_gfs2_32(GFS2_MAGIC);
+	mh->mh_blkno = cpu_to_gfs2_64(bh->b_blocknr);
+}
+
+/**
+ * gfs2_meta_new - Get a block
+ * @gl: The glock associated with this block
+ * @blkno: The block number
+ *
+ * Returns: The buffer
+ */
+
+struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno)
+{
+	struct buffer_head *bh;
+	bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	meta_prep_new(bh);
+	return bh;
+}
+
+/**
+ * gfs2_meta_read - Read a block from disk
+ * @gl: The glock covering the block
+ * @blkno: The block number
+ * @flags: flags to gfs2_dreread()
+ * @bhp: the place where the buffer is returned (NULL on failure)
+ *
+ * Returns: errno
+ */
+
+int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags,
+		   struct buffer_head **bhp)
+{
+	int error;
+
+	*bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags);
+	if (error)
+		brelse(*bhp);
+
+	return error;
+}
+
+/**
+ * gfs2_meta_reread - Reread a block from disk
+ * @sdp: the filesystem
+ * @bh: The block to read
+ * @flags: Flags that control the read
+ *
+ * Returns: errno
+ */
+
+int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags)
+{
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		return -EIO;
+
+	if (flags & DIO_FORCE)
+		clear_buffer_uptodate(bh);
+
+	if ((flags & DIO_START) && !buffer_uptodate(bh))
+		ll_rw_block(READ, 1, &bh);
+
+	if (flags & DIO_WAIT) {
+		wait_on_buffer(bh);
+
+		if (!buffer_uptodate(bh)) {
+			struct gfs2_trans *tr = get_transaction;
+			if (tr && tr->tr_touched)
+				gfs2_io_error_bh(sdp, bh);
+			return -EIO;
+		}
+		if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+			return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_meta_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
+ * @gl: the glock the buffer belongs to
+ * @bh: The buffer to be attached to
+ *
+ */
+
+void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh)
+{
+	struct gfs2_bufdata *bd;
+
+	lock_page(bh->b_page);
+
+	if (get_v2bd(bh)) {
+		unlock_page(bh->b_page);
+		return;
+	}
+
+	bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_KERNEL | __GFP_NOFAIL),
+	atomic_inc(&gl->gl_sbd->sd_bufdata_count);
+
+	memset(bd, 0, sizeof(struct gfs2_bufdata));
+
+	bd->bd_bh = bh;
+	bd->bd_gl = gl;
+
+	INIT_LIST_HEAD(&bd->bd_list_tr);
+	lops_init_le(&bd->bd_le, &gfs2_buf_lops);
+
+	set_v2bd(bh, bd);
+
+	unlock_page(bh->b_page);
+}
+
+/**
+ * gfs2_meta_pin - Pin a metadata buffer in memory
+ * @sdp: the filesystem the buffer belongs to
+ * @bh: The buffer to be pinned
+ *
+ */
+
+void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+	struct gfs2_bufdata *bd = get_v2bd(bh);
+
+	gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+
+	if (test_set_buffer_pinned(bh))
+		gfs2_assert_withdraw(sdp, FALSE);
+
+	wait_on_buffer(bh);
+
+	/* If this buffer is in the AIL and it has already been written
+	   to in-place disk block, remove it from the AIL. */
+
+	gfs2_log_lock(sdp);
+	if (bd->bd_ail && !buffer_in_io(bh))
+		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+	gfs2_log_unlock(sdp);
+
+	clear_buffer_dirty(bh);
+	wait_on_buffer(bh);
+
+	if (!buffer_uptodate(bh))
+		gfs2_io_error_bh(sdp, bh);
+
+	get_bh(bh);
+}
+
+/**
+ * gfs2_meta_unpin - Unpin a buffer
+ * @sdp: the filesystem the buffer belongs to
+ * @bh: The buffer to unpin
+ * @ai:
+ *
+ */
+
+void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		     struct gfs2_ail *ai)
+{
+	struct gfs2_bufdata *bd = get_v2bd(bh);
+
+	gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
+
+	if (!buffer_pinned(bh))
+		gfs2_assert_withdraw(sdp, FALSE);
+
+	mark_buffer_dirty(bh);
+	clear_buffer_pinned(bh);
+
+	gfs2_log_lock(sdp);
+	if (bd->bd_ail) {
+		list_del(&bd->bd_ail_st_list);
+		brelse(bh);
+	} else {
+		struct gfs2_glock *gl = bd->bd_gl;
+		list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
+		atomic_inc(&gl->gl_ail_count);
+	}
+	bd->bd_ail = ai;
+	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+	gfs2_log_unlock(sdp);
+}
+
+/**
+ * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
+ * @ip: the inode who owns the buffers
+ * @bstart: the first buffer in the run
+ * @blen: the number of buffers in the run
+ *
+ */
+
+void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct inode *aspace = ip->i_gl->gl_aspace;
+	struct buffer_head *bh;
+
+	while (blen) {
+		bh = getbuf(sdp, aspace, bstart, NO_CREATE);
+		if (bh) {
+			struct gfs2_bufdata *bd = get_v2bd(bh);
+
+			if (test_clear_buffer_pinned(bh)) {
+				gfs2_log_lock(sdp);
+				list_del_init(&bd->bd_le.le_list);
+				gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
+				sdp->sd_log_num_buf--;
+				gfs2_log_unlock(sdp);
+				get_transaction->tr_num_buf_rm++;
+				brelse(bh);
+			}
+			if (bd) {
+				gfs2_log_lock(sdp);
+				if (bd->bd_ail) {
+					uint64_t blkno = bh->b_blocknr;
+					bd->bd_ail = NULL;
+					list_del(&bd->bd_ail_st_list);
+					list_del(&bd->bd_ail_gl_list);
+					atomic_dec(&bd->bd_gl->gl_ail_count);
+					brelse(bh);
+					gfs2_log_unlock(sdp);
+					gfs2_trans_add_revoke(sdp, blkno);
+				} else
+					gfs2_log_unlock(sdp);
+			}
+
+			lock_buffer(bh);
+			clear_buffer_dirty(bh);
+			clear_buffer_uptodate(bh);
+			unlock_buffer(bh);
+
+			brelse(bh);
+		}
+
+		bstart++;
+		blen--;
+	}
+}
+
+/**
+ * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
+ * @ip: The GFS2 inode
+ *
+ * This releases buffers that are in the most-recently-used array of
+ * blocks used for indirect block addressing for this inode.
+ */
+
+void gfs2_meta_cache_flush(struct gfs2_inode *ip)
+{
+	struct buffer_head **bh_slot;
+	unsigned int x;
+
+	spin_lock(&ip->i_spin);
+
+	for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
+		bh_slot = &ip->i_cache[x];
+		if (!*bh_slot)
+			break;
+		brelse(*bh_slot);
+		*bh_slot = NULL;
+	}
+
+	spin_unlock(&ip->i_spin);
+}
+
+/**
+ * gfs2_meta_indirect_buffer - Get a metadata buffer
+ * @ip: The GFS2 inode
+ * @height: The level of this buf in the metadata (indir addr) tree (if any)
+ * @num: The block number (device relative) of the buffer
+ * @new: Non-zero if we may create a new buffer
+ * @bhp: the buffer is returned here
+ *
+ * Try to use the gfs2_inode's MRU metadata tree cache.
+ *
+ * Returns: errno
+ */
+
+int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
+			      int new, struct buffer_head **bhp)
+{
+	struct buffer_head *bh, **bh_slot = ip->i_cache + height;
+	int error;
+
+	spin_lock(&ip->i_spin);
+	bh = *bh_slot;
+	if (bh) {
+		if (bh->b_blocknr == num)
+			get_bh(bh);
+		else
+			bh = NULL;
+	}
+	spin_unlock(&ip->i_spin);
+
+	if (bh) {
+		if (new)
+			meta_prep_new(bh);
+		else {
+			error = gfs2_meta_reread(ip->i_sbd, bh,
+						 DIO_START | DIO_WAIT);
+			if (error) {
+				brelse(bh);
+				return error;
+			}
+		}
+	} else {
+		if (new)
+			bh = gfs2_meta_new(ip->i_gl, num);
+		else {
+			error = gfs2_meta_read(ip->i_gl, num,
+					       DIO_START | DIO_WAIT, &bh);
+			if (error)
+				return error;
+		}
+
+		spin_lock(&ip->i_spin);
+		if (*bh_slot != bh) {
+			brelse(*bh_slot);
+			*bh_slot = bh;
+			get_bh(bh);
+		}
+		spin_unlock(&ip->i_spin);
+	}
+
+	if (new) {
+		if (gfs2_assert_warn(ip->i_sbd, height)) {
+			brelse(bh);
+			return -EIO;
+		}
+		gfs2_trans_add_bh(ip->i_gl, bh);
+		gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
+		gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
+
+	} else if (gfs2_metatype_check(ip->i_sbd, bh,
+			     (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) {
+		brelse(bh);
+		return -EIO;
+	}
+
+	*bhp = bh;
+
+	return 0;
+}
+
+/**
+ * gfs2_meta_ra - start readahead on an extent of a file
+ * @gl: the glock the blocks belong to
+ * @dblock: the starting disk block
+ * @extlen: the number of blocks in the extent
+ *
+ */
+
+void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct inode *aspace = gl->gl_aspace;
+	struct buffer_head *first_bh, *bh;
+	uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift;
+	int error;
+
+	if (!extlen || !max_ra)
+		return;
+	if (extlen > max_ra)
+		extlen = max_ra;
+
+	first_bh = getbuf(sdp, aspace, dblock, CREATE);
+
+	if (buffer_uptodate(first_bh))
+		goto out;
+	if (!buffer_locked(first_bh)) {
+		error = gfs2_meta_reread(sdp, first_bh, DIO_START);
+		if (error)
+			goto out;
+	}
+
+	dblock++;
+	extlen--;
+
+	while (extlen) {
+		bh = getbuf(sdp, aspace, dblock, CREATE);
+
+		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
+			error = gfs2_meta_reread(sdp, bh, DIO_START);
+			brelse(bh);
+			if (error)
+				goto out;
+		} else
+			brelse(bh);
+
+		dblock++;
+		extlen--;
+
+		if (buffer_uptodate(first_bh))
+			break;
+	}
+
+ out:
+	brelse(first_bh);
+}
+
+/**
+ * gfs2_meta_syncfs - sync all the buffers in a filesystem
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
+{
+	gfs2_log_flush(sdp);
+	for (;;) {
+		gfs2_ail1_start(sdp, DIO_ALL);
+		if (gfs2_ail1_empty(sdp, DIO_ALL))
+			break;
+		msleep(100);
+	}
+}
+
--- a/fs/gfs2/meta_io.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/meta_io.h	2005-09-01 17:36:55.368107416 +0800
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __DIO_DOT_H__
+#define __DIO_DOT_H__
+
+static inline void gfs2_buffer_clear(struct buffer_head *bh)
+{
+	memset(bh->b_data, 0, bh->b_size);
+}
+
+static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
+{
+	memset(bh->b_data + head, 0, bh->b_size - head);
+}
+
+static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset,
+					  int amount, int journaled)
+{
+	int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0;
+	int z_len1 = offset - z_off1;
+	int z_off2 = offset + amount;
+	int z_len2 = (bh)->b_size - z_off2;
+
+	if (z_len1)
+		memset(bh->b_data + z_off1, 0, z_len1);
+
+	if (z_len2)
+		memset(bh->b_data + z_off2, 0, z_len2);
+}
+
+static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
+					 int to_head,
+					 struct buffer_head *from_bh,
+					 int from_head)
+{
+	memcpy(to_bh->b_data + to_head,
+	       from_bh->b_data + from_head,
+	       from_bh->b_size - from_head);
+	memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
+	       0,
+	       from_head - to_head);
+}
+
+struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
+void gfs2_aspace_put(struct inode *aspace);
+
+void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
+int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
+void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
+void gfs2_ail_empty_gl(struct gfs2_glock *gl);
+
+void gfs2_meta_inval(struct gfs2_glock *gl);
+void gfs2_meta_sync(struct gfs2_glock *gl, int flags);
+
+struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno);
+int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno,
+		   int flags, struct buffer_head **bhp);
+int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags);
+
+void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		 struct gfs2_ail *ai);
+
+void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
+
+void gfs2_meta_cache_flush(struct gfs2_inode *ip);
+int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
+			      int new, struct buffer_head **bhp);
+
+static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
+					 struct buffer_head **bhp)
+{
+	return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, FALSE, bhp);
+}
+
+void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen);
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
+
+#endif /* __DIO_DOT_H__ */
+
--- a/fs/gfs2/ondisk.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ondisk.c	2005-09-01 17:36:55.391103920 +0800
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+
+#define pv(struct, member, fmt) printk("  "#member" = "fmt"\n", struct->member);
+
+#define WANT_GFS2_CONVERSION_FUNCTIONS
+#include <linux/gfs2_ondisk.h>
+
+#ifdef GFS2_ENDIAN_BIG
+#warning Big endian is set.
+#endif
+
--- a/fs/gfs2/ops_address.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_address.c	2005-09-01 17:36:55.392103768 +0800
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "inode.h"
+#include "jdata.h"
+#include "log.h"
+#include "meta_io.h"
+#include "ops_address.h"
+#include "page.h"
+#include "quota.h"
+#include "trans.h"
+
+/**
+ * get_block - Fills in a buffer head with details about a block
+ * @inode: The inode
+ * @lblock: The block number to look up
+ * @bh_result: The buffer head to return the result in
+ * @create: Non-zero if we may add block to the file
+ *
+ * Returns: errno
+ */
+
+static int get_block(struct inode *inode, sector_t lblock,
+		     struct buffer_head *bh_result, int create)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	int new = create;
+	uint64_t dblock;
+	int error;
+
+	error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
+	if (error)
+		return error;
+
+	if (!dblock)
+		return 0;
+
+	map_bh(bh_result, inode->i_sb, dblock);
+	if (new)
+		set_buffer_new(bh_result);
+
+	return 0;
+}
+
+/**
+ * get_block_noalloc - Fills in a buffer head with details about a block
+ * @inode: The inode
+ * @lblock: The block number to look up
+ * @bh_result: The buffer head to return the result in
+ * @create: Non-zero if we may add block to the file
+ *
+ * Returns: errno
+ */
+
+static int get_block_noalloc(struct inode *inode, sector_t lblock,
+			     struct buffer_head *bh_result, int create)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	int new = FALSE;
+	uint64_t dblock;
+	int error;
+
+	error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
+	if (error)
+		return error;
+
+	if (dblock)
+		map_bh(bh_result, inode->i_sb, dblock);
+	else if (gfs2_assert_withdraw(ip->i_sbd, !create))
+		error = -EIO;
+
+	return error;
+}
+
+static int get_blocks(struct inode *inode, sector_t lblock,
+		      unsigned long max_blocks, struct buffer_head *bh_result,
+		      int create)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	int new = create;
+	uint64_t dblock;
+	uint32_t extlen;
+	int error;
+
+	error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
+	if (error)
+		return error;
+
+	if (!dblock)
+		return 0;
+
+	map_bh(bh_result, inode->i_sb, dblock);
+	if (new)
+		set_buffer_new(bh_result);
+
+	if (extlen > max_blocks)
+		extlen = max_blocks;
+	bh_result->b_size = extlen << inode->i_blkbits;
+
+	return 0;
+}
+
+static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
+			      unsigned long max_blocks,
+			      struct buffer_head *bh_result, int create)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	int new = FALSE;
+	uint64_t dblock;
+	uint32_t extlen;
+	int error;
+
+	error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
+	if (error)
+		return error;
+
+	if (dblock) {
+		map_bh(bh_result, inode->i_sb, dblock);
+		if (extlen > max_blocks)
+			extlen = max_blocks;
+		bh_result->b_size = extlen << inode->i_blkbits;
+	} else if (gfs2_assert_withdraw(ip->i_sbd, !create))
+		error = -EIO;
+
+	return error;
+}
+
+/**
+ * gfs2_writepage - Write complete page
+ * @page: Page to write
+ *
+ * Returns: errno
+ *
+ * Use Linux VFS block_write_full_page() to write one page,
+ *   using GFS2's get_block_noalloc to find which blocks to write.
+ */
+
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct gfs2_inode *ip = get_v2ip(page->mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_address);
+
+	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
+		unlock_page(page);
+		return -EIO;
+	}
+	if (get_transaction) {
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return 0;
+	}
+
+	error = block_write_full_page(page, get_block_noalloc, wbc);
+
+	gfs2_meta_cache_flush(ip);
+
+	return error;
+}
+
+/**
+ * stuffed_readpage - Fill in a Linux page with stuffed file data
+ * @ip: the inode
+ * @page: the page
+ *
+ * Returns: errno
+ */
+
+static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+{
+	struct buffer_head *dibh;
+	void *kaddr;
+	int error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	kaddr = kmap(page);
+	memcpy((char *)kaddr,
+	       dibh->b_data + sizeof(struct gfs2_dinode),
+	       ip->i_di.di_size);
+	memset((char *)kaddr + ip->i_di.di_size,
+	       0,
+	       PAGE_CACHE_SIZE - ip->i_di.di_size);
+	kunmap(page);
+
+	brelse(dibh);
+
+	SetPageUptodate(page);
+
+	return 0;
+}
+
+static int zero_readpage(struct page *page)
+{
+	void *kaddr;
+
+	kaddr = kmap(page);
+	memset(kaddr, 0, PAGE_CACHE_SIZE);
+	kunmap(page);
+
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	return 0;
+}
+
+/**
+ * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
+ * @ip:
+ * @page: The page to read
+ *
+ * Returns: errno
+ */
+
+static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
+{
+	void *kaddr;
+	int ret;
+
+	kaddr = kmap(page);
+
+	ret = gfs2_jdata_read_mem(ip, kaddr,
+				  (uint64_t)page->index << PAGE_CACHE_SHIFT,
+				  PAGE_CACHE_SIZE);
+	if (ret >= 0) {
+		if (ret < PAGE_CACHE_SIZE)
+			memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
+		SetPageUptodate(page);
+		ret = 0;
+	}
+
+	kunmap(page);
+
+	unlock_page(page);
+
+	return ret;
+}
+
+/**
+ * gfs2_readpage - readpage with locking
+ * @file: The file to read a page for
+ * @page: The page to read
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readpage(struct file *file, struct page *page)
+{
+	struct gfs2_inode *ip = get_v2ip(page->mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_address);
+
+	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) {
+		unlock_page(page);
+		return -EOPNOTSUPP;
+	}
+
+	if (!gfs2_is_jdata(ip)) {
+		if (gfs2_is_stuffed(ip)) {
+			if (!page->index) {
+				error = stuffed_readpage(ip, page);
+				unlock_page(page);
+			} else
+				error = zero_readpage(page);
+		} else
+			error = block_read_full_page(page, get_block);
+	} else
+		error = jdata_readpage(ip, page);
+
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+
+	return error;
+}
+
+/**
+ * gfs2_prepare_write - Prepare to write a page to a file
+ * @file: The file to write to
+ * @page: The page which is to be prepared for writing
+ * @from: From (byte range within page)
+ * @to: To (byte range within page)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_prepare_write(struct file *file, struct page *page,
+			      unsigned from, unsigned to)
+{
+	struct gfs2_inode *ip = get_v2ip(page->mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int error = 0;
+
+	atomic_inc(&sdp->sd_ops_address);
+
+	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
+		return -EOPNOTSUPP;
+
+	if (gfs2_is_stuffed(ip)) {
+		uint64_t file_size;
+		file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+		if (file_size > sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_dinode)) {
+			error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
+						    page);
+			if (!error)
+				error = block_prepare_write(page, from, to,
+							    get_block);
+		} else if (!PageUptodate(page))
+			error = stuffed_readpage(ip, page);
+	} else
+		error = block_prepare_write(page, from, to, get_block);
+
+	return error;
+}
+
+/**
+ * gfs2_commit_write - Commit write to a file
+ * @file: The file to write to
+ * @page: The page containing the data
+ * @from: From (byte range within page)
+ * @to: To (byte range within page)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_address);
+
+	if (gfs2_is_stuffed(ip)) {
+		struct buffer_head *dibh;
+		uint64_t file_size;
+		void *kaddr;
+
+		file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto fail;
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+
+		kaddr = kmap(page);
+		memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
+		       (char *)kaddr + from,
+		       to - from);
+		kunmap(page);
+
+		brelse(dibh);
+
+		SetPageUptodate(page);
+
+		if (inode->i_size < file_size)
+			i_size_write(inode, file_size);
+	} else {
+		if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+			gfs2_page_add_databufs(sdp, page, from, to);
+		error = generic_commit_write(file, page, from, to);
+		if (error)
+			goto fail;
+	}
+
+	return 0;
+
+ fail:
+	ClearPageUptodate(page);
+
+	return error;
+}
+
+/**
+ * gfs2_bmap - Block map function
+ * @mapping: Address space info
+ * @lblock: The block to map
+ *
+ * Returns: The disk address for the block or 0 on hole or error
+ */
+
+static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
+{
+	struct gfs2_inode *ip = get_v2ip(mapping->host);
+	struct gfs2_holder i_gh;
+	sector_t dblock = 0;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_address);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return 0;
+
+	if (!gfs2_is_stuffed(ip))
+		dblock = generic_block_bmap(mapping, lblock, get_block);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return dblock;
+}
+
+static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+	struct gfs2_databuf *db;
+
+	gfs2_log_lock(sdp);
+	db = get_v2db(bh);
+	if (db) {
+		db->db_bh = NULL;
+		set_v2db(bh, NULL);
+		gfs2_log_unlock(sdp);
+		brelse(bh);
+	} else
+		gfs2_log_unlock(sdp);
+
+	lock_buffer(bh);
+	clear_buffer_dirty(bh);
+	bh->b_bdev = NULL;
+	clear_buffer_mapped(bh);
+	clear_buffer_req(bh);
+	clear_buffer_new(bh);
+	clear_buffer_delay(bh);
+	unlock_buffer(bh);
+}
+
+static int gfs2_invalidatepage(struct page *page, unsigned long offset)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(page->mapping->host->i_sb);
+	struct buffer_head *head, *bh, *next;
+	unsigned int curr_off = 0;
+	int ret = 1;
+
+	BUG_ON(!PageLocked(page));
+	if (!page_has_buffers(page))
+		return 1;
+
+	bh = head = page_buffers(page);
+	do {
+		unsigned int next_off = curr_off + bh->b_size;
+		next = bh->b_this_page;
+
+		if (offset <= curr_off)
+			discard_buffer(sdp, bh);
+
+		curr_off = next_off;
+		bh = next;
+	} while (bh != head);
+
+	if (!offset)
+		ret = try_to_release_page(page, 0);
+
+	return ret;
+}
+
+static int gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+			  loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	get_blocks_t *gb = get_blocks;
+
+	atomic_inc(&sdp->sd_ops_address);
+
+	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+	    gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+		return -EINVAL;
+
+	if (rw == WRITE && !get_transaction)
+		gb = get_blocks_noalloc;
+
+	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				  offset, nr_segs, gb, NULL);
+}
+
+struct address_space_operations gfs2_file_aops = {
+	.writepage = gfs2_writepage,
+	.readpage = gfs2_readpage,
+	.sync_page = block_sync_page,
+	.prepare_write = gfs2_prepare_write,
+	.commit_write = gfs2_commit_write,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.direct_IO = gfs2_direct_IO,
+};
+
--- a/fs/gfs2/ops_address.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_address.h	2005-09-01 17:36:55.392103768 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_ADDRESS_DOT_H__
+#define __OPS_ADDRESS_DOT_H__
+
+extern struct address_space_operations gfs2_file_aops;
+
+#endif /* __OPS_ADDRESS_DOT_H__ */
--- a/fs/gfs2/ops_dentry.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_dentry.c	2005-09-01 17:36:55.392103768 +0800
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "dir.h"
+#include "glock.h"
+#include "ops_dentry.h"
+
+/**
+ * gfs2_drevalidate - Check directory lookup consistency
+ * @dentry: the mapping to check
+ * @nd:
+ *
+ * Check to make sure the lookup necessary to arrive at this inode from its
+ * parent is still good.
+ *
+ * Returns: 1 if the dentry is ok, 0 if it isn't
+ */
+
+static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *parent = dget_parent(dentry);
+	struct gfs2_inode *dip = get_v2ip(parent->d_inode);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct inode *inode;
+	struct gfs2_holder d_gh;
+	struct gfs2_inode *ip;
+	struct gfs2_inum inum;
+	unsigned int type;
+	int error;
+
+	lock_kernel();
+
+	atomic_inc(&sdp->sd_ops_dentry);
+
+	inode = dentry->d_inode;
+	if (inode && is_bad_inode(inode))
+		goto invalid;
+
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	if (error)
+		goto fail;
+
+	error = gfs2_dir_search(dip, &dentry->d_name, &inum, &type);
+	switch (error) {
+	case 0:
+		if (!inode)
+			goto invalid_gunlock;
+		break;
+	case -ENOENT:
+		if (!inode)
+			goto valid_gunlock;
+		goto invalid_gunlock;
+	default:
+		goto fail_gunlock;
+	}
+
+	ip = get_v2ip(inode);
+
+	if (!gfs2_inum_equal(&ip->i_num, &inum))
+		goto invalid_gunlock;
+
+	if (IF2DT(ip->i_di.di_mode) != type) {
+		gfs2_consist_inode(dip);
+		goto fail_gunlock;
+	}
+
+ valid_gunlock:
+	gfs2_glock_dq_uninit(&d_gh);
+
+ valid:
+	unlock_kernel();
+	dput(parent);
+	return 1;
+
+ invalid_gunlock:
+	gfs2_glock_dq_uninit(&d_gh);
+
+ invalid:
+	if (inode && S_ISDIR(inode->i_mode)) {
+		if (have_submounts(dentry))
+			goto valid;
+		shrink_dcache_parent(dentry);
+	}
+	d_drop(dentry);
+
+	unlock_kernel();
+	dput(parent);
+	return 0;
+
+ fail_gunlock:
+	gfs2_glock_dq_uninit(&d_gh);
+
+ fail:
+	unlock_kernel();
+	dput(parent);
+	return 0;
+}
+
+struct dentry_operations gfs2_dops = {
+	.d_revalidate = gfs2_drevalidate,
+};
+
--- a/fs/gfs2/ops_dentry.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_dentry.h	2005-09-01 17:36:55.392103768 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_DENTRY_DOT_H__
+#define __OPS_DENTRY_DOT_H__
+
+extern struct dentry_operations gfs2_dops;
+
+#endif /* __OPS_DENTRY_DOT_H__ */
--- a/fs/gfs2/ops_export.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_export.c	2005-09-01 17:36:55.403102096 +0800
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "ops_export.h"
+#include "rgrp.h"
+
+static struct dentry *gfs2_decode_fh(struct super_block *sb,
+				     __u32 *fh,
+				     int fh_len,
+				     int fh_type,
+				     int (*acceptable)(void *context,
+						       struct dentry *dentry),
+				     void *context)
+{
+	struct gfs2_inum this, parent;
+
+	atomic_inc(&get_v2sdp(sb)->sd_ops_export);
+
+	if (fh_type != fh_len)
+		return NULL;
+
+	memset(&parent, 0, sizeof(struct gfs2_inum));
+
+	switch (fh_type) {
+	case 8:
+		parent.no_formal_ino = ((uint64_t)gfs2_32_to_cpu(fh[4])) << 32;
+		parent.no_formal_ino |= gfs2_32_to_cpu(fh[5]);
+		parent.no_addr = ((uint64_t)gfs2_32_to_cpu(fh[6])) << 32;
+		parent.no_addr |= gfs2_32_to_cpu(fh[7]);
+	case 4:
+		this.no_formal_ino = ((uint64_t)gfs2_32_to_cpu(fh[0])) << 32;
+		this.no_formal_ino |= gfs2_32_to_cpu(fh[1]);
+		this.no_addr = ((uint64_t)gfs2_32_to_cpu(fh[2])) << 32;
+		this.no_addr |= gfs2_32_to_cpu(fh[3]);
+		break;
+	default:
+		return NULL;
+	}
+
+	return gfs2_export_ops.find_exported_dentry(sb, &this, &parent,
+						    acceptable, context);
+}
+
+static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+			  int connectable)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+
+	atomic_inc(&sdp->sd_ops_export);
+
+	if (*len < 4 || (connectable && *len < 8))
+		return 255;
+
+	fh[0] = ip->i_num.no_formal_ino >> 32;
+	fh[0] = cpu_to_gfs2_32(fh[0]);
+	fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
+	fh[1] = cpu_to_gfs2_32(fh[1]);
+	fh[2] = ip->i_num.no_addr >> 32;
+	fh[2] = cpu_to_gfs2_32(fh[2]);
+	fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
+	fh[3] = cpu_to_gfs2_32(fh[3]);
+	*len = 4;
+
+	if (!connectable || ip == sdp->sd_root_dir)
+		return *len;
+
+	spin_lock(&dentry->d_lock);
+	inode = dentry->d_parent->d_inode;
+	ip = get_v2ip(inode);
+	gfs2_inode_hold(ip);
+	spin_unlock(&dentry->d_lock);
+
+	fh[4] = ip->i_num.no_formal_ino >> 32;
+	fh[4] = cpu_to_gfs2_32(fh[4]);
+	fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
+	fh[5] = cpu_to_gfs2_32(fh[5]);
+	fh[6] = ip->i_num.no_addr >> 32;
+	fh[6] = cpu_to_gfs2_32(fh[6]);
+	fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
+	fh[7] = cpu_to_gfs2_32(fh[7]);
+	*len = 8;
+
+	gfs2_inode_put(ip);
+
+	return *len;
+}
+
+struct get_name_filldir {
+	struct gfs2_inum inum;
+	char *name;
+};
+
+static int get_name_filldir(void *opaque, const char *name, unsigned int length,
+			    uint64_t offset, struct gfs2_inum *inum,
+			    unsigned int type)
+{
+	struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
+
+	if (!gfs2_inum_equal(inum, &gnfd->inum))
+		return 0;
+
+	memcpy(gnfd->name, name, length);
+	gnfd->name[length] = 0;
+
+	return 1;
+}
+
+static int gfs2_get_name(struct dentry *parent, char *name,
+			 struct dentry *child)
+{
+	struct inode *dir = parent->d_inode;
+	struct inode *inode = child->d_inode;
+	struct gfs2_inode *dip, *ip;
+	struct get_name_filldir gnfd;
+	struct gfs2_holder gh;
+	uint64_t offset = 0;
+	int error;
+
+	if (!dir)
+		return -EINVAL;
+
+	atomic_inc(&get_v2sdp(dir->i_sb)->sd_ops_export);
+
+	if (!S_ISDIR(dir->i_mode) || !inode)
+		return -EINVAL;
+
+	dip = get_v2ip(dir);
+	ip = get_v2ip(inode);
+
+	*name = 0;
+	gnfd.inum = ip->i_num;
+	gnfd.name = name;
+
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (error)
+		return error;
+
+	error = gfs2_dir_read(dip, &offset, &gnfd, get_name_filldir);
+
+	gfs2_glock_dq_uninit(&gh);
+
+	if (!error && !*name)
+		error = -ENOENT;
+
+	return error;
+}
+
+static struct dentry *gfs2_get_parent(struct dentry *child)
+{
+	struct gfs2_inode *dip = get_v2ip(child->d_inode);
+	struct qstr dotdot = { .name = "..", .len = 2 };
+	struct gfs2_inode *ip;
+	struct inode *inode;
+	struct dentry *dentry;
+	int error;
+
+	atomic_inc(&dip->i_sbd->sd_ops_export);
+
+	error = gfs2_lookupi(dip, &dotdot, TRUE, &ip);
+	if (error)
+		return ERR_PTR(error);
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	dentry = d_alloc_anon(inode);
+	if (!dentry) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return dentry;
+}
+
+static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	struct gfs2_inum *inum = (struct gfs2_inum *)inum_p;
+	struct gfs2_holder i_gh, ri_gh, rgd_gh;
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_inode *ip;
+	struct inode *inode;
+	struct dentry *dentry;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_export);
+
+	/* System files? */
+
+	inode = gfs2_iget(sb, inum);
+	if (inode) {
+		ip = get_v2ip(inode);
+		if (ip->i_num.no_formal_ino != inum->no_formal_ino) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
+		goto out_inode;
+	}
+
+	error = gfs2_glock_nq_num(sdp,
+				  inum->no_addr, &gfs2_inode_glops,
+				  LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
+				  &i_gh);
+	if (error)
+		return ERR_PTR(error);
+
+	error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip);
+	if (error)
+		goto fail;
+	if (ip)
+		goto out_ip;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto fail;
+
+	error = -EINVAL;
+	rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
+	if (!rgd)
+		goto fail_rindex;
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
+	if (error)
+		goto fail_rindex;
+
+	error = -ESTALE;
+	if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
+		goto fail_rgd;
+
+	gfs2_glock_dq_uninit(&rgd_gh);
+	gfs2_glock_dq_uninit(&ri_gh);
+
+	error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip);
+	if (error)
+		goto fail;
+
+	error = gfs2_inode_refresh(ip);
+	if (error) {
+		gfs2_inode_put(ip);
+		goto fail;
+	}
+
+	atomic_inc(&sdp->sd_fh2dentry_misses);
+
+ out_ip:
+	error = -EIO;
+	if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) {
+		gfs2_inode_put(ip);
+		goto fail;
+	}
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+ out_inode:
+	dentry = d_alloc_anon(inode);
+	if (!dentry) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return dentry;
+
+ fail_rgd:
+	gfs2_glock_dq_uninit(&rgd_gh);
+
+ fail_rindex:
+	gfs2_glock_dq_uninit(&ri_gh);
+
+ fail:
+	gfs2_glock_dq_uninit(&i_gh);
+	return ERR_PTR(error);
+}
+
+struct export_operations gfs2_export_ops = {
+	.decode_fh = gfs2_decode_fh,
+	.encode_fh = gfs2_encode_fh,
+	.get_name = gfs2_get_name,
+	.get_parent = gfs2_get_parent,
+	.get_dentry = gfs2_get_dentry,
+};
+
--- a/fs/gfs2/ops_export.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_export.h	2005-09-01 17:36:55.403102096 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_EXPORT_DOT_H__
+#define __OPS_EXPORT_DOT_H__
+
+extern struct export_operations gfs2_export_ops;
+
+#endif /* __OPS_EXPORT_DOT_H__ */
--- a/fs/gfs2/ops_file.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_file.c	2005-09-01 17:36:55.414100424 +0800
@@ -0,0 +1,1522 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/gfs2_ioctl.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "ioctl.h"
+#include "jdata.h"
+#include "lm.h"
+#include "log.h"
+#include "meta_io.h"
+#include "ops_file.h"
+#include "ops_vm.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+
+/* "bad" is for NFS support */
+struct filldir_bad_entry {
+	char *fbe_name;
+	unsigned int fbe_length;
+	uint64_t fbe_offset;
+	struct gfs2_inum fbe_inum;
+	unsigned int fbe_type;
+};
+
+struct filldir_bad {
+	struct gfs2_sbd *fdb_sbd;
+
+	struct filldir_bad_entry *fdb_entry;
+	unsigned int fdb_entry_num;
+	unsigned int fdb_entry_off;
+
+	char *fdb_name;
+	unsigned int fdb_name_size;
+	unsigned int fdb_name_off;
+};
+
+/* For regular, non-NFS */
+struct filldir_reg {
+	struct gfs2_sbd *fdr_sbd;
+	int fdr_prefetch;
+
+	filldir_t fdr_filldir;
+	void *fdr_opaque;
+};
+
+typedef ssize_t(*do_rw_t) (struct file *file,
+			   char *buf,
+			   size_t size, loff_t *offset,
+			   unsigned int num_gh, struct gfs2_holder *ghs);
+
+/**
+ * gfs2_llseek - seek to a location in a file
+ * @file: the file
+ * @offset: the offset
+ * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
+ *
+ * SEEK_END requires the glock for the file because it references the
+ * file's size.
+ *
+ * Returns: The new offset, or errno
+ */
+
+static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_holder i_gh;
+	loff_t error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	if (origin == 2) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
+					   &i_gh);
+		if (!error) {
+			error = remote_llseek(file, offset, origin);
+			gfs2_glock_dq_uninit(&i_gh);
+		}
+	} else
+		error = remote_llseek(file, offset, origin);
+
+	return error;
+}
+
+static inline unsigned int vma2state(struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
+	    (VM_MAYWRITE | VM_MAYSHARE))
+		return LM_ST_EXCLUSIVE;
+	return LM_ST_SHARED;
+}
+
+static ssize_t walk_vm_hard(struct file *file, char *buf, size_t size,
+			    loff_t *offset, do_rw_t operation)
+{
+	struct gfs2_holder *ghs;
+	unsigned int num_gh = 0;
+	ssize_t count;
+	struct super_block *sb = file->f_dentry->d_inode->i_sb;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start = (unsigned long)buf;
+	unsigned long end = start + size;
+	int dumping = (current->flags & PF_DUMPCORE);
+	unsigned int x = 0;
+
+	for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
+		if (end <= vma->vm_start)
+			break;
+		if (vma->vm_file &&
+		    vma->vm_file->f_dentry->d_inode->i_sb == sb) {
+			num_gh++;
+		}
+	}
+
+	ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!ghs) {
+		if (!dumping)
+			up_read(&mm->mmap_sem);
+		return -ENOMEM;
+	}
+
+	for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
+		if (end <= vma->vm_start)
+			break;
+		if (vma->vm_file) {
+			struct inode *inode = vma->vm_file->f_dentry->d_inode;
+			if (inode->i_sb == sb)
+				gfs2_holder_init(get_v2ip(inode)->i_gl,
+						 vma2state(vma), 0, &ghs[x++]);
+		}
+	}
+
+	if (!dumping)
+		up_read(&mm->mmap_sem);
+
+	gfs2_assert(get_v2sdp(sb), x == num_gh,);
+
+	count = operation(file, buf, size, offset, num_gh, ghs);
+
+	while (num_gh--)
+		gfs2_holder_uninit(&ghs[num_gh]);
+	kfree(ghs);
+
+	return count;
+}
+
+/**
+ * walk_vm - Walk the vmas associated with a buffer for read or write.
+ *    If any of them are gfs2, pass the gfs2 inode down to the read/write
+ *    worker function so that locks can be acquired in the correct order.
+ * @file: The file to read/write from/to
+ * @buf: The buffer to copy to/from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ * @operation: The read or write worker function
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t walk_vm(struct file *file, char *buf, size_t size,
+		       loff_t *offset, do_rw_t operation)
+{
+	struct gfs2_holder gh;
+
+	if (current->mm) {
+		struct super_block *sb = file->f_dentry->d_inode->i_sb;
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+		unsigned long start = (unsigned long)buf;
+		unsigned long end = start + size;
+		int dumping = (current->flags & PF_DUMPCORE);
+
+		if (!dumping)
+			down_read(&mm->mmap_sem);
+
+		for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
+			if (end <= vma->vm_start)
+				break;
+			if (vma->vm_file &&
+			    vma->vm_file->f_dentry->d_inode->i_sb == sb)
+				goto do_locks;
+		}
+
+		if (!dumping)
+			up_read(&mm->mmap_sem);
+	}
+
+	return operation(file, buf, size, offset, 0, &gh);
+
+ do_locks:
+	return walk_vm_hard(file, buf, size, offset, operation);
+}
+
+static ssize_t do_jdata_read(struct file *file, char *buf, size_t size,
+			     loff_t *offset)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	ssize_t count = 0;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, buf, size))
+		return -EFAULT;
+
+	if (!(file->f_flags & O_LARGEFILE)) {
+		if (*offset >= MAX_NON_LFS)
+			return -EFBIG;
+		if (*offset + size > MAX_NON_LFS)
+			size = MAX_NON_LFS - *offset;
+	}
+
+	count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
+
+	if (count > 0)
+		*offset += count;
+
+	return count;
+}
+
+/**
+ * do_read_direct - Read bytes from a file
+ * @file: The file to read from
+ * @buf: The buffer to copy into
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ * @num_gh: The number of other locks we need to do the read
+ * @ghs: the locks we need plus one for our lock
+ *
+ * Outputs: Offset - updated according to number of bytes read
+ *
+ * Returns: The number of bytes read, errno on failure
+ */
+
+static ssize_t do_read_direct(struct file *file, char *buf, size_t size,
+			      loff_t *offset, unsigned int num_gh,
+			      struct gfs2_holder *ghs)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	unsigned int state = LM_ST_DEFERRED;
+	int flags = 0;
+	unsigned int x;
+	ssize_t count = 0;
+	int error;
+
+	for (x = 0; x < num_gh; x++)
+		if (ghs[x].gh_gl == ip->i_gl) {
+			state = LM_ST_SHARED;
+			flags |= GL_LOCAL_EXCL;
+			break;
+		}
+
+	gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
+
+	error = gfs2_glock_nq_m(num_gh + 1, ghs);
+	if (error)
+		goto out;
+
+	error = -EINVAL;
+	if (gfs2_is_jdata(ip))
+		goto out_gunlock;
+
+	if (gfs2_is_stuffed(ip)) {
+		size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
+
+		if (((*offset) & mask) || (((unsigned long)buf) & mask))
+			goto out_gunlock;
+
+		count = do_jdata_read(file, buf, size & ~mask, offset);
+	} else
+		count = generic_file_read(file, buf, size, offset);
+
+	error = 0;
+
+ out_gunlock:
+	gfs2_glock_dq_m(num_gh + 1, ghs);
+
+ out:
+	gfs2_holder_uninit(&ghs[num_gh]);
+
+	return (count) ? count : error;
+}
+
+/**
+ * do_read_buf - Read bytes from a file
+ * @file: The file to read from
+ * @buf: The buffer to copy into
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ * @num_gh: The number of other locks we need to do the read
+ * @ghs: the locks we need plus one for our lock
+ *
+ * Outputs: Offset - updated according to number of bytes read
+ *
+ * Returns: The number of bytes read, errno on failure
+ */
+
+static ssize_t do_read_buf(struct file *file, char *buf, size_t size,
+			   loff_t *offset, unsigned int num_gh,
+			   struct gfs2_holder *ghs)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	ssize_t count = 0;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
+
+	error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
+	if (error)
+		goto out;
+
+	if (gfs2_is_jdata(ip) ||
+	    (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags)))
+		count = do_jdata_read(file, buf, size, offset);
+	else
+		count = generic_file_read(file, buf, size, offset);
+
+	gfs2_glock_dq_m(num_gh + 1, ghs);
+
+ out:
+	gfs2_holder_uninit(&ghs[num_gh]);
+
+	return (count) ? count : error;
+}
+
+/**
+ * gfs2_read - Read bytes from a file
+ * @file: The file to read from
+ * @buf: The buffer to copy into
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ *
+ * Outputs: Offset - updated according to number of bytes read
+ *
+ * Returns: The number of bytes read, errno on failure
+ */
+
+static ssize_t gfs2_read(struct file *file, char *buf, size_t size,
+			 loff_t *offset)
+{
+	atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
+
+	if (file->f_flags & O_DIRECT)
+		return walk_vm(file, buf, size, offset, do_read_direct);
+	else
+		return walk_vm(file, buf, size, offset, do_read_buf);
+}
+
+/**
+ * grope_mapping - feel up a mapping that needs to be written
+ * @buf: the start of the memory to be written
+ * @size: the size of the memory to be written
+ *
+ * We do this after acquiring the locks on the mapping,
+ * but before starting the write transaction.  We need to make
+ * sure that we don't cause recursive transactions if blocks
+ * need to be allocated to the file backing the mapping.
+ *
+ * Returns: errno
+ */
+
+static int grope_mapping(char *buf, size_t size)
+{
+	unsigned long start = (unsigned long)buf;
+	unsigned long stop = start + size;
+	char c;
+
+	while (start < stop) {
+		if (copy_from_user(&c, (char *)start, 1))
+			return -EFAULT;
+		start += PAGE_CACHE_SIZE;
+		start &= PAGE_CACHE_MASK;
+	}
+
+	return 0;
+}
+
+/**
+ * do_write_direct_alloc - Write bytes to a file
+ * @file: The file to write to
+ * @buf: The buffer to copy from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t do_write_direct_alloc(struct file *file, char *buf, size_t size,
+				     loff_t *offset)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = NULL;
+	struct iovec local_iov = { .iov_base = buf, .iov_len = size };
+	struct buffer_head *dibh;
+	unsigned int data_blocks, ind_blocks;
+	ssize_t count;
+	int error;
+
+	gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto fail;
+
+	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	if (error)
+		goto fail_gunlock_q;
+
+	al->al_requested = data_blocks + ind_blocks;
+
+	error = gfs2_inplace_reserve(ip);
+	if (error)
+		goto fail_gunlock_q;
+
+	error = gfs2_trans_begin(sdp,
+				 al->al_rgd->rd_ri.ri_length + ind_blocks +
+				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
+	if (error)
+		goto fail_ipres;
+
+	if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto fail_end_trans;
+
+		ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
+			(~(S_ISUID | S_ISGID)) : (~S_ISUID);
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	if (gfs2_is_stuffed(ip)) {
+		error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
+		if (error)
+			goto fail_end_trans;
+	}
+
+	count = generic_file_write_nolock(file, &local_iov, 1, offset);
+	if (count < 0) {
+		error = count;
+		goto fail_end_trans;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto fail_end_trans;
+
+	if (ip->i_di.di_size < inode->i_size)
+		ip->i_di.di_size = inode->i_size;
+	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	gfs2_trans_end(sdp);
+
+	if (file->f_flags & O_SYNC)
+		gfs2_log_flush_glock(ip->i_gl);
+
+	gfs2_inplace_release(ip);
+	gfs2_quota_unlock(ip);
+	gfs2_alloc_put(ip);
+
+	return count;
+
+ fail_end_trans:
+	gfs2_trans_end(sdp);
+
+ fail_ipres:
+	gfs2_inplace_release(ip);
+
+ fail_gunlock_q:
+	gfs2_quota_unlock(ip);
+
+ fail:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/**
+ * do_write_direct - Write bytes to a file
+ * @file: The file to write to
+ * @buf: The buffer to copy from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ * @num_gh: The number of other locks we need to do the read
+ * @gh: the locks we need plus one for our lock
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t do_write_direct(struct file *file, char *buf, size_t size,
+			       loff_t *offset, unsigned int num_gh,
+			       struct gfs2_holder *ghs)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_file *fp = get_v2fp(file);
+	unsigned int state = LM_ST_DEFERRED;
+	int alloc_required;
+	unsigned int x;
+	size_t s;
+	ssize_t count = 0;
+	int error;
+
+	if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
+		state = LM_ST_EXCLUSIVE;
+	else
+		for (x = 0; x < num_gh; x++)
+			if (ghs[x].gh_gl == ip->i_gl) {
+				state = LM_ST_EXCLUSIVE;
+				break;
+			}
+
+ restart:
+	gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
+
+	error = gfs2_glock_nq_m(num_gh + 1, ghs);
+	if (error)
+		goto out;
+
+	error = -EINVAL;
+	if (gfs2_is_jdata(ip))
+		goto out_gunlock;
+
+	if (num_gh) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out_gunlock;
+	}
+
+	if (file->f_flags & O_APPEND)
+		*offset = ip->i_di.di_size;
+
+	if (!(file->f_flags & O_LARGEFILE)) {
+		error = -EFBIG;
+		if (*offset >= MAX_NON_LFS)
+			goto out_gunlock;
+		if (*offset + size > MAX_NON_LFS)
+			size = MAX_NON_LFS - *offset;
+	}
+
+	if (gfs2_is_stuffed(ip) ||
+	    *offset + size > ip->i_di.di_size ||
+	    ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
+		alloc_required = TRUE;
+	else {
+		error = gfs2_write_alloc_required(ip, *offset, size,
+						 &alloc_required);
+		if (error)
+			goto out_gunlock;
+	}
+
+	if (alloc_required && state != LM_ST_EXCLUSIVE) {
+		gfs2_glock_dq_m(num_gh + 1, ghs);
+		gfs2_holder_uninit(&ghs[num_gh]);
+		state = LM_ST_EXCLUSIVE;
+		goto restart;
+	}
+
+	if (alloc_required) {
+		set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
+
+		/* split large writes into smaller atomic transactions */
+		while (size) {
+			s = gfs2_tune_get(sdp, gt_max_atomic_write);
+			if (s > size)
+				s = size;
+
+			error = do_write_direct_alloc(file, buf, s, offset);
+			if (error < 0)
+				goto out_gunlock;
+
+			buf += error;
+			size -= error;
+			count += error;
+		}
+	} else {
+		struct iovec local_iov = { .iov_base = buf, .iov_len = size };
+		struct gfs2_holder t_gh;
+
+		clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
+
+		error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
+					   GL_NEVER_RECURSE, &t_gh);
+		if (error)
+			goto out_gunlock;
+
+		count = generic_file_write_nolock(file, &local_iov, 1, offset);
+
+		gfs2_glock_dq_uninit(&t_gh);
+	}
+
+	error = 0;
+
+ out_gunlock:
+	gfs2_glock_dq_m(num_gh + 1, ghs);
+
+ out:
+	gfs2_holder_uninit(&ghs[num_gh]);
+
+	return (count) ? count : error;
+}
+
+/**
+ * do_do_write_buf - Write bytes to a file
+ * @file: The file to write to
+ * @buf: The buffer to copy from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t do_do_write_buf(struct file *file, char *buf, size_t size,
+			       loff_t *offset)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = NULL;
+	struct buffer_head *dibh;
+	unsigned int data_blocks, ind_blocks;
+	int alloc_required, journaled;
+	ssize_t count;
+	int error;
+
+	journaled = gfs2_is_jdata(ip);
+
+	gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
+
+	error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
+	if (error)
+		return error;
+
+	if (alloc_required) {
+		al = gfs2_alloc_get(ip);
+
+		error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+		if (error)
+			goto fail;
+
+		error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+		if (error)
+			goto fail_gunlock_q;
+
+		al->al_requested = data_blocks + ind_blocks;
+
+		error = gfs2_inplace_reserve(ip);
+		if (error)
+			goto fail_gunlock_q;
+
+		error = gfs2_trans_begin(sdp,
+					 al->al_rgd->rd_ri.ri_length +
+					 ind_blocks +
+					 ((journaled) ? data_blocks : 0) +
+					 RES_DINODE + RES_STATFS + RES_QUOTA,
+					 0);
+		if (error)
+			goto fail_ipres;
+	} else {
+		error = gfs2_trans_begin(sdp,
+					((journaled) ? data_blocks : 0) +
+					RES_DINODE,
+					0);
+		if (error)
+			goto fail_ipres;
+	}
+
+	if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto fail_end_trans;
+
+		ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
+					  (~(S_ISUID | S_ISGID)) : (~S_ISUID);
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	if (journaled ||
+	    (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) &&
+	     *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))) {
+
+		count = gfs2_jdata_write(ip, buf, *offset, size,
+					 gfs2_copy_from_user);
+		if (count < 0) {
+			error = count;
+			goto fail_end_trans;
+		}
+
+		*offset += count;
+	} else {
+		struct iovec local_iov = { .iov_base = buf, .iov_len = size };
+
+		count = generic_file_write_nolock(file, &local_iov, 1, offset);
+		if (count < 0) {
+			error = count;
+			goto fail_end_trans;
+		}
+
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto fail_end_trans;
+
+		if (ip->i_di.di_size < inode->i_size)
+			ip->i_di.di_size = inode->i_size;
+		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+
+	if (file->f_flags & O_SYNC)
+		gfs2_log_flush_glock(ip->i_gl);
+
+	if (alloc_required) {
+		gfs2_assert_warn(sdp, count != size ||
+				 al->al_alloced);
+		gfs2_inplace_release(ip);
+		gfs2_quota_unlock(ip);
+		gfs2_alloc_put(ip);
+	}
+
+	return count;
+
+ fail_end_trans:
+	gfs2_trans_end(sdp);
+
+ fail_ipres:
+	if (alloc_required)
+		gfs2_inplace_release(ip);
+
+ fail_gunlock_q:
+	if (alloc_required)
+		gfs2_quota_unlock(ip);
+
+ fail:
+	if (alloc_required)
+		gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/**
+ * do_write_buf - Write bytes to a file
+ * @file: The file to write to
+ * @buf: The buffer to copy from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ * @num_gh: The number of other locks we need to do the read
+ * @gh: the locks we need plus one for our lock
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t do_write_buf(struct file *file, char *buf, size_t size,
+			    loff_t *offset, unsigned int num_gh,
+			    struct gfs2_holder *ghs)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	size_t s;
+	ssize_t count = 0;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
+
+	error = gfs2_glock_nq_m(num_gh + 1, ghs);
+	if (error)
+		goto out;
+
+	if (num_gh) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out_gunlock;
+	}
+
+	if (file->f_flags & O_APPEND)
+		*offset = ip->i_di.di_size;
+
+	if (!(file->f_flags & O_LARGEFILE)) {
+		error = -EFBIG;
+		if (*offset >= MAX_NON_LFS)
+			goto out_gunlock;
+		if (*offset + size > MAX_NON_LFS)
+			size = MAX_NON_LFS - *offset;
+	}
+
+	/* split large writes into smaller atomic transactions */
+	while (size) {
+		s = gfs2_tune_get(sdp, gt_max_atomic_write);
+		if (s > size)
+			s = size;
+
+		error = do_do_write_buf(file, buf, s, offset);
+		if (error < 0)
+			goto out_gunlock;
+
+		buf += error;
+		size -= error;
+		count += error;
+	}
+
+	error = 0;
+
+ out_gunlock:
+	gfs2_glock_dq_m(num_gh + 1, ghs);
+
+ out:
+	gfs2_holder_uninit(&ghs[num_gh]);
+
+	return (count) ? count : error;
+}
+
+/**
+ * gfs2_write - Write bytes to a file
+ * @file: The file to write to
+ * @buf: The buffer to copy from
+ * @size: The amount of data requested
+ * @offset: The current file offset
+ *
+ * Outputs: Offset - updated according to number of bytes written
+ *
+ * Returns: The number of bytes written, errno on failure
+ */
+
+static ssize_t gfs2_write(struct file *file, const char *buf, size_t size,
+			  loff_t *offset)
+{
+	struct inode *inode = file->f_mapping->host;
+	ssize_t count;
+
+	atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (!access_ok(VERIFY_READ, buf, size))
+		return -EFAULT;
+
+	down(&inode->i_sem);
+	if (file->f_flags & O_DIRECT)
+		count = walk_vm(file, (char *)buf, size, offset,
+				do_write_direct);
+	else
+		count = walk_vm(file, (char *)buf, size, offset, do_write_buf);
+	up(&inode->i_sem);
+
+	return count;
+}
+
+/**
+ * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
+ * @opaque: opaque data used by the function
+ * @name: the name of the directory entry
+ * @length: the length of the name
+ * @offset: the entry's offset in the directory
+ * @inum: the inode number the entry points to
+ * @type: the type of inode the entry points to
+ *
+ * Returns: 0 on success, 1 if buffer full
+ */
+
+static int filldir_reg_func(void *opaque, const char *name, unsigned int length,
+			    uint64_t offset, struct gfs2_inum *inum,
+			    unsigned int type)
+{
+	struct filldir_reg *fdr = (struct filldir_reg *)opaque;
+	struct gfs2_sbd *sdp = fdr->fdr_sbd;
+	int error;
+
+	error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
+				 inum->no_formal_ino, type);
+	if (error)
+		return 1;
+
+	if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
+		gfs2_glock_prefetch_num(sdp,
+				       inum->no_addr, &gfs2_inode_glops,
+				       LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
+		gfs2_glock_prefetch_num(sdp,
+				       inum->no_addr, &gfs2_iopen_glops,
+				       LM_ST_SHARED, LM_FLAG_TRY);
+	}
+
+	return 0;
+}
+
+/**
+ * readdir_reg - Read directory entries from a directory
+ * @file: The directory to read from
+ * @dirent: Buffer for dirents
+ * @filldir: Function used to do the copying
+ *
+ * Returns: errno
+ */
+
+static int readdir_reg(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
+	struct filldir_reg fdr;
+	struct gfs2_holder d_gh;
+	uint64_t offset = file->f_pos;
+	int error;
+
+	fdr.fdr_sbd = dip->i_sbd;
+	fdr.fdr_prefetch = TRUE;
+	fdr.fdr_filldir = filldir;
+	fdr.fdr_opaque = dirent;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
+	error = gfs2_glock_nq_atime(&d_gh);
+	if (error) {
+		gfs2_holder_uninit(&d_gh);
+		return error;
+	}
+
+	error = gfs2_dir_read(dip, &offset, &fdr, filldir_reg_func);
+
+	gfs2_glock_dq_uninit(&d_gh);
+
+	file->f_pos = offset;
+
+	return error;
+}
+
+/**
+ * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
+ * @opaque: opaque data used by the function
+ * @name: the name of the directory entry
+ * @length: the length of the name
+ * @offset: the entry's offset in the directory
+ * @inum: the inode number the entry points to
+ * @type: the type of inode the entry points to
+ *
+ * For supporting NFS.
+ *
+ * Returns: 0 on success, 1 if buffer full
+ */
+
+static int filldir_bad_func(void *opaque, const char *name, unsigned int length,
+			    uint64_t offset, struct gfs2_inum *inum,
+			    unsigned int type)
+{
+	struct filldir_bad *fdb = (struct filldir_bad *)opaque;
+	struct gfs2_sbd *sdp = fdb->fdb_sbd;
+	struct filldir_bad_entry *fbe;
+
+	if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
+	    fdb->fdb_name_off + length > fdb->fdb_name_size)
+		return 1;
+
+	fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
+	fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
+	memcpy(fbe->fbe_name, name, length);
+	fbe->fbe_length = length;
+	fbe->fbe_offset = offset;
+	fbe->fbe_inum = *inum;
+	fbe->fbe_type = type;
+
+	fdb->fdb_entry_off++;
+	fdb->fdb_name_off += length;
+
+	if (!(length == 1 && *name == '.')) {
+		gfs2_glock_prefetch_num(sdp,
+				       inum->no_addr, &gfs2_inode_glops,
+				       LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
+		gfs2_glock_prefetch_num(sdp,
+				       inum->no_addr, &gfs2_iopen_glops,
+				       LM_ST_SHARED, LM_FLAG_TRY);
+	}
+
+	return 0;
+}
+
+/**
+ * readdir_bad - Read directory entries from a directory
+ * @file: The directory to read from
+ * @dirent: Buffer for dirents
+ * @filldir: Function used to do the copying
+ *
+ * For supporting NFS.
+ *
+ * Returns: errno
+ */
+
+static int readdir_bad(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct filldir_reg fdr;
+	unsigned int entries, size;
+	struct filldir_bad *fdb;
+	struct gfs2_holder d_gh;
+	uint64_t offset = file->f_pos;
+	unsigned int x;
+	struct filldir_bad_entry *fbe;
+	int error;
+
+	entries = gfs2_tune_get(sdp, gt_entries_per_readdir);
+	size = sizeof(struct filldir_bad) +
+	    entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE);
+
+	fdb = kzalloc(size, GFP_KERNEL);
+	if (!fdb)
+		return -ENOMEM;
+
+	fdb->fdb_sbd = sdp;
+	fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
+	fdb->fdb_entry_num = entries;
+	fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
+		entries * sizeof(struct filldir_bad_entry);
+	fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
+	error = gfs2_glock_nq_atime(&d_gh);
+	if (error) {
+		gfs2_holder_uninit(&d_gh);
+		goto out;
+	}
+
+	error = gfs2_dir_read(dip, &offset, fdb, filldir_bad_func);
+
+	gfs2_glock_dq_uninit(&d_gh);
+
+	fdr.fdr_sbd = sdp;
+	fdr.fdr_prefetch = FALSE;
+	fdr.fdr_filldir = filldir;
+	fdr.fdr_opaque = dirent;
+
+	for (x = 0; x < fdb->fdb_entry_off; x++) {
+		fbe = &fdb->fdb_entry[x];
+
+		error = filldir_reg_func(&fdr,
+					 fbe->fbe_name, fbe->fbe_length,
+					 fbe->fbe_offset,
+					 &fbe->fbe_inum, fbe->fbe_type);
+		if (error) {
+			file->f_pos = fbe->fbe_offset;
+			error = 0;
+			goto out;
+		}
+	}
+
+	file->f_pos = offset;
+
+ out:
+	kfree(fdb);
+
+	return error;
+}
+
+/**
+ * gfs2_readdir - Read directory entries from a directory
+ * @file: The directory to read from
+ * @dirent: Buffer for dirents
+ * @filldir: Function used to do the copying
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	int error;
+
+	atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
+
+	if (strcmp(current->comm, "nfsd") != 0)
+		error = readdir_reg(file, dirent, filldir);
+	else
+		error = readdir_bad(file, dirent, filldir);
+
+	return error;
+}
+
+/**
+ * gfs2_ioctl - do an ioctl on a file
+ * @inode: the inode
+ * @file: the file pointer
+ * @cmd: the ioctl command
+ * @arg: the argument
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		      unsigned long arg)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	switch (cmd) {
+	case GFS2_IOCTL_IDENTIFY: {
+		unsigned int x = GFS2_MAGIC;
+		if (copy_to_user((unsigned int *)arg, &x, sizeof(unsigned int)))
+			return -EFAULT;
+		return 0;
+	}
+
+	case GFS2_IOCTL_SUPER:
+		return gfs2_ioctl_i(ip, (void *)arg);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+/**
+ * gfs2_mmap -
+ * @file: The file to map
+ * @vma: The VMA which described the mapping
+ *
+ * Returns: 0 or error code
+ */
+
+static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_holder i_gh;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
+	error = gfs2_glock_nq_atime(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		return error;
+	}
+
+	if (gfs2_is_jdata(ip)) {
+		if (vma->vm_flags & VM_MAYSHARE)
+			error = -EOPNOTSUPP;
+		else
+			vma->vm_ops = &gfs2_vm_ops_private;
+	} else {
+		/* This is VM_MAYWRITE instead of VM_WRITE because a call
+		   to mprotect() can turn on VM_WRITE later. */
+
+		if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
+		    (VM_MAYSHARE | VM_MAYWRITE))
+			vma->vm_ops = &gfs2_vm_ops_sharewrite;
+		else
+			vma->vm_ops = &gfs2_vm_ops_private;
+	}
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gfs2_open - open a file
+ * @inode: the inode to open
+ * @file: the struct file for this opening
+ *
+ * Returns: errno
+ */
+
+static int gfs2_open(struct inode *inode, struct file *file)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder i_gh;
+	struct gfs2_file *fp;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+
+	init_MUTEX(&fp->f_fl_mutex);
+
+	fp->f_inode = ip;
+	fp->f_vfile = file;
+
+	gfs2_assert_warn(ip->i_sbd, !get_v2fp(file));
+	set_v2fp(file, fp);
+
+	if (S_ISREG(ip->i_di.di_mode)) {
+		error = gfs2_glock_nq_init(ip->i_gl,
+					  LM_ST_SHARED, LM_FLAG_ANY,
+					  &i_gh);
+		if (error)
+			goto fail;
+
+		if (!(file->f_flags & O_LARGEFILE) &&
+		    ip->i_di.di_size > MAX_NON_LFS) {
+			error = -EFBIG;
+			goto fail_gunlock;
+		}
+
+		/* Listen to the Direct I/O flag */
+
+		if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
+			file->f_flags |= O_DIRECT;
+
+		/* Don't let the user open O_DIRECT on a jdata file */
+
+		if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
+			error = -EINVAL;
+			goto fail_gunlock;
+		}
+
+		gfs2_glock_dq_uninit(&i_gh);
+	}
+
+	return 0;
+
+ fail_gunlock:
+	gfs2_glock_dq_uninit(&i_gh);
+
+ fail:
+	set_v2fp(file, NULL);
+	kfree(fp);
+
+	return error;
+}
+
+/**
+ * gfs2_close - called to close a struct file
+ * @inode: the inode the struct file belongs to
+ * @file: the struct file being closed
+ *
+ * Returns: errno
+ */
+
+static int gfs2_close(struct inode *inode, struct file *file)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(inode->i_sb);
+	struct gfs2_file *fp;
+
+	atomic_inc(&sdp->sd_ops_file);
+
+	fp = get_v2fp(file);
+	set_v2fp(file, NULL);
+
+	if (gfs2_assert_warn(sdp, fp))
+		return -EIO;
+
+	kfree(fp);
+
+	return 0;
+}
+
+/**
+ * gfs2_fsync - sync the dirty data for a file (across the cluster)
+ * @file: the file that points to the dentry (we ignore this)
+ * @dentry: the dentry that points to the inode to sync
+ *
+ * Returns: errno
+ */
+
+static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+	gfs2_log_flush_glock(ip->i_gl);
+
+	return 0;
+}
+
+/**
+ * gfs2_lock - acquire/release a posix lock on a file
+ * @file: the file pointer
+ * @cmd: either modify or retrieve lock state, possibly wait
+ * @fl: type and range of lock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct lm_lockname name =
+		{ .ln_number = ip->i_num.no_addr,
+		  .ln_type = LM_TYPE_PLOCK };
+
+	atomic_inc(&sdp->sd_ops_file);
+
+	if (!(fl->fl_flags & FL_POSIX))
+		return -ENOLCK;
+	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if (sdp->sd_args.ar_localflocks) {
+		if (IS_GETLK(cmd)) {
+			struct file_lock *tmp;
+			lock_kernel();
+			tmp = posix_test_lock(file, fl);
+			fl->fl_type = F_UNLCK;
+			if (tmp)
+				memcpy(fl, tmp, sizeof(struct file_lock));
+			unlock_kernel();
+			return 0;
+		} else {
+			int error;
+			lock_kernel();
+			error = posix_lock_file_wait(file, fl);
+			unlock_kernel();
+			return error;
+		}
+	}
+
+	if (IS_GETLK(cmd))
+		return gfs2_lm_plock_get(sdp, &name, file, fl);
+	else if (fl->fl_type == F_UNLCK)
+		return gfs2_lm_punlock(sdp, &name, file, fl);
+	else
+		return gfs2_lm_plock(sdp, &name, file, cmd, fl);
+}
+
+/**
+ * gfs2_sendfile - Send bytes to a file or socket
+ * @in_file: The file to read from
+ * @out_file: The file to write to
+ * @count: The amount of data
+ * @offset: The beginning file offset
+ *
+ * Outputs: offset - updated according to number of bytes read
+ *
+ * Returns: The number of bytes sent, errno on failure
+ */
+
+static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
+			     read_actor_t actor, void __user *target)
+{
+	struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
+	struct gfs2_holder gh;
+	ssize_t retval;
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+
+	retval = gfs2_glock_nq_atime(&gh);
+	if (retval)
+		goto out;
+
+	if (gfs2_is_jdata(ip))
+		retval = -EOPNOTSUPP;
+	else
+		retval = generic_file_sendfile(in_file, offset, count, actor,
+					       target);
+
+	gfs2_glock_dq(&gh);
+
+ out:
+	gfs2_holder_uninit(&gh);
+
+	return retval;
+}
+
+static int do_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_file *fp = get_v2fp(file);
+	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
+	struct gfs2_inode *ip = fp->f_inode;
+	struct gfs2_glock *gl;
+	unsigned int state;
+	int flags;
+	int error = 0;
+
+	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+	flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+
+	down(&fp->f_fl_mutex);
+
+	gl = fl_gh->gh_gl;
+	if (gl) {
+		if (fl_gh->gh_state == state)
+			goto out;
+		gfs2_glock_hold(gl);
+		flock_lock_file_wait(file,
+				     &(struct file_lock){.fl_type = F_UNLCK});		
+		gfs2_glock_dq_uninit(fl_gh);
+	} else {
+		error = gfs2_glock_get(ip->i_sbd,
+				      ip->i_num.no_addr, &gfs2_flock_glops,
+				      CREATE, &gl);
+		if (error)
+			goto out;
+	}
+
+	gfs2_holder_init(gl, state, flags, fl_gh);
+	gfs2_glock_put(gl);
+
+	error = gfs2_glock_nq(fl_gh);
+	if (error) {
+		gfs2_holder_uninit(fl_gh);
+		if (error == GLR_TRYFAILED)
+			error = -EAGAIN;
+	} else {
+		error = flock_lock_file_wait(file, fl);
+		gfs2_assert_warn(ip->i_sbd, !error);
+	}
+
+ out:
+	up(&fp->f_fl_mutex);
+
+	return error;
+}
+
+static void do_unflock(struct file *file, struct file_lock *fl)
+{
+	struct gfs2_file *fp = get_v2fp(file);
+	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
+
+	down(&fp->f_fl_mutex);
+	flock_lock_file_wait(file, fl);
+	if (fl_gh->gh_gl)
+		gfs2_glock_dq_uninit(fl_gh);
+	up(&fp->f_fl_mutex);
+}
+
+/**
+ * gfs2_flock - acquire/release a flock lock on a file
+ * @file: the file pointer
+ * @cmd: either modify or retrieve lock state, possibly wait
+ * @fl: type and range of lock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+
+	atomic_inc(&ip->i_sbd->sd_ops_file);
+
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if (sdp->sd_args.ar_localflocks)
+		return flock_lock_file_wait(file, fl);
+
+	if (fl->fl_type == F_UNLCK) {
+		do_unflock(file, fl);
+		return 0;
+	} else
+		return do_flock(file, cmd, fl);
+}
+
+struct file_operations gfs2_file_fops = {
+	.llseek = gfs2_llseek,
+	.read = gfs2_read,
+	.write = gfs2_write,
+	.ioctl = gfs2_ioctl,
+	.mmap = gfs2_mmap,
+	.open = gfs2_open,
+	.release = gfs2_close,
+	.fsync = gfs2_fsync,
+	.lock = gfs2_lock,
+	.sendfile = gfs2_sendfile,
+	.flock = gfs2_flock,
+};
+
+struct file_operations gfs2_dir_fops = {
+	.readdir = gfs2_readdir,
+	.ioctl = gfs2_ioctl,
+	.open = gfs2_open,
+	.release = gfs2_close,
+	.fsync = gfs2_fsync,
+	.lock = gfs2_lock,
+	.flock = gfs2_flock,
+};
+
--- a/fs/gfs2/ops_file.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_file.h	2005-09-01 17:36:55.414100424 +0800
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_FILE_DOT_H__
+#define __OPS_FILE_DOT_H__
+
+extern struct file_operations gfs2_file_fops;
+extern struct file_operations gfs2_dir_fops;
+
+#endif /* __OPS_FILE_DOT_H__ */
--- a/fs/gfs2/ops_fstype.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_fstype.c	2005-09-01 17:36:55.415100272 +0800
@@ -0,0 +1,801 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/kthread.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "daemon.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "lm.h"
+#include "mount.h"
+#include "ops_export.h"
+#include "ops_fstype.h"
+#include "ops_super.h"
+#include "recovery.h"
+#include "rgrp.h"
+#include "super.h"
+#include "unlinked.h"
+#include "sys.h"
+
+#define DO FALSE
+#define UNDO TRUE
+
+static struct gfs2_sbd *init_sbd(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp;
+	unsigned int x;
+
+	sdp = vmalloc(sizeof(struct gfs2_sbd));
+	if (!sdp)
+		return NULL;
+
+	memset(sdp, 0, sizeof(struct gfs2_sbd));
+
+	set_v2sdp(sb, sdp);
+	sdp->sd_vfs = sb;
+
+	gfs2_tune_init(&sdp->sd_tune);
+
+	for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
+		sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
+		INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
+	}
+	INIT_LIST_HEAD(&sdp->sd_reclaim_list);
+	spin_lock_init(&sdp->sd_reclaim_lock);
+	init_waitqueue_head(&sdp->sd_reclaim_wq);
+
+	init_MUTEX(&sdp->sd_inum_mutex);
+	spin_lock_init(&sdp->sd_statfs_spin);
+	init_MUTEX(&sdp->sd_statfs_mutex);
+
+	spin_lock_init(&sdp->sd_rindex_spin);
+	init_MUTEX(&sdp->sd_rindex_mutex);
+	INIT_LIST_HEAD(&sdp->sd_rindex_list);
+	INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
+	INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
+
+	INIT_LIST_HEAD(&sdp->sd_jindex_list);
+	spin_lock_init(&sdp->sd_jindex_spin);
+	init_MUTEX(&sdp->sd_jindex_mutex);
+
+	INIT_LIST_HEAD(&sdp->sd_unlinked_list);
+	spin_lock_init(&sdp->sd_unlinked_spin);
+	init_MUTEX(&sdp->sd_unlinked_mutex);
+
+	INIT_LIST_HEAD(&sdp->sd_quota_list);
+	spin_lock_init(&sdp->sd_quota_spin);
+	init_MUTEX(&sdp->sd_quota_mutex);
+
+	spin_lock_init(&sdp->sd_log_lock);
+	init_waitqueue_head(&sdp->sd_log_trans_wq);
+	init_waitqueue_head(&sdp->sd_log_flush_wq);
+
+	INIT_LIST_HEAD(&sdp->sd_log_le_gl);
+	INIT_LIST_HEAD(&sdp->sd_log_le_buf);
+	INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
+	INIT_LIST_HEAD(&sdp->sd_log_le_rg);
+	INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
+
+	INIT_LIST_HEAD(&sdp->sd_log_blks_list);
+	init_waitqueue_head(&sdp->sd_log_blks_wait);
+
+	INIT_LIST_HEAD(&sdp->sd_ail1_list);
+	INIT_LIST_HEAD(&sdp->sd_ail2_list);
+
+	init_MUTEX(&sdp->sd_log_flush_lock);
+	INIT_LIST_HEAD(&sdp->sd_log_flush_list);
+
+	INIT_LIST_HEAD(&sdp->sd_revoke_list);
+
+	init_MUTEX(&sdp->sd_freeze_lock);
+
+	return sdp;
+}
+
+static void init_vfs(struct gfs2_sbd *sdp)
+{
+	struct super_block *sb = sdp->sd_vfs;
+
+	sb->s_magic = GFS2_MAGIC;
+	sb->s_op = &gfs2_super_ops;
+	sb->s_export_op = &gfs2_export_ops;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
+		set_bit(SDF_NOATIME, &sdp->sd_flags);
+
+	/* Don't let the VFS update atimes.  GFS2 handles this itself. */
+	sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
+
+	/* Set up the buffer cache and fill in some fake block size values
+	   to allow us to read-in the on-disk superblock. */
+	sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
+	sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
+	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT;
+	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+}
+
+static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
+			int undo)
+{
+	struct task_struct *p;
+	int error = 0;
+
+	if (undo)
+		goto fail_trans;
+
+	p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
+	error = IS_ERR(p);
+	if (error) {
+		fs_err(sdp, "can't start scand thread: %d\n", error);
+		return error;
+	}
+	sdp->sd_scand_process = p;
+
+	for (sdp->sd_glockd_num = 0;
+	     sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
+	     sdp->sd_glockd_num++) {
+		p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
+		error = IS_ERR(p);
+		if (error) {
+			fs_err(sdp, "can't start glockd thread: %d\n", error);
+			goto fail;
+		}
+		sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
+	}
+
+	error = gfs2_glock_nq_num(sdp,
+				  GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
+				  LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
+				  mount_gh);
+	if (error) {
+		fs_err(sdp, "can't acquire mount glock: %d\n", error);
+		goto fail;
+	}
+
+	error = gfs2_glock_nq_num(sdp,
+				  GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
+				  LM_ST_SHARED,
+				  LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE,
+				  &sdp->sd_live_gh);
+	if (error) {
+		fs_err(sdp, "can't acquire live glock: %d\n", error);
+		goto fail_mount;
+	}
+
+	error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
+			       CREATE, &sdp->sd_rename_gl);
+	if (error) {
+		fs_err(sdp, "can't create rename glock: %d\n", error);
+		goto fail_live;
+	}
+
+	error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
+			       CREATE, &sdp->sd_trans_gl);
+	if (error) {
+		fs_err(sdp, "can't create transaction glock: %d\n", error);
+		goto fail_rename;
+	}
+	set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
+
+	return 0;
+
+ fail_trans:
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+ fail_rename:
+	gfs2_glock_put(sdp->sd_rename_gl);
+
+ fail_live:
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+
+ fail_mount:
+	gfs2_glock_dq_uninit(mount_gh);
+
+ fail:
+	while (sdp->sd_glockd_num--)
+		kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
+
+	kthread_stop(sdp->sd_scand_process);
+
+	return error;
+}
+
+static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
+{
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_holder sb_gh;
+	int error = 0;
+
+	if (undo) {
+		gfs2_inode_put(sdp->sd_master_dir);
+		return 0;
+	}
+	
+	error = gfs2_glock_nq_num(sdp,
+				 GFS2_SB_LOCK, &gfs2_meta_glops,
+				 LM_ST_SHARED, 0, &sb_gh);
+	if (error) {
+		fs_err(sdp, "can't acquire superblock glock: %d\n", error);
+		return error;
+	}
+
+	error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+	if (error) {
+		fs_err(sdp, "can't read superblock: %d\n", error);
+		goto out;
+	}
+
+	/* Set up the buffer cache and SB for real */
+	error = -EINVAL;
+	if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
+		fs_err(sdp, "FS block size (%u) is too small for device "
+		       "block size (%u)\n",
+		       sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
+		goto out;
+	}
+	if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
+		fs_err(sdp, "FS block size (%u) is too big for machine "
+		       "page size (%u)\n",
+		       sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
+		goto out;
+	}
+
+	/* Get rid of buffers from the original block size */
+	sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
+	sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
+
+	sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
+
+	error = gfs2_lookup_master_dir(sdp);
+	if (error)
+		fs_err(sdp, "can't read in master directory: %d\n", error);
+
+ out:
+	gfs2_glock_dq_uninit(&sb_gh);
+
+	return error;
+}
+
+static int init_journal(struct gfs2_sbd *sdp, int undo)
+{
+	struct gfs2_holder ji_gh;
+	struct task_struct *p;
+	int jindex = TRUE;
+	int error = 0;
+
+	if (undo) {
+		jindex = FALSE;
+		goto fail_recoverd;
+	}
+
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "jindex",
+				   &sdp->sd_jindex);
+	if (error) {
+		fs_err(sdp, "can't lookup journal index: %d\n", error);
+		return error;
+	}
+	set_bit(GLF_STICKY, &sdp->sd_jindex->i_gl->gl_flags);
+
+	/* Load in the journal index special file */
+
+	error = gfs2_jindex_hold(sdp, &ji_gh);
+	if (error) {
+		fs_err(sdp, "can't read journal index: %d\n", error);
+		goto fail;
+	}
+
+	error = -EINVAL;
+	if (!gfs2_jindex_size(sdp)) {
+		fs_err(sdp, "no journals!\n");
+		goto fail_jindex;		
+	}
+
+	if (sdp->sd_args.ar_spectator) {
+		sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
+		sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+	} else {
+		if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
+			fs_err(sdp, "can't mount journal #%u\n",
+			       sdp->sd_lockstruct.ls_jid);
+			fs_err(sdp, "there are only %u journals (0 - %u)\n",
+			       gfs2_jindex_size(sdp),
+			       gfs2_jindex_size(sdp) - 1);
+			goto fail_jindex;
+		}
+		sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
+
+		error = gfs2_glock_nq_num(sdp,
+					  sdp->sd_lockstruct.ls_jid,
+					  &gfs2_journal_glops,
+					  LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
+					  &sdp->sd_journal_gh);
+		if (error) {
+			fs_err(sdp, "can't acquire journal glock: %d\n", error);
+			goto fail_jindex;
+		}
+
+		error = gfs2_glock_nq_init(sdp->sd_jdesc->jd_inode->i_gl,
+					   LM_ST_SHARED,
+					   LM_FLAG_NOEXP | GL_EXACT,
+					   &sdp->sd_jinode_gh);
+		if (error) {
+			fs_err(sdp, "can't acquire journal inode glock: %d\n",
+			       error);
+			goto fail_journal_gh;
+		}
+
+		error = gfs2_jdesc_check(sdp->sd_jdesc);
+		if (error) {
+			fs_err(sdp, "my journal (%u) is bad: %d\n",
+			       sdp->sd_jdesc->jd_jid, error);
+			goto fail_jinode_gh;
+		}
+		sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+	}
+
+	if (sdp->sd_lockstruct.ls_first) {
+		unsigned int x;
+		for (x = 0; x < sdp->sd_journals; x++) {
+			error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
+						     WAIT);
+			if (error) {
+				fs_err(sdp, "error recovering journal %u: %d\n",
+				       x, error);
+				goto fail_jinode_gh;
+			}
+		}
+
+		gfs2_lm_others_may_mount(sdp);
+	} else if (!sdp->sd_args.ar_spectator) {
+		error = gfs2_recover_journal(sdp->sd_jdesc, WAIT);
+		if (error) {
+			fs_err(sdp, "error recovering my journal: %d\n", error);
+			goto fail_jinode_gh;
+		}
+	}
+
+	set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
+	gfs2_glock_dq_uninit(&ji_gh);
+	jindex = FALSE;
+
+	/* Disown my Journal glock */
+
+	sdp->sd_journal_gh.gh_owner = NULL;
+	sdp->sd_jinode_gh.gh_owner = NULL;
+
+	p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
+	error = IS_ERR(p);
+	if (error) {
+		fs_err(sdp, "can't start recoverd thread: %d\n", error);
+		goto fail_jinode_gh;
+	}
+	sdp->sd_recoverd_process = p;
+
+	return 0;
+
+ fail_recoverd:
+	kthread_stop(sdp->sd_recoverd_process);
+
+ fail_jinode_gh:
+	if (!sdp->sd_args.ar_spectator)
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+
+ fail_journal_gh:
+	if (!sdp->sd_args.ar_spectator)
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+
+ fail_jindex:
+	gfs2_jindex_free(sdp);
+	if (jindex)
+		gfs2_glock_dq_uninit(&ji_gh);
+
+ fail:
+	gfs2_inode_put(sdp->sd_jindex);
+
+	return error;
+}
+
+static int init_inodes(struct gfs2_sbd *sdp, int undo)
+{
+	struct inode *inode;
+	struct dentry **dentry = &sdp->sd_vfs->s_root;
+	int error = 0;
+
+	if (undo)
+		goto fail_dput;
+
+	/* Read in the master inode number inode */
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "inum",
+				   &sdp->sd_inum_inode);
+	if (error) {
+		fs_err(sdp, "can't read in inum inode: %d\n", error);
+		return error;
+	}
+
+	/* Read in the master statfs inode */
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "statfs",
+				   &sdp->sd_statfs_inode);
+	if (error) {
+		fs_err(sdp, "can't read in statfs inode: %d\n", error);
+		goto fail;
+	}
+
+	/* Read in the resource index inode */
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "rindex",
+				   &sdp->sd_rindex);
+	if (error) {
+		fs_err(sdp, "can't get resource index inode: %d\n", error);
+		goto fail_statfs;
+	}
+	set_bit(GLF_STICKY, &sdp->sd_rindex->i_gl->gl_flags);
+	sdp->sd_rindex_vn = sdp->sd_rindex->i_gl->gl_vn - 1;
+
+	/* Read in the quota inode */
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "quota",
+				   &sdp->sd_quota_inode);
+	if (error) {
+		fs_err(sdp, "can't get quota file inode: %d\n", error);
+		goto fail_rindex;
+	}
+
+	/* Get the root inode */
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "root",
+				   &sdp->sd_root_dir);
+	if (error) {
+		fs_err(sdp, "can't read in root inode: %d\n", error);
+		goto fail_qinode;
+	}
+
+	/* Get the root inode/dentry */
+	inode = gfs2_ip2v(sdp->sd_root_dir);
+	if (!inode) {
+		fs_err(sdp, "can't get root inode\n");
+		error = -ENOMEM;
+		goto fail_rooti;
+	}
+
+	*dentry = d_alloc_root(inode);
+	if (!*dentry) {
+		iput(inode);
+		fs_err(sdp, "can't get root dentry\n");
+		error = -ENOMEM;
+		goto fail_rooti;
+	}
+
+	return 0;
+
+ fail_dput:
+	dput(*dentry);
+	*dentry = NULL;
+
+ fail_rooti:
+	gfs2_inode_put(sdp->sd_root_dir);
+
+ fail_qinode:
+	gfs2_inode_put(sdp->sd_quota_inode);
+
+ fail_rindex:
+	gfs2_clear_rgrpd(sdp);
+	gfs2_inode_put(sdp->sd_rindex);
+
+ fail_statfs:
+	gfs2_inode_put(sdp->sd_statfs_inode);
+
+ fail:
+	gfs2_inode_put(sdp->sd_inum_inode);
+
+	return error;
+}
+
+static int init_per_node(struct gfs2_sbd *sdp, int undo)
+{
+	struct gfs2_inode *pn = NULL;
+	char buf[30];
+	int error = 0;
+
+	if (sdp->sd_args.ar_spectator)
+		return 0;
+
+	if (undo)
+		goto fail_qc_gh;
+
+	error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node", &pn);
+	if (error) {
+		fs_err(sdp, "can't find per_node directory: %d\n", error);
+		return error;
+	}
+
+	sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
+	error = gfs2_lookup_simple(pn, buf, &sdp->sd_ir_inode);
+	if (error) {
+		fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
+		goto fail;
+	}
+
+	sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
+	error = gfs2_lookup_simple(pn, buf, &sdp->sd_sc_inode);
+	if (error) {
+		fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
+		goto fail_ir_i;
+	}
+
+	sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid);
+	error = gfs2_lookup_simple(pn, buf, &sdp->sd_ut_inode);
+	if (error) {
+		fs_err(sdp, "can't find local \"ut\" file: %d\n", error);
+		goto fail_sc_i;
+	}
+
+	sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
+	error = gfs2_lookup_simple(pn, buf, &sdp->sd_qc_inode);
+	if (error) {
+		fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
+		goto fail_ut_i;
+	}
+
+	gfs2_inode_put(pn);
+	pn = NULL;
+
+	error = gfs2_glock_nq_init(sdp->sd_ir_inode->i_gl,
+				   LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
+				   &sdp->sd_ir_gh);
+	if (error) {
+		fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
+		goto fail_qc_i;
+	}
+
+	error = gfs2_glock_nq_init(sdp->sd_sc_inode->i_gl,
+				   LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
+				   &sdp->sd_sc_gh);
+	if (error) {
+		fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
+		goto fail_ir_gh;
+	}
+
+	error = gfs2_glock_nq_init(sdp->sd_ut_inode->i_gl,
+				   LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
+				   &sdp->sd_ut_gh);
+	if (error) {
+		fs_err(sdp, "can't lock local \"ut\" file: %d\n", error);
+		goto fail_sc_gh;
+	}
+
+	error = gfs2_glock_nq_init(sdp->sd_qc_inode->i_gl,
+				   LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
+				   &sdp->sd_qc_gh);
+	if (error) {
+		fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
+		goto fail_ut_gh;
+	}
+
+	return 0;
+
+ fail_qc_gh:
+	gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+
+ fail_ut_gh:
+	gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
+
+ fail_sc_gh:
+	gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+
+ fail_ir_gh:
+	gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+
+ fail_qc_i:
+	gfs2_inode_put(sdp->sd_qc_inode);
+
+ fail_ut_i:
+	gfs2_inode_put(sdp->sd_ut_inode);
+
+ fail_sc_i:
+	gfs2_inode_put(sdp->sd_sc_inode);
+
+ fail_ir_i:
+	gfs2_inode_put(sdp->sd_ir_inode);
+
+ fail:
+	if (pn)
+		gfs2_inode_put(pn);
+	return error;
+}
+
+static int init_threads(struct gfs2_sbd *sdp, int undo)
+{
+	struct task_struct *p;
+	int error = 0;
+
+	if (undo)
+		goto fail_inoded;
+
+	sdp->sd_log_flush_time = jiffies;
+	sdp->sd_jindex_refresh_time = jiffies;
+
+	p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+	error = IS_ERR(p);
+	if (error) {
+		fs_err(sdp, "can't start logd thread: %d\n", error);
+		return error;
+	}
+	sdp->sd_logd_process = p;
+
+	sdp->sd_statfs_sync_time = jiffies;
+	sdp->sd_quota_sync_time = jiffies;
+
+	p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+	error = IS_ERR(p);
+	if (error) {
+		fs_err(sdp, "can't start quotad thread: %d\n", error);
+		goto fail;
+	}
+	sdp->sd_quotad_process = p;
+
+	p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded");
+	error = IS_ERR(p);
+	if (error) {
+		fs_err(sdp, "can't start inoded thread: %d\n", error);
+		goto fail_quotad;
+	}
+	sdp->sd_inoded_process = p;
+
+	return 0;
+
+ fail_inoded:
+	kthread_stop(sdp->sd_inoded_process);
+
+ fail_quotad:
+	kthread_stop(sdp->sd_quotad_process);
+
+ fail:
+	kthread_stop(sdp->sd_logd_process);
+	
+	return error;
+}
+
+/**
+ * fill_super - Read in superblock
+ * @sb: The VFS superblock
+ * @data: Mount options
+ * @silent: Don't complain if it's not a GFS2 filesystem
+ *
+ * Returns: errno
+ */
+
+static int fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct gfs2_sbd *sdp;
+	struct gfs2_holder mount_gh;
+	int error;
+
+	sdp = init_sbd(sb);
+	if (!sdp) {
+		printk("GFS2: can't alloc struct gfs2_sbd\n");
+		return -ENOMEM;
+	}
+
+	error = gfs2_mount_args(sdp, (char *)data, FALSE);
+	if (error) {
+		printk("GFS2: can't parse mount arguments\n");
+		goto fail;
+	}
+
+	init_vfs(sdp);
+
+	/* Mount an inter-node lock module, check for local optimizations */
+	error = gfs2_lm_mount(sdp, silent);
+	if (error)
+		goto fail;
+
+	error = init_locking(sdp, &mount_gh, DO);
+	if (error)
+		goto fail_lm;
+
+	error = init_sb(sdp, silent, DO);
+	if (error)
+		goto fail_locking;
+
+	error = init_journal(sdp, DO);
+	if (error)
+		goto fail_sb;
+
+	error = init_inodes(sdp, DO);
+	if (error)
+		goto fail_journals;
+
+	error = init_per_node(sdp, DO);
+	if (error)
+		goto fail_inodes;
+
+	error = gfs2_statfs_init(sdp);
+	if (error) {
+		fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
+		goto fail_per_node;
+	}
+
+	error = init_threads(sdp, DO);
+	if (error)
+		goto fail_per_node;
+
+	error = gfs2_sys_fs_add(sdp);
+	if (error)
+		goto fail_threads;
+
+	/* Make the FS read/write */
+	if (!(sb->s_flags & MS_RDONLY)) {
+		error = gfs2_make_fs_rw(sdp);
+		if (error) {
+			fs_err(sdp, "can't make FS RW: %d\n", error);
+			goto fail_sys;
+		}
+	}
+
+	gfs2_glock_dq_uninit(&mount_gh);
+
+	return 0;
+
+ fail_sys:
+	gfs2_sys_fs_del(sdp);
+
+ fail_threads:
+	init_threads(sdp, UNDO);
+
+ fail_per_node:
+	init_per_node(sdp, UNDO);
+
+ fail_inodes:
+	init_inodes(sdp, UNDO);
+
+ fail_journals:
+	init_journal(sdp, UNDO);
+
+ fail_sb:
+	init_sb(sdp, FALSE, UNDO);
+
+ fail_locking:
+	init_locking(sdp, &mount_gh, UNDO);
+
+ fail_lm:
+	gfs2_gl_hash_clear(sdp, WAIT);
+	gfs2_lm_unmount(sdp);
+	while (invalidate_inodes(sb))
+		yield();
+
+ fail:
+	vfree(sdp);
+	set_v2sdp(sb, NULL);
+
+	return error;
+}
+
+struct super_block *gfs2_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *data)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
+}
+
+struct file_system_type gfs2_fs_type = {
+	.name = "gfs2",
+	.fs_flags = FS_REQUIRES_DEV,
+	.get_sb = gfs2_get_sb,
+	.kill_sb = kill_block_super,
+	.owner = THIS_MODULE,
+};
+
--- a/fs/gfs2/ops_fstype.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_fstype.h	2005-09-01 17:36:55.418099816 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_FSTYPE_DOT_H__
+#define __OPS_FSTYPE_DOT_H__
+
+extern struct file_system_type gfs2_fs_type;
+
+#endif /* __OPS_FSTYPE_DOT_H__ */
--- a/fs/gfs2/ops_inode.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_inode.c	2005-09-01 17:36:55.426098600 +0800
@@ -0,0 +1,1270 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/namei.h>
+#include <linux/utsname.h>
+#include <linux/mm.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "acl.h"
+#include "bmap.h"
+#include "dir.h"
+#include "eaops.h"
+#include "eattr.h"
+#include "glock.h"
+#include "inode.h"
+#include "meta_io.h"
+#include "ops_dentry.h"
+#include "ops_inode.h"
+#include "page.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+#include "unlinked.h"
+
+/**
+ * gfs2_create - Create a file
+ * @dir: The directory in which to create the file
+ * @dentry: The dentry of the new file
+ * @mode: The mode of the new file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_create(struct inode *dir, struct dentry *dentry,
+		       int mode, struct nameidata *nd)
+{
+	struct gfs2_inode *dip = get_v2ip(dir), *ip;
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_holder ghs[2];
+	struct inode *inode;
+	int new = TRUE;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
+
+	for (;;) {
+		error = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
+		if (!error) {
+			ip = get_gl2ip(ghs[1].gh_gl);
+			gfs2_trans_end(sdp);
+			if (dip->i_alloc->al_rgd)
+				gfs2_inplace_release(dip);
+			gfs2_quota_unlock(dip);
+			gfs2_alloc_put(dip);
+			gfs2_glock_dq_uninit_m(2, ghs);
+			break;
+		} else if (error != -EEXIST ||
+			   (nd->intent.open.flags & O_EXCL)) {
+			gfs2_holder_uninit(ghs);
+			return error;
+		}
+
+		error = gfs2_lookupi(dip, &dentry->d_name, FALSE, &ip);
+		if (!error) {
+			new = FALSE;
+			gfs2_holder_uninit(ghs);
+			break;
+		} else if (error != -ENOENT) {
+			gfs2_holder_uninit(ghs);
+			return error;
+		}
+	}
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return -ENOMEM;
+
+	d_instantiate(dentry, inode);
+	if (new)
+		mark_inode_dirty(inode);
+
+	return 0;
+}
+
+/**
+ * gfs2_lookup - Look up a filename in a directory and return its inode
+ * @dir: The directory inode
+ * @dentry: The dentry of the new inode
+ * @nd: passed from Linux VFS, ignored by us
+ *
+ * Called by the VFS layer. Lock dir and call gfs2_lookupi()
+ *
+ * Returns: errno
+ */
+
+static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
+				  struct nameidata *nd)
+{
+	struct gfs2_inode *dip = get_v2ip(dir), *ip;
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct inode *inode = NULL;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	if (!sdp->sd_args.ar_localcaching)
+		dentry->d_op = &gfs2_dops;
+
+	error = gfs2_lookupi(dip, &dentry->d_name, FALSE, &ip);
+	if (!error) {
+		inode = gfs2_ip2v(ip);
+		gfs2_inode_put(ip);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+
+	} else if (error != -ENOENT)
+		return ERR_PTR(error);
+
+	if (inode)
+		return d_splice_alias(inode, dentry);
+	d_add(dentry, inode);
+
+	return NULL;
+}
+
+/**
+ * gfs2_link - Link to a file
+ * @old_dentry: The inode to link
+ * @dir: Add link to this directory
+ * @dentry: The name of the link
+ *
+ * Link the inode in "old_dentry" into the directory "dir" with the
+ * name in "dentry".
+ *
+ * Returns: errno
+ */
+
+static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
+		     struct dentry *dentry)
+{
+	struct gfs2_inode *dip = get_v2ip(dir);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct inode *inode = old_dentry->d_inode;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder ghs[2];
+	int alloc_required;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	if (S_ISDIR(ip->i_di.di_mode))
+		return -EPERM;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+	error = gfs2_glock_nq_m(2, ghs);
+	if (error)
+		goto out;
+
+	error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_dir_search(dip, &dentry->d_name, NULL, NULL);
+	switch (error) {
+	case -ENOENT:
+		break;
+	case 0:
+		error = -EEXIST;
+	default:
+		goto out_gunlock;
+	}
+
+	error = -EINVAL;
+	if (!dip->i_di.di_nlink)
+		goto out_gunlock;
+	error = -EFBIG;
+	if (dip->i_di.di_entries == (uint32_t)-1)
+		goto out_gunlock;
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		goto out_gunlock;
+	error = -EINVAL;
+	if (!ip->i_di.di_nlink)
+		goto out_gunlock;
+	error = -EMLINK;
+	if (ip->i_di.di_nlink == (uint32_t)-1)
+		goto out_gunlock;
+
+	error = gfs2_diradd_alloc_required(dip, &dentry->d_name,
+					   &alloc_required);
+	if (error)
+		goto out_gunlock;
+
+	if (alloc_required) {
+		struct gfs2_alloc *al = gfs2_alloc_get(dip);
+
+		error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+		if (error)
+			goto out_alloc;
+
+		error = gfs2_quota_check(dip, dip->i_di.di_uid,
+					 dip->i_di.di_gid);
+		if (error)
+			goto out_gunlock_q;
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve(dip);
+		if (error)
+			goto out_gunlock_q;
+
+		error = gfs2_trans_begin(sdp,
+					 sdp->sd_max_dirres +
+					 al->al_rgd->rd_ri.ri_length +
+					 2 * RES_DINODE + RES_STATFS +
+					 RES_QUOTA, 0);
+		if (error)
+			goto out_ipres;
+	} else {
+		error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
+		if (error)
+			goto out_ipres;
+	}
+
+	error = gfs2_dir_add(dip, &dentry->d_name, &ip->i_num,
+			     IF2DT(ip->i_di.di_mode));
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_change_nlink(ip, +1);
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipres:
+	if (alloc_required)
+		gfs2_inplace_release(dip);
+
+ out_gunlock_q:
+	if (alloc_required)
+		gfs2_quota_unlock(dip);
+
+ out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(dip);
+
+ out_gunlock:
+	gfs2_glock_dq_m(2, ghs);
+
+ out:
+	gfs2_holder_uninit(ghs);
+	gfs2_holder_uninit(ghs + 1);
+
+	if (!error) {
+		atomic_inc(&inode->i_count);
+		d_instantiate(dentry, inode);
+		mark_inode_dirty(inode);
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_unlink - Unlink a file
+ * @dir: The inode of the directory containing the file to unlink
+ * @dentry: The file itself
+ *
+ * Unlink a file.  Call gfs2_unlinki()
+ *
+ * Returns: errno
+ */
+
+static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct gfs2_inode *dip = get_v2ip(dir);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+	struct gfs2_unlinked *ul;
+	struct gfs2_holder ghs[2];
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	error = gfs2_unlinked_get(sdp, &ul);
+	if (error)
+		return error;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+	error = gfs2_glock_nq_m(2, ghs);
+	if (error)
+		goto out;
+
+	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF +
+				RES_UNLINKED, 0);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_unlinki(dip, &dentry->d_name, ip,ul);
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_m(2, ghs);
+
+ out:
+	gfs2_holder_uninit(ghs);
+	gfs2_holder_uninit(ghs + 1);
+
+	gfs2_unlinked_put(sdp, ul);
+
+	return error;
+}
+
+/**
+ * gfs2_symlink - Create a symlink
+ * @dir: The directory to create the symlink in
+ * @dentry: The dentry to put the symlink in
+ * @symname: The thing which the link points to
+ *
+ * Returns: errno
+ */
+
+static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
+			const char *symname)
+{
+	struct gfs2_inode *dip = get_v2ip(dir), *ip;
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_holder ghs[2];
+	struct inode *inode;
+	struct buffer_head *dibh;
+	int size;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	/* Must be stuffed with a null terminator for gfs2_follow_link() */
+	size = strlen(symname);
+	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
+		return -ENAMETOOLONG;
+
+	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
+
+	error = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
+	if (error) {
+		gfs2_holder_uninit(ghs);
+		return error;
+	}
+
+	ip = get_gl2ip(ghs[1].gh_gl);
+
+	ip->i_di.di_size = size;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+
+	if (!gfs2_assert_withdraw(sdp, !error)) {
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
+		       size);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+	if (dip->i_alloc->al_rgd)
+		gfs2_inplace_release(dip);
+	gfs2_quota_unlock(dip);
+	gfs2_alloc_put(dip);
+
+	gfs2_glock_dq_uninit_m(2, ghs);
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return -ENOMEM;
+
+	d_instantiate(dentry, inode);
+	mark_inode_dirty(inode);
+
+	return 0;
+}
+
+/**
+ * gfs2_mkdir - Make a directory
+ * @dir: The parent directory of the new one
+ * @dentry: The dentry of the new directory
+ * @mode: The mode of the new directory
+ *
+ * Returns: errno
+ */
+
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct gfs2_inode *dip = get_v2ip(dir), *ip;
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_holder ghs[2];
+	struct inode *inode;
+	struct buffer_head *dibh;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
+
+	error = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
+	if (error) {
+		gfs2_holder_uninit(ghs);
+		return error;
+	}
+
+	ip = get_gl2ip(ghs[1].gh_gl);
+
+	ip->i_di.di_nlink = 2;
+	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+	ip->i_di.di_flags |= GFS2_DIF_JDATA;
+	ip->i_di.di_payload_format = GFS2_FORMAT_DE;
+	ip->i_di.di_entries = 2;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+
+	if (!gfs2_assert_withdraw(sdp, !error)) {
+		struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+		struct gfs2_dirent *dent;
+
+		gfs2_dirent_alloc(ip, dibh, 1, &dent);
+
+		dent->de_inum = di->di_num; /* already GFS2 endian */
+		dent->de_hash = gfs2_disk_hash(".", 1);
+		dent->de_hash = cpu_to_gfs2_32(dent->de_hash);
+		dent->de_type = DT_DIR;
+		memcpy((char *) (dent + 1), ".", 1);
+		di->di_entries = cpu_to_gfs2_32(1);
+
+		gfs2_dirent_alloc(ip, dibh, 2, &dent);
+
+		gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
+		dent->de_hash = gfs2_disk_hash("..", 2);
+		dent->de_hash = cpu_to_gfs2_32(dent->de_hash);
+		dent->de_type = DT_DIR;
+		memcpy((char *) (dent + 1), "..", 2);
+
+		gfs2_dinode_out(&ip->i_di, (char *)di);
+
+		brelse(dibh);
+	}
+
+	error = gfs2_change_nlink(dip, +1);
+	gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
+
+	gfs2_trans_end(sdp);
+	if (dip->i_alloc->al_rgd)
+		gfs2_inplace_release(dip);
+	gfs2_quota_unlock(dip);
+	gfs2_alloc_put(dip);
+
+	gfs2_glock_dq_uninit_m(2, ghs);
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return -ENOMEM;
+
+	d_instantiate(dentry, inode);
+	mark_inode_dirty(inode);
+
+	return 0;
+}
+
+/**
+ * gfs2_rmdir - Remove a directory
+ * @dir: The parent directory of the directory to be removed
+ * @dentry: The dentry of the directory to remove
+ *
+ * Remove a directory. Call gfs2_rmdiri()
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct gfs2_inode *dip = get_v2ip(dir);
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+	struct gfs2_unlinked *ul;
+	struct gfs2_holder ghs[2];
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	error = gfs2_unlinked_get(sdp, &ul);
+	if (error)
+		return error;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+	error = gfs2_glock_nq_m(2, ghs);
+	if (error)
+		goto out;
+
+	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
+	if (error)
+		goto out_gunlock;
+
+	if (ip->i_di.di_entries < 2) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(&ip->i_di);
+		error = -EIO;
+		goto out_gunlock;
+	}
+	if (ip->i_di.di_entries > 2) {
+		error = -ENOTEMPTY;
+		goto out_gunlock;
+	}
+
+	error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF +
+				RES_UNLINKED, 0);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul);
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_m(2, ghs);
+
+ out:
+	gfs2_holder_uninit(ghs);
+	gfs2_holder_uninit(ghs + 1);
+
+	gfs2_unlinked_put(sdp, ul);
+
+	return error;
+}
+
+/**
+ * gfs2_mknod - Make a special file
+ * @dir: The directory in which the special file will reside
+ * @dentry: The dentry of the special file
+ * @mode: The mode of the special file
+ * @rdev: The device specification of the special file
+ *
+ */
+
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		      dev_t dev)
+{
+	struct gfs2_inode *dip = get_v2ip(dir), *ip;
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_holder ghs[2];
+	struct inode *inode;
+	struct buffer_head *dibh;
+	uint32_t major = 0, minor = 0;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	switch (mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		major = MAJOR(dev);
+		minor = MINOR(dev);
+		break;
+	case S_IFIFO:
+	case S_IFSOCK:
+		break;
+	default:
+		return -EOPNOTSUPP;		
+	};
+
+	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
+
+	error = gfs2_createi(ghs, &dentry->d_name, mode);
+	if (error) {
+		gfs2_holder_uninit(ghs);
+		return error;
+	}
+
+	ip = get_gl2ip(ghs[1].gh_gl);
+
+	ip->i_di.di_major = major;
+	ip->i_di.di_minor = minor;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+
+	if (!gfs2_assert_withdraw(sdp, !error)) {
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+	if (dip->i_alloc->al_rgd)
+		gfs2_inplace_release(dip);
+	gfs2_quota_unlock(dip);
+	gfs2_alloc_put(dip);
+
+	gfs2_glock_dq_uninit_m(2, ghs);
+
+	inode = gfs2_ip2v(ip);
+	gfs2_inode_put(ip);
+
+	if (!inode)
+		return -ENOMEM;
+
+	d_instantiate(dentry, inode);
+	mark_inode_dirty(inode);
+
+	return 0;
+}
+
+/**
+ * gfs2_rename - Rename a file
+ * @odir: Parent directory of old file name
+ * @odentry: The old dentry of the file
+ * @ndir: Parent directory of new file name
+ * @ndentry: The new dentry of the file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rename(struct inode *odir, struct dentry *odentry,
+		       struct inode *ndir, struct dentry *ndentry)
+{
+	struct gfs2_inode *odip = get_v2ip(odir);
+	struct gfs2_inode *ndip = get_v2ip(ndir);
+	struct gfs2_inode *ip = get_v2ip(odentry->d_inode);
+	struct gfs2_inode *nip = NULL;
+	struct gfs2_sbd *sdp = odip->i_sbd;
+	struct gfs2_unlinked *ul;
+	struct gfs2_holder ghs[4], r_gh;
+	unsigned int num_gh;
+	int dir_rename = FALSE;
+	int alloc_required;
+	unsigned int x;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_inode);
+
+	if (ndentry->d_inode) {
+		nip = get_v2ip(ndentry->d_inode);
+		if (ip == nip)
+			return 0;
+	}
+
+	error = gfs2_unlinked_get(sdp, &ul);
+	if (error)
+		return error;
+
+	/* Make sure we aren't trying to move a dirctory into it's subdir */
+
+	if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
+		dir_rename = TRUE;
+
+		error = gfs2_glock_nq_init(sdp->sd_rename_gl,
+					   LM_ST_EXCLUSIVE, 0,
+					   &r_gh);
+		if (error)
+			goto out;
+
+		error = gfs2_ok_to_move(ip, ndip);
+		if (error)
+			goto out_gunlock_r;
+	}
+
+	gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+	num_gh = 3;
+
+	if (nip)
+		gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
+
+	error = gfs2_glock_nq_m(num_gh, ghs);
+	if (error)
+		goto out_uninit;
+
+	/* Check out the old directory */
+
+	error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
+	if (error)
+		goto out_gunlock;
+
+	/* Check out the new directory */
+
+	if (nip) {
+		error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
+		if (error)
+			goto out_gunlock;
+
+		if (S_ISDIR(nip->i_di.di_mode)) {
+			if (nip->i_di.di_entries < 2) {
+				if (gfs2_consist_inode(nip))
+					gfs2_dinode_print(&nip->i_di);
+				error = -EIO;
+				goto out_gunlock;
+			}
+			if (nip->i_di.di_entries > 2) {
+				error = -ENOTEMPTY;
+				goto out_gunlock;
+			}
+		}
+	} else {
+		error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL);
+		if (error)
+			goto out_gunlock;
+
+		error = gfs2_dir_search(ndip, &ndentry->d_name, NULL, NULL);
+		switch (error) {
+		case -ENOENT:
+			error = 0;
+			break;
+		case 0:
+			error = -EEXIST;
+		default:
+			goto out_gunlock;
+		};
+
+		if (odip != ndip) {
+			if (!ndip->i_di.di_nlink) {
+				error = -EINVAL;
+				goto out_gunlock;
+			}
+			if (ndip->i_di.di_entries == (uint32_t)-1) {
+				error = -EFBIG;
+				goto out_gunlock;
+			}
+			if (S_ISDIR(ip->i_di.di_mode) &&
+			    ndip->i_di.di_nlink == (uint32_t)-1) {
+				error = -EMLINK;
+				goto out_gunlock;
+			}
+		}
+	}
+
+	/* Check out the dir to be renamed */
+
+	if (dir_rename) {
+		error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL);
+		if (error)
+			goto out_gunlock;
+	}
+
+	error = gfs2_diradd_alloc_required(ndip, &ndentry->d_name,
+					   &alloc_required);
+	if (error)
+		goto out_gunlock;
+
+	if (alloc_required) {
+		struct gfs2_alloc *al = gfs2_alloc_get(ndip);
+
+		error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+		if (error)
+			goto out_alloc;
+
+		error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
+					 ndip->i_di.di_gid);
+		if (error)
+			goto out_gunlock_q;
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve(ndip);
+		if (error)
+			goto out_gunlock_q;
+
+		error = gfs2_trans_begin(sdp,
+					 sdp->sd_max_dirres +
+					 al->al_rgd->rd_ri.ri_length +
+					 4 * RES_DINODE + 4 * RES_LEAF +
+					 RES_UNLINKED + RES_STATFS +
+					 RES_QUOTA, 0);
+		if (error)
+			goto out_ipreserv;
+	} else {
+		error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
+					 5 * RES_LEAF +
+					 RES_UNLINKED, 0);
+		if (error)
+			goto out_gunlock;
+	}
+
+	/* Remove the target file, if it exists */
+
+	if (nip) {
+		if (S_ISDIR(nip->i_di.di_mode))
+			error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul);
+		else
+			error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul);
+		if (error)
+			goto out_end_trans;
+	}
+
+	if (dir_rename) {
+		struct qstr name;
+		name.len = 2;
+		name.name = "..";
+
+		error = gfs2_change_nlink(ndip, +1);
+		if (error)
+			goto out_end_trans;
+		error = gfs2_change_nlink(odip, -1);
+		if (error)
+			goto out_end_trans;
+
+		error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+		if (error)
+			goto out_end_trans;
+	} else {
+		struct buffer_head *dibh;
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto out_end_trans;
+		ip->i_di.di_ctime = get_seconds();
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	error = gfs2_dir_del(odip, &odentry->d_name);
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_dir_add(ndip, &ndentry->d_name, &ip->i_num,
+			     IF2DT(ip->i_di.di_mode));
+	if (error)
+		goto out_end_trans;
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipreserv:
+	if (alloc_required)
+		gfs2_inplace_release(ndip);
+
+ out_gunlock_q:
+	if (alloc_required)
+		gfs2_quota_unlock(ndip);
+
+ out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(ndip);
+
+ out_gunlock:
+	gfs2_glock_dq_m(num_gh, ghs);
+
+ out_uninit:
+	for (x = 0; x < num_gh; x++)
+		gfs2_holder_uninit(ghs + x);
+
+ out_gunlock_r:
+	if (dir_rename)
+		gfs2_glock_dq_uninit(&r_gh);
+
+ out:
+	gfs2_unlinked_put(sdp, ul);
+
+	return error;
+}
+
+/**
+ * gfs2_readlink - Read the value of a symlink
+ * @dentry: the symlink
+ * @buf: the buffer to read the symlink data into
+ * @size: the size of the buffer
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
+			 int user_size)
+{
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+	char array[GFS2_FAST_NAME_SIZE], *buf = array;
+	unsigned int len = GFS2_FAST_NAME_SIZE;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	error = gfs2_readlinki(ip, &buf, &len);
+	if (error)
+		return error;
+
+	if (user_size > len - 1)
+		user_size = len - 1;
+
+	if (copy_to_user(user_buf, buf, user_size))
+		error = -EFAULT;
+	else
+		error = user_size;
+
+	if (buf != array)
+		kfree(buf);
+
+	return error;
+}
+
+/**
+ * gfs2_follow_link - Follow a symbolic link
+ * @dentry: The dentry of the link
+ * @nd: Data that we pass to vfs_follow_link()
+ *
+ * This can handle symlinks of any size. It is optimised for symlinks
+ * under GFS2_FAST_NAME_SIZE.
+ *
+ * Returns: 0 on success or error code
+ */
+
+static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+	char array[GFS2_FAST_NAME_SIZE], *buf = array;
+	unsigned int len = GFS2_FAST_NAME_SIZE;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	error = gfs2_readlinki(ip, &buf, &len);
+	if (!error) {
+		error = vfs_follow_link(nd, buf);
+		if (buf != array)
+			kfree(buf);
+	}
+
+	return ERR_PTR(error);
+}
+
+/**
+ * gfs2_permission -
+ * @inode:
+ * @mask:
+ * @nd: passed from Linux VFS, ignored by us
+ *
+ * Returns: errno
+ */
+
+static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder i_gh;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	if (ip->i_vn == ip->i_gl->gl_vn)
+		return generic_permission(inode, mask, gfs2_check_acl);
+
+	error = gfs2_glock_nq_init(ip->i_gl,
+				   LM_ST_SHARED, LM_FLAG_ANY,
+				   &i_gh);
+	if (!error) {
+		error = generic_permission(inode, mask, gfs2_check_acl_locked);
+		gfs2_glock_dq_uninit(&i_gh);
+	}
+
+	return error;
+}
+
+static int setattr_size(struct inode *inode, struct iattr *attr)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	int error;
+
+	error = gfs2_repermission(inode, MAY_WRITE, NULL);
+	if (error)
+		return error;
+
+	if (attr->ia_size != ip->i_di.di_size) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	error = gfs2_truncatei(ip, attr->ia_size, gfs2_truncator_page);
+	if (error)
+		return error;
+
+	return error;
+}
+
+static int setattr_chown(struct inode *inode, struct iattr *attr)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *dibh;
+	uint32_t ouid, ogid, nuid, ngid;
+	int error;
+
+	ouid = ip->i_di.di_uid;
+	ogid = ip->i_di.di_gid;
+	nuid = attr->ia_uid;
+	ngid = attr->ia_gid;
+
+	if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
+		ouid = nuid = NO_QUOTA_CHANGE;
+	if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
+		ogid = ngid = NO_QUOTA_CHANGE;
+
+	gfs2_alloc_get(ip);
+
+	error = gfs2_quota_lock(ip, nuid, ngid);
+	if (error)
+		goto out_alloc;
+
+	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+		error = gfs2_quota_check(ip, nuid, ngid);
+		if (error)
+			goto out_gunlock_q;
+	}
+
+	error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_end_trans;
+
+	error = inode_setattr(inode, attr);
+	gfs2_assert_warn(sdp, !error);
+	gfs2_inode_attr_out(ip);
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+		gfs2_quota_change(ip, -ip->i_di.di_blocks,
+				 ouid, ogid);
+		gfs2_quota_change(ip, ip->i_di.di_blocks,
+				 nuid, ngid);
+	}
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_gunlock_q:
+	gfs2_quota_unlock(ip);
+
+ out_alloc:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/**
+ * gfs2_setattr - Change attributes on an inode
+ * @dentry: The dentry which is changing
+ * @attr: The structure describing the change
+ *
+ * The VFS layer wants to change one or more of an inodes attributes.  Write
+ * that change out to disk.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder i_gh;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		goto out;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		goto out;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		error = setattr_size(inode, attr);
+	else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
+		error = setattr_chown(inode, attr);
+	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
+		error = gfs2_acl_chmod(ip, attr);
+	else
+		error = gfs2_setattr_simple(ip, attr);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	if (!error)
+		mark_inode_dirty(inode);
+
+	return error;
+}
+
+/**
+ * gfs2_getattr - Read out an inode's attributes
+ * @mnt: ?
+ * @dentry: The dentry to stat
+ * @stat: The inode's stats
+ *
+ * Returns: errno
+ */
+
+static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder gh;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+	if (!error) {
+		generic_fillattr(inode, stat);
+		gfs2_glock_dq_uninit(&gh);
+	}
+
+	return error;
+}
+
+static int gfs2_setxattr(struct dentry *dentry, const char *name,
+			 const void *data, size_t size, int flags)
+{
+	struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
+	struct gfs2_ea_request er;
+
+	atomic_inc(&ip->i_sbd->sd_ops_inode);
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	er.er_type = gfs2_ea_name2type(name, &er.er_name);
+	if (er.er_type == GFS2_EATYPE_UNUSED)
+		return -EOPNOTSUPP;
+	er.er_data = (char *)data;
+	er.er_name_len = strlen(er.er_name);
+	er.er_data_len = size;
+	er.er_flags = flags;
+
+	gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE));
+
+	return gfs2_ea_set(ip, &er);
+}
+
+static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
+			     void *data, size_t size)
+{
+	struct gfs2_ea_request er;
+
+	atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	er.er_type = gfs2_ea_name2type(name, &er.er_name);
+	if (er.er_type == GFS2_EATYPE_UNUSED)
+		return -EOPNOTSUPP;
+	er.er_data = data;
+	er.er_name_len = strlen(er.er_name);
+	er.er_data_len = size;
+
+	return gfs2_ea_get(get_v2ip(dentry->d_inode), &er);
+}
+
+static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct gfs2_ea_request er;
+
+	atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	er.er_data = (size) ? buffer : NULL;
+	er.er_data_len = size;
+
+	return gfs2_ea_list(get_v2ip(dentry->d_inode), &er);
+}
+
+static int gfs2_removexattr(struct dentry *dentry, const char *name)
+{
+	struct gfs2_ea_request er;
+
+	atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	er.er_type = gfs2_ea_name2type(name, &er.er_name);
+	if (er.er_type == GFS2_EATYPE_UNUSED)
+		return -EOPNOTSUPP;
+	er.er_name_len = strlen(er.er_name);
+
+	return gfs2_ea_remove(get_v2ip(dentry->d_inode), &er);
+}
+
+struct inode_operations gfs2_file_iops = {
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+};
+
+struct inode_operations gfs2_dev_iops = {
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+};
+
+struct inode_operations gfs2_dir_iops = {
+	.create = gfs2_create,
+	.lookup = gfs2_lookup,
+	.link = gfs2_link,
+	.unlink = gfs2_unlink,
+	.symlink = gfs2_symlink,
+	.mkdir = gfs2_mkdir,
+	.rmdir = gfs2_rmdir,
+	.mknod = gfs2_mknod,
+	.rename = gfs2_rename,
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+};
+
+struct inode_operations gfs2_symlink_iops = {
+	.readlink = gfs2_readlink,
+	.follow_link = gfs2_follow_link,
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+};
+
--- a/fs/gfs2/ops_inode.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_inode.h	2005-09-01 17:36:55.427098448 +0800
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_INODE_DOT_H__
+#define __OPS_INODE_DOT_H__
+
+extern struct inode_operations gfs2_file_iops;
+extern struct inode_operations gfs2_dir_iops;
+extern struct inode_operations gfs2_symlink_iops;
+extern struct inode_operations gfs2_dev_iops;
+
+#endif /* __OPS_INODE_DOT_H__ */
--- a/fs/gfs2/ops_super.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_super.c	2005-09-01 17:36:55.433097536 +0800
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "inode.h"
+#include "lm.h"
+#include "log.h"
+#include "mount.h"
+#include "ops_super.h"
+#include "page.h"
+#include "quota.h"
+#include "recovery.h"
+#include "rgrp.h"
+#include "super.h"
+#include "sys.h"
+
+/**
+ * gfs2_write_inode - Make sure the inode is stable on the disk
+ * @inode: The inode
+ * @sync: synchronous write flag
+ *
+ * Returns: errno
+ */
+
+static int gfs2_write_inode(struct inode *inode, int sync)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+
+	atomic_inc(&ip->i_sbd->sd_ops_super);
+
+	if (current->flags & PF_MEMALLOC)
+		return 0;
+	if (ip && sync)
+		gfs2_log_flush_glock(ip->i_gl);
+
+	return 0;
+}
+
+/**
+ * gfs2_put_super - Unmount the filesystem
+ * @sb: The VFS superblock
+ *
+ */
+
+static void gfs2_put_super(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	int error;
+
+	if (!sdp)
+		return;
+
+	atomic_inc(&sdp->sd_ops_super);
+
+	gfs2_sys_fs_del(sdp);
+
+	/*  Unfreeze the filesystem, if we need to  */
+
+	down(&sdp->sd_freeze_lock);
+	if (sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+	up(&sdp->sd_freeze_lock);
+
+	kthread_stop(sdp->sd_inoded_process);
+	kthread_stop(sdp->sd_quotad_process);
+	kthread_stop(sdp->sd_logd_process);
+	kthread_stop(sdp->sd_recoverd_process);
+	while (sdp->sd_glockd_num--)
+		kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
+	kthread_stop(sdp->sd_scand_process);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		error = gfs2_make_fs_ro(sdp);
+		if (error)
+			gfs2_io_error(sdp);
+	}
+
+	/*  At this point, we're through modifying the disk  */
+
+	/*  Release stuff  */
+
+	gfs2_inode_put(sdp->sd_master_dir);
+	gfs2_inode_put(sdp->sd_jindex);
+	gfs2_inode_put(sdp->sd_inum_inode);
+	gfs2_inode_put(sdp->sd_statfs_inode);
+	gfs2_inode_put(sdp->sd_rindex);
+	gfs2_inode_put(sdp->sd_quota_inode);
+	gfs2_inode_put(sdp->sd_root_dir);
+
+	gfs2_glock_put(sdp->sd_rename_gl);
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+	if (!sdp->sd_args.ar_spectator) {
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+		gfs2_inode_put(sdp->sd_ir_inode);
+		gfs2_inode_put(sdp->sd_sc_inode);
+		gfs2_inode_put(sdp->sd_ut_inode);
+		gfs2_inode_put(sdp->sd_qc_inode);
+	}
+
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+
+	gfs2_clear_rgrpd(sdp);
+	gfs2_jindex_free(sdp);
+
+	/*  Take apart glock structures and buffer lists  */
+	gfs2_gl_hash_clear(sdp, WAIT);
+
+	/*  Unmount the locking protocol  */
+	gfs2_lm_unmount(sdp);
+
+	/*  At this point, we're through participating in the lockspace  */
+
+	/*  Get rid of any extra inodes  */
+	while (invalidate_inodes(sb))
+		yield();
+
+	vfree(sdp);
+
+	set_v2sdp(sb, NULL);
+}
+
+/**
+ * gfs2_write_super - disk commit all incore transactions
+ * @sb: the filesystem
+ *
+ * This function is called every time sync(2) is called.
+ * After this exits, all dirty buffers and synced.
+ */
+
+static void gfs2_write_super(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	atomic_inc(&sdp->sd_ops_super);
+	gfs2_log_flush(sdp);
+}
+
+/**
+ * gfs2_write_super_lockfs - prevent further writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static void gfs2_write_super_lockfs(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	int error;
+
+	atomic_inc(&sdp->sd_ops_super);
+
+	for (;;) {
+		error = gfs2_freeze_fs(sdp);
+		if (!error)
+			break;
+
+		switch (error) {
+		case -EBUSY:
+			fs_err(sdp, "waiting for recovery before freeze\n");
+			break;
+
+		default:
+			fs_err(sdp, "error freezing FS: %d\n", error);
+			break;
+		}
+
+		fs_err(sdp, "retrying...\n");
+		msleep(1000);
+	}
+}
+
+/**
+ * gfs2_unlockfs - reallow writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static void gfs2_unlockfs(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+
+	atomic_inc(&sdp->sd_ops_super);
+	gfs2_unfreeze_fs(sdp);
+}
+
+/**
+ * gfs2_statfs - Gather and return stats about the filesystem
+ * @sb: The superblock
+ * @statfsbuf: The buffer
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	struct gfs2_statfs_change sc;
+	int error;
+
+	atomic_inc(&sdp->sd_ops_super);
+
+	if (gfs2_tune_get(sdp, gt_statfs_slow))
+		error = gfs2_statfs_slow(sdp, &sc);
+	else
+		error = gfs2_statfs_i(sdp, &sc);
+
+	if (error)
+		return error;
+
+	memset(buf, 0, sizeof(struct kstatfs));
+
+	buf->f_type = GFS2_MAGIC;
+	buf->f_bsize = sdp->sd_sb.sb_bsize;
+	buf->f_blocks = sc.sc_total;
+	buf->f_bfree = sc.sc_free;
+	buf->f_bavail = sc.sc_free;
+	buf->f_files = sc.sc_dinodes + sc.sc_free;
+	buf->f_ffree = sc.sc_free;
+	buf->f_namelen = GFS2_FNAMESIZE;
+
+	return 0;
+}
+
+/**
+ * gfs2_remount_fs - called when the FS is remounted
+ * @sb:  the filesystem
+ * @flags:  the remount flags
+ * @data:  extra data passed in (not used right now)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(sb);
+	int error;
+
+	atomic_inc(&sdp->sd_ops_super);
+
+	error = gfs2_mount_args(sdp, data, TRUE);
+	if (error)
+		return error;
+
+	if (sdp->sd_args.ar_spectator)
+		*flags |= MS_RDONLY;
+	else {
+		if (*flags & MS_RDONLY) {
+			if (!(sb->s_flags & MS_RDONLY))
+				error = gfs2_make_fs_ro(sdp);
+		} else if (!(*flags & MS_RDONLY) &&
+			   (sb->s_flags & MS_RDONLY)) {
+			error = gfs2_make_fs_rw(sdp);
+		}
+	}
+
+	if (*flags & (MS_NOATIME | MS_NODIRATIME))
+		set_bit(SDF_NOATIME, &sdp->sd_flags);
+	else
+		clear_bit(SDF_NOATIME, &sdp->sd_flags);
+
+	/* Don't let the VFS update atimes.  GFS2 handles this itself. */
+	*flags |= MS_NOATIME | MS_NODIRATIME;
+
+	return error;
+}
+
+/**
+ * gfs2_clear_inode - Deallocate an inode when VFS is done with it
+ * @inode: The VFS inode
+ *
+ */
+
+static void gfs2_clear_inode(struct inode *inode)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+
+	atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_super);
+
+	if (ip) {
+		spin_lock(&ip->i_spin);
+		ip->i_vnode = NULL;
+		set_v2ip(inode, NULL);
+		spin_unlock(&ip->i_spin);
+
+		gfs2_glock_schedule_for_reclaim(ip->i_gl);
+		gfs2_inode_put(ip);
+	}
+}
+
+/**
+ * gfs2_show_options - Show mount options for /proc/mounts
+ * @s: seq_file structure
+ * @mnt: vfsmount
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+{
+	struct gfs2_sbd *sdp = get_v2sdp(mnt->mnt_sb);
+	struct gfs2_args *args = &sdp->sd_args;
+
+	atomic_inc(&sdp->sd_ops_super);
+
+	if (args->ar_lockproto[0])
+		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
+	if (args->ar_locktable[0])
+		seq_printf(s, ",locktable=%s", args->ar_locktable);
+	if (args->ar_hostdata[0])
+		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
+	if (args->ar_spectator)
+		seq_printf(s, ",spectator");
+	if (args->ar_ignore_local_fs)
+		seq_printf(s, ",ignore_local_fs");
+	if (args->ar_localflocks)
+		seq_printf(s, ",localflocks");
+	if (args->ar_localcaching)
+		seq_printf(s, ",localcaching");
+	if (args->ar_oopses_ok)
+		seq_printf(s, ",oopses_ok");
+	if (args->ar_debug)
+		seq_printf(s, ",debug");
+	if (args->ar_upgrade)
+		seq_printf(s, ",upgrade");
+	if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
+		seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
+	if (args->ar_posix_acl)
+		seq_printf(s, ",acl");
+	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
+		char *state;
+		switch (args->ar_quota) {
+		case GFS2_QUOTA_OFF:
+			state = "off";
+			break;
+		case GFS2_QUOTA_ACCOUNT:
+			state = "account";
+			break;
+		case GFS2_QUOTA_ON:
+			state = "on";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",quota=%s", state);
+	}
+	if (args->ar_suiddir)
+		seq_printf(s, ",suiddir");
+	if (args->ar_data != GFS2_DATA_DEFAULT) {
+		char *state;
+		switch (args->ar_data) {
+		case GFS2_DATA_WRITEBACK:
+			state = "writeback";
+			break;
+		case GFS2_DATA_ORDERED:
+			state = "ordered";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",data=%s", state);
+	}
+
+	return 0;
+}
+
+struct super_operations gfs2_super_ops = {
+	.write_inode = gfs2_write_inode,
+	.put_super = gfs2_put_super,
+	.write_super = gfs2_write_super,
+	.write_super_lockfs = gfs2_write_super_lockfs,
+	.unlockfs = gfs2_unlockfs,
+	.statfs = gfs2_statfs,
+	.remount_fs = gfs2_remount_fs,
+	.clear_inode = gfs2_clear_inode,
+	.show_options = gfs2_show_options,
+};
+
--- a/fs/gfs2/ops_super.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_super.h	2005-09-01 17:36:55.433097536 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_SUPER_DOT_H__
+#define __OPS_SUPER_DOT_H__
+
+extern struct super_operations gfs2_super_ops;
+
+#endif /* __OPS_SUPER_DOT_H__ */
--- a/fs/gfs2/ops_vm.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_vm.c	2005-09-01 17:36:55.433097536 +0800
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "inode.h"
+#include "ops_vm.h"
+#include "page.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+
+static void pfault_be_greedy(struct gfs2_inode *ip)
+{
+	unsigned int time;
+
+	spin_lock(&ip->i_spin);
+	time = ip->i_greedy;
+	ip->i_last_pfault = jiffies;
+	spin_unlock(&ip->i_spin);
+
+	gfs2_inode_hold(ip);
+	if (gfs2_glock_be_greedy(ip->i_gl, time))
+		gfs2_inode_put(ip);
+}
+
+static struct page *gfs2_private_nopage(struct vm_area_struct *area,
+					unsigned long address, int *type)
+{
+	struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
+	struct gfs2_holder i_gh;
+	struct page *result;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_vm);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	if (error)
+		return NULL;
+
+	set_bit(GIF_PAGED, &ip->i_flags);
+
+	result = filemap_nopage(area, address, type);
+
+	if (result && result != NOPAGE_OOM)
+		pfault_be_greedy(ip);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return result;
+}
+
+static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned long index = page->index;
+	uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
+	unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
+	struct gfs2_alloc *al;
+	unsigned int data_blocks, ind_blocks;
+	unsigned int x;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	if (error)
+		goto out_gunlock_q;
+
+	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE,
+			      &data_blocks, &ind_blocks);
+
+	al->al_requested = data_blocks + ind_blocks;
+
+	error = gfs2_inplace_reserve(ip);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_trans_begin(sdp,
+				 al->al_rgd->rd_ri.ri_length +
+				 ind_blocks + RES_DINODE +
+				 RES_STATFS + RES_QUOTA, 0);
+	if (error)
+		goto out_ipres;
+
+	if (gfs2_is_stuffed(ip)) {
+		error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL);
+		if (error)
+			goto out_trans;
+	}
+
+	for (x = 0; x < blocks; ) {
+		uint64_t dblock;
+		unsigned int extlen;
+		int new = TRUE;
+
+		error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
+		if (error)
+			goto out_trans;
+
+		lblock += extlen;
+		x += extlen;
+	}
+
+	gfs2_assert_warn(sdp, al->al_alloced);
+
+ out_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipres:
+	gfs2_inplace_release(ip);
+
+ out_gunlock_q:
+	gfs2_quota_unlock(ip);
+
+ out:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
+					   unsigned long address, int *type)
+{
+	struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
+	struct gfs2_holder i_gh;
+	struct page *result = NULL;
+	unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+	int alloc_required;
+	int error;
+
+	atomic_inc(&ip->i_sbd->sd_ops_vm);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return NULL;
+
+	if (gfs2_is_jdata(ip))
+		goto out;
+
+	set_bit(GIF_PAGED, &ip->i_flags);
+	set_bit(GIF_SW_PAGED, &ip->i_flags);
+
+	error = gfs2_write_alloc_required(ip,
+					  (uint64_t)index << PAGE_CACHE_SHIFT,
+					  PAGE_CACHE_SIZE, &alloc_required);
+	if (error)
+		goto out;
+
+	result = filemap_nopage(area, address, type);
+	if (!result || result == NOPAGE_OOM)
+		goto out;
+
+	if (alloc_required) {
+		error = alloc_page_backing(ip, result);
+		if (error) {
+			page_cache_release(result);
+			result = NULL;
+			goto out;
+		}
+		set_page_dirty(result);
+	}
+
+	pfault_be_greedy(ip);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return result;
+}
+
+struct vm_operations_struct gfs2_vm_ops_private = {
+	.nopage = gfs2_private_nopage,
+};
+
+struct vm_operations_struct gfs2_vm_ops_sharewrite = {
+	.nopage = gfs2_sharewrite_nopage,
+};
+
--- a/fs/gfs2/ops_vm.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ops_vm.h	2005-09-01 17:36:55.434097384 +0800
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __OPS_VM_DOT_H__
+#define __OPS_VM_DOT_H__
+
+extern struct vm_operations_struct gfs2_vm_ops_private;
+extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
+
+#endif /* __OPS_VM_DOT_H__ */
--- a/fs/gfs2/page.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/page.c	2005-09-01 17:36:55.434097384 +0800
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/mm.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "inode.h"
+#include "page.h"
+#include "trans.h"
+
+/**
+ * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
+ * @gl: the glock
+ *
+ */
+
+void gfs2_pte_inval(struct gfs2_glock *gl)
+{
+	struct gfs2_inode *ip;
+	struct inode *inode;
+
+	ip = get_gl2ip(gl);
+	if (!ip || !S_ISREG(ip->i_di.di_mode))
+		return;
+
+	if (!test_bit(GIF_PAGED, &ip->i_flags))
+		return;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (inode) {
+		unmap_shared_mapping_range(inode->i_mapping, 0, 0);
+		iput(inode);
+
+		if (test_bit(GIF_SW_PAGED, &ip->i_flags))
+			set_bit(GLF_DIRTY, &gl->gl_flags);
+	}
+
+	clear_bit(GIF_SW_PAGED, &ip->i_flags);
+}
+
+/**
+ * gfs2_page_inval - Invalidate all pages associated with a glock
+ * @gl: the glock
+ *
+ */
+
+void gfs2_page_inval(struct gfs2_glock *gl)
+{
+	struct gfs2_inode *ip;
+	struct inode *inode;
+
+	ip = get_gl2ip(gl);
+	if (!ip || !S_ISREG(ip->i_di.di_mode))
+		return;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (inode) {
+		struct address_space *mapping = inode->i_mapping;
+
+		truncate_inode_pages(mapping, 0);
+		gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages);
+
+		iput(inode);
+	}
+
+	clear_bit(GIF_PAGED, &ip->i_flags);
+}
+
+/**
+ * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock
+ * @gl: the glock
+ * @flags: DIO_START | DIO_WAIT
+ *
+ * Syncs data (not metadata) for a regular file.
+ * No-op for all other types.
+ */
+
+void gfs2_page_sync(struct gfs2_glock *gl, int flags)
+{
+	struct gfs2_inode *ip;
+	struct inode *inode;
+
+	ip = get_gl2ip(gl);
+	if (!ip || !S_ISREG(ip->i_di.di_mode))
+		return;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (inode) {
+		struct address_space *mapping = inode->i_mapping;
+		int error = 0;
+
+		if (flags & DIO_START)
+			filemap_fdatawrite(mapping);
+		if (!error && (flags & DIO_WAIT))
+			error = filemap_fdatawait(mapping);
+
+		/* Put back any errors cleared by filemap_fdatawait()
+		   so they can be caught by someone who can pass them
+		   up to user space. */
+
+		if (error == -ENOSPC)
+			set_bit(AS_ENOSPC, &mapping->flags);
+		else if (error)
+			set_bit(AS_EIO, &mapping->flags);
+
+		iput(inode);
+	}
+}
+
+/**
+ * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @block: the block number that was allocated
+ * @private: any locked page held by the caller process
+ *
+ * Returns: errno
+ */
+
+int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
+			uint64_t block, void *private)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct inode *inode = ip->i_vnode;
+	struct page *page = (struct page *)private;
+	struct buffer_head *bh;
+	int release = FALSE;
+
+	if (!page || page->index) {
+		page = grab_cache_page(inode->i_mapping, 0);
+		if (!page)
+			return -ENOMEM;
+		release = TRUE;
+	}
+
+	if (!PageUptodate(page)) {
+		void *kaddr = kmap(page);
+
+		memcpy(kaddr,
+		       dibh->b_data + sizeof(struct gfs2_dinode),
+		       ip->i_di.di_size);
+		memset(kaddr + ip->i_di.di_size,
+		       0,
+		       PAGE_CACHE_SIZE - ip->i_di.di_size);
+		kunmap(page);
+
+		SetPageUptodate(page);
+	}
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits,
+				     (1 << BH_Uptodate));
+
+	bh = page_buffers(page);
+
+	if (!buffer_mapped(bh))
+		map_bh(bh, inode->i_sb, block);
+
+	set_buffer_uptodate(bh);
+	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+		gfs2_trans_add_databuf(sdp, bh);
+	mark_buffer_dirty(bh);
+
+	if (release) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_truncator_page - truncate a partial data block in the page cache
+ * @ip: the inode
+ * @size: the size the file should be
+ *
+ * Returns: errno
+ */
+
+int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct inode *inode = ip->i_vnode;
+	struct page *page;
+	struct buffer_head *bh;
+	void *kaddr;
+	uint64_t lbn, dbn;
+	unsigned long index;
+	unsigned int offset;
+	unsigned int bufnum;
+	int new = FALSE;
+	int error;
+
+	lbn = size >> inode->i_blkbits;
+	error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
+	if (error || !dbn)
+		return error;
+
+	index = size >> PAGE_CACHE_SHIFT;
+	offset = size & (PAGE_CACHE_SIZE - 1);
+	bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits));
+
+	page = read_cache_page(inode->i_mapping, index,
+			       (filler_t *)inode->i_mapping->a_ops->readpage,
+			       NULL);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	lock_page(page);
+
+	if (!PageUptodate(page) || PageError(page)) {
+		error = -EIO;
+		goto out;
+	}
+
+	kaddr = kmap(page);
+	memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+	kunmap(page);
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits,
+				     (1 << BH_Uptodate));
+
+	for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
+		/* Do nothing */;
+
+	if (!buffer_mapped(bh))
+		map_bh(bh, inode->i_sb, dbn);
+
+	set_buffer_uptodate(bh);
+	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+		gfs2_trans_add_databuf(sdp, bh);
+	mark_buffer_dirty(bh);
+
+ out:
+	unlock_page(page);
+	page_cache_release(page);
+
+	return error;
+}
+
+void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page,
+			    unsigned int from, unsigned int to)
+{
+	struct buffer_head *head = page_buffers(page);
+	unsigned int bsize = head->b_size;
+	struct buffer_head *bh;
+	unsigned int start, end;
+
+	for (bh = head, start = 0;
+	     bh != head || !start;
+	     bh = bh->b_this_page, start = end) {
+		end = start + bsize;
+		if (end <= from || start >= to)
+			continue;
+		gfs2_trans_add_databuf(sdp, bh);
+	}
+}
+
--- a/fs/gfs2/page.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/page.h	2005-09-01 17:36:55.434097384 +0800
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __PAGE_DOT_H__
+#define __PAGE_DOT_H__
+
+void gfs2_pte_inval(struct gfs2_glock *gl);
+void gfs2_page_inval(struct gfs2_glock *gl);
+void gfs2_page_sync(struct gfs2_glock *gl, int flags);
+
+int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
+			uint64_t block, void *private);
+int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size);
+void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page,
+			    unsigned int from, unsigned int to);
+
+#endif /* __PAGE_DOT_H__ */
--- a/fs/gfs2/super.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/super.c	2005-09-01 17:36:55.494088264 +0800
@@ -0,0 +1,945 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "dir.h"
+#include "format.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "recovery.h"
+#include "rgrp.h"
+#include "super.h"
+#include "trans.h"
+#include "unlinked.h"
+
+/**
+ * gfs2_tune_init - Fill a gfs2_tune structure with default values
+ * @gt: tune
+ *
+ */
+
+void gfs2_tune_init(struct gfs2_tune *gt)
+{
+	spin_lock_init(&gt->gt_spin);
+
+	gt->gt_ilimit = 100;
+	gt->gt_ilimit_tries = 3;
+	gt->gt_ilimit_min = 1;
+	gt->gt_demote_secs = 300;
+	gt->gt_incore_log_blocks = 1024;
+	gt->gt_log_flush_secs = 60;
+	gt->gt_jindex_refresh_secs = 60;
+	gt->gt_scand_secs = 15;
+	gt->gt_recoverd_secs = 60;
+	gt->gt_logd_secs = 1;
+	gt->gt_quotad_secs = 5;
+	gt->gt_inoded_secs = 15;
+	gt->gt_quota_simul_sync = 64;
+	gt->gt_quota_warn_period = 10;
+	gt->gt_quota_scale_num = 1;
+	gt->gt_quota_scale_den = 1;
+	gt->gt_quota_cache_secs = 300;
+	gt->gt_quota_quantum = 60;
+	gt->gt_atime_quantum = 3600;
+	gt->gt_new_files_jdata = 0;
+	gt->gt_new_files_directio = 0;
+	gt->gt_max_atomic_write = 4 << 20;
+	gt->gt_max_readahead = 1 << 18;
+	gt->gt_lockdump_size = 131072;
+	gt->gt_stall_secs = 600;
+	gt->gt_complain_secs = 10;
+	gt->gt_reclaim_limit = 5000;
+	gt->gt_entries_per_readdir = 32;
+	gt->gt_prefetch_secs = 10;
+	gt->gt_greedy_default = HZ / 10;
+	gt->gt_greedy_quantum = HZ / 40;
+	gt->gt_greedy_max = HZ / 4;
+	gt->gt_statfs_quantum = 30;
+	gt->gt_statfs_slow = 0;
+}
+
+/**
+ * gfs2_check_sb - Check superblock
+ * @sdp: the filesystem
+ * @sb: The superblock
+ * @silent: Don't print a message if the check fails
+ *
+ * Checks the version code of the FS is one that we understand how to
+ * read and that the sizes of the various on-disk structures have not
+ * changed.
+ */
+
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
+{
+	unsigned int x;
+
+	if (sb->sb_header.mh_magic != GFS2_MAGIC ||
+	    sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+		if (!silent)
+			printk("GFS2: not a GFS2 filesystem\n");
+		return -EINVAL;
+	}
+
+	/*  If format numbers match exactly, we're done.  */
+
+	if (sb->sb_fs_format == GFS2_FORMAT_FS &&
+	    sb->sb_multihost_format == GFS2_FORMAT_MULTI)
+		return 0;
+
+	if (sb->sb_fs_format != GFS2_FORMAT_FS) {
+		for (x = 0; gfs2_old_fs_formats[x]; x++)
+			if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
+				break;
+
+		if (!gfs2_old_fs_formats[x]) {
+			printk("GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk("GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+		for (x = 0; gfs2_old_multihost_formats[x]; x++)
+			if (gfs2_old_multihost_formats[x] == sb->sb_multihost_format)
+				break;
+
+		if (!gfs2_old_multihost_formats[x]) {
+			printk("GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk("GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!sdp->sd_args.ar_upgrade) {
+		printk("GFS2: code version (%u, %u) is incompatible "
+		       "with ondisk format (%u, %u)\n",
+		       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+		       sb->sb_fs_format, sb->sb_multihost_format);
+		printk("GFS2: Use the \"upgrade\" mount option to upgrade "
+		       "the FS\n");
+		printk("GFS2: See the manual for more details\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_read_sb - Read super block
+ * @sdp: The GFS2 superblock
+ * @gl: the glock for the superblock (assumed to be held)
+ * @silent: Don't print message if mount fails
+ *
+ */
+
+int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+{
+	struct buffer_head *bh;
+	uint32_t hash_blocks, ind_blocks, leaf_blocks;
+	uint32_t tmp_blocks;
+	unsigned int x;
+	int error;
+
+	error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
+			       DIO_FORCE | DIO_START | DIO_WAIT, &bh);
+	if (error) {
+		if (!silent)
+			fs_err(sdp, "can't read superblock\n");
+		return error;
+	}
+
+	gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size,);
+	gfs2_sb_in(&sdp->sd_sb, bh->b_data);
+	brelse(bh);
+
+	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
+	if (error)
+		return error;
+
+	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
+			       GFS2_BASIC_BLOCK_SHIFT;
+	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+	sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
+			  sizeof(struct gfs2_dinode)) / sizeof(uint64_t);
+	sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
+			  sizeof(struct gfs2_meta_header)) / sizeof(uint64_t);
+	sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
+	sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
+	sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
+	sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
+	sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_meta_header)) /
+			       sizeof(struct gfs2_unlinked_tag);
+	sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_meta_header)) /
+			       sizeof(struct gfs2_quota_change);
+
+	/* Compute maximum reservation required to add a entry to a directory */
+
+	hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH),
+			     sdp->sd_jbsize);
+
+	ind_blocks = 0;
+	for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
+		tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs);
+		ind_blocks += tmp_blocks;
+	}
+
+	leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
+
+	sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
+
+	sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_dinode);
+	sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
+	for (x = 2;; x++) {
+		uint64_t space, d;
+		uint32_t m;
+
+		space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
+		d = space;
+		m = do_div(d, sdp->sd_inptrs);
+
+		if (d != sdp->sd_heightsize[x - 1] || m)
+			break;
+		sdp->sd_heightsize[x] = space;
+	}
+	sdp->sd_max_height = x;
+	gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT,);
+
+	sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
+				 sizeof(struct gfs2_dinode);
+	sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
+	for (x = 2;; x++) {
+		uint64_t space, d;
+		uint32_t m;
+
+		space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
+		d = space;
+		m = do_div(d, sdp->sd_inptrs);
+
+		if (d != sdp->sd_jheightsize[x - 1] || m)
+			break;
+		sdp->sd_jheightsize[x] = space;
+	}
+	sdp->sd_max_jheight = x;
+	gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT,);
+
+	return 0;
+}
+
+int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl)
+{
+	return 0;
+}
+
+/**
+ * gfs2_jindex_hold - Grab a lock on the jindex
+ * @sdp: The GFS2 superblock
+ * @ji_gh: the holder for the jindex glock
+ *
+ * This is very similar to the gfs2_rindex_hold() function, except that
+ * in general we hold the jindex lock for longer periods of time and
+ * we grab it far less frequently (in general) then the rgrp lock.
+ *
+ * Returns: errno
+ */
+
+int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
+{
+	struct gfs2_inode *dip = sdp->sd_jindex;
+	struct qstr name;
+	char buf[20];
+	struct gfs2_jdesc *jd;
+	int error;
+
+	name.name = buf;
+
+	down(&sdp->sd_jindex_mutex);
+
+	for (;;) {
+		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
+					   GL_LOCAL_EXCL, ji_gh);
+		if (error)
+			break;
+
+		name.len = sprintf(buf, "journal%u", sdp->sd_journals);
+
+		error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+		if (error == -ENOENT) {
+			error = 0;
+			break;
+		}
+
+		gfs2_glock_dq_uninit(ji_gh);
+
+		if (error)
+			break;
+
+		error = -ENOMEM;
+		jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
+		if (!jd)
+			break;
+
+		error = gfs2_lookupi(dip, &name, TRUE, &jd->jd_inode);
+		if (error) {
+			kfree(jd);
+			break;
+		}
+
+		spin_lock(&sdp->sd_jindex_spin);
+		jd->jd_jid = sdp->sd_journals++;
+		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
+		spin_unlock(&sdp->sd_jindex_spin);
+	}
+
+	up(&sdp->sd_jindex_mutex);
+
+	return error;
+}
+
+/**
+ * gfs2_jindex_free - Clear all the journal index information
+ * @sdp: The GFS2 superblock
+ *
+ */
+
+void gfs2_jindex_free(struct gfs2_sbd *sdp)
+{
+	struct list_head list;
+	struct gfs2_jdesc *jd;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	list_add(&list, &sdp->sd_jindex_list);
+	list_del_init(&sdp->sd_jindex_list);
+	sdp->sd_journals = 0;
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	while (!list_empty(&list)) {
+		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+		list_del(&jd->jd_list);
+		gfs2_inode_put(jd->jd_inode);
+		kfree(jd);
+	}
+}
+
+static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
+{
+	struct gfs2_jdesc *jd;
+	int found = FALSE;
+
+	list_for_each_entry(jd, head, jd_list) {
+		if (jd->jd_jid == jid) {
+			found = TRUE;
+			break;
+		}
+	}
+
+	if (!found)
+		jd = NULL;
+
+	return jd;
+}
+
+struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
+{
+	struct gfs2_jdesc *jd;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	return jd;
+}
+
+void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+{
+	struct gfs2_jdesc *jd;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
+	if (jd)
+		jd->jd_dirty = TRUE;
+	spin_unlock(&sdp->sd_jindex_spin);
+}
+
+struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
+{
+	struct gfs2_jdesc *jd;
+	int found = FALSE;
+
+	spin_lock(&sdp->sd_jindex_spin);
+
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (jd->jd_dirty) {
+			jd->jd_dirty = FALSE;
+			found = TRUE;
+			break;
+		}
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	if (!found)
+		jd = NULL;
+
+	return jd;
+}
+
+int gfs2_jdesc_check(struct gfs2_jdesc *jd)
+{
+	struct gfs2_inode *ip = jd->jd_inode;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int ar;
+	int error;
+
+	if (ip->i_di.di_size < (8 << 20) ||
+	    ip->i_di.di_size > (1 << 30) ||
+	    (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+	jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+
+	error = gfs2_write_alloc_required(ip,
+					  0, ip->i_di.di_size,
+					  &ar);
+	if (!error && ar) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+	}
+
+	return error;
+}
+
+int gfs2_lookup_master_dir(struct gfs2_sbd *sdp)
+{
+	struct gfs2_glock *gl;
+	int error;
+
+	error = gfs2_glock_get(sdp,
+			       sdp->sd_sb.sb_master_dir.no_addr,
+			       &gfs2_inode_glops, CREATE, &gl);
+	if (!error) {
+		error = gfs2_inode_get(gl, &sdp->sd_sb.sb_master_dir, CREATE,
+				       &sdp->sd_master_dir);
+		gfs2_glock_put(gl);
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
+{
+	struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl;
+	struct gfs2_holder t_gh;
+	struct gfs2_log_header head;
+	int error;
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
+				   GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh);
+	if (error)
+		return error;
+
+	gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode);
+	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+
+	error = gfs2_find_jhead(sdp->sd_jdesc, &head);
+	if (error)
+		goto fail;
+
+	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+		gfs2_consist(sdp);
+		error = -EIO;
+		goto fail;
+	}
+
+	/*  Initialize some head of the log stuff  */
+	sdp->sd_log_sequence = head.lh_sequence + 1;
+	gfs2_log_pointers_init(sdp, head.lh_blkno);
+
+	error = gfs2_unlinked_init(sdp);
+	if (error)
+		goto fail;
+	error = gfs2_quota_init(sdp);
+	if (error)
+		goto fail_unlinked;
+
+	set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	gfs2_glock_dq_uninit(&t_gh);
+
+	return 0;
+
+ fail_unlinked:
+	gfs2_unlinked_cleanup(sdp);
+
+ fail:
+	t_gh.gh_flags |= GL_NOCACHE;
+	gfs2_glock_dq_uninit(&t_gh);
+
+	return error;
+}
+
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+	struct gfs2_holder t_gh;
+	int error;
+
+	gfs2_unlinked_dealloc(sdp);
+	gfs2_quota_sync(sdp);
+	gfs2_statfs_sync(sdp);
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
+				GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE,
+				&t_gh);
+	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return error;
+
+	gfs2_meta_syncfs(sdp);
+	gfs2_log_shutdown(sdp);
+
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	if (t_gh.gh_gl)
+		gfs2_glock_dq_uninit(&t_gh);
+
+	gfs2_unlinked_cleanup(sdp);
+	gfs2_quota_cleanup(sdp);
+
+	return error;
+}
+
+int gfs2_statfs_init(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *m_ip = sdp->sd_statfs_inode;
+	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_inode *l_ip = sdp->sd_sc_inode;
+	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct buffer_head *m_bh, *l_bh;
+	struct gfs2_holder gh;
+	int error;
+
+	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
+				   &gh);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
+	if (error)
+		goto out;
+
+	if (sdp->sd_args.ar_spectator) {
+		spin_lock(&sdp->sd_statfs_spin);
+		gfs2_statfs_change_in(m_sc, m_bh->b_data +
+				      sizeof(struct gfs2_dinode));
+		spin_unlock(&sdp->sd_statfs_spin);
+	} else {
+		error = gfs2_meta_inode_buffer(l_ip, &l_bh);
+		if (error)
+			goto out_m_bh;
+
+		spin_lock(&sdp->sd_statfs_spin);
+		gfs2_statfs_change_in(m_sc, m_bh->b_data +
+				      sizeof(struct gfs2_dinode));
+		gfs2_statfs_change_in(l_sc, l_bh->b_data +
+				      sizeof(struct gfs2_dinode));
+		spin_unlock(&sdp->sd_statfs_spin);
+
+		brelse(l_bh);
+	}
+
+ out_m_bh:
+	brelse(m_bh);
+
+ out:
+	gfs2_glock_dq_uninit(&gh);
+
+	return 0;
+}
+
+void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free,
+			int64_t dinodes)
+{
+	struct gfs2_inode *l_ip = sdp->sd_sc_inode;
+	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct buffer_head *l_bh;
+	int error;
+
+	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
+	if (error)
+		return;
+
+	down(&sdp->sd_statfs_mutex);
+	gfs2_trans_add_bh(l_ip->i_gl, l_bh);
+	up(&sdp->sd_statfs_mutex);
+
+	spin_lock(&sdp->sd_statfs_spin);
+	l_sc->sc_total += total;
+	l_sc->sc_free += free;
+	l_sc->sc_dinodes += dinodes;
+	gfs2_statfs_change_out(l_sc, l_bh->b_data +
+			       sizeof(struct gfs2_dinode));	
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	brelse(l_bh);
+}
+
+int gfs2_statfs_sync(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *m_ip = sdp->sd_statfs_inode;
+	struct gfs2_inode *l_ip = sdp->sd_sc_inode;
+	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_holder gh;
+	struct buffer_head *m_bh, *l_bh;
+	int error;
+
+	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
+				   &gh);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
+	if (error)
+		goto out;
+
+	spin_lock(&sdp->sd_statfs_spin);
+	gfs2_statfs_change_in(m_sc, m_bh->b_data +
+			      sizeof(struct gfs2_dinode));	
+	if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
+		spin_unlock(&sdp->sd_statfs_spin);
+		goto out_bh;
+	}
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
+	if (error)
+		goto out_bh;
+
+	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
+	if (error)
+		goto out_bh2;
+
+	down(&sdp->sd_statfs_mutex);
+	gfs2_trans_add_bh(l_ip->i_gl, l_bh);
+	up(&sdp->sd_statfs_mutex);
+
+	spin_lock(&sdp->sd_statfs_spin);
+	m_sc->sc_total += l_sc->sc_total;
+	m_sc->sc_free += l_sc->sc_free;
+	m_sc->sc_dinodes += l_sc->sc_dinodes;
+	memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
+	memset(l_bh->b_data + sizeof(struct gfs2_dinode),
+	       0, sizeof(struct gfs2_statfs_change));
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	gfs2_trans_add_bh(m_ip->i_gl, m_bh);
+	gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
+
+	gfs2_trans_end(sdp);
+
+ out_bh2:
+	brelse(l_bh);
+
+ out_bh:
+	brelse(m_bh);
+
+ out:
+	gfs2_glock_dq_uninit(&gh);
+
+	return error;
+}
+
+/**
+ * gfs2_statfs_i - Do a statfs
+ * @sdp: the filesystem
+ * @sg: the sg structure
+ *
+ * Returns: errno
+ */
+
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+{
+	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+
+	spin_lock(&sdp->sd_statfs_spin);
+
+	*sc = *m_sc;
+	sc->sc_total += l_sc->sc_total;
+	sc->sc_free += l_sc->sc_free;
+	sc->sc_dinodes += l_sc->sc_dinodes;
+
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	if (sc->sc_free < 0)
+		sc->sc_free = 0;
+	if (sc->sc_free > sc->sc_total)
+		sc->sc_free = sc->sc_total;
+	if (sc->sc_dinodes < 0)
+		sc->sc_dinodes = 0;
+
+	return 0;
+}
+
+/**
+ * statfs_fill - fill in the sg for a given RG
+ * @rgd: the RG
+ * @sc: the sc structure
+ *
+ * Returns: 0 on success, -ESTALE if the LVB is invalid
+ */
+
+static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
+			    struct gfs2_statfs_change *sc)
+{
+	gfs2_rgrp_verify(rgd);
+	sc->sc_total += rgd->rd_ri.ri_data;
+	sc->sc_free += rgd->rd_rg.rg_free;
+	sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
+	return 0;
+}
+
+/**
+ * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
+ * @sdp: the filesystem
+ * @sc: the sc info that will be returned
+ *
+ * Any error (other than a signal) will cause this routine to fall back
+ * to the synchronous version.
+ *
+ * FIXME: This really shouldn't busy wait like this.
+ *
+ * Returns: errno
+ */
+
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+{
+	struct gfs2_holder ri_gh;
+	struct gfs2_rgrpd *rgd_next;
+	struct gfs2_holder *gha, *gh;
+	unsigned int slots = 64;
+	unsigned int x;
+	int done;
+	int error = 0, err;
+
+	memset(sc, 0, sizeof(struct gfs2_statfs_change));
+	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!gha)
+		return -ENOMEM;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto out;
+
+	rgd_next = gfs2_rgrpd_get_first(sdp);
+
+	for (;;) {
+		done = TRUE;
+
+		for (x = 0; x < slots; x++) {
+			gh = gha + x;
+
+			if (gh->gh_gl && gfs2_glock_poll(gh)) {
+				err = gfs2_glock_wait(gh);
+				if (err) {
+					gfs2_holder_uninit(gh);
+					error = err;
+				} else {
+					if (!error)
+						error = statfs_slow_fill(get_gl2rgd(gh->gh_gl), sc);
+					gfs2_glock_dq_uninit(gh);
+				}
+			}
+
+			if (gh->gh_gl)
+				done = FALSE;
+			else if (rgd_next && !error) {
+				error = gfs2_glock_nq_init(rgd_next->rd_gl,
+							   LM_ST_SHARED,
+							   GL_ASYNC,
+							   gh);
+				rgd_next = gfs2_rgrpd_get_next(rgd_next);
+				done = FALSE;
+			}
+
+			if (signal_pending(current))
+				error = -ERESTARTSYS;
+		}
+
+		if (done)
+			break;
+
+		yield();
+	}
+
+	gfs2_glock_dq_uninit(&ri_gh);
+
+ out:
+	kfree(gha);
+
+	return error;
+}
+
+struct lfcc {
+	struct list_head list;
+	struct gfs2_holder gh;
+};
+
+/**
+ * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
+ *                            journals are clean
+ * @sdp: the file system
+ * @state: the state to put the transaction lock into
+ * @t_gh: the hold on the transaction lock
+ *
+ * Returns: errno
+ */
+
+int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh)
+{
+	struct gfs2_holder ji_gh;
+	struct gfs2_jdesc *jd;
+	struct lfcc *lfcc;
+	LIST_HEAD(list);
+	struct gfs2_log_header lh;
+	int error;
+
+	error = gfs2_jindex_hold(sdp, &ji_gh);
+	if (error)
+		return error;
+
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
+		if (!lfcc) {
+			error = -ENOMEM;
+			goto out;
+		}
+		error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED, 0,
+					   &lfcc->gh);
+		if (error) {
+			kfree(lfcc);
+			goto out;
+		}
+		list_add(&lfcc->list, &list);
+	}
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
+			       LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE,
+			       t_gh);
+
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		error = gfs2_jdesc_check(jd);
+		if (error)
+			break;
+		error = gfs2_find_jhead(jd, &lh);
+		if (error)
+			break;
+		if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+			error = -EBUSY;
+			break;
+		}
+	}
+
+	if (error)
+		gfs2_glock_dq_uninit(t_gh);
+
+ out:
+	while (!list_empty(&list)) {
+		lfcc = list_entry(list.next, struct lfcc, list);
+		list_del(&lfcc->list);
+		gfs2_glock_dq_uninit(&lfcc->gh);
+		kfree(lfcc);
+	}
+	gfs2_glock_dq_uninit(&ji_gh);
+
+	return error;
+}
+
+/**
+ * gfs2_freeze_fs - freezes the file system
+ * @sdp: the file system
+ *
+ * This function flushes data and meta data for all machines by
+ * aquiring the transaction log exclusively.  All journals are
+ * ensured to be in a clean state as well.
+ *
+ * Returns: errno
+ */
+
+int gfs2_freeze_fs(struct gfs2_sbd *sdp)
+{
+	int error = 0;
+
+	down(&sdp->sd_freeze_lock);
+
+	if (!sdp->sd_freeze_count++) {
+		error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
+		if (error)
+			sdp->sd_freeze_count--;
+	}
+
+	up(&sdp->sd_freeze_lock);
+
+	return error;
+}
+
+/**
+ * gfs2_unfreeze_fs - unfreezes the file system
+ * @sdp: the file system
+ *
+ * This function allows the file system to proceed by unlocking
+ * the exclusively held transaction lock.  Other GFS2 nodes are
+ * now free to acquire the lock shared and go on with their lives.
+ *
+ */
+
+void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
+{
+	down(&sdp->sd_freeze_lock);
+
+	if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+
+	up(&sdp->sd_freeze_lock);
+}
+
--- a/fs/gfs2/super.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/super.h	2005-09-01 17:36:55.494088264 +0800
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __SUPER_DOT_H__
+#define __SUPER_DOT_H__
+
+void gfs2_tune_init(struct gfs2_tune *gt);
+
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
+int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
+int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb);
+
+static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
+{
+	unsigned int x;
+	spin_lock(&sdp->sd_jindex_spin);
+	x = sdp->sd_journals;
+	spin_unlock(&sdp->sd_jindex_spin);
+	return x;
+}
+
+int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
+void gfs2_jindex_free(struct gfs2_sbd *sdp);
+
+struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
+void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
+struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
+int gfs2_jdesc_check(struct gfs2_jdesc *jd);
+
+int gfs2_lookup_master_dir(struct gfs2_sbd *sdp);
+int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
+			      struct gfs2_inode **ipp);
+
+int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
+
+int gfs2_statfs_init(struct gfs2_sbd *sdp);
+void gfs2_statfs_change(struct gfs2_sbd *sdp,
+			int64_t total, int64_t free, int64_t dinodes);
+int gfs2_statfs_sync(struct gfs2_sbd *sdp);
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
+
+int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh);
+int gfs2_freeze_fs(struct gfs2_sbd *sdp);
+void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
+
+#endif /* __SUPER_DOT_H__ */
+
--- a/fs/gfs2/trans.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/trans.c	2005-09-01 17:36:55.528083096 +0800
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "log.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "trans.h"
+
+int gfs2_trans_begin_i(struct gfs2_sbd *sdp, unsigned int blocks,
+		       unsigned int revokes, char *file, unsigned int line)
+{
+	struct gfs2_trans *tr;
+	int error;
+
+	if (gfs2_assert_warn(sdp, !get_transaction) ||
+	    gfs2_assert_warn(sdp, blocks || revokes)) {
+		fs_warn(sdp, "(%s, %u)\n", file, line);
+		return -EINVAL;
+	}
+
+	tr = kzalloc(sizeof(struct gfs2_trans), GFP_KERNEL);
+	if (!tr)
+		return -ENOMEM;
+
+	tr->tr_file = file;
+	tr->tr_line = line;
+	tr->tr_blocks = blocks;
+	tr->tr_revokes = revokes;
+	tr->tr_reserved = 1;
+	if (blocks)
+		tr->tr_reserved += 1 + blocks;
+	if (revokes)
+		tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
+						   sizeof(uint64_t));
+	INIT_LIST_HEAD(&tr->tr_list_buf);
+
+	error = -ENOMEM;
+	tr->tr_t_gh = gfs2_holder_get(sdp->sd_trans_gl, LM_ST_SHARED,
+				      GL_NEVER_RECURSE, 0);
+	if (!tr->tr_t_gh)
+		goto fail;
+
+	error = gfs2_glock_nq(tr->tr_t_gh);
+	if (error)
+		goto fail_holder_put;
+
+	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+		tr->tr_t_gh->gh_flags |= GL_NOCACHE;
+		error = -EROFS;
+		goto fail_gunlock;
+	}
+
+	error = gfs2_log_reserve(sdp, tr->tr_reserved);
+	if (error)
+		goto fail_gunlock;
+
+	set_transaction(tr);
+
+	return 0;
+
+ fail_gunlock:
+	gfs2_glock_dq(tr->tr_t_gh);
+
+ fail_holder_put:
+	gfs2_holder_put(tr->tr_t_gh);
+
+ fail:
+	kfree(tr);
+
+	return error;
+}
+
+void gfs2_trans_end(struct gfs2_sbd *sdp)
+{
+	struct gfs2_trans *tr;
+	struct gfs2_holder *t_gh;
+
+	tr = get_transaction;
+	set_transaction(NULL);
+
+	if (gfs2_assert_warn(sdp, tr))
+		return;
+
+	t_gh = tr->tr_t_gh;
+	tr->tr_t_gh = NULL;
+
+	if (!tr->tr_touched) {
+		gfs2_log_release(sdp, tr->tr_reserved);
+		kfree(tr);
+
+		gfs2_glock_dq(t_gh);
+		gfs2_holder_put(t_gh);
+
+		return;
+	}
+
+	if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks))
+		fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u "
+		       "tr_file = %s, tr_line = %u\n",
+		       tr->tr_num_buf, tr->tr_blocks,
+		       tr->tr_file, tr->tr_line);
+	if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
+		fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u "
+		       "tr_file = %s, tr_line = %u\n",
+		       tr->tr_num_revoke, tr->tr_revokes,
+		       tr->tr_file, tr->tr_line);
+
+	gfs2_log_commit(sdp, tr);
+
+	gfs2_glock_dq(t_gh);
+	gfs2_holder_put(t_gh);
+
+	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
+		gfs2_log_flush(sdp);
+}
+
+void gfs2_trans_add_gl(struct gfs2_glock *gl)
+{
+	lops_add(gl->gl_sbd, &gl->gl_le);
+}
+
+/**
+ * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
+ * @gl: the glock the buffer belongs to
+ * @bh: The buffer to add
+ *
+ */
+
+void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_bufdata *bd;
+
+	bd = get_v2bd(bh);
+	if (bd)
+		gfs2_assert(sdp, bd->bd_gl == gl,);
+	else {
+		gfs2_meta_attach_bufdata(gl, bh);
+		bd = get_v2bd(bh);
+	}
+
+	lops_add(sdp, &bd->bd_le);
+}
+
+void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
+{
+	struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
+					 GFP_KERNEL | __GFP_NOFAIL);
+	lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
+	rv->rv_blkno = blkno;
+	lops_add(sdp, &rv->rv_le);
+}
+
+void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno)
+{
+	struct gfs2_revoke *rv;
+	int found = FALSE;
+
+	gfs2_log_lock(sdp);
+
+	list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
+		if (rv->rv_blkno == blkno) {
+			list_del(&rv->rv_le.le_list);
+			gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
+			sdp->sd_log_num_revoke--;
+			found = TRUE;
+			break;
+		}
+	}
+
+	gfs2_log_unlock(sdp);
+
+	if (found) {
+		kfree(rv);
+		get_transaction->tr_num_revoke_rm++;
+	}
+}
+
+void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
+{
+	lops_add(rgd->rd_sbd, &rgd->rd_le);
+}
+
+void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+	struct gfs2_databuf *db;
+
+	db = get_v2db(bh);
+	if (!db) {
+		db = kmalloc(sizeof(struct gfs2_databuf),
+			     GFP_KERNEL | __GFP_NOFAIL);
+		lops_init_le(&db->db_le, &gfs2_databuf_lops);
+		get_bh(bh);
+		db->db_bh = bh;
+		set_v2db(bh, db);
+		lops_add(sdp, &db->db_le);
+	}
+}
+
--- a/fs/gfs2/trans.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/trans.h	2005-09-01 17:36:55.538081576 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __TRANS_DOT_H__
+#define __TRANS_DOT_H__
+
+#define RES_DINODE	1
+#define RES_INDIRECT	1
+#define RES_JDATA	1
+#define RES_DATA	1
+#define RES_LEAF	1
+#define RES_RG_BIT	2
+#define RES_EATTR	1
+#define RES_UNLINKED	1
+#define RES_STATFS	1
+#define RES_QUOTA	2
+
+#define gfs2_trans_begin(sdp, blocks, revokes) \
+gfs2_trans_begin_i((sdp), (blocks), (revokes), __FILE__, __LINE__)
+
+int gfs2_trans_begin_i(struct gfs2_sbd *sdp,
+		      unsigned int blocks, unsigned int revokes,
+		      char *file, unsigned int line);
+
+void gfs2_trans_end(struct gfs2_sbd *sdp);
+
+void gfs2_trans_add_gl(struct gfs2_glock *gl);
+void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
+void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
+void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
+void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
+
+#endif /* __TRANS_DOT_H__ */
--- a/fs/gfs2/unlinked.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/unlinked.c	2005-09-01 17:36:55.557078688 +0800
@@ -0,0 +1,454 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/kthread.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "inode.h"
+#include "meta_io.h"
+#include "trans.h"
+#include "unlinked.h"
+
+static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot,
+			struct gfs2_unlinked_tag *ut)
+{
+	struct gfs2_inode *ip = sdp->sd_ut_inode;
+	unsigned int block, offset;
+	uint64_t dblock;
+	int new = FALSE;
+	struct buffer_head *bh;
+	int error;
+
+	block = slot / sdp->sd_ut_per_block;
+	offset = slot % sdp->sd_ut_per_block;
+
+	error = gfs2_block_map(ip, block, &new, &dblock, NULL);
+	if (error)
+		return error;
+	error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
+	if (error)
+		return error;
+	if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
+		error = -EIO;
+		goto out;
+	}
+
+	down(&sdp->sd_unlinked_mutex);
+	gfs2_trans_add_bh(ip->i_gl, bh);
+	gfs2_unlinked_tag_out(ut, bh->b_data +
+				  sizeof(struct gfs2_meta_header) +
+				  offset * sizeof(struct gfs2_unlinked_tag));
+	up(&sdp->sd_unlinked_mutex);
+
+ out:
+	brelse(bh);
+
+	return error;
+}
+
+static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	spin_lock(&sdp->sd_unlinked_spin);
+	list_add(&ul->ul_list, &sdp->sd_unlinked_list);
+	gfs2_assert(sdp, ul->ul_count,);
+	ul->ul_count++;
+	atomic_inc(&sdp->sd_unlinked_count);
+	spin_unlock(&sdp->sd_unlinked_spin);
+}
+
+static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	spin_lock(&sdp->sd_unlinked_spin);
+	list_del_init(&ul->ul_list);
+	gfs2_assert(sdp, ul->ul_count > 1,);
+	ul->ul_count--;
+	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0);
+	atomic_dec(&sdp->sd_unlinked_count);
+	spin_unlock(&sdp->sd_unlinked_spin);
+}
+
+static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp)
+{
+	struct list_head *head;
+	struct gfs2_unlinked *ul;
+	int found = FALSE;
+
+	if (sdp->sd_vfs->s_flags & MS_RDONLY)
+		return NULL;
+
+	spin_lock(&sdp->sd_unlinked_spin);
+
+	head = &sdp->sd_unlinked_list;
+
+	list_for_each_entry(ul, head, ul_list) {
+		if (test_bit(ULF_LOCKED, &ul->ul_flags))
+			continue;
+
+		list_move_tail(&ul->ul_list, head);
+		ul->ul_count++;
+		set_bit(ULF_LOCKED, &ul->ul_flags);
+		found = TRUE;
+
+		break;
+	}
+
+	if (!found)
+		ul = NULL;
+
+	spin_unlock(&sdp->sd_unlinked_spin);
+
+	return ul;
+}
+
+/**
+ * enforce_limit - limit the number of inodes waiting to be deallocated
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static void enforce_limit(struct gfs2_sbd *sdp)
+{
+	unsigned int tries = 0, min = 0;
+	int error;
+
+	if (atomic_read(&sdp->sd_unlinked_count) >=
+	    gfs2_tune_get(sdp, gt_ilimit)) {
+		tries = gfs2_tune_get(sdp, gt_ilimit_tries);
+		min = gfs2_tune_get(sdp, gt_ilimit_min);
+	}
+
+	while (tries--) {
+		struct gfs2_unlinked *ul = ul_fish(sdp);
+		if (!ul)
+			break;
+		error = gfs2_inode_dealloc(sdp, ul);
+		gfs2_unlinked_put(sdp, ul);
+
+		if (!error) {
+			if (!--min)
+				break;
+		} else if (error != 1)
+			break;
+	}
+}
+
+static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp)
+{
+	struct gfs2_unlinked *ul;
+
+	ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL);
+	if (ul) {
+		INIT_LIST_HEAD(&ul->ul_list);
+		ul->ul_count = 1;
+		set_bit(ULF_LOCKED, &ul->ul_flags);
+	}
+
+	return ul;
+}
+
+int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul)
+{
+	unsigned int c, o = 0, b;
+	unsigned char byte = 0;
+
+	enforce_limit(sdp);
+
+	*ul = ul_alloc(sdp);
+	if (!*ul)
+		return -ENOMEM;
+
+	spin_lock(&sdp->sd_unlinked_spin);
+
+	for (c = 0; c < sdp->sd_unlinked_chunks; c++)
+		for (o = 0; o < PAGE_SIZE; o++) {
+			byte = sdp->sd_unlinked_bitmap[c][o];
+			if (byte != 0xFF)
+				goto found;
+		}
+
+	goto fail;
+
+ found:
+	for (b = 0; b < 8; b++)
+		if (!(byte & (1 << b)))
+			break;
+	(*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
+
+	if ((*ul)->ul_slot >= sdp->sd_unlinked_slots)
+		goto fail;
+
+	sdp->sd_unlinked_bitmap[c][o] |= 1 << b;
+
+	spin_unlock(&sdp->sd_unlinked_spin);
+
+	return 0;
+
+ fail:
+	spin_unlock(&sdp->sd_unlinked_spin);
+	kfree(*ul);
+	return -ENOSPC;
+}
+
+void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags));
+
+	spin_lock(&sdp->sd_unlinked_spin);
+	gfs2_assert(sdp, ul->ul_count,);
+	ul->ul_count--;
+	if (!ul->ul_count) {
+		gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0);
+		spin_unlock(&sdp->sd_unlinked_spin);
+		kfree(ul);
+	} else
+		spin_unlock(&sdp->sd_unlinked_spin);
+}
+
+int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	int error;
+
+	gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
+	gfs2_assert_warn(sdp, list_empty(&ul->ul_list));
+
+	error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
+	if (!error)
+		ul_hash(sdp, ul);
+
+	return error;
+}
+
+int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	int error;
+
+	gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
+	gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
+
+	error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
+
+	return error;
+}
+
+int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
+{
+	struct gfs2_unlinked_tag ut;
+	int error;
+
+	gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
+	gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
+
+	memset(&ut, 0, sizeof(struct gfs2_unlinked_tag));
+
+	error = munge_ondisk(sdp, ul->ul_slot, &ut);
+	if (error)
+		return error;
+
+	ul_unhash(sdp, ul);
+
+	return 0;
+}
+
+/**
+ * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp)
+{
+	unsigned int hits, strikes;
+	int error;
+
+	for (;;) {
+		hits = 0;
+		strikes = 0;
+
+		for (;;) {
+			struct gfs2_unlinked *ul = ul_fish(sdp);
+			if (!ul)
+				return 0;
+			error = gfs2_inode_dealloc(sdp, ul);
+			gfs2_unlinked_put(sdp, ul);
+
+			if (!error) {
+				hits++;
+				if (strikes)
+					strikes--;
+			} else if (error == 1) {
+				strikes++;
+				if (strikes >=
+				    atomic_read(&sdp->sd_unlinked_count)) {
+					error = 0;
+					break;
+				}
+			} else
+				return error;
+		}
+
+		if (!hits || kthread_should_stop())
+			break;
+
+		cond_resched();
+	}
+
+	return 0;
+}
+
+int gfs2_unlinked_init(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *ip = sdp->sd_ut_inode;
+	unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+	unsigned int x, slot = 0;
+	unsigned int found = 0;
+	uint64_t dblock;
+	uint32_t extlen = 0;
+	int error;
+
+	if (!ip->i_di.di_size ||
+	    ip->i_di.di_size > (64 << 20) ||
+	    ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
+		gfs2_consist_inode(ip);
+		return -EIO;		
+	}
+	sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block;
+	sdp->sd_unlinked_chunks = DIV_RU(sdp->sd_unlinked_slots, 8 * PAGE_SIZE);
+
+	error = -ENOMEM;
+
+	sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks,
+					  sizeof(unsigned char *),
+					  GFP_KERNEL);
+	if (!sdp->sd_unlinked_bitmap)
+		return error;
+
+	for (x = 0; x < sdp->sd_unlinked_chunks; x++) {
+		sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!sdp->sd_unlinked_bitmap[x])
+			goto fail;
+	}
+
+	for (x = 0; x < blocks; x++) {
+		struct buffer_head *bh;
+		unsigned int y;
+
+		if (!extlen) {
+			int new = FALSE;
+			error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
+			if (error)
+				goto fail;
+		}
+		gfs2_meta_ra(ip->i_gl, dblock, extlen);
+		error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
+				       &bh);
+		if (error)
+			goto fail;
+		error = -EIO;
+		if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
+			brelse(bh);
+			goto fail;
+		}
+
+		for (y = 0;
+		     y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots;
+		     y++, slot++) {
+			struct gfs2_unlinked_tag ut;
+			struct gfs2_unlinked *ul;
+
+			gfs2_unlinked_tag_in(&ut, bh->b_data +
+					  sizeof(struct gfs2_meta_header) +
+					  y * sizeof(struct gfs2_unlinked_tag));
+			if (!ut.ut_inum.no_addr)
+				continue;
+
+			error = -ENOMEM;
+			ul = ul_alloc(sdp);
+			if (!ul) {
+				brelse(bh);
+				goto fail;
+			}
+			ul->ul_ut = ut;
+			ul->ul_slot = slot;
+
+			spin_lock(&sdp->sd_unlinked_spin);
+			gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1);
+			spin_unlock(&sdp->sd_unlinked_spin);
+			ul_hash(sdp, ul);
+
+			gfs2_unlinked_put(sdp, ul);
+			found++;
+		}
+
+		brelse(bh);
+		dblock++;
+		extlen--;
+	}
+
+	if (found)
+		fs_info(sdp, "found %u unlinked inodes\n", found);
+
+	return 0;
+
+ fail:
+	gfs2_unlinked_cleanup(sdp);
+	return error;
+}
+
+/**
+ * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp)
+{
+	struct list_head *head = &sdp->sd_unlinked_list;
+	struct gfs2_unlinked *ul;
+	unsigned int x;
+
+	spin_lock(&sdp->sd_unlinked_spin);
+	while (!list_empty(head)) {
+		ul = list_entry(head->next, struct gfs2_unlinked, ul_list);
+
+		if (ul->ul_count > 1) {
+			list_move_tail(&ul->ul_list, head);
+			spin_unlock(&sdp->sd_unlinked_spin);
+			schedule();
+			spin_lock(&sdp->sd_unlinked_spin);
+			continue;
+		}
+
+		list_del_init(&ul->ul_list);
+		atomic_dec(&sdp->sd_unlinked_count);
+
+		gfs2_assert_warn(sdp, ul->ul_count == 1);
+		gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags));
+		kfree(ul);
+	}
+	spin_unlock(&sdp->sd_unlinked_spin);
+
+	gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count));
+
+	if (sdp->sd_unlinked_bitmap) {
+		for (x = 0; x < sdp->sd_unlinked_chunks; x++)
+			kfree(sdp->sd_unlinked_bitmap[x]);
+		kfree(sdp->sd_unlinked_bitmap);
+	}
+}
+
--- a/fs/gfs2/unlinked.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/unlinked.h	2005-09-01 17:36:55.558078536 +0800
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __UNLINKED_DOT_H__
+#define __UNLINKED_DOT_H__
+
+int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul);
+void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
+
+int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
+int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
+int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
+
+int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp);
+
+int gfs2_unlinked_init(struct gfs2_sbd *sdp);
+void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp);
+
+#endif /* __UNLINKED_DOT_H__ */
--- a/fs/gfs2/util.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/util.c	2005-09-01 17:36:55.566077320 +0800
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/crc32.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "lm.h"
+
+kmem_cache_t *gfs2_glock_cachep;
+kmem_cache_t *gfs2_inode_cachep;
+kmem_cache_t *gfs2_bufdata_cachep;
+
+uint32_t gfs2_disk_hash(const char *data, int len)
+{
+	return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF;
+}
+
+/**
+ * gfs2_assert_i - Cause the machine to panic if @assertion is false
+ *
+ */
+
+void gfs2_assert_i(struct gfs2_sbd *sdp, char *assertion, const char *function,
+		   char *file, unsigned int line)
+{
+	if (sdp->sd_args.ar_oopses_ok) {
+		printk(KERN_ERR
+		       "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
+		       "GFS2: fsid=%s:   function = %s\n"
+		       "GFS2: fsid=%s:   file = %s, line = %u\n"
+		       "GFS2: fsid=%s:   time = %lu\n",
+		       sdp->sd_fsname, assertion,
+		       sdp->sd_fsname, function,
+		       sdp->sd_fsname, file, line,
+		       sdp->sd_fsname, get_seconds());
+		BUG();
+	}
+	dump_stack();
+	panic("GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
+	      "GFS2: fsid=%s:   function = %s\n"
+	      "GFS2: fsid=%s:   file = %s, line = %u\n"
+	      "GFS2: fsid=%s:   time = %lu\n",
+	      sdp->sd_fsname, assertion,
+	      sdp->sd_fsname, function,
+	      sdp->sd_fsname, file, line,
+	      sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
+ * Returns: -1 if this call withdrew the machine,
+ *          -2 if it was already withdrawn
+ */
+
+int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+			   const char *function, char *file, unsigned int line)
+{
+	int me;
+	me = gfs2_lm_withdraw(sdp,
+			     "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
+			     "GFS2: fsid=%s:   function = %s\n"
+			     "GFS2: fsid=%s:   file = %s, line = %u\n"
+			     "GFS2: fsid=%s:   time = %lu\n",
+			     sdp->sd_fsname, assertion,
+			     sdp->sd_fsname, function,
+			     sdp->sd_fsname, file, line,
+			     sdp->sd_fsname, get_seconds());
+	return (me) ? -1 : -2;
+}
+
+/**
+ * gfs2_assert_warn_i - Print a message to the console if @assertion is false
+ * Returns: -1 if we printed something
+ *          -2 if we didn't
+ */
+
+int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+		       const char *function, char *file, unsigned int line)
+{
+	if (time_before(jiffies,
+			sdp->sd_last_warning +
+			gfs2_tune_get(sdp, gt_complain_secs) * HZ))
+		return -2;
+
+	printk(KERN_WARNING
+	       "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
+	       "GFS2: fsid=%s:   function = %s\n"
+	       "GFS2: fsid=%s:   file = %s, line = %u\n"
+	       "GFS2: fsid=%s:   time = %lu\n",
+	       sdp->sd_fsname, assertion,
+	       sdp->sd_fsname, function,
+	       sdp->sd_fsname, file, line,
+	       sdp->sd_fsname, get_seconds());
+
+	if (sdp->sd_args.ar_debug)
+		BUG();
+
+	sdp->sd_last_warning = jiffies;
+
+	return -1;
+}
+
+/**
+ * gfs2_consist_i - Flag a filesystem consistency error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          0 if it was already withdrawn
+ */
+
+int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
+		   char *file, unsigned int line)
+{
+	return gfs2_lm_withdraw(sdp,
+			"GFS2: fsid=%s: fatal: filesystem consistency error\n"
+			"GFS2: fsid=%s:   function = %s\n"
+			"GFS2: fsid=%s:   file = %s, line = %u\n"
+			"GFS2: fsid=%s:   time = %lu\n",
+			sdp->sd_fsname,
+			sdp->sd_fsname, function,
+			sdp->sd_fsname, file, line,
+			sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          0 if it was already withdrawn
+ */
+
+int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
+			 const char *function, char *file, unsigned int line)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	return gfs2_lm_withdraw(sdp,
+			"GFS2: fsid=%s: fatal: filesystem consistency error\n"
+			"GFS2: fsid=%s:   inode = %"PRIu64"/%"PRIu64"\n"
+			"GFS2: fsid=%s:   function = %s\n"
+			"GFS2: fsid=%s:   file = %s, line = %u\n"
+			"GFS2: fsid=%s:   time = %lu\n",
+			sdp->sd_fsname,
+			sdp->sd_fsname,
+			ip->i_num.no_formal_ino, ip->i_num.no_addr,
+			sdp->sd_fsname, function,
+			sdp->sd_fsname, file, line,
+			sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          0 if it was already withdrawn
+ */
+
+int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
+			 const char *function, char *file, unsigned int line)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	return gfs2_lm_withdraw(sdp,
+			"GFS2: fsid=%s: fatal: filesystem consistency error\n"
+			"GFS2: fsid=%s:   RG = %"PRIu64"\n"
+			"GFS2: fsid=%s:   function = %s\n"
+			"GFS2: fsid=%s:   file = %s, line = %u\n"
+			"GFS2: fsid=%s:   time = %lu\n",
+			sdp->sd_fsname,
+			sdp->sd_fsname, rgd->rd_ri.ri_addr,
+			sdp->sd_fsname, function,
+			sdp->sd_fsname, file, line,
+			sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          -2 if it was already withdrawn
+ */
+
+int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		       const char *type, const char *function, char *file,
+		       unsigned int line)
+{
+	int me;
+	me = gfs2_lm_withdraw(sdp,
+			     "GFS2: fsid=%s: fatal: invalid metadata block\n"
+			     "GFS2: fsid=%s:   bh = %"PRIu64" (%s)\n"
+			     "GFS2: fsid=%s:   function = %s\n"
+			     "GFS2: fsid=%s:   file = %s, line = %u\n"
+			     "GFS2: fsid=%s:   time = %lu\n",
+			     sdp->sd_fsname,
+			     sdp->sd_fsname, (uint64_t)bh->b_blocknr, type,
+			     sdp->sd_fsname, function,
+			     sdp->sd_fsname, file, line,
+			     sdp->sd_fsname, get_seconds());
+	return (me) ? -1 : -2;
+}
+
+/**
+ * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          -2 if it was already withdrawn
+ */
+
+int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+			   uint16_t type, uint16_t t, const char *function,
+			   char *file, unsigned int line)
+{
+	int me;
+	me = gfs2_lm_withdraw(sdp,
+		"GFS2: fsid=%s: fatal: invalid metadata block\n"
+		"GFS2: fsid=%s:   bh = %"PRIu64" (type: exp=%u, found=%u)\n"
+		"GFS2: fsid=%s:   function = %s\n"
+		"GFS2: fsid=%s:   file = %s, line = %u\n"
+		"GFS2: fsid=%s:   time = %lu\n",
+		sdp->sd_fsname,
+		sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t,
+		sdp->sd_fsname, function,
+		sdp->sd_fsname, file, line,
+		sdp->sd_fsname, get_seconds());
+	return (me) ? -1 : -2;
+}
+
+/**
+ * gfs2_io_error_i - Flag an I/O error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          0 if it was already withdrawn
+ */
+
+int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
+		    unsigned int line)
+{
+	return gfs2_lm_withdraw(sdp,
+			       "GFS2: fsid=%s: fatal: I/O error\n"
+			       "GFS2: fsid=%s:   function = %s\n"
+			       "GFS2: fsid=%s:   file = %s, line = %u\n"
+			       "GFS2: fsid=%s:   time = %lu\n",
+			       sdp->sd_fsname,
+			       sdp->sd_fsname, function,
+			       sdp->sd_fsname, file, line,
+			       sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
+ * Returns: -1 if this call withdrew the machine,
+ *          0 if it was already withdrawn
+ */
+
+int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		       const char *function, char *file, unsigned int line)
+{
+	return gfs2_lm_withdraw(sdp,
+			       "GFS2: fsid=%s: fatal: I/O error\n"
+			       "GFS2: fsid=%s:   block = %"PRIu64"\n"
+			       "GFS2: fsid=%s:   function = %s\n"
+			       "GFS2: fsid=%s:   file = %s, line = %u\n"
+			       "GFS2: fsid=%s:   time = %lu\n",
+			       sdp->sd_fsname,
+			       sdp->sd_fsname, (uint64_t)bh->b_blocknr,
+			       sdp->sd_fsname, function,
+			       sdp->sd_fsname, file, line,
+			       sdp->sd_fsname, get_seconds());
+}
+
+/**
+ * gfs2_add_bh_to_ub - copy a buffer up to user space
+ * @ub: the structure representing where to copy
+ * @bh: the buffer
+ *
+ * Returns: errno
+ */
+
+int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh)
+{
+	uint64_t blkno = bh->b_blocknr;
+
+	if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size)
+		return -ENOMEM;
+
+	if (copy_to_user(ub->ub_data + ub->ub_count,
+			  &blkno,
+			  sizeof(uint64_t)))
+		return -EFAULT;
+	ub->ub_count += sizeof(uint64_t);
+
+	if (copy_to_user(ub->ub_data + ub->ub_count,
+			  bh->b_data,
+			  bh->b_size))
+		return -EFAULT;
+	ub->ub_count += bh->b_size;
+
+	return 0;
+}
+
+int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count,
+		  char *fmt, ...)
+{
+	va_list args;
+	int left, out;
+
+	if (!buf) {
+		va_start(args, fmt);
+		vprintk(fmt, args);
+		va_end(args);
+		return 0;
+	}
+
+	left = size - *count;
+	if (left <= 0)
+		return 1;
+
+	va_start(args, fmt);
+	out = vsnprintf(buf + *count, left, fmt, args);
+	va_end(args);
+
+	if (out < left)
+		*count += out;
+	else
+		return 1;
+
+	return 0;
+}
+
+void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
+		      unsigned int bit, int new_value)
+{
+	unsigned int c, o, b = bit;
+	int old_value;
+
+	c = b / (8 * PAGE_SIZE);
+	b %= 8 * PAGE_SIZE;
+	o = b / 8;
+	b %= 8;
+
+	old_value = (bitmap[c][o] & (1 << b));
+	gfs2_assert_withdraw(sdp, !old_value != !new_value);
+
+	if (new_value)
+		bitmap[c][o] |= 1 << b;
+	else
+		bitmap[c][o] &= ~(1 << b);
+}
+
--- a/fs/gfs2/util.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/util.h	2005-09-01 17:36:55.566077320 +0800
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __UTIL_DOT_H__
+#define __UTIL_DOT_H__
+
+uint32_t gfs2_disk_hash(const char *data, int len);
+
+
+#define fs_printk(level, fs, fmt, arg...) \
+	printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
+
+#define fs_info(fs, fmt, arg...) \
+	fs_printk(KERN_INFO , fs , fmt , ## arg)
+
+#define fs_warn(fs, fmt, arg...) \
+	fs_printk(KERN_WARNING , fs , fmt , ## arg)
+
+#define fs_err(fs, fmt, arg...) \
+	fs_printk(KERN_ERR, fs , fmt , ## arg)
+
+
+void gfs2_assert_i(struct gfs2_sbd *sdp, char *assertion, const char *function,
+		   char *file, unsigned int line) __attribute__ ((noreturn));
+
+#define gfs2_assert(sdp, assertion, todo) \
+do { \
+	if (unlikely(!(assertion))) { \
+		{todo} \
+		gfs2_assert_i((sdp), #assertion, \
+			      __FUNCTION__, __FILE__, __LINE__); \
+	} \
+} while (0)
+
+
+int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+			   const char *function, char *file, unsigned int line);
+
+#define gfs2_assert_withdraw(sdp, assertion) \
+((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
+					__FUNCTION__, __FILE__, __LINE__))
+
+
+int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+		       const char *function, char *file, unsigned int line);
+
+#define gfs2_assert_warn(sdp, assertion) \
+((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
+					__FUNCTION__, __FILE__, __LINE__))
+
+
+int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
+		   const char *function, char *file, unsigned int line);
+
+#define gfs2_consist(sdp) \
+gfs2_consist_i((sdp), FALSE, __FUNCTION__, __FILE__, __LINE__)
+
+
+int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
+			 const char *function, char *file, unsigned int line);
+
+#define gfs2_consist_inode(ip) \
+gfs2_consist_inode_i((ip), FALSE, __FUNCTION__, __FILE__, __LINE__)
+
+
+int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
+			 const char *function, char *file, unsigned int line);
+
+#define gfs2_consist_rgrpd(rgd) \
+gfs2_consist_rgrpd_i((rgd), FALSE, __FUNCTION__, __FILE__, __LINE__)
+
+
+int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		       const char *type, const char *function,
+		       char *file, unsigned int line);
+
+static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
+				    struct buffer_head *bh,
+				    const char *function,
+				    char *file, unsigned int line)
+{
+	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
+	uint32_t magic = mh->mh_magic;
+	uint64_t blkno = mh->mh_blkno;
+	magic = gfs2_32_to_cpu(magic);
+	blkno = gfs2_64_to_cpu(blkno);
+	if (unlikely(magic != GFS2_MAGIC))
+		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
+					  file, line);
+	if (unlikely(blkno != bh->b_blocknr))
+		return gfs2_meta_check_ii(sdp, bh, "block number", function,
+					  file, line);
+	return 0;
+}
+
+#define gfs2_meta_check(sdp, bh) \
+gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
+
+
+int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+			   uint16_t type, uint16_t t,
+			   const char *function,
+			   char *file, unsigned int line);
+
+static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
+					struct buffer_head *bh,
+					uint16_t type,
+					const char *function,
+					char *file, unsigned int line)
+{
+	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
+	uint32_t magic = mh->mh_magic;
+	uint16_t t = mh->mh_type;
+	uint64_t blkno = mh->mh_blkno;
+	magic = gfs2_32_to_cpu(magic);
+	blkno = gfs2_64_to_cpu(blkno);
+	if (unlikely(magic != GFS2_MAGIC))
+		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
+					  file, line);
+	if (unlikely(blkno != bh->b_blocknr))
+		return gfs2_meta_check_ii(sdp, bh, "block number", function,
+					  file, line);
+	t = gfs2_16_to_cpu(t);
+        if (unlikely(t != type))
+		return gfs2_metatype_check_ii(sdp, bh, type, t, function,
+					      file, line);
+	return 0;
+}
+
+#define gfs2_metatype_check(sdp, bh, type) \
+gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
+
+static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type,
+				     uint16_t format)
+{
+	struct gfs2_meta_header *mh;
+	mh = (struct gfs2_meta_header *)bh->b_data;
+	mh->mh_type = cpu_to_gfs2_16(type);
+	mh->mh_format = cpu_to_gfs2_16(format);
+}
+
+
+int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
+		    char *file, unsigned int line);
+
+#define gfs2_io_error(sdp) \
+gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
+
+
+int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		       const char *function, char *file, unsigned int line);
+
+#define gfs2_io_error_bh(sdp, bh) \
+gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
+
+
+extern kmem_cache_t *gfs2_glock_cachep;
+extern kmem_cache_t *gfs2_inode_cachep;
+extern kmem_cache_t *gfs2_bufdata_cachep;
+
+struct gfs2_user_buffer {
+	char __user *ub_data;
+	unsigned int ub_size;
+	unsigned int ub_count;
+};
+
+int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh);
+
+static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
+					   unsigned int *p)
+{
+	unsigned int x;
+	spin_lock(&gt->gt_spin);
+	x = *p;
+	spin_unlock(&gt->gt_spin);
+	return x;
+}
+
+#define gfs2_tune_get(sdp, field) \
+gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
+
+int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count,
+		  char *fmt, ...) __attribute__ ((format(printf, 4, 5)));
+
+#define gfs2_printf(fmt, args...) \
+do { \
+	if (gfs2_printf_i(buf, size, count, fmt, ##args)) \
+		goto out; \
+} while(0)
+
+void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
+		      unsigned int bit, int new_value);
+
+#endif /* __UTIL_DOT_H__ */
+

[PATCH 03/13] GFS: directories

Code that handles directory operations.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/dir.c | 2158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/dir.h |   51 +
 2 files changed, 2209 insertions(+)

--- a/fs/gfs2/dir.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/dir.c	2005-09-01 17:36:55.180135992 +0800
@@ -0,0 +1,2158 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+/*
+* Implements Extendible Hashing as described in:
+*   "Extendible Hashing" by Fagin, et al in
+*     __ACM Trans. on Database Systems__, Sept 1979.
+*
+*
+* Here's the layout of dirents which is essentially the same as that of ext2
+* within a single block. The field de_name_len is the number of bytes
+* actually required for the name (no null terminator). The field de_rec_len
+* is the number of bytes allocated to the dirent. The offset of the next
+* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
+* deleted, the preceding dirent inherits its allocated space, ie
+* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
+* by adding de_rec_len to the current dirent, this essentially causes the
+* deleted dirent to get jumped over when iterating through all the dirents.
+*
+* When deleting the first dirent in a block, there is no previous dirent so
+* the field de_ino is set to zero to designate it as deleted. When allocating
+* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
+* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
+* dirent is allocated. Otherwise it must go through all the 'used' dirents
+* searching for one in which the amount of total space minus the amount of
+* used space will provide enough space for the new dirent.
+*
+* There are two types of blocks in which dirents reside. In a stuffed dinode,
+* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
+* the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
+* beginning of the leaf block. The dirents reside in leaves when
+*
+* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
+*
+* Otherwise, the dirents are "linear", within a single stuffed dinode block.
+*
+* When the dirents are in leaves, the actual contents of the directory file are
+* used as an array of 64-bit block pointers pointing to the leaf blocks. The
+* dirents are NOT in the directory file itself. There can be more than one block
+* pointer in the array that points to the same leaf. In fact, when a directory
+* is first converted from linear to exhash, all of the pointers point to the
+* same leaf.
+*
+* When a leaf is completely full, the size of the hash table can be
+* doubled unless it is already at the maximum size which is hard coded into
+* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
+* but never before the maximum hash table size has been reached.
+*/
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/sort.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "dir.h"
+#include "glock.h"
+#include "inode.h"
+#include "jdata.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+
+#define IS_LEAF     1 /* Hashed (leaf) directory */
+#define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
+
+#if 1
+#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
+#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
+#else
+#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
+#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
+#endif
+
+typedef int (*leaf_call_t) (struct gfs2_inode *dip,
+			    uint32_t index, uint32_t len, uint64_t leaf_no,
+			    void *data);
+
+/**
+ * int gfs2_filecmp - Compare two filenames
+ * @file1: The first filename
+ * @file2: The second filename
+ * @len_of_file2: The length of the second file
+ *
+ * This routine compares two filenames and returns TRUE if they are equal.
+ *
+ * Returns: TRUE (!=0) if the files are the same, otherwise FALSE (0).
+ */
+
+int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2)
+{
+	if (file1->len != len_of_file2)
+		return FALSE;
+	if (memcmp(file1->name, file2, file1->len))
+		return FALSE;
+	return TRUE;
+}
+
+/**
+ * dirent_first - Return the first dirent
+ * @dip: the directory
+ * @bh: The buffer
+ * @dent: Pointer to list of dirents
+ *
+ * return first dirent whether bh points to leaf or stuffed dinode
+ *
+ * Returns: IS_LEAF, IS_DINODE, or -errno
+ */
+
+static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
+			struct gfs2_dirent **dent)
+{
+	struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
+
+	if (gfs2_16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
+		if (gfs2_meta_check(dip->i_sbd, bh))
+			return -EIO;
+		*dent = (struct gfs2_dirent *)(bh->b_data +
+					       sizeof(struct gfs2_leaf));
+		return IS_LEAF;
+	} else {
+		if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
+			return -EIO;
+		*dent = (struct gfs2_dirent *)(bh->b_data +
+					       sizeof(struct gfs2_dinode));
+		return IS_DINODE;
+	}
+}
+
+/**
+ * dirent_next - Next dirent
+ * @dip: the directory
+ * @bh: The buffer
+ * @dent: Pointer to list of dirents
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
+		       struct gfs2_dirent **dent)
+{
+	struct gfs2_dirent *tmp, *cur;
+	char *bh_end;
+	uint32_t cur_rec_len;
+
+	cur = *dent;
+	bh_end = bh->b_data + bh->b_size;
+	cur_rec_len = gfs2_32_to_cpu(cur->de_rec_len);
+
+	if ((char *)cur + cur_rec_len >= bh_end) {
+		if ((char *)cur + cur_rec_len > bh_end) {
+			gfs2_consist_inode(dip);
+			return -EIO;
+		}
+		return -ENOENT;
+	}
+
+	tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
+
+	if ((char *)tmp + gfs2_32_to_cpu(tmp->de_rec_len) > bh_end) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+        /* Only the first dent could ever have de_inum.no_addr == 0 */
+	if (!tmp->de_inum.no_addr) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	*dent = tmp;
+
+	return 0;
+}
+
+/**
+ * dirent_del - Delete a dirent
+ * @dip: The GFS2 inode
+ * @bh: The buffer
+ * @prev: The previous dirent
+ * @cur: The current dirent
+ *
+ */
+
+static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
+		       struct gfs2_dirent *prev, struct gfs2_dirent *cur)
+{
+	uint32_t cur_rec_len, prev_rec_len;
+
+	if (!cur->de_inum.no_addr) {
+		gfs2_consist_inode(dip);
+		return;
+	}
+
+	gfs2_trans_add_bh(dip->i_gl, bh);
+
+	/* If there is no prev entry, this is the first entry in the block.
+	   The de_rec_len is already as big as it needs to be.  Just zero
+	   out the inode number and return.  */
+
+	if (!prev) {
+		cur->de_inum.no_addr = 0;	/* No endianess worries */
+		return;
+	}
+
+	/*  Combine this dentry with the previous one.  */
+
+	prev_rec_len = gfs2_32_to_cpu(prev->de_rec_len);
+	cur_rec_len = gfs2_32_to_cpu(cur->de_rec_len);
+
+	if ((char *)prev + prev_rec_len != (char *)cur)
+		gfs2_consist_inode(dip);
+	if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
+		gfs2_consist_inode(dip);
+
+	prev_rec_len += cur_rec_len;
+	prev->de_rec_len = cpu_to_gfs2_32(prev_rec_len);
+}
+
+/**
+ * gfs2_dirent_alloc - Allocate a directory entry
+ * @dip: The GFS2 inode
+ * @bh: The buffer
+ * @name_len: The length of the name
+ * @dent_out: Pointer to list of dirents
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
+		      int name_len, struct gfs2_dirent **dent_out)
+{
+	struct gfs2_dirent *dent, *new;
+	unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
+	unsigned int entries = 0, offset = 0;
+	int type;
+
+	type = dirent_first(dip, bh, &dent);
+	if (type < 0)
+		return type;
+
+	if (type == IS_LEAF) {
+		struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
+		entries = gfs2_16_to_cpu(leaf->lf_entries);
+		offset = sizeof(struct gfs2_leaf);
+	} else {
+		struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
+		entries = gfs2_32_to_cpu(dinode->di_entries);
+		offset = sizeof(struct gfs2_dinode);
+	}
+
+	if (!entries) {
+		if (dent->de_inum.no_addr) {
+			gfs2_consist_inode(dip);
+			return -EIO;
+		}
+
+		gfs2_trans_add_bh(dip->i_gl, bh);
+
+		dent->de_rec_len = bh->b_size - offset;
+		dent->de_rec_len = cpu_to_gfs2_32(dent->de_rec_len);
+		dent->de_name_len = name_len;
+
+		*dent_out = dent;
+		return 0;
+	}
+
+	do {
+		uint32_t cur_rec_len, cur_name_len;
+
+		cur_rec_len = gfs2_32_to_cpu(dent->de_rec_len);
+		cur_name_len = dent->de_name_len;
+
+		if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
+		    (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) {
+			gfs2_trans_add_bh(dip->i_gl, bh);
+
+			if (dent->de_inum.no_addr) {
+				new = (struct gfs2_dirent *)((char *)dent +
+							    GFS2_DIRENT_SIZE(cur_name_len));
+				memset(new, 0, sizeof(struct gfs2_dirent));
+
+				new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len);
+				new->de_rec_len = cpu_to_gfs2_32(new->de_rec_len);
+				new->de_name_len = name_len;
+
+				dent->de_rec_len = cur_rec_len - gfs2_32_to_cpu(new->de_rec_len);
+				dent->de_rec_len = cpu_to_gfs2_32(dent->de_rec_len);
+
+				*dent_out = new;
+				return 0;
+			}
+
+			dent->de_name_len = name_len;
+
+			*dent_out = dent;
+			return 0;
+		}
+	} while (dirent_next(dip, bh, &dent) == 0);
+
+	return -ENOSPC;
+}
+
+/**
+ * dirent_fits - See if we can fit a entry in this buffer
+ * @dip: The GFS2 inode
+ * @bh: The buffer
+ * @name_len: The length of the name
+ *
+ * Returns: TRUE if it can fit, FALSE otherwise
+ */
+
+static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh,
+		       int name_len)
+{
+	struct gfs2_dirent *dent;
+	unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
+	unsigned int entries = 0;
+	int type;
+
+	type = dirent_first(dip, bh, &dent);
+	if (type < 0)
+		return type;
+
+	if (type == IS_LEAF) {
+		struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
+		entries = gfs2_16_to_cpu(leaf->lf_entries);
+	} else {
+		struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
+		entries = gfs2_32_to_cpu(dinode->di_entries);
+	}
+
+	if (!entries)
+		return TRUE;
+
+	do {
+		uint32_t cur_rec_len, cur_name_len;
+
+		cur_rec_len = gfs2_32_to_cpu(dent->de_rec_len);
+		cur_name_len = dent->de_name_len;
+
+		if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
+		    (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len))
+			return TRUE;
+	} while (dirent_next(dip, bh, &dent) == 0);
+
+	return FALSE;
+}
+
+static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh,
+		       struct qstr *filename, struct gfs2_dirent **dent_out,
+		       struct gfs2_dirent **dent_prev)
+{
+	uint32_t hash;
+	struct gfs2_dirent *dent, *prev = NULL;
+	unsigned int entries = 0;
+	int type;
+
+	type = dirent_first(dip, bh, &dent);
+	if (type < 0)
+		return type;
+
+	if (type == IS_LEAF) {
+		struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
+		entries = gfs2_16_to_cpu(leaf->lf_entries);
+	} else if (type == IS_DINODE) {
+		struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
+		entries = gfs2_32_to_cpu(dinode->di_entries);
+	}
+
+	hash = gfs2_disk_hash(filename->name, filename->len);
+
+	do {
+		if (!dent->de_inum.no_addr) {
+			prev = dent;
+			continue;
+		}
+
+		if (gfs2_32_to_cpu(dent->de_hash) == hash &&
+		    gfs2_filecmp(filename, (char *)(dent + 1),
+				 dent->de_name_len)) {
+			*dent_out = dent;
+			if (dent_prev)
+				*dent_prev = prev;
+
+			return 0;
+		}
+
+		prev = dent;
+	} while (dirent_next(dip, bh, &dent) == 0);
+
+	return -ENOENT;
+}
+
+static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
+		    struct buffer_head **bhp)
+{
+	int error;
+
+	error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
+	if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
+		error = -EIO;
+
+	return error;
+}
+
+/**
+ * get_leaf_nr - Get a leaf number associated with the index
+ * @dip: The GFS2 inode
+ * @index:
+ * @leaf_out:
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
+		       uint64_t *leaf_out)
+{
+	uint64_t leaf_no;
+	int error;
+
+	error = gfs2_jdata_read_mem(dip, (char *)&leaf_no,
+				    index * sizeof(uint64_t),
+				    sizeof(uint64_t));
+	if (error != sizeof(uint64_t))
+		return (error < 0) ? error : -EIO;
+
+	*leaf_out = gfs2_64_to_cpu(leaf_no);
+
+	return 0;
+}
+
+static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
+			  struct buffer_head **bh_out)
+{
+	uint64_t leaf_no;
+	int error;
+
+	error = get_leaf_nr(dip, index, &leaf_no);
+	if (!error)
+		error = get_leaf(dip, leaf_no, bh_out);
+
+	return error;
+}
+
+static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in,
+			 struct buffer_head **bh_out)
+{
+	struct gfs2_leaf *leaf;
+	int error;
+
+	leaf = (struct gfs2_leaf *)bh_in->b_data;
+
+	if (!leaf->lf_next)
+		error = -ENOENT;
+	else
+		error = get_leaf(dip, gfs2_64_to_cpu(leaf->lf_next), bh_out);
+
+	return error;
+}
+
+static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename,
+			      struct gfs2_dirent **dent_out,
+			      struct gfs2_dirent **dent_prev,
+			      struct buffer_head **bh_out)
+{
+	struct buffer_head *bh = NULL, *bh_next;
+	uint32_t hsize, index;
+	uint32_t hash;
+	int error;
+
+	hsize = 1 << dip->i_di.di_depth;
+	if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	/*  Figure out the address of the leaf node.  */
+
+	hash = gfs2_disk_hash(filename->name, filename->len);
+	index = hash >> (32 - dip->i_di.di_depth);
+
+	error = get_first_leaf(dip, index, &bh_next);
+	if (error)
+		return error;
+
+	/*  Find the entry  */
+
+	do {
+		brelse(bh);
+
+		bh = bh_next;
+
+		error = leaf_search(dip, bh, filename, dent_out, dent_prev);
+		switch (error) {
+		case 0:
+			*bh_out = bh;
+			return 0;
+
+		case -ENOENT:
+			break;
+
+		default:
+			brelse(bh);
+			return error;
+		}
+
+		error = get_next_leaf(dip, bh, &bh_next);
+	}
+	while (!error);
+
+	brelse(bh);
+
+	return error;
+}
+
+/**
+ * dir_make_exhash - Convert a stuffed directory into an ExHash directory
+ * @dip: The GFS2 inode
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int dir_make_exhash(struct gfs2_inode *dip)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_dirent *dent;
+	struct buffer_head *bh, *dibh;
+	struct gfs2_leaf *leaf;
+	int y;
+	uint32_t x;
+	uint64_t *lp, bn;
+	int error;
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	/*  Allocate a new block for the first leaf node  */
+
+	bn = gfs2_alloc_meta(dip);
+
+	/*  Turn over a new leaf  */
+
+	bh = gfs2_meta_new(dip->i_gl, bn);
+	gfs2_trans_add_bh(dip->i_gl, bh);
+	gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
+	gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
+
+	/*  Fill in the leaf structure  */
+
+	leaf = (struct gfs2_leaf *)bh->b_data;
+
+	gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16),);
+
+	leaf->lf_dirent_format = cpu_to_gfs2_32(GFS2_FORMAT_DE);
+	leaf->lf_entries = cpu_to_gfs2_16(dip->i_di.di_entries);
+
+	/*  Copy dirents  */
+
+	gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
+			     sizeof(struct gfs2_dinode));
+
+	/*  Find last entry  */
+
+	x = 0;
+	dirent_first(dip, bh, &dent);
+
+	do {
+		if (!dent->de_inum.no_addr)
+			continue;
+		if (++x == dip->i_di.di_entries)
+			break;
+	}
+	while (dirent_next(dip, bh, &dent) == 0);
+
+	/*  Adjust the last dirent's record length
+	   (Remember that dent still points to the last entry.)  */
+
+	dent->de_rec_len = gfs2_32_to_cpu(dent->de_rec_len) +
+		sizeof(struct gfs2_dinode) -
+		sizeof(struct gfs2_leaf);
+	dent->de_rec_len = cpu_to_gfs2_32(dent->de_rec_len);
+
+	brelse(bh);
+
+	/*  We're done with the new leaf block, now setup the new
+	    hash table.  */
+
+	gfs2_trans_add_bh(dip->i_gl, dibh);
+	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+
+	lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
+
+	for (x = sdp->sd_hash_ptrs; x--; lp++)
+		*lp = cpu_to_gfs2_64(bn);
+
+	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
+	dip->i_di.di_blocks++;
+	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
+	dip->i_di.di_payload_format = 0;
+
+	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
+	dip->i_di.di_depth = y;
+
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+
+	brelse(dibh);
+
+	return 0;
+}
+
+/**
+ * dir_split_leaf - Split a leaf block into two
+ * @dip: The GFS2 inode
+ * @index:
+ * @leaf_no:
+ *
+ * Returns: 0 on success, error code on failure
+ */
+
+static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index,
+			  uint64_t leaf_no)
+{
+	struct buffer_head *nbh, *obh, *dibh;
+	struct gfs2_leaf *nleaf, *oleaf;
+	struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
+	uint32_t start, len, half_len, divider;
+	uint64_t bn, *lp;
+	uint32_t name_len;
+	int x, moved = FALSE;
+	int error;
+
+	/*  Allocate the new leaf block  */
+
+	bn = gfs2_alloc_meta(dip);
+
+	/*  Get the new leaf block  */
+
+	nbh = gfs2_meta_new(dip->i_gl, bn);
+	gfs2_trans_add_bh(dip->i_gl, nbh);
+	gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
+	gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header));
+
+	nleaf = (struct gfs2_leaf *)nbh->b_data;
+
+	nleaf->lf_dirent_format = cpu_to_gfs2_32(GFS2_FORMAT_DE);
+
+	/*  Get the old leaf block  */
+
+	error = get_leaf(dip, leaf_no, &obh);
+	if (error)
+		goto fail;
+
+	gfs2_trans_add_bh(dip->i_gl, obh);
+
+	oleaf = (struct gfs2_leaf *)obh->b_data;
+
+	/*  Compute the start and len of leaf pointers in the hash table.  */
+
+	len = 1 << (dip->i_di.di_depth - gfs2_16_to_cpu(oleaf->lf_depth));
+	half_len = len >> 1;
+	if (!half_len) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+		goto fail_brelse;
+	}
+
+	start = (index & ~(len - 1));
+
+	/* Change the pointers.
+	   Don't bother distinguishing stuffed from non-stuffed.
+	   This code is complicated enough already. */
+
+	lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL);
+
+	error = gfs2_jdata_read_mem(dip, (char *)lp, start * sizeof(uint64_t),
+				    half_len * sizeof(uint64_t));
+	if (error != half_len * sizeof(uint64_t)) {
+		if (error >= 0)
+			error = -EIO;
+		goto fail_lpfree;
+	}
+
+	/*  Change the pointers  */
+
+	for (x = 0; x < half_len; x++)
+		lp[x] = cpu_to_gfs2_64(bn);
+
+	error = gfs2_jdata_write_mem(dip, (char *)lp, start * sizeof(uint64_t),
+				     half_len * sizeof(uint64_t));
+	if (error != half_len * sizeof(uint64_t)) {
+		if (error >= 0)
+			error = -EIO;
+		goto fail_lpfree;
+	}
+
+	kfree(lp);
+
+	/*  Compute the divider  */
+
+	divider = (start + half_len) << (32 - dip->i_di.di_depth);
+
+	/*  Copy the entries  */
+
+	dirent_first(dip, obh, &dent);
+
+	do {
+		next = dent;
+		if (dirent_next(dip, obh, &next))
+			next = NULL;
+
+		if (dent->de_inum.no_addr &&
+		    gfs2_32_to_cpu(dent->de_hash) < divider) {
+			name_len = dent->de_name_len;
+
+			gfs2_dirent_alloc(dip, nbh, name_len, &new);
+
+			new->de_inum = dent->de_inum; /* No endian worries */
+			new->de_hash = dent->de_hash; /* No endian worries */
+			new->de_type = dent->de_type; /* No endian worries */
+			memcpy((char *)(new + 1), (char *)(dent + 1),
+			       name_len);
+
+			nleaf->lf_entries = gfs2_16_to_cpu(nleaf->lf_entries)+1;
+			nleaf->lf_entries = cpu_to_gfs2_16(nleaf->lf_entries);
+
+			dirent_del(dip, obh, prev, dent);
+
+			if (!oleaf->lf_entries)
+				gfs2_consist_inode(dip);
+			oleaf->lf_entries = gfs2_16_to_cpu(oleaf->lf_entries)-1;
+			oleaf->lf_entries = cpu_to_gfs2_16(oleaf->lf_entries);
+
+			if (!prev)
+				prev = dent;
+
+			moved = TRUE;
+		} else
+			prev = dent;
+
+		dent = next;
+	}
+	while (dent);
+
+	/* If none of the entries got moved into the new leaf,
+	   artificially fill in the first entry. */
+
+	if (!moved) {
+		gfs2_dirent_alloc(dip, nbh, 0, &new);
+		new->de_inum.no_addr = 0;
+	}
+
+	oleaf->lf_depth = gfs2_16_to_cpu(oleaf->lf_depth) + 1;
+	oleaf->lf_depth = cpu_to_gfs2_16(oleaf->lf_depth);
+	nleaf->lf_depth = oleaf->lf_depth;
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
+		dip->i_di.di_blocks++;
+		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	brelse(obh);
+	brelse(nbh);
+
+	return error;
+
+ fail_lpfree:
+	kfree(lp);
+
+ fail_brelse:
+	brelse(obh);
+
+ fail:
+	brelse(nbh);
+	return error;
+}
+
+/**
+ * dir_double_exhash - Double size of ExHash table
+ * @dip: The GFS2 dinode
+ *
+ * Returns: 0 on success, error code on failure
+ */
+
+static int dir_double_exhash(struct gfs2_inode *dip)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct buffer_head *dibh;
+	uint32_t hsize;
+	uint64_t *buf;
+	uint64_t *from, *to;
+	uint64_t block;
+	int x;
+	int error = 0;
+
+	hsize = 1 << dip->i_di.di_depth;
+	if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	/*  Allocate both the "from" and "to" buffers in one big chunk  */
+
+	buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
+
+	for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
+		error = gfs2_jdata_read_mem(dip, (char *)buf,
+					    block * sdp->sd_hash_bsize,
+					    sdp->sd_hash_bsize);
+		if (error != sdp->sd_hash_bsize) {
+			if (error >= 0)
+				error = -EIO;
+			goto fail;
+		}
+
+		from = buf;
+		to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
+
+		for (x = sdp->sd_hash_ptrs; x--; from++) {
+			*to++ = *from;	/*  No endianess worries  */
+			*to++ = *from;
+		}
+
+		error = gfs2_jdata_write_mem(dip,
+					     (char *)buf + sdp->sd_hash_bsize,
+					     block * sdp->sd_sb.sb_bsize,
+					     sdp->sd_sb.sb_bsize);
+		if (error != sdp->sd_sb.sb_bsize) {
+			if (error >= 0)
+				error = -EIO;
+			goto fail;
+		}
+	}
+
+	kfree(buf);
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (!gfs2_assert_withdraw(sdp, !error)) {
+		dip->i_di.di_depth++;
+		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	return error;
+
+ fail:
+	kfree(buf);
+
+	return error;
+}
+
+/**
+ * compare_dents - compare directory entries by hash value
+ * @a: first dent
+ * @b: second dent
+ *
+ * When comparing the hash entries of @a to @b:
+ *   gt: returns 1
+ *   lt: returns -1
+ *   eq: returns 0
+ */
+
+static int compare_dents(const void *a, const void *b)
+{
+	struct gfs2_dirent *dent_a, *dent_b;
+	uint32_t hash_a, hash_b;
+	int ret = 0;
+
+	dent_a = *(struct gfs2_dirent **)a;
+	hash_a = dent_a->de_hash;
+	hash_a = gfs2_32_to_cpu(hash_a);
+
+	dent_b = *(struct gfs2_dirent **)b;
+	hash_b = dent_b->de_hash;
+	hash_b = gfs2_32_to_cpu(hash_b);
+
+	if (hash_a > hash_b)
+		ret = 1;
+	else if (hash_a < hash_b)
+		ret = -1;
+	else {
+		unsigned int len_a = dent_a->de_name_len;
+		unsigned int len_b = dent_b->de_name_len;
+
+		if (len_a > len_b)
+			ret = 1;
+		else if (len_a < len_b)
+			ret = -1;
+		else
+			ret = memcmp((char *)(dent_a + 1),
+				     (char *)(dent_b + 1),
+				     len_a);
+	}
+
+	return ret;
+}
+
+/**
+ * do_filldir_main - read out directory entries
+ * @dip: The GFS2 inode
+ * @offset: The offset in the file to read from
+ * @opaque: opaque data to pass to filldir
+ * @filldir: The function to pass entries to
+ * @darr: an array of struct gfs2_dirent pointers to read
+ * @entries: the number of entries in darr
+ * @copied: pointer to int that's non-zero if a entry has been copied out
+ *
+ * Jump through some hoops to make sure that if there are hash collsions,
+ * they are read out at the beginning of a buffer.  We want to minimize
+ * the possibility that they will fall into different readdir buffers or
+ * that someone will want to seek to that location.
+ *
+ * Returns: errno, >0 on exception from filldir
+ */
+
+static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
+			   void *opaque, gfs2_filldir_t filldir,
+			   struct gfs2_dirent **darr, uint32_t entries,
+			   int *copied)
+{
+	struct gfs2_dirent *dent, *dent_next;
+	struct gfs2_inum inum;
+	uint64_t off, off_next;
+	unsigned int x, y;
+	int run = FALSE;
+	int error = 0;
+
+	sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
+
+	dent_next = darr[0];
+	off_next = gfs2_32_to_cpu(dent_next->de_hash);
+	off_next = gfs2_disk_hash2offset(off_next);
+
+	for (x = 0, y = 1; x < entries; x++, y++) {
+		dent = dent_next;
+		off = off_next;
+
+		if (y < entries) {
+			dent_next = darr[y];
+			off_next = gfs2_32_to_cpu(dent_next->de_hash);
+			off_next = gfs2_disk_hash2offset(off_next);
+
+			if (off < *offset)
+				continue;
+			*offset = off;
+
+			if (off_next == off) {
+				if (*copied && !run)
+					return 1;
+				run = TRUE;
+			} else
+				run = FALSE;
+		} else {
+			if (off < *offset)
+				continue;
+			*offset = off;
+		}
+
+		gfs2_inum_in(&inum, (char *)&dent->de_inum);
+
+		error = filldir(opaque, (char *)(dent + 1),
+				dent->de_name_len,
+				off, &inum,
+				dent->de_type);
+		if (error)
+			return 1;
+
+		*copied = TRUE;
+	}
+
+	/* Increment the *offset by one, so the next time we come into the
+	   do_filldir fxn, we get the next entry instead of the last one in the
+	   current leaf */
+
+	(*offset)++;
+
+	return 0;
+}
+
+/**
+ * do_filldir_single - Read directory entries out of a single block
+ * @dip: The GFS2 inode
+ * @offset: The offset in the file to read from
+ * @opaque: opaque data to pass to filldir
+ * @filldir: The function to pass entries to
+ * @bh: the block
+ * @entries: the number of entries in the block
+ * @copied: pointer to int that's non-zero if a entry has been copied out
+ *
+ * Returns: errno, >0 on exception from filldir
+ */
+
+static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
+			     void *opaque, gfs2_filldir_t filldir,
+			     struct buffer_head *bh, uint32_t entries,
+			     int *copied)
+{
+	struct gfs2_dirent **darr;
+	struct gfs2_dirent *de;
+	unsigned int e = 0;
+	int error;
+
+	if (!entries)
+		return 0;
+
+	darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
+	if (!darr)
+		return -ENOMEM;
+
+	dirent_first(dip, bh, &de);
+	do {
+		if (!de->de_inum.no_addr)
+			continue;
+		if (e >= entries) {
+			gfs2_consist_inode(dip);
+			error = -EIO;
+			goto out;
+		}
+		darr[e++] = de;
+	}
+	while (dirent_next(dip, bh, &de) == 0);
+
+	if (e != entries) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = do_filldir_main(dip, offset, opaque, filldir, darr,
+				entries, copied);
+
+ out:
+	kfree(darr);
+
+	return error;
+}
+
+/**
+ * do_filldir_multi - Read directory entries out of a linked leaf list
+ * @dip: The GFS2 inode
+ * @offset: The offset in the file to read from
+ * @opaque: opaque data to pass to filldir
+ * @filldir: The function to pass entries to
+ * @bh: the first leaf in the list
+ * @copied: pointer to int that's non-zero if a entry has been copied out
+ *
+ * Returns: errno, >0 on exception from filldir
+ */
+
+static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
+			    void *opaque, gfs2_filldir_t filldir,
+			    struct buffer_head *bh, int *copied)
+{
+	struct buffer_head **larr = NULL;
+	struct gfs2_dirent **darr;
+	struct gfs2_leaf *leaf;
+	struct buffer_head *tmp_bh;
+	struct gfs2_dirent *de;
+	unsigned int entries, e = 0;
+	unsigned int leaves = 0, l = 0;
+	unsigned int x;
+	uint64_t ln;
+	int error = 0;
+
+	/*  Count leaves and entries  */
+
+	leaf = (struct gfs2_leaf *)bh->b_data;
+	entries = gfs2_16_to_cpu(leaf->lf_entries);
+	ln = leaf->lf_next;
+
+	while (ln) {
+		ln = gfs2_64_to_cpu(ln);
+
+		error = get_leaf(dip, ln, &tmp_bh);
+		if (error)
+			return error;
+
+		leaf = (struct gfs2_leaf *)tmp_bh->b_data;
+		if (leaf->lf_entries) {
+			entries += gfs2_16_to_cpu(leaf->lf_entries);
+			leaves++;
+		}
+		ln = leaf->lf_next;
+
+		brelse(tmp_bh);
+	}
+
+	if (!entries)
+		return 0;
+
+	if (leaves) {
+		larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
+		if (!larr)
+			return -ENOMEM;
+	}
+
+	darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
+	if (!darr) {
+		kfree(larr);
+		return -ENOMEM;
+	}
+
+	leaf = (struct gfs2_leaf *)bh->b_data;
+	if (leaf->lf_entries) {
+		dirent_first(dip, bh, &de);
+		do {
+			if (!de->de_inum.no_addr)
+				continue;
+			if (e >= entries) {
+				gfs2_consist_inode(dip);
+				error = -EIO;
+				goto out;
+			}
+			darr[e++] = de;
+		}
+		while (dirent_next(dip, bh, &de) == 0);
+	}
+	ln = leaf->lf_next;
+
+	while (ln) {
+		ln = gfs2_64_to_cpu(ln);
+
+		error = get_leaf(dip, ln, &tmp_bh);
+		if (error)
+			goto out;
+
+		leaf = (struct gfs2_leaf *)tmp_bh->b_data;
+		if (leaf->lf_entries) {
+			dirent_first(dip, tmp_bh, &de);
+			do {
+				if (!de->de_inum.no_addr)
+					continue;
+				if (e >= entries) {
+					gfs2_consist_inode(dip);
+					error = -EIO;
+					goto out;
+				}
+				darr[e++] = de;
+			}
+			while (dirent_next(dip, tmp_bh, &de) == 0);
+
+			larr[l++] = tmp_bh;
+
+			ln = leaf->lf_next;
+		} else {
+			ln = leaf->lf_next;
+			brelse(tmp_bh);
+		}
+	}
+
+	if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
+		error = -EIO;
+		goto out;
+	}
+	if (e != entries) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = do_filldir_main(dip, offset, opaque, filldir, darr,
+				entries, copied);
+
+ out:
+	kfree(darr);
+	for (x = 0; x < l; x++)
+		brelse(larr[x]);
+	kfree(larr);
+
+	return error;
+}
+
+/**
+ * dir_e_search - Search exhash (leaf) dir for inode matching name
+ * @dip: The GFS2 inode
+ * @filename: Filename string
+ * @inode: If non-NULL, function fills with formal inode # and block address
+ * @type: If non-NULL, function fills with DT_... dinode type
+ *
+ * Returns:
+ */
+
+static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename,
+			struct gfs2_inum *inum, unsigned int *type)
+{
+	struct buffer_head *bh;
+	struct gfs2_dirent *dent;
+	int error;
+
+	error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
+	if (error)
+		return error;
+
+	if (inum)
+		gfs2_inum_in(inum, (char *)&dent->de_inum);
+	if (type)
+		*type = dent->de_type;
+
+	brelse(bh);
+
+	return 0;
+}
+
+static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename,
+		     struct gfs2_inum *inum, unsigned int type)
+{
+	struct buffer_head *bh, *nbh, *dibh;
+	struct gfs2_leaf *leaf, *nleaf;
+	struct gfs2_dirent *dent;
+	uint32_t hsize, index;
+	uint32_t hash;
+	uint64_t leaf_no, bn;
+	int error;
+
+ restart:
+	hsize = 1 << dip->i_di.di_depth;
+	if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	/*  Figure out the address of the leaf node.  */
+
+	hash = gfs2_disk_hash(filename->name, filename->len);
+	index = hash >> (32 - dip->i_di.di_depth);
+
+	error = get_leaf_nr(dip, index, &leaf_no);
+	if (error)
+		return error;
+
+	/*  Add entry to the leaf  */
+
+	for (;;) {
+		error = get_leaf(dip, leaf_no, &bh);
+		if (error)
+			return error;
+
+		leaf = (struct gfs2_leaf *)bh->b_data;
+
+		if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) {
+
+			if (gfs2_16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
+				/* Can we split the leaf? */
+
+				brelse(bh);
+
+				error = dir_split_leaf(dip, index, leaf_no);
+				if (error)
+					return error;
+
+				goto restart;
+
+			} else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
+				/* Can we double the hash table? */
+
+				brelse(bh);
+
+				error = dir_double_exhash(dip);
+				if (error)
+					return error;
+
+				goto restart;
+
+			} else if (leaf->lf_next) {
+				/* Can we try the next leaf in the list? */
+				leaf_no = gfs2_64_to_cpu(leaf->lf_next);
+				brelse(bh);
+				continue;
+
+			} else {
+				/* Create a new leaf and add it to the list. */
+
+				bn = gfs2_alloc_meta(dip);
+
+				nbh = gfs2_meta_new(dip->i_gl, bn);
+				gfs2_trans_add_bh(dip->i_gl, nbh);
+				gfs2_metatype_set(nbh,
+						 GFS2_METATYPE_LF,
+						 GFS2_FORMAT_LF);
+				gfs2_buffer_clear_tail(nbh,
+					sizeof(struct gfs2_meta_header));
+
+				gfs2_trans_add_bh(dip->i_gl, bh);
+				leaf->lf_next = cpu_to_gfs2_64(bn);
+
+				nleaf = (struct gfs2_leaf *)nbh->b_data;
+				nleaf->lf_depth = leaf->lf_depth;
+				nleaf->lf_dirent_format = cpu_to_gfs2_32(GFS2_FORMAT_DE);
+
+				gfs2_dirent_alloc(dip, nbh, filename->len,
+						  &dent);
+
+				dip->i_di.di_blocks++;
+
+				brelse(bh);
+
+				bh = nbh;
+				leaf = nleaf;
+			}
+		}
+
+		/* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */
+
+		gfs2_inum_out(inum, (char *)&dent->de_inum);
+		dent->de_hash = cpu_to_gfs2_32(hash);
+		dent->de_type = type;
+		memcpy((char *)(dent + 1), filename->name, filename->len);
+
+		leaf->lf_entries = gfs2_16_to_cpu(leaf->lf_entries) + 1;
+		leaf->lf_entries = cpu_to_gfs2_16(leaf->lf_entries);
+
+		brelse(bh);
+
+		error = gfs2_meta_inode_buffer(dip, &dibh);
+		if (error)
+			return error;
+
+		dip->i_di.di_entries++;
+		dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+		gfs2_trans_add_bh(dip->i_gl, dibh);
+		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		brelse(dibh);
+
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename)
+{
+	struct buffer_head *bh, *dibh;
+	struct gfs2_dirent *dent, *prev;
+	struct gfs2_leaf *leaf;
+	unsigned int entries;
+	int error;
+
+	error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
+	if (error == -ENOENT) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+	if (error)
+		return error;
+
+	dirent_del(dip, bh, prev, dent); /* Pins bh */
+
+	leaf = (struct gfs2_leaf *)bh->b_data;
+	entries = gfs2_16_to_cpu(leaf->lf_entries);
+	if (!entries)
+		gfs2_consist_inode(dip);
+	entries--;
+	leaf->lf_entries = cpu_to_gfs2_16(entries);
+
+	brelse(bh);
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	if (!dip->i_di.di_entries)
+		gfs2_consist_inode(dip);
+	dip->i_di.di_entries--;
+	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+	gfs2_trans_add_bh(dip->i_gl, dibh);
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	return 0;
+}
+
+/**
+ * dir_e_read - Reads the entries from a directory into a filldir buffer
+ * @dip: dinode pointer
+ * @offset: the hash of the last entry read shifted to the right once
+ * @opaque: buffer for the filldir function to fill
+ * @filldir: points to the filldir function to use
+ *
+ * Returns: errno
+ */
+
+static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
+		      gfs2_filldir_t filldir)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct buffer_head *bh;
+	struct gfs2_leaf leaf;
+	uint32_t hsize, len;
+	uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
+	uint32_t hash, index;
+	uint64_t *lp;
+	int copied = FALSE;
+	int error = 0;
+
+	hsize = 1 << dip->i_di.di_depth;
+	if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	hash = gfs2_dir_offset2hash(*offset);
+	index = hash >> (32 - dip->i_di.di_depth);
+
+	lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+
+	while (index < hsize) {
+		lp_offset = index & (sdp->sd_hash_ptrs - 1);
+		ht_offset = index - lp_offset;
+
+		if (ht_offset_cur != ht_offset) {
+			error = gfs2_jdata_read_mem(dip, (char *)lp,
+						ht_offset * sizeof(uint64_t),
+						sdp->sd_hash_bsize);
+			if (error != sdp->sd_hash_bsize) {
+				if (error >= 0)
+					error = -EIO;
+				goto out;
+			}
+			ht_offset_cur = ht_offset;
+		}
+
+		error = get_leaf(dip, gfs2_64_to_cpu(lp[lp_offset]), &bh);
+		if (error)
+			goto out;
+
+		gfs2_leaf_in(&leaf, bh->b_data);
+
+		if (leaf.lf_next)
+			error = do_filldir_multi(dip, offset, opaque, filldir,
+						 bh, &copied);
+		else
+			error = do_filldir_single(dip, offset, opaque, filldir,
+						  bh, leaf.lf_entries, &copied);
+
+		brelse(bh);
+
+		if (error) {
+			if (error > 0)
+				error = 0;
+			goto out;
+		}
+
+		len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
+		index = (index & ~(len - 1)) + len;
+	}
+
+ out:
+	kfree(lp);
+
+	return error;
+}
+
+static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename,
+		       struct gfs2_inum *inum, unsigned int new_type)
+{
+	struct buffer_head *bh, *dibh;
+	struct gfs2_dirent *dent;
+	int error;
+
+	error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
+	if (error == -ENOENT) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+	if (error)
+		return error;
+
+	gfs2_trans_add_bh(dip->i_gl, bh);
+
+	gfs2_inum_out(inum, (char *)&dent->de_inum);
+	dent->de_type = new_type;
+
+	brelse(bh);
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+	gfs2_trans_add_bh(dip->i_gl, dibh);
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	return 0;
+}
+
+/**
+ * dir_l_search - Search linear (stuffed dinode) dir for inode matching name
+ * @dip: The GFS2 inode
+ * @filename: Filename string
+ * @inode: If non-NULL, function fills with formal inode # and block address
+ * @type: If non-NULL, function fills with DT_... dinode type
+ *
+ * Returns:
+ */
+
+static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename,
+			struct gfs2_inum *inum, unsigned int *type)
+{
+	struct buffer_head *dibh;
+	struct gfs2_dirent *dent;
+	int error;
+
+	if (!gfs2_is_stuffed(dip)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	error = leaf_search(dip, dibh, filename, &dent, NULL);
+	if (!error) {
+		if (inum)
+			gfs2_inum_in(inum, (char *)&dent->de_inum);
+		if (type)
+			*type = dent->de_type;
+	}
+
+	brelse(dibh);
+
+	return error;
+}
+
+static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename,
+		     struct gfs2_inum *inum, unsigned int type)
+{
+	struct buffer_head *dibh;
+	struct gfs2_dirent *dent;
+	int error;
+
+	if (!gfs2_is_stuffed(dip)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) {
+		brelse(dibh);
+
+		error = dir_make_exhash(dip);
+		if (!error)
+			error = dir_e_add(dip, filename, inum, type);
+
+		return error;
+	}
+
+	/*  gfs2_dirent_alloc() pins  */
+
+	gfs2_inum_out(inum, (char *)&dent->de_inum);
+	dent->de_hash = gfs2_disk_hash(filename->name, filename->len);
+	dent->de_hash = cpu_to_gfs2_32(dent->de_hash);
+	dent->de_type = type;
+	memcpy((char *)(dent + 1), filename->name, filename->len);
+
+	dip->i_di.di_entries++;
+	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	return 0;
+}
+
+static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename)
+{
+	struct buffer_head *dibh;
+	struct gfs2_dirent *dent, *prev;
+	int error;
+
+	if (!gfs2_is_stuffed(dip)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	error = leaf_search(dip, dibh, filename, &dent, &prev);
+	if (error == -ENOENT) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+		goto out;
+	}
+	if (error)
+		goto out;
+
+	dirent_del(dip, dibh, prev, dent);
+
+	/*  dirent_del() pins  */
+
+	if (!dip->i_di.di_entries)
+		gfs2_consist_inode(dip);
+	dip->i_di.di_entries--;
+
+	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+
+ out:
+	brelse(dibh);
+
+	return error;
+}
+
+static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
+		      gfs2_filldir_t filldir)
+{
+	struct buffer_head *dibh;
+	int copied = FALSE;
+	int error;
+
+	if (!gfs2_is_stuffed(dip)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	if (!dip->i_di.di_entries)
+		return 0;
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	error = do_filldir_single(dip, offset,
+				  opaque, filldir,
+				  dibh, dip->i_di.di_entries,
+				  &copied);
+	if (error > 0)
+		error = 0;
+
+	brelse(dibh);
+
+	return error;
+}
+
+static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename,
+		       struct gfs2_inum *inum, unsigned int new_type)
+{
+	struct buffer_head *dibh;
+	struct gfs2_dirent *dent;
+	int error;
+
+	if (!gfs2_is_stuffed(dip)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		return error;
+
+	error = leaf_search(dip, dibh, filename, &dent, NULL);
+	if (error == -ENOENT) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+		goto out;
+	}
+	if (error)
+		goto out;
+
+	gfs2_trans_add_bh(dip->i_gl, dibh);
+
+	gfs2_inum_out(inum, (char *)&dent->de_inum);
+	dent->de_type = new_type;
+
+	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+
+ out:
+	brelse(dibh);
+
+	return error;
+}
+
+/**
+ * gfs2_dir_search - Search a directory
+ * @dip: The GFS2 inode
+ * @filename:
+ * @inode:
+ *
+ * This routine searches a directory for a file or another directory.
+ * Assumes a glock is held on dip.
+ *
+ * Returns: errno
+ */
+
+int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
+		    struct gfs2_inum *inum, unsigned int *type)
+{
+	int error;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = dir_e_search(dip, filename, inum, type);
+	else
+		error = dir_l_search(dip, filename, inum, type);
+
+	return error;
+}
+
+/**
+ * gfs2_dir_add - Add new filename into directory
+ * @dip: The GFS2 inode
+ * @filename: The new name
+ * @inode: The inode number of the entry
+ * @type: The type of the entry
+ *
+ * Returns: 0 on success, error code on failure
+ */
+
+int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
+		 struct gfs2_inum *inum, unsigned int type)
+{
+	int error;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = dir_e_add(dip, filename, inum, type);
+	else
+		error = dir_l_add(dip, filename, inum, type);
+
+	return error;
+}
+
+/**
+ * gfs2_dir_del - Delete a directory entry
+ * @dip: The GFS2 inode
+ * @filename: The filename
+ *
+ * Returns: 0 on success, error code on failure
+ */
+
+int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename)
+{
+	int error;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = dir_e_del(dip, filename);
+	else
+		error = dir_l_del(dip, filename);
+
+	return error;
+}
+
+int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
+		  gfs2_filldir_t filldir)
+{
+	int error;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = dir_e_read(dip, offset, opaque, filldir);
+	else
+		error = dir_l_read(dip, offset, opaque, filldir);
+
+	return error;
+}
+
+/**
+ * gfs2_dir_mvino - Change inode number of directory entry
+ * @dip: The GFS2 inode
+ * @filename:
+ * @new_inode:
+ *
+ * This routine changes the inode number of a directory entry.  It's used
+ * by rename to change ".." when a directory is moved.
+ * Assumes a glock is held on dvp.
+ *
+ * Returns: errno
+ */
+
+int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
+		   struct gfs2_inum *inum, unsigned int new_type)
+{
+	int error;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = dir_e_mvino(dip, filename, inum, new_type);
+	else
+		error = dir_l_mvino(dip, filename, inum, new_type);
+
+	return error;
+}
+
+/**
+ * foreach_leaf - call a function for each leaf in a directory
+ * @dip: the directory
+ * @lc: the function to call for each each
+ * @data: private data to pass to it
+ *
+ * Returns: errno
+ */
+
+static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct buffer_head *bh;
+	struct gfs2_leaf leaf;
+	uint32_t hsize, len;
+	uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
+	uint32_t index = 0;
+	uint64_t *lp;
+	uint64_t leaf_no;
+	int error = 0;
+
+	hsize = 1 << dip->i_di.di_depth;
+	if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
+
+	lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+
+	while (index < hsize) {
+		lp_offset = index & (sdp->sd_hash_ptrs - 1);
+		ht_offset = index - lp_offset;
+
+		if (ht_offset_cur != ht_offset) {
+			error = gfs2_jdata_read_mem(dip, (char *)lp,
+						ht_offset * sizeof(uint64_t),
+						sdp->sd_hash_bsize);
+			if (error != sdp->sd_hash_bsize) {
+				if (error >= 0)
+					error = -EIO;
+				goto out;
+			}
+			ht_offset_cur = ht_offset;
+		}
+
+		leaf_no = gfs2_64_to_cpu(lp[lp_offset]);
+		if (leaf_no) {
+			error = get_leaf(dip, leaf_no, &bh);
+			if (error)
+				goto out;
+			gfs2_leaf_in(&leaf, bh->b_data);
+			brelse(bh);
+
+			len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
+
+			error = lc(dip, index, len, leaf_no, data);
+			if (error)
+				goto out;
+
+			index = (index & ~(len - 1)) + len;
+		} else
+			index++;
+	}
+
+	if (index != hsize) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+	}
+
+ out:
+	kfree(lp);
+
+	return error;
+}
+
+/**
+ * leaf_dealloc - Deallocate a directory leaf
+ * @dip: the directory
+ * @index: the hash table offset in the directory
+ * @len: the number of pointers to this leaf
+ * @leaf_no: the leaf number
+ * @data: not used
+ *
+ * Returns: errno
+ */
+
+static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
+			uint64_t leaf_no, void *data)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_leaf tmp_leaf;
+	struct gfs2_rgrp_list rlist;
+	struct buffer_head *bh, *dibh;
+	uint64_t blk;
+	unsigned int rg_blocks = 0, l_blocks = 0;
+	char *ht;
+	unsigned int x, size = len * sizeof(uint64_t);
+	int error;
+
+	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
+
+	ht = kzalloc(size, GFP_KERNEL);
+	if (!ht)
+		return -ENOMEM;
+
+	gfs2_alloc_get(dip);
+
+	error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
+	if (error)
+		goto out_qs;
+
+	/*  Count the number of leaves  */
+
+	for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
+		error = get_leaf(dip, blk, &bh);
+		if (error)
+			goto out_rlist;
+		gfs2_leaf_in(&tmp_leaf, (bh)->b_data);
+		brelse(bh);
+
+		gfs2_rlist_add(sdp, &rlist, blk);
+		l_blocks++;
+	}
+
+	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
+
+	for (x = 0; x < rlist.rl_rgrps; x++) {
+		struct gfs2_rgrpd *rgd;
+		rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
+		rg_blocks += rgd->rd_ri.ri_length;
+	}
+
+	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
+	if (error)
+		goto out_rlist;
+
+	error = gfs2_trans_begin(sdp,
+			rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) +
+			RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
+	if (error)
+		goto out_rg_gunlock;
+
+	for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
+		error = get_leaf(dip, blk, &bh);
+		if (error)
+			goto out_end_trans;
+		gfs2_leaf_in(&tmp_leaf, bh->b_data);
+		brelse(bh);
+
+		gfs2_free_meta(dip, blk, 1);
+
+		if (!dip->i_di.di_blocks)
+			gfs2_consist_inode(dip);
+		dip->i_di.di_blocks--;
+	}
+
+	error = gfs2_jdata_write_mem(dip, ht, index * sizeof(uint64_t), size);
+	if (error != size) {
+		if (error >= 0)
+			error = -EIO;
+		goto out_end_trans;
+	}
+
+	error = gfs2_meta_inode_buffer(dip, &dibh);
+	if (error)
+		goto out_end_trans;
+
+	gfs2_trans_add_bh(dip->i_gl, dibh);
+	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	brelse(dibh);
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_rg_gunlock:
+	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
+
+ out_rlist:
+	gfs2_rlist_free(&rlist);
+	gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
+
+ out_qs:
+	gfs2_quota_unhold(dip);
+
+ out:
+	gfs2_alloc_put(dip);
+	kfree(ht);
+
+	return error;
+}
+
+/**
+ * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
+ * @dip: the directory
+ *
+ * Dealloc all on-disk directory leaves to FREEMETA state
+ * Change on-disk inode type to "regular file"
+ *
+ * Returns: errno
+ */
+
+int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct buffer_head *bh;
+	int error;
+
+	/* Dealloc on-disk leaves to FREEMETA state */
+	error = foreach_leaf(dip, leaf_dealloc, NULL);
+	if (error)
+		return error;
+
+	/* Make this a regular file in case we crash.
+	   (We don't want to free these blocks a second time.)  */
+
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(dip, &bh);
+	if (!error) {
+		gfs2_trans_add_bh(dip->i_gl, bh);
+		((struct gfs2_dinode *)bh->b_data)->di_mode = cpu_to_gfs2_32(S_IFREG);
+		brelse(bh);
+	}
+
+	gfs2_trans_end(sdp);
+
+	return error;
+}
+
+/**
+ * gfs2_diradd_alloc_required - find if adding entry will require an allocation
+ * @ip: the file being written to
+ * @filname: the filename that's going to be added
+ * @alloc_required: set to TRUE if an alloc is required, FALSE otherwise
+ *
+ * Returns: errno
+ */
+
+int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
+			       int *alloc_required)
+{
+	struct buffer_head *bh = NULL, *bh_next;
+	uint32_t hsize, hash, index;
+	int error = 0;
+
+	*alloc_required = FALSE;
+
+	if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
+		hsize = 1 << dip->i_di.di_depth;
+		if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
+			gfs2_consist_inode(dip);
+			return -EIO;
+		}
+
+		hash = gfs2_disk_hash(filename->name, filename->len);
+		index = hash >> (32 - dip->i_di.di_depth);
+
+		error = get_first_leaf(dip, index, &bh_next);
+		if (error)
+			return error;
+
+		do {
+			brelse(bh);
+
+			bh = bh_next;
+
+			if (dirent_fits(dip, bh, filename->len))
+				break;
+
+			error = get_next_leaf(dip, bh, &bh_next);
+			if (error == -ENOENT) {
+				*alloc_required = TRUE;
+				error = 0;
+				break;
+			}
+		}
+		while (!error);
+
+		brelse(bh);
+	} else {
+		error = gfs2_meta_inode_buffer(dip, &bh);
+		if (error)
+			return error;
+
+		if (!dirent_fits(dip, bh, filename->len))
+			*alloc_required = TRUE;
+
+		brelse(bh);
+	}
+
+	return error;
+}
+
+/**
+ * do_gdm - copy out one leaf (or list of leaves)
+ * @dip: the directory
+ * @index: the hash table offset in the directory
+ * @len: the number of pointers to this leaf
+ * @leaf_no: the leaf number
+ * @data: a pointer to a struct gfs2_user_buffer structure
+ *
+ * Returns: errno
+ */
+
+static int do_gdm(struct gfs2_inode *dip, uint32_t index, uint32_t len,
+		  uint64_t leaf_no, void *data)
+{
+	struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data;
+	struct gfs2_leaf leaf;
+	struct buffer_head *bh;
+	uint64_t blk;
+	int error = 0;
+
+	for (blk = leaf_no; blk; blk = leaf.lf_next) {
+		error = get_leaf(dip, blk, &bh);
+		if (error)
+			break;
+
+		gfs2_leaf_in(&leaf, bh->b_data);
+
+		error = gfs2_add_bh_to_ub(ub, bh);
+
+		brelse(bh);
+
+		if (error)
+			break;
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_get_dir_meta - return all the leaf blocks of a directory
+ * @dip: the directory
+ * @ub: the structure representing the meta
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_dir_meta(struct gfs2_inode *dip, struct gfs2_user_buffer *ub)
+{
+	return foreach_leaf(dip, do_gdm, ub);
+}
+
--- a/fs/gfs2/dir.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/dir.h	2005-09-01 17:36:55.180135992 +0800
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __DIR_DOT_H__
+#define __DIR_DOT_H__
+
+/**
+ * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
+ * @opaque: opaque data used by the function
+ * @name: the name of the directory entry
+ * @length: the length of the name
+ * @offset: the entry's offset in the directory
+ * @inum: the inode number the entry points to
+ * @type: the type of inode the entry points to
+ *
+ * Returns: 0 on success, 1 if buffer full
+ */
+
+typedef int (*gfs2_filldir_t) (void *opaque,
+			      const char *name, unsigned int length,
+			      uint64_t offset,
+			      struct gfs2_inum *inum, unsigned int type);
+
+int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2);
+int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
+		     int name_len, struct gfs2_dirent **dent_out);
+
+int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
+		   struct gfs2_inum *inum, unsigned int *type);
+int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
+		struct gfs2_inum *inum, unsigned int type);
+int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename);
+int gfs2_dir_read(struct gfs2_inode *dip, uint64_t * offset, void *opaque,
+		 gfs2_filldir_t filldir);
+int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
+		  struct gfs2_inum *new_inum, unsigned int new_type);
+
+int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
+
+int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
+			      int *alloc_required);
+
+int gfs2_get_dir_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
+
+#endif /* __DIR_DOT_H__ */

[PATCH 04/13] GFS: allocation

Code that manages block allocation.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/bits.c |  179 +++++++
 fs/gfs2/bits.h |   28 +
 fs/gfs2/rgrp.c | 1374 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/rgrp.h |   62 ++
 4 files changed, 1643 insertions(+)

--- a/fs/gfs2/rgrp.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/rgrp.c	2005-09-01 17:36:55.478090696 +0800
@@ -0,0 +1,1374 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bits.h"
+#include "glock.h"
+#include "glops.h"
+#include "jdata.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "super.h"
+#include "trans.h"
+
+/**
+ * gfs2_rgrp_verify - Verify that a resource group is consistent
+ * @sdp: the filesystem
+ * @rgd: the rgrp
+ *
+ */
+
+void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	struct gfs2_bitmap *bi = NULL;
+	uint32_t length = rgd->rd_ri.ri_length;
+	uint32_t count[4], tmp;
+	int buf, x;
+
+	memset(count, 0, 4 * sizeof(uint32_t));
+
+	/* Count # blocks in each of 4 possible allocation states */
+	for (buf = 0; buf < length; buf++) {
+		bi = rgd->rd_bits + buf;
+		for (x = 0; x < 4; x++)
+			count[x] += gfs2_bitcount(rgd,
+						  bi->bi_bh->b_data +
+						  bi->bi_offset,
+						  bi->bi_len, x);
+	}
+
+	if (count[0] != rgd->rd_rg.rg_free) {
+		if (gfs2_consist_rgrpd(rgd))
+			fs_err(sdp, "free data mismatch:  %u != %u\n",
+			       count[0], rgd->rd_rg.rg_free);
+		return;
+	}
+
+	tmp = rgd->rd_ri.ri_data -
+		rgd->rd_rg.rg_free -
+		rgd->rd_rg.rg_dinodes;
+	if (count[1] != tmp) {
+		if (gfs2_consist_rgrpd(rgd))
+			fs_err(sdp, "used data mismatch:  %u != %u\n",
+			       count[1], tmp);
+		return;
+	}
+
+	if (count[2]) {
+		if (gfs2_consist_rgrpd(rgd))
+			fs_err(sdp, "free metadata mismatch:  %u != 0\n",
+			       count[2]);
+		return;
+	}
+
+	if (count[3] != rgd->rd_rg.rg_dinodes) {
+		if (gfs2_consist_rgrpd(rgd))
+			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
+			       count[3], rgd->rd_rg.rg_dinodes);
+		return;
+	}
+}
+
+static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block)
+{
+	uint64_t first = ri->ri_data0;
+	uint64_t last = first + ri->ri_data;
+	return !!(first <= block && block < last);
+}
+
+/**
+ * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
+ * @sdp: The GFS2 superblock
+ * @n: The data block number
+ *
+ * Returns: The resource group, or NULL if not found
+ */
+
+struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
+{
+	struct gfs2_rgrpd *rgd;
+
+	spin_lock(&sdp->sd_rindex_spin);
+
+	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
+		if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+			spin_unlock(&sdp->sd_rindex_spin);
+			return rgd;
+		}
+	}
+
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return NULL;
+}
+
+/**
+ * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
+ * @sdp: The GFS2 superblock
+ *
+ * Returns: The first rgrp in the filesystem
+ */
+
+struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
+{
+	gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list),);
+	return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
+}
+
+/**
+ * gfs2_rgrpd_get_next - get the next RG
+ * @rgd: A RG
+ *
+ * Returns: The next rgrp
+ */
+
+struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
+{
+	if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
+		return NULL;
+	return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
+}
+
+static void clear_rgrpdi(struct gfs2_sbd *sdp)
+{
+	struct list_head *head;
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_glock *gl;
+
+	spin_lock(&sdp->sd_rindex_spin);
+	sdp->sd_rindex_forward = NULL;
+	head = &sdp->sd_rindex_recent_list;
+	while (!list_empty(head)) {
+		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
+		list_del(&rgd->rd_recent);
+	}
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	head = &sdp->sd_rindex_list;
+	while (!list_empty(head)) {
+		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
+		gl = rgd->rd_gl;
+
+		list_del(&rgd->rd_list);
+		list_del(&rgd->rd_list_mru);
+
+		if (gl) {
+			set_gl2rgd(gl, NULL);
+			gfs2_glock_put(gl);
+		}
+
+		kfree(rgd->rd_bits);
+		kfree(rgd);
+	}
+}
+
+void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
+{
+	down(&sdp->sd_rindex_mutex);
+	clear_rgrpdi(sdp);
+	up(&sdp->sd_rindex_mutex);
+}
+
+/**
+ * gfs2_compute_bitstructs - Compute the bitmap sizes
+ * @rgd: The resource group descriptor
+ *
+ * Calculates bitmap descriptors, one for each block that contains bitmap data
+ *
+ * Returns: errno
+ */
+
+static int compute_bitstructs(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	struct gfs2_bitmap *bi;
+	uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+	uint32_t bytes_left, bytes;
+	int x;
+
+	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL);
+	if (!rgd->rd_bits)
+		return -ENOMEM;
+
+	bytes_left = rgd->rd_ri.ri_bitbytes;
+
+	for (x = 0; x < length; x++) {
+		bi = rgd->rd_bits + x;
+
+		/* small rgrp; bitmap stored completely in header block */
+		if (length == 1) {
+			bytes = bytes_left;
+			bi->bi_offset = sizeof(struct gfs2_rgrp);
+			bi->bi_start = 0;
+			bi->bi_len = bytes;
+		/* header block */
+		} else if (x == 0) {
+			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
+			bi->bi_offset = sizeof(struct gfs2_rgrp);
+			bi->bi_start = 0;
+			bi->bi_len = bytes;
+		/* last block */
+		} else if (x + 1 == length) {
+			bytes = bytes_left;
+			bi->bi_offset = sizeof(struct gfs2_meta_header);
+			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_len = bytes;
+		/* other blocks */
+		} else {
+			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
+			bi->bi_offset = sizeof(struct gfs2_meta_header);
+			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_len = bytes;
+		}
+
+		bytes_left -= bytes;
+	}
+
+	if (bytes_left) {
+		gfs2_consist_rgrpd(rgd);
+		return -EIO;
+	}
+	bi = rgd->rd_bits + (length - 1);
+	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+		if (gfs2_consist_rgrpd(rgd)) {
+			gfs2_rindex_print(&rgd->rd_ri);
+			fs_err(sdp, "start=%u len=%u offset=%u\n",
+			       bi->bi_start, bi->bi_len, bi->bi_offset);
+		}
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @gl: The glock covering the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+
+static int gfs2_ri_update(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrpd *rgd;
+	char buf[sizeof(struct gfs2_rindex)];
+	uint64_t junk = ip->i_di.di_size;
+	int error;
+
+	if (do_div(junk, sizeof(struct gfs2_rindex))) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+
+	clear_rgrpdi(sdp);
+
+	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+		error = gfs2_jdata_read_mem(ip, buf,
+					    sdp->sd_rgrps *
+					    sizeof(struct gfs2_rindex),
+					    sizeof(struct gfs2_rindex));
+		if (!error)
+			break;
+		if (error != sizeof(struct gfs2_rindex)) {
+			if (error > 0)
+				error = -EIO;
+			goto fail;
+		}
+
+		rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL);
+		error = -ENOMEM;
+		if (!rgd)
+			goto fail;
+
+		init_MUTEX(&rgd->rd_mutex);
+		lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+		rgd->rd_sbd = sdp;
+
+		list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+		list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+
+		gfs2_rindex_in(&rgd->rd_ri, buf);
+
+		error = compute_bitstructs(rgd);
+		if (error)
+			goto fail;
+
+		error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
+				       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+		if (error)
+			goto fail;
+
+		set_gl2rgd(rgd->rd_gl, rgd);
+		rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+	}
+
+	sdp->sd_rindex_vn = ip->i_gl->gl_vn;
+
+	return 0;
+
+ fail:
+	clear_rgrpdi(sdp);
+
+	return error;
+}
+
+/**
+ * gfs2_rindex_hold - Grab a lock on the rindex
+ * @sdp: The GFS2 superblock
+ * @ri_gh: the glock holder
+ *
+ * We grab a lock on the rindex inode to make sure that it doesn't
+ * change whilst we are performing an operation. We keep this lock
+ * for quite long periods of time compared to other locks. This
+ * doesn't matter, since it is shared and it is very, very rarely
+ * accessed in the exclusive mode (i.e. only when expanding the filesystem).
+ *
+ * This makes sure that we're using the latest copy of the resource index
+ * special file, which might have been updated if someone expanded the
+ * filesystem (via gfs2_grow utility), which adds new resource groups.
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
+{
+	struct gfs2_inode *ip = sdp->sd_rindex;
+	struct gfs2_glock *gl = ip->i_gl;
+	int error;
+
+	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
+	if (error)
+		return error;
+
+	/* Read new copy from disk if we don't have the latest */
+	if (sdp->sd_rindex_vn != gl->gl_vn) {
+		down(&sdp->sd_rindex_mutex);
+		if (sdp->sd_rindex_vn != gl->gl_vn) {
+			error = gfs2_ri_update(ip);
+			if (error)
+				gfs2_glock_dq_uninit(ri_gh);
+		}
+		up(&sdp->sd_rindex_mutex);
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
+ *
+ * Read in all of a Resource Group's header and bitmap blocks.
+ * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ *
+ * Returns: errno
+ */
+
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	struct gfs2_glock *gl = rgd->rd_gl;
+	unsigned int length = rgd->rd_ri.ri_length;
+	struct gfs2_bitmap *bi;
+	unsigned int x, y;
+	int error;
+
+	down(&rgd->rd_mutex);
+
+	spin_lock(&sdp->sd_rindex_spin);
+	if (rgd->rd_bh_count) {
+		rgd->rd_bh_count++;
+		spin_unlock(&sdp->sd_rindex_spin);
+		up(&rgd->rd_mutex);
+		return 0;
+	}
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	for (x = 0; x < length; x++) {
+		bi = rgd->rd_bits + x;
+		error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START,
+				       &bi->bi_bh);
+		if (error)
+			goto fail;
+	}
+
+	for (y = length; y--;) {
+		bi = rgd->rd_bits + y;
+		error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT);
+		if (error)
+			goto fail;
+		if (gfs2_metatype_check(sdp, bi->bi_bh,
+					(y) ? GFS2_METATYPE_RB :
+					      GFS2_METATYPE_RG)) {
+			error = -EIO;
+			goto fail;
+		}
+	}
+
+	if (rgd->rd_rg_vn != gl->gl_vn) {
+		gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
+		rgd->rd_rg_vn = gl->gl_vn;
+	}
+
+	spin_lock(&sdp->sd_rindex_spin);
+	rgd->rd_free_clone = rgd->rd_rg.rg_free;
+	rgd->rd_bh_count++;
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	up(&rgd->rd_mutex);
+
+	return 0;
+
+ fail:
+	while (x--) {
+		bi = rgd->rd_bits + x;
+		brelse(bi->bi_bh);
+		bi->bi_bh = NULL;
+		gfs2_assert_warn(sdp, !bi->bi_clone);
+	}
+	up(&rgd->rd_mutex);
+
+	return error;
+}
+
+void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+
+	spin_lock(&sdp->sd_rindex_spin);
+	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
+	rgd->rd_bh_count++;
+	spin_unlock(&sdp->sd_rindex_spin);
+}
+
+/**
+ * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
+ *
+ */
+
+void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	int x, length = rgd->rd_ri.ri_length;
+
+	spin_lock(&sdp->sd_rindex_spin);
+	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
+	if (--rgd->rd_bh_count) {
+		spin_unlock(&sdp->sd_rindex_spin);
+		return;
+	}
+
+	for (x = 0; x < length; x++) {
+		struct gfs2_bitmap *bi = rgd->rd_bits + x;
+		kfree(bi->bi_clone);
+		bi->bi_clone = NULL;
+		brelse(bi->bi_bh);
+		bi->bi_bh = NULL;
+	}
+
+	spin_unlock(&sdp->sd_rindex_spin);
+}
+
+void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	unsigned int length = rgd->rd_ri.ri_length;
+	unsigned int x;
+
+	for (x = 0; x < length; x++) {
+		struct gfs2_bitmap *bi = rgd->rd_bits + x;
+		if (!bi->bi_clone)
+			continue;
+		memcpy(bi->bi_clone + bi->bi_offset,
+		       bi->bi_bh->b_data + bi->bi_offset,
+		       bi->bi_len);
+	}
+
+	spin_lock(&sdp->sd_rindex_spin);
+	rgd->rd_free_clone = rgd->rd_rg.rg_free;
+	spin_unlock(&sdp->sd_rindex_spin);
+}
+
+/**
+ * gfs2_alloc_get - allocate a struct gfs2_alloc structure for an inode
+ * @ip: the incore GFS2 inode structure
+ *
+ * FIXME: Don't use NOFAIL
+ *
+ * Returns: the struct gfs2_alloc
+ */
+
+struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
+{
+	struct gfs2_alloc *al = ip->i_alloc;
+
+	gfs2_assert_warn(ip->i_sbd, !al);
+
+	al = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL | __GFP_NOFAIL);
+
+	ip->i_alloc = al;
+
+	return al;
+}
+
+/**
+ * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode
+ * @ip: the inode
+ *
+ */
+
+void gfs2_alloc_put(struct gfs2_inode *ip)
+{
+	struct gfs2_alloc *al = ip->i_alloc;
+
+	if (gfs2_assert_warn(ip->i_sbd, al))
+		return;
+
+	ip->i_alloc = NULL;
+	kfree(al);
+}
+
+/**
+ * try_rgrp_fit - See if a given reservation will fit in a given RG
+ * @rgd: the RG data
+ * @al: the struct gfs2_alloc structure describing the reservation
+ *
+ * If there's room for the requested blocks to be allocated from the RG:
+ *   Sets the $al_reserved_data field in @al.
+ *   Sets the $al_reserved_meta field in @al.
+ *   Sets the $al_rgd field in @al.
+ *
+ * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
+ */
+
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	int ret = 0;
+
+	spin_lock(&sdp->sd_rindex_spin);
+	if (rgd->rd_free_clone >= al->al_requested) {
+		al->al_rgd = rgd;
+		ret = 1;
+	}
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return ret;
+}
+
+/**
+ * recent_rgrp_first - get first RG from "recent" list
+ * @sdp: The GFS2 superblock
+ * @rglast: address of the rgrp used last
+ *
+ * Returns: The first rgrp in the recent list
+ */
+
+static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
+					    uint64_t rglast)
+{
+	struct gfs2_rgrpd *rgd = NULL;
+
+	spin_lock(&sdp->sd_rindex_spin);
+
+	if (list_empty(&sdp->sd_rindex_recent_list))
+		goto out;
+
+	if (!rglast)
+		goto first;
+
+	list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
+		if (rgd->rd_ri.ri_addr == rglast)
+			goto out;
+	}
+
+ first:
+	rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
+			 rd_recent);
+
+ out:
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return rgd;
+}
+
+/**
+ * recent_rgrp_next - get next RG from "recent" list
+ * @cur_rgd: current rgrp
+ * @remove:
+ *
+ * Returns: The next rgrp in the recent list
+ */
+
+static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
+					   int remove)
+{
+	struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
+	struct list_head *head;
+	struct gfs2_rgrpd *rgd;
+
+	spin_lock(&sdp->sd_rindex_spin);
+
+	head = &sdp->sd_rindex_recent_list;
+
+	list_for_each_entry(rgd, head, rd_recent) {
+		if (rgd == cur_rgd) {
+			if (cur_rgd->rd_recent.next != head)
+				rgd = list_entry(cur_rgd->rd_recent.next,
+						 struct gfs2_rgrpd, rd_recent);
+			else
+				rgd = NULL;
+
+			if (remove)
+				list_del(&cur_rgd->rd_recent);
+
+			goto out;
+		}
+	}
+
+	rgd = NULL;
+	if (!list_empty(head))
+		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
+
+ out:
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return rgd;
+}
+
+/**
+ * recent_rgrp_add - add an RG to tail of "recent" list
+ * @new_rgd: The rgrp to add
+ *
+ */
+
+static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
+{
+	struct gfs2_sbd *sdp = new_rgd->rd_sbd;
+	struct gfs2_rgrpd *rgd;
+	unsigned int count = 0;
+	unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
+
+	spin_lock(&sdp->sd_rindex_spin);
+
+	list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
+		if (rgd == new_rgd)
+			goto out;
+
+		if (++count >= max)
+			goto out;
+	}
+	list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
+
+ out:
+	spin_unlock(&sdp->sd_rindex_spin);
+}
+
+/**
+ * forward_rgrp_get - get an rgrp to try next from full list
+ * @sdp: The GFS2 superblock
+ *
+ * Returns: The rgrp to try next
+ */
+
+static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
+{
+	struct gfs2_rgrpd *rgd;
+	unsigned int journals = gfs2_jindex_size(sdp);
+	unsigned int rg = 0, x;
+
+	spin_lock(&sdp->sd_rindex_spin);
+
+	rgd = sdp->sd_rindex_forward;
+	if (!rgd) {
+		if (sdp->sd_rgrps >= journals)
+			rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
+
+		for (x = 0, rgd = gfs2_rgrpd_get_first(sdp);
+		     x < rg;
+		     x++, rgd = gfs2_rgrpd_get_next(rgd))
+			/* Do Nothing */;
+
+		sdp->sd_rindex_forward = rgd;
+	}
+
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return rgd;
+}
+
+/**
+ * forward_rgrp_set - set the forward rgrp pointer
+ * @sdp: the filesystem
+ * @rgd: The new forward rgrp
+ *
+ */
+
+static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
+{
+	spin_lock(&sdp->sd_rindex_spin);
+	sdp->sd_rindex_forward = rgd;
+	spin_unlock(&sdp->sd_rindex_spin);
+}
+
+/**
+ * get_local_rgrp - Choose and lock a rgrp for allocation
+ * @ip: the inode to reserve space for
+ * @rgp: the chosen and locked rgrp
+ *
+ * Try to acquire rgrp in way which avoids contending with others.
+ *
+ * Returns: errno
+ */
+
+static int get_local_rgrp(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrpd *rgd, *begin = NULL;
+	struct gfs2_alloc *al = ip->i_alloc;
+	int flags = LM_FLAG_TRY;
+	int skipped = 0;
+	int loops = 0;
+	int error;
+
+	/* Try recently successful rgrps */
+
+	rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
+
+	while (rgd) {
+		error = gfs2_glock_nq_init(rgd->rd_gl,
+					  LM_ST_EXCLUSIVE, LM_FLAG_TRY,
+					  &al->al_rgd_gh);
+		switch (error) {
+		case 0:
+			if (try_rgrp_fit(rgd, al))
+				goto out;
+			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			rgd = recent_rgrp_next(rgd, TRUE);
+			break;
+
+		case GLR_TRYFAILED:
+			rgd = recent_rgrp_next(rgd, FALSE);
+			break;
+
+		default:
+			return error;
+		}
+	}
+
+	/* Go through full list of rgrps */
+
+	begin = rgd = forward_rgrp_get(sdp);
+
+	for (;;) {
+		error = gfs2_glock_nq_init(rgd->rd_gl,
+					  LM_ST_EXCLUSIVE, flags,
+					  &al->al_rgd_gh);
+		switch (error) {
+		case 0:
+			if (try_rgrp_fit(rgd, al))
+				goto out;
+			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			break;
+
+		case GLR_TRYFAILED:
+			skipped++;
+			break;
+
+		default:
+			return error;
+		}
+
+		rgd = gfs2_rgrpd_get_next(rgd);
+		if (!rgd)
+			rgd = gfs2_rgrpd_get_first(sdp);
+
+		if (rgd == begin) {
+			if (++loops >= 2 || !skipped)
+				return -ENOSPC;
+			flags = 0;
+		}
+	}
+
+ out:
+	ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+
+	if (begin) {
+		recent_rgrp_add(rgd);
+		rgd = gfs2_rgrpd_get_next(rgd);
+		if (!rgd)
+			rgd = gfs2_rgrpd_get_first(sdp);
+		forward_rgrp_set(sdp, rgd);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_inplace_reserve_i - Reserve space in the filesystem
+ * @ip: the inode to reserve space for
+ *
+ * Returns: errno
+ */
+
+int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	int error;
+
+	if (gfs2_assert_warn(sdp, al->al_requested))
+		return -EINVAL;
+
+	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	if (error)
+		return error;
+
+	error = get_local_rgrp(ip);
+	if (error) {
+		gfs2_glock_dq_uninit(&al->al_ri_gh);
+		return error;
+	}
+
+	al->al_file = file;
+	al->al_line = line;
+
+	return 0;
+}
+
+/**
+ * gfs2_inplace_release - release an inplace reservation
+ * @ip: the inode the reservation was taken out on
+ *
+ * Release a reservation made by gfs2_inplace_reserve().
+ */
+
+void gfs2_inplace_release(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+
+	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
+		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
+			     "al_file = %s, al_line = %u\n",
+		             al->al_alloced, al->al_requested, al->al_file,
+			     al->al_line);
+
+	al->al_rgd = NULL;
+	gfs2_glock_dq_uninit(&al->al_rgd_gh);
+	gfs2_glock_dq_uninit(&al->al_ri_gh);
+}
+
+/**
+ * gfs2_get_block_type - Check a block in a RG is of given type
+ * @rgd: the resource group holding the block
+ * @block: the block number
+ *
+ * Returns: The block type (GFS2_BLKST_*)
+ */
+
+unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block)
+{
+	struct gfs2_bitmap *bi = NULL;
+	uint32_t length, rgrp_block, buf_block;
+	unsigned int buf;
+	unsigned char type;
+
+	length = rgd->rd_ri.ri_length;
+	rgrp_block = block - rgd->rd_ri.ri_data0;
+
+	for (buf = 0; buf < length; buf++) {
+		bi = rgd->rd_bits + buf;
+		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
+			break;
+	}
+
+	gfs2_assert(rgd->rd_sbd, buf < length,);
+	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
+
+	type = gfs2_testbit(rgd,
+			   bi->bi_bh->b_data + bi->bi_offset,
+			   bi->bi_len, buf_block);
+
+	return type;
+}
+
+/**
+ * rgblk_search - find a block in @old_state, change allocation
+ *           state to @new_state
+ * @rgd: the resource group descriptor
+ * @goal: the goal block within the RG (start here to search for avail block)
+ * @old_state: GFS2_BLKST_XXX the before-allocation state to find
+ * @new_state: GFS2_BLKST_XXX the after-allocation block state
+ *
+ * Walk rgrp's bitmap to find bits that represent a block in @old_state.
+ * Add the found bitmap buffer to the transaction.
+ * Set the found bits to @new_state to change block's allocation state.
+ *
+ * This function never fails, because we wouldn't call it unless we
+ * know (from reservation results, etc.) that a block is available.
+ *
+ * Scope of @goal and returned block is just within rgrp, not the whole
+ * filesystem.
+ *
+ * Returns:  the block number allocated
+ */
+
+static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal,
+			     unsigned char old_state, unsigned char new_state)
+{
+	struct gfs2_bitmap *bi = NULL;
+	uint32_t length = rgd->rd_ri.ri_length;
+	uint32_t blk = 0;
+	unsigned int buf, x;
+
+	/* Find bitmap block that contains bits for goal block */
+	for (buf = 0; buf < length; buf++) {
+		bi = rgd->rd_bits + buf;
+		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
+			break;
+	}
+
+	gfs2_assert(rgd->rd_sbd, buf < length,);
+
+	/* Convert scope of "goal" from rgrp-wide to within found bit block */
+	goal -= bi->bi_start * GFS2_NBBY;
+
+	/* Search (up to entire) bitmap in this rgrp for allocatable block.
+	   "x <= length", instead of "x < length", because we typically start
+	   the search in the middle of a bit block, but if we can't find an
+	   allocatable block anywhere else, we want to be able wrap around and
+	   search in the first part of our first-searched bit block.  */
+	for (x = 0; x <= length; x++) {
+		if (bi->bi_clone)
+			blk = gfs2_bitfit(rgd,
+					  bi->bi_clone + bi->bi_offset,
+					  bi->bi_len, goal, old_state);
+		else
+			blk = gfs2_bitfit(rgd,
+					  bi->bi_bh->b_data + bi->bi_offset,
+					  bi->bi_len, goal, old_state);
+		if (blk != BFITNOENT)
+			break;
+
+		/* Try next bitmap block (wrap back to rgrp header if at end) */
+		buf = (buf + 1) % length;
+		bi = rgd->rd_bits + buf;
+		goal = 0;
+	}
+
+	if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
+		blk = 0;
+
+	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh);
+	gfs2_setbit(rgd,
+		    bi->bi_bh->b_data + bi->bi_offset,
+		    bi->bi_len, blk, new_state);
+	if (bi->bi_clone)
+		gfs2_setbit(rgd,
+			    bi->bi_clone + bi->bi_offset,
+			    bi->bi_len, blk, new_state);
+
+	return bi->bi_start * GFS2_NBBY + blk;
+}
+
+/**
+ * rgblk_free - Change alloc state of given block(s)
+ * @sdp: the filesystem
+ * @bstart: the start of a run of blocks to free
+ * @blen: the length of the block run (all must lie within ONE RG!)
+ * @new_state: GFS2_BLKST_XXX the after-allocation block state
+ *
+ * Returns:  Resource group containing the block(s)
+ */
+
+static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart,
+				     uint32_t blen, unsigned char new_state)
+{
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_bitmap *bi = NULL;
+	uint32_t length, rgrp_blk, buf_blk;
+	unsigned int buf;
+
+	rgd = gfs2_blk2rgrpd(sdp, bstart);
+	if (!rgd) {
+		if (gfs2_consist(sdp))
+			fs_err(sdp, "block = %"PRIu64"\n", bstart);
+		return NULL;
+	}
+
+	length = rgd->rd_ri.ri_length;
+
+	rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+
+	while (blen--) {
+		for (buf = 0; buf < length; buf++) {
+			bi = rgd->rd_bits + buf;
+			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
+				break;
+		}
+
+		gfs2_assert(rgd->rd_sbd, buf < length,);
+
+		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
+		rgrp_blk++;
+
+		if (!bi->bi_clone) {
+			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
+					       GFP_KERNEL | __GFP_NOFAIL);
+			memcpy(bi->bi_clone + bi->bi_offset,
+			       bi->bi_bh->b_data + bi->bi_offset,
+			       bi->bi_len);
+		}
+		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh);
+		gfs2_setbit(rgd,
+			    bi->bi_bh->b_data + bi->bi_offset,
+			    bi->bi_len, buf_blk, new_state);
+	}
+
+	return rgd;
+}
+
+/**
+ * gfs2_alloc_data - Allocate a data block
+ * @ip: the inode to allocate the data block for
+ *
+ * Returns: the allocated block
+ */
+
+uint64_t gfs2_alloc_data(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_rgrpd *rgd = al->al_rgd;
+	uint32_t goal, blk;
+	uint64_t block;
+
+	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
+		goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+	else
+		goal = rgd->rd_last_alloc_data;
+
+	blk = rgblk_search(rgd, goal,
+			   GFS2_BLKST_FREE, GFS2_BLKST_USED);
+	rgd->rd_last_alloc_data = blk;
+
+	block = rgd->rd_ri.ri_data0 + blk;
+	ip->i_di.di_goal_data = block;
+
+	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
+	rgd->rd_rg.rg_free--;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	al->al_alloced++;
+
+	gfs2_statfs_change(sdp, 0, -1, 0);
+	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+
+	spin_lock(&sdp->sd_rindex_spin);
+	rgd->rd_free_clone--;
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return block;
+}
+
+/**
+ * gfs2_alloc_meta - Allocate a metadata block
+ * @ip: the inode to allocate the metadata block for
+ *
+ * Returns: the allocated block
+ */
+
+uint64_t gfs2_alloc_meta(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_rgrpd *rgd = al->al_rgd;
+	uint32_t goal, blk;
+	uint64_t block;
+
+	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
+		goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+	else
+		goal = rgd->rd_last_alloc_meta;
+
+	blk = rgblk_search(rgd, goal,
+			   GFS2_BLKST_FREE, GFS2_BLKST_USED);
+	rgd->rd_last_alloc_meta = blk;
+
+	block = rgd->rd_ri.ri_data0 + blk;
+	ip->i_di.di_goal_meta = block;
+
+	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
+	rgd->rd_rg.rg_free--;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	al->al_alloced++;
+
+	gfs2_statfs_change(sdp, 0, -1, 0);
+	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_trans_add_unrevoke(sdp, block);
+
+	spin_lock(&sdp->sd_rindex_spin);
+	rgd->rd_free_clone--;
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return block;
+}
+
+/**
+ * gfs2_alloc_di - Allocate a dinode
+ * @dip: the directory that the inode is going in
+ *
+ * Returns: the block allocated
+ */
+
+uint64_t gfs2_alloc_di(struct gfs2_inode *dip)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct gfs2_alloc *al = dip->i_alloc;
+	struct gfs2_rgrpd *rgd = al->al_rgd;
+	uint32_t blk;
+	uint64_t block;
+
+	blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
+			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
+
+	rgd->rd_last_alloc_meta = blk;
+
+	block = rgd->rd_ri.ri_data0 + blk;
+
+	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
+	rgd->rd_rg.rg_free--;
+	rgd->rd_rg.rg_dinodes++;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	al->al_alloced++;
+
+	gfs2_statfs_change(sdp, 0, -1, +1);
+	gfs2_trans_add_unrevoke(sdp, block);
+
+	spin_lock(&sdp->sd_rindex_spin);
+	rgd->rd_free_clone--;
+	spin_unlock(&sdp->sd_rindex_spin);
+
+	return block;
+}
+
+/**
+ * gfs2_free_data - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrpd *rgd;
+
+	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
+	if (!rgd)
+		return;
+
+	rgd->rd_rg.rg_free += blen;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	gfs2_trans_add_rg(rgd);
+
+	gfs2_statfs_change(sdp, 0, +blen, 0);
+	gfs2_quota_change(ip, -(int64_t)blen,
+			 ip->i_di.di_uid, ip->i_di.di_gid);
+}
+
+/**
+ * gfs2_free_meta - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrpd *rgd;
+
+	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
+	if (!rgd)
+		return;
+
+	rgd->rd_rg.rg_free += blen;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	gfs2_trans_add_rg(rgd);
+
+	gfs2_statfs_change(sdp, 0, +blen, 0);
+	gfs2_quota_change(ip, -(int64_t)blen,
+			 ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_meta_wipe(ip, bstart, blen);
+}
+
+void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno)
+{
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+	struct gfs2_rgrpd *tmp_rgd;
+
+	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
+	if (!tmp_rgd)
+		return;
+	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
+
+	if (!rgd->rd_rg.rg_dinodes)
+		gfs2_consist_rgrpd(rgd);
+	rgd->rd_rg.rg_dinodes--;
+	rgd->rd_rg.rg_free++;
+
+	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
+	gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
+
+	gfs2_statfs_change(sdp, 0, +1, -1);
+	gfs2_trans_add_rg(rgd);
+}
+
+/**
+ * gfs2_free_uninit_di - free a dinode block
+ * @rgd: the resource group that contains the dinode
+ * @ip: the inode
+ *
+ */
+
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+{
+	gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+	gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+}
+
+/**
+ * gfs2_rlist_add - add a RG to a list of RGs
+ * @sdp: the filesystem
+ * @rlist: the list of resource groups
+ * @block: the block
+ *
+ * Figure out what RG a block belongs to and add that RG to the list
+ *
+ * FIXME: Don't use NOFAIL
+ *
+ */
+
+void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+		    uint64_t block)
+{
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_rgrpd **tmp;
+	unsigned int new_space;
+	unsigned int x;
+
+	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
+		return;
+
+	rgd = gfs2_blk2rgrpd(sdp, block);
+	if (!rgd) {
+		if (gfs2_consist(sdp))
+			fs_err(sdp, "block = %"PRIu64"\n", block);
+		return;
+	}
+
+	for (x = 0; x < rlist->rl_rgrps; x++)
+		if (rlist->rl_rgd[x] == rgd)
+			return;
+
+	if (rlist->rl_rgrps == rlist->rl_space) {
+		new_space = rlist->rl_space + 10;
+
+		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
+			      GFP_KERNEL | __GFP_NOFAIL);
+
+		if (rlist->rl_rgd) {
+			memcpy(tmp, rlist->rl_rgd,
+			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
+			kfree(rlist->rl_rgd);
+		}
+
+		rlist->rl_space = new_space;
+		rlist->rl_rgd = tmp;
+	}
+
+	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
+}
+
+/**
+ * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
+ *      and initialize an array of glock holders for them
+ * @rlist: the list of resource groups
+ * @state: the lock state to acquire the RG lock in
+ * @flags: the modifier flags for the holder structures
+ *
+ * FIXME: Don't use NOFAIL
+ *
+ */
+
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
+		      int flags)
+{
+	unsigned int x;
+
+	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
+				GFP_KERNEL | __GFP_NOFAIL);
+	for (x = 0; x < rlist->rl_rgrps; x++)
+		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
+				state, flags,
+				&rlist->rl_ghs[x]);
+}
+
+/**
+ * gfs2_rlist_free - free a resource group list
+ * @list: the list of resource groups
+ *
+ */
+
+void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
+{
+	unsigned int x;
+
+	kfree(rlist->rl_rgd);
+
+	if (rlist->rl_ghs) {
+		for (x = 0; x < rlist->rl_rgrps; x++)
+			gfs2_holder_uninit(&rlist->rl_ghs[x]);
+		kfree(rlist->rl_ghs);
+	}
+}
+
--- a/fs/gfs2/rgrp.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/rgrp.h	2005-09-01 17:36:55.479090544 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __RGRP_DOT_H__
+#define __RGRP_DOT_H__
+
+void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
+
+struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
+struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
+struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
+
+void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
+int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
+
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
+void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
+void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
+
+void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
+
+struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+void gfs2_alloc_put(struct gfs2_inode *ip);
+
+int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
+			 char *file, unsigned int line);
+#define gfs2_inplace_reserve(ip) \
+gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
+
+void gfs2_inplace_release(struct gfs2_inode *ip);
+
+unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block);
+
+uint64_t gfs2_alloc_data(struct gfs2_inode *ip);
+uint64_t gfs2_alloc_meta(struct gfs2_inode *ip);
+uint64_t gfs2_alloc_di(struct gfs2_inode *ip);
+
+void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
+void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
+void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno);
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
+
+struct gfs2_rgrp_list {
+	unsigned int rl_rgrps;
+	unsigned int rl_space;
+	struct gfs2_rgrpd **rl_rgd;
+	struct gfs2_holder *rl_ghs;
+};
+
+void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+		    uint64_t block);
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
+		      int flags);
+void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+
+#endif /* __RGRP_DOT_H__ */
--- a/fs/gfs2/bits.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/bits.c	2005-09-01 17:36:55.149140704 +0800
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+/*
+ * These routines are used by the resource group routines (rgrp.c)
+ * to keep track of block allocation.  Each block is represented by two
+ * bits.  One bit indicates whether or not the block is used.  (1=used,
+ * 0=free)  The other bit indicates whether or not the block contains a
+ * dinode or not.  (1=dinode, 0=not-dinode) So, each byte represents
+ * GFS2_NBBY (i.e. 4) blocks.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bits.h"
+
+static const char valid_change[16] = {
+	        /* current */
+	/* n */ 0, 1, 0, 1,
+	/* e */ 1, 0, 0, 0,
+	/* w */ 0, 0, 0, 0,
+	        1, 0, 0, 0
+};
+
+/**
+ * gfs2_setbit - Set a bit in the bitmaps
+ * @buffer: the buffer that holds the bitmaps
+ * @buflen: the length (in bytes) of the buffer
+ * @block: the block to set
+ * @new_state: the new state of the block
+ *
+ */
+
+void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
+		 unsigned int buflen, uint32_t block, unsigned char new_state)
+{
+	unsigned char *byte, *end, cur_state;
+	unsigned int bit;
+
+	byte = buffer + (block / GFS2_NBBY);
+	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
+	end = buffer + buflen;
+
+	gfs2_assert(rgd->rd_sbd, byte < end,);
+
+	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
+
+	if (valid_change[new_state * 4 + cur_state]) {
+		*byte ^= cur_state << bit;
+		*byte |= new_state << bit;
+	} else
+		gfs2_consist_rgrpd(rgd);
+}
+
+/**
+ * gfs2_testbit - test a bit in the bitmaps
+ * @buffer: the buffer that holds the bitmaps
+ * @buflen: the length (in bytes) of the buffer
+ * @block: the block to read
+ *
+ */
+
+unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
+			   unsigned int buflen, uint32_t block)
+{
+	unsigned char *byte, *end, cur_state;
+	unsigned int bit;
+
+	byte = buffer + (block / GFS2_NBBY);
+	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
+	end = buffer + buflen;
+
+	gfs2_assert(rgd->rd_sbd, byte < end,);
+
+	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
+
+	return cur_state;
+}
+
+/**
+ * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
+ *       a block in a given allocation state.
+ * @buffer: the buffer that holds the bitmaps
+ * @buflen: the length (in bytes) of the buffer
+ * @goal: start search at this block's bit-pair (within @buffer)
+ * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
+ *       bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
+ *
+ * Scope of @goal and returned block number is only within this bitmap buffer,
+ * not entire rgrp or filesystem.  @buffer will be offset from the actual
+ * beginning of a bitmap block buffer, skipping any header structures.
+ *
+ * Return: the block number (bitmap buffer scope) that was found
+ */
+
+uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
+		     unsigned int buflen, uint32_t goal,
+		     unsigned char old_state)
+{
+	unsigned char *byte, *end, alloc;
+	uint32_t blk = goal;
+	unsigned int bit;
+
+	byte = buffer + (goal / GFS2_NBBY);
+	bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
+	end = buffer + buflen;
+	alloc = (old_state & 1) ? 0 : 0x55;
+
+	while (byte < end) {
+		if ((*byte & 0x55) == alloc) {
+			blk += (8 - bit) >> 1;
+
+			bit = 0;
+			byte++;
+
+			continue;
+		}
+
+		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
+			return blk;
+
+		bit += GFS2_BIT_SIZE;
+		if (bit >= 8) {
+			bit = 0;
+			byte++;
+		}
+
+		blk++;
+	}
+
+	return BFITNOENT;
+}
+
+/**
+ * gfs2_bitcount - count the number of bits in a certain state
+ * @buffer: the buffer that holds the bitmaps
+ * @buflen: the length (in bytes) of the buffer
+ * @state: the state of the block we're looking for
+ *
+ * Returns: The number of bits
+ */
+
+uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
+		       unsigned int buflen, unsigned char state)
+{
+	unsigned char *byte = buffer;
+	unsigned char *end = buffer + buflen;
+	unsigned char state1 = state << 2;
+	unsigned char state2 = state << 4;
+	unsigned char state3 = state << 6;
+	uint32_t count = 0;
+
+	for (; byte < end; byte++) {
+		if (((*byte) & 0x03) == state)
+			count++;
+		if (((*byte) & 0x0C) == state1)
+			count++;
+		if (((*byte) & 0x30) == state2)
+			count++;
+		if (((*byte) & 0xC0) == state3)
+			count++;
+	}
+
+	return count;
+}
+
--- a/fs/gfs2/bits.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/bits.h	2005-09-01 17:36:55.149140704 +0800
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __BITS_DOT_H__
+#define __BITS_DOT_H__
+
+#define BFITNOENT 0xFFFFFFFF
+
+void gfs2_setbit(struct gfs2_rgrpd *rgd,
+		unsigned char *buffer, unsigned int buflen,
+		uint32_t block, unsigned char new_state);
+unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
+			  unsigned char *buffer, unsigned int buflen,
+			  uint32_t block);
+uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd,
+		    unsigned char *buffer, unsigned int buflen,
+		    uint32_t goal, unsigned char old_state);
+uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd,
+		      unsigned char *buffer, unsigned int buflen,
+		      unsigned char state);
+
+#endif /* __BITS_DOT_H__ */

[PATCH 05/13] GFS: ea and acl

Code that handles extended attributes and ACL's.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/acl.c   |  313 ++++++++++
 fs/gfs2/acl.h   |   37 +
 fs/gfs2/eaops.c |  179 ++++++
 fs/gfs2/eaops.h |   30 +
 fs/gfs2/eattr.c | 1621 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/eattr.h |   91 +++
 6 files changed, 2271 insertions(+)

--- a/fs/gfs2/acl.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/acl.c	2005-09-01 17:36:55.135142832 +0800
@@ -0,0 +1,313 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "acl.h"
+#include "eaops.h"
+#include "eattr.h"
+#include "glock.h"
+#include "inode.h"
+#include "meta_io.h"
+#include "trans.h"
+
+#define ACL_ACCESS 1
+#define ACL_DEFAULT 0
+
+int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
+		      struct gfs2_ea_request *er,
+		      int *remove, mode_t *mode)
+{
+	struct posix_acl *acl;
+	int error;
+
+	error = gfs2_acl_validate_remove(ip, access);
+	if (error)
+		return error;
+
+	if (!er->er_data)
+		return -EINVAL;
+
+	acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (!acl) {
+		*remove = TRUE;
+		return 0;
+	}
+
+	error = posix_acl_valid(acl);
+	if (error)
+		goto out;
+
+	if (access) {
+		error = posix_acl_equiv_mode(acl, mode);
+		if (!error)
+			*remove = TRUE;
+		else if (error > 0)
+			error = 0;
+	}
+
+ out:
+	posix_acl_release(acl);
+
+	return error;
+}
+
+int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
+{
+	if (!ip->i_sbd->sd_args.ar_posix_acl)
+		return -EOPNOTSUPP;
+	if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
+		return -EPERM;
+	if (S_ISLNK(ip->i_di.di_mode))
+		return -EOPNOTSUPP;
+	if (!access && !S_ISDIR(ip->i_di.di_mode))
+		return -EACCES;
+
+	return 0;
+}
+
+static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
+		   struct gfs2_ea_location *el, char **data, unsigned int *len)
+{
+	struct gfs2_ea_request er;
+	struct gfs2_ea_location el_this;
+	int error;
+
+	if (!ip->i_di.di_eattr)
+		return 0;
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	if (access) {
+		er.er_name = GFS2_POSIX_ACL_ACCESS;
+		er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
+	} else {
+		er.er_name = GFS2_POSIX_ACL_DEFAULT;
+		er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
+	}
+	er.er_type = GFS2_EATYPE_SYS;
+
+	if (!el)
+		el = &el_this;
+
+	error = gfs2_ea_find(ip, &er, el);
+	if (error)
+		return error;
+	if (!el->el_ea)
+		return 0;
+	if (!GFS2_EA_DATA_LEN(el->el_ea))
+		goto out;
+
+	er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
+	er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!er.er_data)
+		goto out;
+
+	error = gfs2_ea_get_copy(ip, el, er.er_data);
+	if (error)
+		goto out_kfree;
+
+	if (acl) {
+		*acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
+		if (IS_ERR(*acl))
+			error = PTR_ERR(*acl);
+	}
+
+ out_kfree:
+	if (error || !data)
+		kfree(er.er_data);
+	else {
+		*data = er.er_data;
+		*len = er.er_data_len;
+	}
+
+ out:
+	if (error || el == &el_this)
+		brelse(el->el_bh);
+
+	return error;
+}
+
+/**
+ * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
+ * @inode: the file we want to do something to
+ * @mask: what we want to do
+ *
+ * Returns: errno
+ */
+
+int gfs2_check_acl_locked(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = NULL;
+	int error;
+
+	error = acl_get(get_v2ip(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
+	if (error)
+		return error;
+
+	if (acl) {
+		error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+
+	return -EAGAIN;
+}
+
+int gfs2_check_acl(struct inode *inode, int mask)
+{
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder i_gh;
+	int error;
+
+	error = gfs2_glock_nq_init(ip->i_gl,
+				   LM_ST_SHARED, LM_FLAG_ANY,
+				   &i_gh);
+	if (!error) {
+		error = gfs2_check_acl_locked(inode, mask);
+		gfs2_glock_dq_uninit(&i_gh);
+	}
+	
+	return error;
+}
+
+static int munge_mode(struct gfs2_inode *ip, mode_t mode)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		gfs2_assert_withdraw(sdp,
+				(ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
+		ip->i_di.di_mode = mode;
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+
+	return 0;
+}
+
+int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = dip->i_sbd;
+	struct posix_acl *acl = NULL, *clone;
+	struct gfs2_ea_request er;
+	mode_t mode = ip->i_di.di_mode;
+	int error;
+
+	if (!sdp->sd_args.ar_posix_acl)
+		return 0;
+	if (S_ISLNK(ip->i_di.di_mode))
+		return 0;
+
+	memset(&er, 0, sizeof(struct gfs2_ea_request));
+	er.er_type = GFS2_EATYPE_SYS;
+
+	error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
+			&er.er_data, &er.er_data_len);
+	if (error)
+		return error;
+	if (!acl) {
+		mode &= ~current->fs->umask;
+		if (mode != ip->i_di.di_mode)
+			error = munge_mode(ip, mode);
+		return error;
+	}
+
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!clone)
+		goto out;
+	posix_acl_release(acl);
+	acl = clone;
+
+	if (S_ISDIR(ip->i_di.di_mode)) {
+		er.er_name = GFS2_POSIX_ACL_DEFAULT;
+		er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
+		error = gfs2_system_eaops.eo_set(ip, &er);
+		if (error)
+			goto out;
+	}
+
+	error = posix_acl_create_masq(acl, &mode);
+	if (error < 0)
+		goto out;
+	if (error > 0) {
+		er.er_name = GFS2_POSIX_ACL_ACCESS;
+		er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
+		posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
+		er.er_mode = mode;
+		er.er_flags = GFS2_ERF_MODE;
+		error = gfs2_system_eaops.eo_set(ip, &er);
+		if (error)
+			goto out;
+	} else
+		munge_mode(ip, mode);
+
+ out:
+	posix_acl_release(acl);
+	kfree(er.er_data);
+	return error;
+}
+
+int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
+{
+	struct posix_acl *acl = NULL, *clone;
+	struct gfs2_ea_location el;
+	char *data;
+	unsigned int len;
+	int error;
+
+	error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
+	if (error)
+		return error;
+	if (!acl)
+		return gfs2_setattr_simple(ip, attr);
+
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!clone)
+		goto out;
+	posix_acl_release(acl);
+	acl = clone;
+
+	error = posix_acl_chmod_masq(acl, attr->ia_mode);
+	if (!error) {
+		posix_acl_to_xattr(acl, data, len);
+		error = gfs2_ea_acl_chmod(ip, &el, attr, data);
+	}
+
+ out:
+	posix_acl_release(acl);
+	brelse(el.el_bh);
+	kfree(data);
+
+	return error;
+}
+
--- a/fs/gfs2/acl.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/acl.h	2005-09-01 17:36:55.140142072 +0800
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __ACL_DOT_H__
+#define __ACL_DOT_H__
+
+#define GFS2_POSIX_ACL_ACCESS		"posix_acl_access"
+#define GFS2_POSIX_ACL_ACCESS_LEN	16
+#define GFS2_POSIX_ACL_DEFAULT		"posix_acl_default"
+#define GFS2_POSIX_ACL_DEFAULT_LEN	17
+
+#define GFS2_ACL_IS_ACCESS(name, len) \
+         ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
+         !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
+
+#define GFS2_ACL_IS_DEFAULT(name, len) \
+         ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
+         !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
+
+struct gfs2_ea_request;
+
+int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
+			  struct gfs2_ea_request *er,
+			  int *remove, mode_t *mode);
+int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
+int gfs2_check_acl_locked(struct inode *inode, int mask);
+int gfs2_check_acl(struct inode *inode, int mask);
+int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
+int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
+
+#endif /* __ACL_DOT_H__ */
--- a/fs/gfs2/eattr.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/eattr.c	2005-09-01 17:36:55.202132648 +0800
@@ -0,0 +1,1621 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/xattr.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "acl.h"
+#include "eaops.h"
+#include "eattr.h"
+#include "glock.h"
+#include "inode.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+
+/**
+ * ea_calc_size - returns the acutal number of bytes the request will take up
+ *                (not counting any unstuffed data blocks)
+ * @sdp:
+ * @er:
+ * @size:
+ *
+ * Returns: TRUE if the EA should be stuffed
+ */
+
+static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
+			unsigned int *size)
+{
+	*size = GFS2_EAREQ_SIZE_STUFFED(er);
+	if (*size <= sdp->sd_jbsize)
+		return TRUE;
+
+	*size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
+
+	return FALSE;
+}
+
+static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
+{
+	unsigned int size;
+
+	if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
+		return -ERANGE;
+
+	ea_calc_size(sdp, er, &size);
+
+	/* This can only happen with 512 byte blocks */
+	if (size > sdp->sd_jbsize)
+		return -ERANGE;
+
+	return 0;
+}
+
+typedef int (*ea_call_t) (struct gfs2_inode *ip,
+			  struct buffer_head *bh,
+			  struct gfs2_ea_header *ea,
+			  struct gfs2_ea_header *prev,
+			  void *private);
+
+static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
+			ea_call_t ea_call, void *data)
+{
+	struct gfs2_ea_header *ea, *prev = NULL;
+	int error = 0;
+
+	if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA))
+		return -EIO;
+
+	for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
+		if (!GFS2_EA_REC_LEN(ea))
+			goto fail;
+		if (!(bh->b_data <= (char *)ea &&
+		      (char *)GFS2_EA2NEXT(ea) <=
+		      bh->b_data + bh->b_size))
+			goto fail;
+		if (!GFS2_EATYPE_VALID(ea->ea_type))
+			goto fail;
+
+		error = ea_call(ip, bh, ea, prev, data);
+		if (error)
+			return error;
+
+		if (GFS2_EA_IS_LAST(ea)) {
+			if ((char *)GFS2_EA2NEXT(ea) !=
+			    bh->b_data + bh->b_size)
+				goto fail;
+			break;
+		}
+	}
+
+	return error;
+
+ fail:
+	gfs2_consist_inode(ip);
+	return -EIO;
+}
+
+static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
+{
+	struct buffer_head *bh, *eabh;
+	uint64_t *eablk, *end;
+	int error;
+
+	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
+			       DIO_START | DIO_WAIT, &bh);
+	if (error)
+		return error;
+
+	if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
+		error = ea_foreach_i(ip, bh, ea_call, data);
+		goto out;
+	}
+
+	if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
+		error = -EIO;
+		goto out;
+	}
+
+	eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header));
+	end = eablk + ip->i_sbd->sd_inptrs;
+
+	for (; eablk < end; eablk++) {
+		uint64_t bn;
+
+		if (!*eablk)
+			break;
+		bn = gfs2_64_to_cpu(*eablk);
+
+		error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT,
+				       &eabh);
+		if (error)
+			break;
+		error = ea_foreach_i(ip, eabh, ea_call, data);
+		brelse(eabh);
+		if (error)
+			break;
+	}
+ out:
+	brelse(bh);
+
+	return error;
+}
+
+struct ea_find {
+	struct gfs2_ea_request *ef_er;
+	struct gfs2_ea_location *ef_el;
+};
+
+static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
+		     struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
+		     void *private)
+{
+	struct ea_find *ef = private;
+	struct gfs2_ea_request *er = ef->ef_er;
+
+	if (ea->ea_type == GFS2_EATYPE_UNUSED)
+		return 0;
+
+	if (ea->ea_type == er->er_type) {
+		if (ea->ea_name_len == er->er_name_len &&
+		    !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
+			struct gfs2_ea_location *el = ef->ef_el;
+			get_bh(bh);
+			el->el_bh = bh;
+			el->el_ea = ea;
+			el->el_prev = prev;
+			return 1;
+		}
+	}
+
+#if 0
+	else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) &&
+		 er->er_type == GFS2_EATYPE_SYS)
+		return 1;
+#endif
+
+	return 0;
+}
+
+int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+		 struct gfs2_ea_location *el)
+{
+	struct ea_find ef;
+	int error;
+
+	ef.ef_er = er;
+	ef.ef_el = el;
+
+	memset(el, 0, sizeof(struct gfs2_ea_location));
+
+	error = ea_foreach(ip, ea_find_i, &ef);
+	if (error > 0)
+		return 0;
+
+	return error;
+}
+
+/**
+ * ea_dealloc_unstuffed -
+ * @ip:
+ * @bh:
+ * @ea:
+ * @prev:
+ * @private:
+ *
+ * Take advantage of the fact that all unstuffed blocks are
+ * allocated from the same RG.  But watch, this may not always
+ * be true.
+ *
+ * Returns: errno
+ */
+
+static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
+				struct gfs2_ea_header *ea,
+				struct gfs2_ea_header *prev, void *private)
+{
+	int *leave = private;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_holder rg_gh;
+	struct buffer_head *dibh;
+	uint64_t *dataptrs, bn = 0;
+	uint64_t bstart = 0;
+	unsigned int blen = 0;
+	unsigned int blks = 0;
+	unsigned int x;
+	int error;
+
+	if (GFS2_EA_IS_STUFFED(ea))
+		return 0;
+
+	dataptrs = GFS2_EA2DATAPTRS(ea);
+	for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
+		if (*dataptrs) {
+			blks++;
+			bn = gfs2_64_to_cpu(*dataptrs);
+		}
+	if (!blks)
+		return 0;
+
+	rgd = gfs2_blk2rgrpd(sdp, bn);
+	if (!rgd) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
+	if (error)
+		return error;
+
+	error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length +
+				 RES_DINODE + RES_EATTR + RES_STATFS +
+				 RES_QUOTA, blks);
+	if (error)
+		goto out_gunlock;
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+
+	dataptrs = GFS2_EA2DATAPTRS(ea);
+	for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
+		if (!*dataptrs)
+			break;
+		bn = gfs2_64_to_cpu(*dataptrs);
+
+		if (bstart + blen == bn)
+			blen++;
+		else {
+			if (bstart)
+				gfs2_free_meta(ip, bstart, blen);
+			bstart = bn;
+			blen = 1;
+		}
+
+		*dataptrs = 0;
+		if (!ip->i_di.di_blocks)
+			gfs2_consist_inode(ip);
+		ip->i_di.di_blocks--;
+	}
+	if (bstart)
+		gfs2_free_meta(ip, bstart, blen);
+
+	if (prev && !leave) {
+		uint32_t len;
+
+		len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
+		prev->ea_rec_len = cpu_to_gfs2_32(len);
+
+		if (GFS2_EA_IS_LAST(ea))
+			prev->ea_flags |= GFS2_EAFLAG_LAST;
+	} else {
+		ea->ea_type = GFS2_EATYPE_UNUSED;
+		ea->ea_num_ptrs = 0;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		ip->i_di.di_ctime = get_seconds();
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_uninit(&rg_gh);
+
+	return error;
+}
+
+static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
+			       struct gfs2_ea_header *ea,
+			       struct gfs2_ea_header *prev, int leave)
+{
+	struct gfs2_alloc *al;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out_alloc;
+
+	error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
+	if (error)
+		goto out_quota;
+
+	error = ea_dealloc_unstuffed(ip,
+				     bh, ea, prev,
+				     (leave) ? &error : NULL);
+
+	gfs2_glock_dq_uninit(&al->al_ri_gh);
+
+ out_quota:
+	gfs2_quota_unhold(ip);
+
+ out_alloc:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/******************************************************************************/
+
+static int gfs2_ea_repack_i(struct gfs2_inode *ip)
+{
+	return -EOPNOTSUPP;
+}
+
+int gfs2_ea_repack(struct gfs2_inode *ip)
+{
+	struct gfs2_holder gh;
+	int error;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (error)
+		return error;
+
+	/* Some sort of permissions checking would be nice */
+
+	error = gfs2_ea_repack_i(ip);
+
+	gfs2_glock_dq_uninit(&gh);
+
+	return error;
+}
+
+struct ea_list {
+	struct gfs2_ea_request *ei_er;
+	unsigned int ei_size;
+};
+
+static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
+		     struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
+		     void *private)
+{
+	struct ea_list *ei = private;
+	struct gfs2_ea_request *er = ei->ei_er;
+	unsigned int ea_size = GFS2_EA_STRLEN(ea);
+
+	if (ea->ea_type == GFS2_EATYPE_UNUSED)
+		return 0;
+
+	if (er->er_data_len) {
+		char *prefix;
+		unsigned int l;
+		char c = 0;
+
+		if (ei->ei_size + ea_size > er->er_data_len)
+			return -ERANGE;
+
+		if (ea->ea_type == GFS2_EATYPE_USR) {
+			prefix = "user.";
+			l = 5;
+		} else {
+			prefix = "system.";
+			l = 7;
+		}
+
+		memcpy(er->er_data + ei->ei_size,
+		       prefix, l);
+		memcpy(er->er_data + ei->ei_size + l,
+		       GFS2_EA2NAME(ea),
+		       ea->ea_name_len);
+		memcpy(er->er_data + ei->ei_size +
+		       ea_size - 1,
+		       &c, 1);
+	}
+
+	ei->ei_size += ea_size;
+
+	return 0;
+}
+
+/**
+ * gfs2_ea_list -
+ * @ip:
+ * @er:
+ *
+ * Returns: actual size of data on success, -errno on error
+ */
+
+int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!er->er_data || !er->er_data_len) {
+		er->er_data = NULL;
+		er->er_data_len = 0;
+	}
+
+	error = gfs2_glock_nq_init(ip->i_gl,
+				  LM_ST_SHARED, LM_FLAG_ANY,
+				  &i_gh);
+	if (error)
+		return error;
+
+	if (ip->i_di.di_eattr) {
+		struct ea_list ei = { .ei_er = er, .ei_size = 0 };
+
+		error = ea_foreach(ip, ea_list_i, &ei);
+		if (!error)
+			error = ei.ei_size;
+	}
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * ea_get_unstuffed - actually copies the unstuffed data into the
+ *                    request buffer
+ * @ip:
+ * @ea:
+ * @data:
+ *
+ * Returns: errno
+ */
+
+static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
+			    char *data)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head **bh;
+	unsigned int amount = GFS2_EA_DATA_LEN(ea);
+	unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
+	uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
+	unsigned int x;
+	int error = 0;
+
+	bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
+	if (!bh)
+		return -ENOMEM;
+
+	for (x = 0; x < nptrs; x++) {
+		error = gfs2_meta_read(ip->i_gl, gfs2_64_to_cpu(*dataptrs),
+				       DIO_START, bh + x);
+		if (error) {
+			while (x--)
+				brelse(bh[x]);
+			goto out;
+		}
+		dataptrs++;
+	}
+
+	for (x = 0; x < nptrs; x++) {
+		error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
+		if (error) {
+			for (; x < nptrs; x++)
+				brelse(bh[x]);
+			goto out;
+		}
+		if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
+			for (; x < nptrs; x++)
+				brelse(bh[x]);
+			error = -EIO;
+			goto out;
+		}
+
+		memcpy(data,
+		       bh[x]->b_data + sizeof(struct gfs2_meta_header),
+		       (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
+
+		amount -= sdp->sd_jbsize;
+		data += sdp->sd_jbsize;
+
+		brelse(bh[x]);
+	}
+
+ out:
+	kfree(bh);
+
+	return error;
+}
+
+int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
+		     char *data)
+{
+	if (GFS2_EA_IS_STUFFED(el->el_ea)) {
+		memcpy(data,
+		       GFS2_EA2DATA(el->el_ea),
+		       GFS2_EA_DATA_LEN(el->el_ea));
+		return 0;
+	} else
+		return ea_get_unstuffed(ip, el->el_ea, data);
+}
+
+/**
+ * gfs2_ea_get_i -
+ * @ip:
+ * @er:
+ *
+ * Returns: actual size of data on success, -errno on error
+ */
+
+int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_ea_location el;
+	int error;
+
+	if (!ip->i_di.di_eattr)
+		return -ENODATA;
+
+	error = gfs2_ea_find(ip, er, &el);
+	if (error)
+		return error;
+	if (!el.el_ea)
+		return -ENODATA;
+
+	if (er->er_data_len) {
+		if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
+			error =  -ERANGE;
+		else
+			error = gfs2_ea_get_copy(ip, &el, er->er_data);
+	}
+	if (!error)
+		error = GFS2_EA_DATA_LEN(el.el_ea);
+
+	brelse(el.el_bh);
+
+	return error;
+}
+
+/**
+ * gfs2_ea_get -
+ * @ip:
+ * @er:
+ *
+ * Returns: actual size of data on success, -errno on error
+ */
+
+int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!er->er_name_len ||
+	    er->er_name_len > GFS2_EA_MAX_NAME_LEN)
+		return -EINVAL;
+	if (!er->er_data || !er->er_data_len) {
+		er->er_data = NULL;
+		er->er_data_len = 0;
+	}
+
+	error = gfs2_glock_nq_init(ip->i_gl,
+				  LM_ST_SHARED, LM_FLAG_ANY,
+				  &i_gh);
+	if (error)
+		return error;
+
+	error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * ea_alloc_blk - allocates a new block for extended attributes.
+ * @ip: A pointer to the inode that's getting extended attributes
+ * @bhp:
+ *
+ * Returns: errno
+ */
+
+static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_ea_header *ea;
+	uint64_t block;
+
+	block = gfs2_alloc_meta(ip);
+
+	*bhp = gfs2_meta_new(ip->i_gl, block);
+	gfs2_trans_add_bh(ip->i_gl, *bhp);
+	gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
+	gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
+
+	ea = GFS2_EA_BH2FIRST(*bhp);
+	ea->ea_rec_len = cpu_to_gfs2_32(sdp->sd_jbsize);
+	ea->ea_type = GFS2_EATYPE_UNUSED;
+	ea->ea_flags = GFS2_EAFLAG_LAST;
+	ea->ea_num_ptrs = 0;
+
+	ip->i_di.di_blocks++;
+
+	return 0;
+}
+
+/**
+ * ea_write - writes the request info to an ea, creating new blocks if
+ *            necessary
+ * @ip:  inode that is being modified
+ * @ea:  the location of the new ea in a block
+ * @er: the write request
+ *
+ * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
+ *
+ * returns : errno
+ */
+
+static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
+		    struct gfs2_ea_request *er)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+
+	ea->ea_data_len = cpu_to_gfs2_32(er->er_data_len);
+	ea->ea_name_len = er->er_name_len;
+	ea->ea_type = er->er_type;
+	ea->ea_pad = 0;
+
+	memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
+
+	if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
+		ea->ea_num_ptrs = 0;
+		memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
+	} else {
+		uint64_t *dataptr = GFS2_EA2DATAPTRS(ea);
+		const char *data = er->er_data;
+		unsigned int data_len = er->er_data_len;
+		unsigned int copy;
+		unsigned int x;
+
+		ea->ea_num_ptrs = DIV_RU(er->er_data_len, sdp->sd_jbsize);
+		for (x = 0; x < ea->ea_num_ptrs; x++) {
+			struct buffer_head *bh;
+			uint64_t block;
+			int mh_size = sizeof(struct gfs2_meta_header);
+
+			block = gfs2_alloc_meta(ip);
+
+			bh = gfs2_meta_new(ip->i_gl, block);
+			gfs2_trans_add_bh(ip->i_gl, bh);
+			gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
+
+			ip->i_di.di_blocks++;
+
+			copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize :
+							     data_len;
+			memcpy(bh->b_data + mh_size, data, copy);
+			if (copy < sdp->sd_jbsize)
+				memset(bh->b_data + mh_size + copy, 0,
+				       sdp->sd_jbsize - copy);
+
+			*dataptr++ = cpu_to_gfs2_64((uint64_t)bh->b_blocknr);
+			data += copy;
+			data_len -= copy;
+
+			brelse(bh);
+		}
+
+		gfs2_assert_withdraw(sdp, !data_len);
+	}
+
+	return 0;
+}
+
+typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
+				   struct gfs2_ea_request *er,
+				   void *private);
+
+static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+			     unsigned int blks,
+			     ea_skeleton_call_t skeleton_call,
+			     void *private)
+{
+	struct gfs2_alloc *al;
+	struct buffer_head *dibh;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	if (error)
+		goto out_gunlock_q;
+
+	al->al_requested = blks;
+
+	error = gfs2_inplace_reserve(ip);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_trans_begin(ip->i_sbd,
+				 blks + al->al_rgd->rd_ri.ri_length +
+				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
+	if (error)
+		goto out_ipres;
+
+	error = skeleton_call(ip, er, private);
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		if (er->er_flags & GFS2_ERF_MODE) {
+			gfs2_assert_withdraw(ip->i_sbd,
+					    (ip->i_di.di_mode & S_IFMT) ==
+					    (er->er_mode & S_IFMT));
+			ip->i_di.di_mode = er->er_mode;
+		}
+		ip->i_di.di_ctime = get_seconds();
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+ out_end_trans:
+	gfs2_trans_end(ip->i_sbd);
+
+ out_ipres:
+	gfs2_inplace_release(ip);
+
+ out_gunlock_q:
+	gfs2_quota_unlock(ip);
+
+ out:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+		     void *private)
+{
+	struct buffer_head *bh;
+	int error;
+
+	error = ea_alloc_blk(ip, &bh);
+	if (error)
+		return error;
+
+	ip->i_di.di_eattr = bh->b_blocknr;
+	error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
+
+	brelse(bh);
+
+	return error;
+}
+
+/**
+ * ea_init - initializes a new eattr block
+ * @ip:
+ * @er:
+ *
+ * Returns: errno
+ */
+
+static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	unsigned int jbsize = ip->i_sbd->sd_jbsize;
+	unsigned int blks = 1;
+
+	if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
+		blks += DIV_RU(er->er_data_len, jbsize);
+
+	return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
+}
+
+static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
+{
+	uint32_t ea_size = GFS2_EA_SIZE(ea);
+	struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + ea_size);
+	uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size;
+	int last = ea->ea_flags & GFS2_EAFLAG_LAST;
+
+	ea->ea_rec_len = cpu_to_gfs2_32(ea_size);
+	ea->ea_flags ^= last;
+
+	new->ea_rec_len = cpu_to_gfs2_32(new_size);
+	new->ea_flags = last;
+
+	return new;
+}
+
+static void ea_set_remove_stuffed(struct gfs2_inode *ip,
+				  struct gfs2_ea_location *el)
+{
+	struct gfs2_ea_header *ea = el->el_ea;
+	struct gfs2_ea_header *prev = el->el_prev;
+	uint32_t len;
+
+	gfs2_trans_add_bh(ip->i_gl, el->el_bh);
+
+	if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
+		ea->ea_type = GFS2_EATYPE_UNUSED;
+		return;
+	} else if (GFS2_EA2NEXT(prev) != ea) {
+		prev = GFS2_EA2NEXT(prev);
+		gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea);
+	}
+
+	len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
+	prev->ea_rec_len = cpu_to_gfs2_32(len);
+
+	if (GFS2_EA_IS_LAST(ea))
+		prev->ea_flags |= GFS2_EAFLAG_LAST;
+}
+
+struct ea_set {
+	int ea_split;
+
+	struct gfs2_ea_request *es_er;
+	struct gfs2_ea_location *es_el;
+
+	struct buffer_head *es_bh;
+	struct gfs2_ea_header *es_ea;
+};
+
+static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
+				 struct gfs2_ea_header *ea, struct ea_set *es)
+{
+	struct gfs2_ea_request *er = es->es_er;
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0);
+	if (error)
+		return error;
+
+	gfs2_trans_add_bh(ip->i_gl, bh);
+
+	if (es->ea_split)
+		ea = ea_split_ea(ea);
+
+	ea_write(ip, ea, er);
+
+	if (es->es_el)
+		ea_set_remove_stuffed(ip, es->es_el);
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	if (er->er_flags & GFS2_ERF_MODE) {
+		gfs2_assert_withdraw(ip->i_sbd,
+			(ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
+		ip->i_di.di_mode = er->er_mode;
+	}
+	ip->i_di.di_ctime = get_seconds();
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+ out:
+	gfs2_trans_end(ip->i_sbd);
+
+	return error;
+}
+
+static int ea_set_simple_alloc(struct gfs2_inode *ip,
+			       struct gfs2_ea_request *er, void *private)
+{
+	struct ea_set *es = private;
+	struct gfs2_ea_header *ea = es->es_ea;
+	int error;
+
+	gfs2_trans_add_bh(ip->i_gl, es->es_bh);
+
+	if (es->ea_split)
+		ea = ea_split_ea(ea);
+
+	error = ea_write(ip, ea, er);
+	if (error)
+		return error;
+
+	if (es->es_el)
+		ea_set_remove_stuffed(ip, es->es_el);
+
+	return 0;
+}
+
+static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
+			 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
+			 void *private)
+{
+	struct ea_set *es = private;
+	unsigned int size;
+	int stuffed;
+	int error;
+
+	stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
+
+	if (ea->ea_type == GFS2_EATYPE_UNUSED) {
+		if (GFS2_EA_REC_LEN(ea) < size)
+			return 0;
+		if (!GFS2_EA_IS_STUFFED(ea)) {
+			error = ea_remove_unstuffed(ip, bh, ea, prev, TRUE);
+			if (error)
+				return error;
+		}
+		es->ea_split = FALSE;
+	} else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
+		es->ea_split = TRUE;
+	else
+		return 0;
+
+	if (stuffed) {
+		error = ea_set_simple_noalloc(ip, bh, ea, es);
+		if (error)
+			return error;
+	} else {
+		unsigned int blks;
+
+		es->es_bh = bh;
+		es->es_ea = ea;
+		blks = 2 + DIV_RU(es->es_er->er_data_len, ip->i_sbd->sd_jbsize);
+
+		error = ea_alloc_skeleton(ip, es->es_er, blks,
+					  ea_set_simple_alloc, es);
+		if (error)
+			return error;
+	}
+
+	return 1;
+}
+
+static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+			void *private)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head *indbh, *newbh;
+	uint64_t *eablk;
+	int error;
+	int mh_size = sizeof(struct gfs2_meta_header);
+
+	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+		uint64_t *end;
+
+		error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
+				       DIO_START | DIO_WAIT, &indbh);
+		if (error)
+			return error;
+
+		if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
+			error = -EIO;
+			goto out;
+		}
+
+		eablk = (uint64_t *)(indbh->b_data + mh_size);
+		end = eablk + sdp->sd_inptrs;
+
+		for (; eablk < end; eablk++)
+			if (!*eablk)
+				break;
+
+		if (eablk == end) {
+			error = -ENOSPC;
+			goto out;
+		}
+
+		gfs2_trans_add_bh(ip->i_gl, indbh);
+	} else {
+		uint64_t blk;
+
+		blk = gfs2_alloc_meta(ip);
+
+		indbh = gfs2_meta_new(ip->i_gl, blk);
+		gfs2_trans_add_bh(ip->i_gl, indbh);
+		gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
+		gfs2_buffer_clear_tail(indbh, mh_size);
+
+		eablk = (uint64_t *)(indbh->b_data + mh_size);
+		*eablk = cpu_to_gfs2_64(ip->i_di.di_eattr);
+		ip->i_di.di_eattr = blk;
+		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
+		ip->i_di.di_blocks++;
+
+		eablk++;
+	}
+
+	error = ea_alloc_blk(ip, &newbh);
+	if (error)
+		goto out;
+
+	*eablk = cpu_to_gfs2_64((uint64_t)newbh->b_blocknr);
+	error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
+	brelse(newbh);
+	if (error)
+		goto out;
+
+	if (private)
+		ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private);
+
+ out:
+	brelse(indbh);
+
+	return error;
+}
+
+static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+		    struct gfs2_ea_location *el)
+{
+	struct ea_set es;
+	unsigned int blks = 2;
+	int error;
+
+	memset(&es, 0, sizeof(struct ea_set));
+	es.es_er = er;
+	es.es_el = el;
+
+	error = ea_foreach(ip, ea_set_simple, &es);
+	if (error > 0)
+		return 0;
+	if (error)
+		return error;
+
+	if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
+		blks++;
+	if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
+		blks += DIV_RU(er->er_data_len, ip->i_sbd->sd_jbsize);
+
+	return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
+}
+
+static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
+				   struct gfs2_ea_location *el)
+{
+	if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
+		el->el_prev = GFS2_EA2NEXT(el->el_prev);
+		gfs2_assert_withdraw(ip->i_sbd,
+				     GFS2_EA2NEXT(el->el_prev) == el->el_ea);
+	}
+
+	return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,FALSE);
+}
+
+int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_ea_location el;
+	int error;
+
+	if (!ip->i_di.di_eattr) {
+		if (er->er_flags & XATTR_REPLACE)
+			return -ENODATA;
+		return ea_init(ip, er);
+	}
+
+	error = gfs2_ea_find(ip, er, &el);
+	if (error)
+		return error;
+
+	if (el.el_ea) {
+		if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
+			brelse(el.el_bh);
+			return -EPERM;
+		}
+
+		error = -EEXIST;
+		if (!(er->er_flags & XATTR_CREATE)) {
+			int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
+			error = ea_set_i(ip, er, &el);
+			if (!error && unstuffed)
+				ea_set_remove_unstuffed(ip, &el);
+		}
+
+		brelse(el.el_bh);
+	} else {
+		error = -ENODATA;
+		if (!(er->er_flags & XATTR_REPLACE))
+			error = ea_set_i(ip, er, NULL);
+	}
+
+	return error;
+}
+
+int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!er->er_name_len ||
+	    er->er_name_len > GFS2_EA_MAX_NAME_LEN)
+		return -EINVAL;
+	if (!er->er_data || !er->er_data_len) {
+		er->er_data = NULL;
+		er->er_data_len = 0;
+	}
+	error = ea_check_size(ip->i_sbd, er);
+	if (error)
+		return error;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	if (IS_IMMUTABLE(ip->i_vnode))
+		error = -EPERM;
+	else
+		error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
+{
+	struct gfs2_ea_header *ea = el->el_ea;
+	struct gfs2_ea_header *prev = el->el_prev;
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
+	if (error)
+		return error;
+
+	gfs2_trans_add_bh(ip->i_gl, el->el_bh);
+
+	if (prev) {
+		uint32_t len;
+
+		len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
+		prev->ea_rec_len = cpu_to_gfs2_32(len);
+
+		if (GFS2_EA_IS_LAST(ea))
+			prev->ea_flags |= GFS2_EAFLAG_LAST;
+	} else
+		ea->ea_type = GFS2_EATYPE_UNUSED;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		ip->i_di.di_ctime = get_seconds();
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}	
+
+	gfs2_trans_end(ip->i_sbd);
+
+	return error;
+}
+
+int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_ea_location el;
+	int error;
+
+	if (!ip->i_di.di_eattr)
+		return -ENODATA;
+
+	error = gfs2_ea_find(ip, er, &el);
+	if (error)
+		return error;
+	if (!el.el_ea)
+		return -ENODATA;
+
+	if (GFS2_EA_IS_STUFFED(el.el_ea))
+		error = ea_remove_stuffed(ip, &el);
+	else
+		error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
+					    FALSE);
+
+	brelse(el.el_bh);
+
+	return error;
+}
+
+/**
+ * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
+ * @ip: pointer to the inode of the target file
+ * @er: request information
+ *
+ * Returns: errno
+ */
+
+int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
+		return -EINVAL;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
+		error = -EPERM;
+	else
+		error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
+				  struct gfs2_ea_header *ea, char *data)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct buffer_head **bh;
+	unsigned int amount = GFS2_EA_DATA_LEN(ea);
+	unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
+	uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
+	unsigned int x;
+	int error;
+
+	bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
+	if (!bh)
+		return -ENOMEM;
+
+	error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
+	if (error)
+		goto out;
+
+	for (x = 0; x < nptrs; x++) {
+		error = gfs2_meta_read(ip->i_gl, gfs2_64_to_cpu(*dataptrs),
+				       DIO_START, bh + x);
+		if (error) {
+			while (x--)
+				brelse(bh[x]);
+			goto fail;
+		}
+		dataptrs++;
+	}
+
+	for (x = 0; x < nptrs; x++) {
+		error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
+		if (error) {
+			for (; x < nptrs; x++)
+				brelse(bh[x]);
+			goto fail;
+		}
+		if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
+			for (; x < nptrs; x++)
+				brelse(bh[x]);
+			error = -EIO;
+			goto fail;
+		}
+
+		gfs2_trans_add_bh(ip->i_gl, bh[x]);
+
+		memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header),
+		       data,
+		       (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
+
+		amount -= sdp->sd_jbsize;
+		data += sdp->sd_jbsize;
+
+		brelse(bh[x]);
+	}
+
+ out:
+	kfree(bh);
+
+	return error;
+
+ fail:
+	gfs2_trans_end(sdp);
+	kfree(bh);
+
+	return error;
+}
+
+int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
+		      struct iattr *attr, char *data)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	if (GFS2_EA_IS_STUFFED(el->el_ea)) {
+		error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
+		if (error)
+			return error;
+
+		gfs2_trans_add_bh(ip->i_gl, el->el_bh);
+		memcpy(GFS2_EA2DATA(el->el_ea),
+		       data,
+		       GFS2_EA_DATA_LEN(el->el_ea));
+	} else
+		error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
+
+	if (error)
+		return error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		error = inode_setattr(ip->i_vnode, attr);
+		gfs2_assert_warn(ip->i_sbd, !error);
+		gfs2_inode_attr_out(ip);
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(ip->i_sbd);
+
+	return error;
+}
+
+static int ea_dealloc_indirect(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_rgrp_list rlist;
+	struct buffer_head *indbh, *dibh;
+	uint64_t *eablk, *end;
+	unsigned int rg_blocks = 0;
+	uint64_t bstart = 0;
+	unsigned int blen = 0;
+	unsigned int blks = 0;
+	unsigned int x;
+	int error;
+
+	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
+
+	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
+			       DIO_START | DIO_WAIT, &indbh);
+	if (error)
+		return error;
+
+	if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
+		error = -EIO;
+		goto out;
+	}
+
+	eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	end = eablk + sdp->sd_inptrs;
+
+	for (; eablk < end; eablk++) {
+		uint64_t bn;
+
+		if (!*eablk)
+			break;
+		bn = gfs2_64_to_cpu(*eablk);
+
+		if (bstart + blen == bn)
+			blen++;
+		else {
+			if (bstart)
+				gfs2_rlist_add(sdp, &rlist, bstart);
+			bstart = bn;
+			blen = 1;
+		}
+		blks++;
+	}
+	if (bstart)
+		gfs2_rlist_add(sdp, &rlist, bstart);
+	else
+		goto out;
+
+	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
+
+	for (x = 0; x < rlist.rl_rgrps; x++) {
+		struct gfs2_rgrpd *rgd;
+		rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
+		rg_blocks += rgd->rd_ri.ri_length;
+	}
+
+	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
+	if (error)
+		goto out_rlist_free;
+
+	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
+				 RES_INDIRECT + RES_STATFS +
+				 RES_QUOTA, blks);
+	if (error)
+		goto out_gunlock;
+
+	gfs2_trans_add_bh(ip->i_gl, indbh);
+
+	eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	bstart = 0;
+	blen = 0;
+
+	for (; eablk < end; eablk++) {
+		uint64_t bn;
+
+		if (!*eablk)
+			break;
+		bn = gfs2_64_to_cpu(*eablk);
+
+		if (bstart + blen == bn)
+			blen++;
+		else {
+			if (bstart)
+				gfs2_free_meta(ip, bstart, blen);
+			bstart = bn;
+			blen = 1;
+		}
+
+		*eablk = 0;
+		if (!ip->i_di.di_blocks)
+			gfs2_consist_inode(ip);
+		ip->i_di.di_blocks--;
+	}
+	if (bstart)
+		gfs2_free_meta(ip, bstart, blen);
+
+	ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
+
+ out_rlist_free:
+	gfs2_rlist_free(&rlist);
+
+ out:
+	brelse(indbh);
+
+	return error;
+}
+
+static int ea_dealloc_block(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_rgrpd *rgd;
+	struct buffer_head *dibh;
+	int error;
+
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
+	if (!rgd) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+				   &al->al_rgd_gh);
+	if (error)
+		return error;
+
+	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE +
+				 RES_STATFS + RES_QUOTA, 1);
+	if (error)
+		goto out_gunlock;
+
+	gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
+
+	ip->i_di.di_eattr = 0;
+	if (!ip->i_di.di_blocks)
+		gfs2_consist_inode(ip);
+	ip->i_di.di_blocks--;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (!error) {
+		gfs2_trans_add_bh(ip->i_gl, dibh);
+		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		brelse(dibh);
+	}
+
+	gfs2_trans_end(sdp);
+
+ out_gunlock:
+	gfs2_glock_dq_uninit(&al->al_rgd_gh);
+
+	return error;
+}
+
+/**
+ * gfs2_ea_dealloc - deallocate the extended attribute fork
+ * @ip: the inode
+ *
+ * Returns: errno
+ */
+
+int gfs2_ea_dealloc(struct gfs2_inode *ip)
+{
+	struct gfs2_alloc *al;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out_alloc;
+
+	error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
+	if (error)
+		goto out_quota;
+
+	error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
+	if (error)
+		goto out_rindex;
+
+	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+		error = ea_dealloc_indirect(ip);
+		if (error)
+			goto out_rindex;
+	}
+
+	error = ea_dealloc_block(ip);
+
+ out_rindex:
+	gfs2_glock_dq_uninit(&al->al_ri_gh);
+
+ out_quota:
+	gfs2_quota_unhold(ip);
+
+ out_alloc:
+	gfs2_alloc_put(ip);
+
+	return error;
+}
+
+/**
+ * gfs2_get_eattr_meta - return all the eattr blocks of a file
+ * @dip: the directory
+ * @ub: the structure representing the user buffer to copy to
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_eattr_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub)
+{
+	struct buffer_head *bh;
+	int error;
+
+	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
+			       DIO_START | DIO_WAIT, &bh);
+	if (error)
+		return error;
+
+	gfs2_add_bh_to_ub(ub, bh);
+
+	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+		struct buffer_head *eabh;
+		uint64_t *eablk, *end;
+
+		if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
+			error = -EIO;
+			goto out;
+		}
+
+		eablk = (uint64_t *)(bh->b_data +
+				     sizeof(struct gfs2_meta_header));
+		end = eablk + ip->i_sbd->sd_inptrs;
+
+		for (; eablk < end; eablk++) {
+			uint64_t bn;
+
+			if (!*eablk)
+				break;
+			bn = gfs2_64_to_cpu(*eablk);
+
+			error = gfs2_meta_read(ip->i_gl, bn,
+					       DIO_START | DIO_WAIT, &eabh);
+			if (error)
+				break;
+			gfs2_add_bh_to_ub(ub, eabh);
+			brelse(eabh);
+			if (error)
+				break;
+		}
+	}
+
+ out:
+	brelse(bh);
+
+	return error;
+}
+
--- a/fs/gfs2/eattr.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/eattr.h	2005-09-01 17:36:55.202132648 +0800
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __EATTR_DOT_H__
+#define __EATTR_DOT_H__
+
+#define GFS2_EA_REC_LEN(ea) gfs2_32_to_cpu((ea)->ea_rec_len)
+#define GFS2_EA_DATA_LEN(ea) gfs2_32_to_cpu((ea)->ea_data_len)
+
+#define GFS2_EA_SIZE(ea) \
+MAKE_MULT8(sizeof(struct gfs2_ea_header) + \
+	   (ea)->ea_name_len + \
+	   ((GFS2_EA_IS_STUFFED(ea)) ? \
+	    GFS2_EA_DATA_LEN(ea) : \
+	    (sizeof(uint64_t) * (ea)->ea_num_ptrs)))
+#define GFS2_EA_STRLEN(ea) \
+((((ea)->ea_type == GFS2_EATYPE_USR) ? 5 : 7) + \
+ (ea)->ea_name_len + 1)
+
+#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
+#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
+
+#define GFS2_EAREQ_SIZE_STUFFED(er) \
+MAKE_MULT8(sizeof(struct gfs2_ea_header) + \
+	   (er)->er_name_len + (er)->er_data_len)
+#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
+MAKE_MULT8(sizeof(struct gfs2_ea_header) + \
+	   (er)->er_name_len + \
+	   sizeof(uint64_t) * DIV_RU((er)->er_data_len, (sdp)->sd_jbsize))
+
+#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
+#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
+#define GFS2_EA2DATAPTRS(ea) \
+((uint64_t *)(GFS2_EA2NAME(ea) + MAKE_MULT8((ea)->ea_name_len)))
+#define GFS2_EA2NEXT(ea) \
+((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
+#define GFS2_EA_BH2FIRST(bh) \
+((struct gfs2_ea_header *)((bh)->b_data + \
+			  sizeof(struct gfs2_meta_header)))
+
+#define GFS2_ERF_MODE 0x80000000
+
+struct gfs2_ea_request {
+	char *er_name;
+	char *er_data;
+	unsigned int er_name_len;
+	unsigned int er_data_len;
+	unsigned int er_type; /* GFS2_EATYPE_... */
+	int er_flags;
+	mode_t er_mode;
+};
+
+struct gfs2_ea_location {
+	struct buffer_head *el_bh;
+	struct gfs2_ea_header *el_ea;
+	struct gfs2_ea_header *el_prev;
+};
+
+int gfs2_ea_repack(struct gfs2_inode *ip);
+
+int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+
+int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
+
+int gfs2_ea_dealloc(struct gfs2_inode *ip);
+
+int gfs2_get_eattr_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
+
+/* Exported to acl.c */
+
+int gfs2_ea_find(struct gfs2_inode *ip,
+		 struct gfs2_ea_request *er,
+		 struct gfs2_ea_location *el);
+int gfs2_ea_get_copy(struct gfs2_inode *ip,
+		     struct gfs2_ea_location *el,
+		     char *data);
+int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
+		      struct iattr *attr, char *data);
+
+#endif /* __EATTR_DOT_H__ */
--- a/fs/gfs2/eaops.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/eaops.c	2005-09-01 17:36:55.190134472 +0800
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/xattr.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "acl.h"
+#include "eaops.h"
+#include "eattr.h"
+
+/**
+ * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
+ * @namep: ea name, possibly with type appended
+ *
+ * Returns: GFS2_EATYPE_XXX
+ */
+
+unsigned int gfs2_ea_name2type(const char *name, char **truncated_name)
+{
+	unsigned int type;
+
+	if (strncmp(name, "system.", 7) == 0) {
+		type = GFS2_EATYPE_SYS;
+		if (truncated_name)
+			*truncated_name = strchr(name, '.') + 1;
+	} else if (strncmp(name, "user.", 5) == 0) {
+		type = GFS2_EATYPE_USR;
+		if (truncated_name)
+			*truncated_name = strchr(name, '.') + 1;
+	} else {
+		type = GFS2_EATYPE_UNUSED;
+		if (truncated_name)
+			*truncated_name = NULL;
+	}
+
+	return type;
+}
+
+static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct inode *inode = ip->i_vnode;
+	int error = permission(inode, MAY_READ, NULL);
+	if (error)
+		return error;
+
+	return gfs2_ea_get_i(ip, er);
+}
+
+static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct inode *inode = ip->i_vnode;
+
+	if (S_ISREG(inode->i_mode) ||
+	    (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
+		int error = permission(inode, MAY_WRITE, NULL);
+		if (error)
+			return error;
+	} else
+		return -EPERM;
+
+	return gfs2_ea_set_i(ip, er);
+}
+
+static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	struct inode *inode = ip->i_vnode;
+
+	if (S_ISREG(inode->i_mode) ||
+	    (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
+		int error = permission(inode, MAY_WRITE, NULL);
+		if (error)
+			return error;
+	} else
+		return -EPERM;
+
+	return gfs2_ea_remove_i(ip, er);
+}
+
+static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
+	    !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return gfs2_ea_get_i(ip, er);
+}
+
+static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	int remove = FALSE;
+	int error;
+
+	if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
+		if (!(er->er_flags & GFS2_ERF_MODE)) {
+			er->er_mode = ip->i_di.di_mode;
+			er->er_flags |= GFS2_ERF_MODE;
+		}
+		error = gfs2_acl_validate_set(ip, TRUE, er,
+					      &remove, &er->er_mode);
+		if (error)
+			return error;
+		error = gfs2_ea_set_i(ip, er);
+		if (error)
+			return error;
+		if (remove)
+			gfs2_ea_remove_i(ip, er);
+		return 0;
+
+	} else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
+		error = gfs2_acl_validate_set(ip, FALSE, er,
+					      &remove, NULL);
+		if (error)
+			return error;
+		if (!remove)
+			error = gfs2_ea_set_i(ip, er);
+		else {
+			error = gfs2_ea_remove_i(ip, er);
+			if (error == -ENODATA)
+				error = 0;
+		}
+		return error;	
+	}
+
+	return -EPERM;
+}
+
+static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+{
+	if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
+		int error = gfs2_acl_validate_remove(ip, TRUE);
+		if (error)
+			return error;
+
+	} else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
+		int error = gfs2_acl_validate_remove(ip, FALSE);
+		if (error)
+			return error;
+
+	} else
+		return -EPERM;
+
+	return gfs2_ea_remove_i(ip, er);
+}
+
+struct gfs2_eattr_operations gfs2_user_eaops = {
+	.eo_get = user_eo_get,
+	.eo_set = user_eo_set,
+	.eo_remove = user_eo_remove,
+	.eo_name = "user",
+};
+
+struct gfs2_eattr_operations gfs2_system_eaops = {
+	.eo_get = system_eo_get,
+	.eo_set = system_eo_set,
+	.eo_remove = system_eo_remove,
+	.eo_name = "system",
+};
+
+struct gfs2_eattr_operations *gfs2_ea_ops[] = {
+	NULL,
+	&gfs2_user_eaops,
+	&gfs2_system_eaops,
+};
+
--- a/fs/gfs2/eaops.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/eaops.h	2005-09-01 17:36:55.190134472 +0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __EAOPS_DOT_H__
+#define __EAOPS_DOT_H__
+
+struct gfs2_ea_request;
+
+struct gfs2_eattr_operations {
+	int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
+	int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
+	int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
+	char *eo_name;
+};
+
+unsigned int gfs2_ea_name2type(const char *name, char **truncated_name);
+
+extern struct gfs2_eattr_operations gfs2_user_eaops;
+extern struct gfs2_eattr_operations gfs2_system_eaops;
+
+extern struct gfs2_eattr_operations *gfs2_ea_ops[];
+
+#endif /* __EAOPS_DOT_H__ */
+

[PATCH 06/13] GFS: logging and recovery

A per-node on-disk log is used for recovery.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/log.c      |  670 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/log.h      |   68 +++++
 fs/gfs2/recovery.c |  561 ++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/recovery.h |   32 ++
 4 files changed, 1331 insertions(+)

--- a/fs/gfs2/log.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/log.c	2005-09-01 17:36:55.338111976 +0800
@@ -0,0 +1,670 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "log.h"
+#include "lops.h"
+#include "meta_io.h"
+
+#define PULL 1
+
+static inline int is_done(struct gfs2_sbd *sdp, atomic_t *a)
+{
+	int done;
+	gfs2_log_lock(sdp);
+	done = atomic_read(a) ? FALSE : TRUE;
+	gfs2_log_unlock(sdp);
+	return done;
+}
+
+static void do_lock_wait(struct gfs2_sbd *sdp, wait_queue_head_t *wq,
+			 atomic_t *a)
+{
+	gfs2_log_unlock(sdp);
+	wait_event(*wq, is_done(sdp, a));
+	gfs2_log_lock(sdp);
+}
+
+static void lock_for_trans(struct gfs2_sbd *sdp)
+{
+	gfs2_log_lock(sdp);
+	do_lock_wait(sdp, &sdp->sd_log_trans_wq, &sdp->sd_log_flush_count);
+	atomic_inc(&sdp->sd_log_trans_count);
+	gfs2_log_unlock(sdp);
+}
+
+static void unlock_from_trans(struct gfs2_sbd *sdp)
+{
+	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_trans_count));
+	if (atomic_dec_and_test(&sdp->sd_log_trans_count))
+		wake_up(&sdp->sd_log_flush_wq);
+}
+
+void gfs2_lock_for_flush(struct gfs2_sbd *sdp)
+{
+	gfs2_log_lock(sdp);
+	atomic_inc(&sdp->sd_log_flush_count);
+	do_lock_wait(sdp, &sdp->sd_log_flush_wq, &sdp->sd_log_trans_count);
+	gfs2_log_unlock(sdp);
+}
+
+void gfs2_unlock_from_flush(struct gfs2_sbd *sdp)
+{
+	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_flush_count));
+	if (atomic_dec_and_test(&sdp->sd_log_flush_count))
+		wake_up(&sdp->sd_log_trans_wq);
+}
+
+/**
+ * gfs2_struct2blk - compute stuff
+ * @sdp: the filesystem
+ * @nstruct: the number of structures
+ * @ssize: the size of the structures
+ *
+ * Compute the number of log descriptor blocks needed to hold a certain number
+ * of structures of a certain size.
+ *
+ * Returns: the number of blocks needed (minimum is always 1)
+ */
+
+unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
+			     unsigned int ssize)
+{
+	unsigned int blks;
+	unsigned int first, second;
+
+	blks = 1;
+	first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
+
+	if (nstruct > first) {
+		second = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / ssize;
+		blks += DIV_RU(nstruct - first, second);
+	}
+
+	return blks;
+}
+
+void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
+{
+	struct list_head *head = &sdp->sd_ail1_list;
+	uint64_t sync_gen;
+	struct list_head *first, *tmp;
+	struct gfs2_ail *first_ai, *ai;
+
+	gfs2_log_lock(sdp);
+	if (list_empty(head)) {
+		gfs2_log_unlock(sdp);
+		return;
+	}
+	sync_gen = sdp->sd_ail_sync_gen++;
+
+	first = head->prev;
+	first_ai = list_entry(first, struct gfs2_ail, ai_list);
+	first_ai->ai_sync_gen = sync_gen;
+	gfs2_ail1_start_one(sdp, first_ai);
+
+	if (flags & DIO_ALL)
+		first = NULL;
+
+	for (;;) {
+		if (first &&
+		    (head->prev != first ||
+		     gfs2_ail1_empty_one(sdp, first_ai, 0)))
+			break;
+
+		for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
+			ai = list_entry(tmp, struct gfs2_ail, ai_list);
+			if (ai->ai_sync_gen >= sync_gen)
+				continue;
+			ai->ai_sync_gen = sync_gen;
+			gfs2_ail1_start_one(sdp, ai);
+			break;
+		}
+
+		if (tmp == head)
+			break;
+	}
+
+	gfs2_log_unlock(sdp);
+}
+
+int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+{
+	struct list_head *head, *tmp, *prev;
+	struct gfs2_ail *ai;
+	int ret;
+
+	gfs2_log_lock(sdp);
+
+	for (head = &sdp->sd_ail1_list, tmp = head->prev, prev = tmp->prev;
+	     tmp != head;
+	     tmp = prev, prev = tmp->prev) {
+		ai = list_entry(tmp, struct gfs2_ail, ai_list);
+		if (gfs2_ail1_empty_one(sdp, ai, flags))
+			list_move(&ai->ai_list, &sdp->sd_ail2_list);
+		else if (!(flags & DIO_ALL))
+			break;
+	}
+
+	ret = list_empty(head);
+
+	gfs2_log_unlock(sdp);
+
+	return ret;
+}
+
+static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
+{
+	struct list_head *head, *tmp, *next;
+	struct gfs2_ail *ai;
+	unsigned int old_tail = sdp->sd_log_tail;
+	int wrap = (new_tail < old_tail);
+	int a, b, rm;
+
+	gfs2_log_lock(sdp);
+
+	for (head = &sdp->sd_ail2_list, tmp = head->next, next = tmp->next;
+	     tmp != head;
+	     tmp = next, next = tmp->next) {
+		ai = list_entry(tmp, struct gfs2_ail, ai_list);
+
+		a = (old_tail <= ai->ai_first);
+		b = (ai->ai_first < new_tail);
+		rm = (wrap) ? (a || b) : (a && b);
+		if (!rm)
+			continue;
+
+		gfs2_ail2_empty_one(sdp, ai);
+		list_del(&ai->ai_list);
+		gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
+		gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
+		kfree(ai);
+	}
+
+	gfs2_log_unlock(sdp);
+}
+
+/**
+ * gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks to reserve
+ *
+ * Returns: errno
+ */
+
+int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+{
+	LIST_HEAD(list);
+	unsigned int try = 0;
+
+	if (gfs2_assert_warn(sdp, blks) ||
+	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
+		return -EINVAL;
+
+	for (;;) {
+		gfs2_log_lock(sdp);
+
+		if (list_empty(&list)) {
+			list_add_tail(&list, &sdp->sd_log_blks_list);
+			while (sdp->sd_log_blks_list.next != &list) {
+				DECLARE_WAITQUEUE(__wait_chan, current);
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				add_wait_queue(&sdp->sd_log_blks_wait,
+					       &__wait_chan);
+				gfs2_log_unlock(sdp);
+				schedule();
+				gfs2_log_lock(sdp);
+				remove_wait_queue(&sdp->sd_log_blks_wait,
+						  &__wait_chan);
+				set_current_state(TASK_RUNNING);
+			}
+		}
+
+		/* Never give away the last block so we can
+		   always pull the tail if we need to. */
+		if (sdp->sd_log_blks_free > blks) {
+			sdp->sd_log_blks_free -= blks;
+			list_del(&list);
+			gfs2_log_unlock(sdp);
+			wake_up(&sdp->sd_log_blks_wait);
+			break;
+		}
+
+		gfs2_log_unlock(sdp);
+
+		gfs2_ail1_empty(sdp, 0);
+		gfs2_log_flush(sdp);
+
+		if (try++)
+			gfs2_ail1_start(sdp, 0);
+	}
+
+	lock_for_trans(sdp);
+
+	return 0;
+}
+
+/**
+ * gfs2_log_release - Release a given number of log blocks
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks
+ *
+ */
+
+void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
+{
+	unlock_from_trans(sdp);
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_blks_free += blks;
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
+	gfs2_log_unlock(sdp);
+}
+
+static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
+{
+	int new = FALSE;
+	uint64_t dbn;
+	int error;
+
+	error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, &new, &dbn, NULL);
+	gfs2_assert_withdraw(sdp, !error && dbn);
+
+	return dbn;
+}
+
+/**
+ * log_distance - Compute distance between two journal blocks
+ * @sdp: The GFS2 superblock
+ * @newer: The most recent journal block of the pair
+ * @older: The older journal block of the pair
+ *
+ *   Compute the distance (in the journal direction) between two
+ *   blocks in the journal
+ *
+ * Returns: the distance in blocks
+ */
+
+static inline unsigned int log_distance(struct gfs2_sbd *sdp,
+					unsigned int newer,
+					unsigned int older)
+{
+	int dist;
+
+	dist = newer - older;
+	if (dist < 0)
+		dist += sdp->sd_jdesc->jd_blocks;
+
+	return dist;
+}
+
+static unsigned int current_tail(struct gfs2_sbd *sdp)
+{
+	struct gfs2_ail *ai;
+	unsigned int tail;
+
+	gfs2_log_lock(sdp);
+
+	if (list_empty(&sdp->sd_ail1_list))
+		tail = sdp->sd_log_head;
+	else {
+		ai = list_entry(sdp->sd_ail1_list.prev,
+				struct gfs2_ail, ai_list);
+		tail = ai->ai_first;
+	}
+
+	gfs2_log_unlock(sdp);
+
+	return tail;
+}
+
+static inline void log_incr_head(struct gfs2_sbd *sdp)
+{
+	if (sdp->sd_log_flush_head == sdp->sd_log_tail)
+		gfs2_assert_withdraw(sdp,
+				sdp->sd_log_flush_head == sdp->sd_log_head);
+
+	if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
+		sdp->sd_log_flush_head = 0;
+		sdp->sd_log_flush_wrapped = TRUE;
+	}
+}
+
+/**
+ * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
+ * @sdp: The GFS2 superblock
+ *
+ * Returns: the buffer_head
+ */
+
+struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
+{
+	uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
+	struct gfs2_log_buf *lb;
+	struct buffer_head *bh;
+
+	lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_KERNEL | __GFP_NOFAIL);
+	list_add(&lb->lb_list, &sdp->sd_log_flush_list);
+
+	bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, bh->b_size);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+	unlock_buffer(bh);
+
+	log_incr_head(sdp);
+
+	return bh;
+}
+
+/**
+ * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
+ * @sdp: the filesystem
+ * @data: the data the buffer_head should point to
+ *
+ * Returns: the log buffer descriptor
+ */
+
+struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
+				      struct buffer_head *real)
+{
+	uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
+	struct gfs2_log_buf *lb;
+	struct buffer_head *bh;
+
+	lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_KERNEL | __GFP_NOFAIL);
+	list_add(&lb->lb_list, &sdp->sd_log_flush_list);
+	lb->lb_real = real;
+
+	bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
+	atomic_set(&bh->b_count, 1);
+	bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
+	set_bh_page(bh, virt_to_page(real->b_data),
+		    ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
+	bh->b_blocknr = blkno;
+	bh->b_size = sdp->sd_sb.sb_bsize;
+	bh->b_bdev = sdp->sd_vfs->s_bdev;
+
+	log_incr_head(sdp);
+
+	return bh;
+}
+
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+{
+	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
+
+	ail2_empty(sdp, new_tail);
+
+	gfs2_log_lock(sdp);
+	sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
+	gfs2_log_unlock(sdp);
+
+	sdp->sd_log_tail = new_tail;
+}
+
+/**
+ * log_write_header - Get and initialize a journal header buffer
+ * @sdp: The GFS2 superblock
+ *
+ * Returns: the initialized log buffer descriptor
+ */
+
+static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
+{
+	uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
+	struct buffer_head *bh;
+	struct gfs2_log_header lh;
+	unsigned int tail;
+	uint32_t hash;
+
+	atomic_inc(&sdp->sd_log_flush_ondisk);
+
+	bh = sb_getblk(sdp->sd_vfs, blkno);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, bh->b_size);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+	unlock_buffer(bh);
+
+	gfs2_ail1_empty(sdp, 0);
+	tail = current_tail(sdp);
+
+	memset(&lh, 0, sizeof(struct gfs2_log_header));
+	lh.lh_header.mh_magic = GFS2_MAGIC;
+	lh.lh_header.mh_type = GFS2_METATYPE_LH;
+	lh.lh_header.mh_format = GFS2_FORMAT_LH;
+	lh.lh_header.mh_blkno = blkno;
+	lh.lh_sequence = sdp->sd_log_sequence++;
+	lh.lh_flags = flags;
+	lh.lh_tail = tail;
+	lh.lh_blkno = sdp->sd_log_flush_head;
+	gfs2_log_header_out(&lh, bh->b_data);
+	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+	((struct gfs2_log_header *)bh->b_data)->lh_hash = cpu_to_gfs2_32(hash);
+
+	set_buffer_dirty(bh);
+	if (sync_dirty_buffer(bh))
+		gfs2_io_error_bh(sdp, bh);
+	brelse(bh);
+
+	if (sdp->sd_log_tail != tail)
+		log_pull_tail(sdp, tail, pull);
+	else
+		gfs2_assert_withdraw(sdp, !pull);
+
+	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
+	log_incr_head(sdp);
+}
+
+static void log_flush_commit(struct gfs2_sbd *sdp)
+{
+	struct list_head *head = &sdp->sd_log_flush_list;
+	struct gfs2_log_buf *lb;
+	struct buffer_head *bh;
+	unsigned int d;
+
+	d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
+
+	gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
+
+	while (!list_empty(head)) {
+		lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
+		list_del(&lb->lb_list);
+		bh = lb->lb_bh;
+
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh))
+			gfs2_io_error_bh(sdp, bh);
+		if (lb->lb_real) {
+			while (atomic_read(&bh->b_count) != 1)  /* Grrrr... */
+				schedule();
+			free_buffer_head(bh);
+		} else
+			brelse(bh);
+		kfree(lb);
+	}
+
+	log_write_header(sdp, 0, 0);
+}
+
+/**
+ * gfs2_log_flush_i - flush incore transaction(s)
+ * @sdp: the filesystem
+ * @gl: The glock structure to flush.  If NULL, flush the whole incore log
+ *
+ */
+
+void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
+{
+	struct gfs2_ail *ai;
+
+	atomic_inc(&sdp->sd_log_flush_incore);
+
+	ai = kzalloc(sizeof(struct gfs2_ail), GFP_KERNEL | __GFP_NOFAIL);
+	INIT_LIST_HEAD(&ai->ai_ail1_list);
+	INIT_LIST_HEAD(&ai->ai_ail2_list);
+
+	gfs2_lock_for_flush(sdp);
+	down(&sdp->sd_log_flush_lock);
+
+	gfs2_assert_withdraw(sdp,
+			sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+	gfs2_assert_withdraw(sdp,
+			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
+
+	if (gl && list_empty(&gl->gl_le.le_list)) {
+		up(&sdp->sd_log_flush_lock);
+		gfs2_unlock_from_flush(sdp);
+		kfree(ai);
+		return;
+	}
+
+	sdp->sd_log_flush_head = sdp->sd_log_head;
+	sdp->sd_log_flush_wrapped = FALSE;
+	ai->ai_first = sdp->sd_log_flush_head;
+
+	lops_before_commit(sdp);
+	if (!list_empty(&sdp->sd_log_flush_list))
+		log_flush_commit(sdp);
+	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+		log_write_header(sdp, 0, PULL);
+	lops_after_commit(sdp, ai);
+
+	sdp->sd_log_head = sdp->sd_log_flush_head;
+	if (sdp->sd_log_flush_wrapped)
+		sdp->sd_log_wraps++;
+
+	sdp->sd_log_blks_reserved =
+		sdp->sd_log_commited_buf =
+		sdp->sd_log_commited_revoke = 0;
+
+	gfs2_log_lock(sdp);
+	if (!list_empty(&ai->ai_ail1_list)) {
+		list_add(&ai->ai_list, &sdp->sd_ail1_list);
+		ai = NULL;
+	}
+	gfs2_log_unlock(sdp);
+
+	up(&sdp->sd_log_flush_lock);
+	sdp->sd_vfs->s_dirt = FALSE;
+	gfs2_unlock_from_flush(sdp);
+
+	kfree(ai);
+}
+
+static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	unsigned int reserved = 1;
+	unsigned int old;
+
+	gfs2_log_lock(sdp);
+
+	sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
+	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
+	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
+
+	if (sdp->sd_log_commited_buf)
+		reserved += 1 + sdp->sd_log_commited_buf;
+	if (sdp->sd_log_commited_revoke)
+		reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+					    sizeof(uint64_t));
+
+	old = sdp->sd_log_blks_free;
+	sdp->sd_log_blks_free += tr->tr_reserved -
+				 (reserved - sdp->sd_log_blks_reserved);
+
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_blks_free >= old);
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
+
+	sdp->sd_log_blks_reserved = reserved;
+
+	gfs2_log_unlock(sdp);
+}
+
+/**
+ * gfs2_log_commit - Commit a transaction to the log
+ * @sdp: the filesystem
+ * @tr: the transaction
+ *
+ * Returns: errno
+ */
+
+void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	log_refund(sdp, tr);
+	lops_incore_commit(sdp, tr);
+
+	sdp->sd_vfs->s_dirt = TRUE;
+	unlock_from_trans(sdp);
+
+	kfree(tr);
+
+	gfs2_log_lock(sdp);
+	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
+		gfs2_log_unlock(sdp);
+		gfs2_log_flush(sdp);
+	} else
+		gfs2_log_unlock(sdp);
+}
+
+/**
+ * gfs2_log_shutdown - write a shutdown header into a journal
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_log_shutdown(struct gfs2_sbd *sdp)
+{
+	down(&sdp->sd_log_flush_lock);
+
+	gfs2_assert_withdraw(sdp, !atomic_read(&sdp->sd_log_trans_count));
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
+	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
+	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
+
+	sdp->sd_log_flush_head = sdp->sd_log_head;
+	sdp->sd_log_flush_wrapped = FALSE;
+
+	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+
+	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free ==
+			     sdp->sd_jdesc->jd_blocks);
+	gfs2_assert_withdraw(sdp, sdp->sd_log_head == sdp->sd_log_tail);
+	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail2_list));
+
+	sdp->sd_log_head = sdp->sd_log_flush_head;
+	if (sdp->sd_log_flush_wrapped)
+		sdp->sd_log_wraps++;
+	sdp->sd_log_tail = sdp->sd_log_head;
+
+	up(&sdp->sd_log_flush_lock);
+}
+
--- a/fs/gfs2/log.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/log.h	2005-09-01 17:36:55.338111976 +0800
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LOG_DOT_H__
+#define __LOG_DOT_H__
+
+/**
+ * gfs2_log_lock - acquire the right to mess with the log manager
+ * @sdp: the filesystem
+ *
+ */
+
+static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
+{
+	spin_lock(&sdp->sd_log_lock);
+}
+
+/**
+ * gfs2_log_unlock - release the right to mess with the log manager
+ * @sdp: the filesystem
+ *
+ */
+
+static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
+{
+	spin_unlock(&sdp->sd_log_lock);
+}
+
+static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
+					  unsigned int value)
+{
+	if (++value == sdp->sd_jdesc->jd_blocks) {
+		value = 0;
+		sdp->sd_log_wraps++;
+	}
+	sdp->sd_log_head = sdp->sd_log_tail = value;
+}
+
+void gfs2_lock_for_flush(struct gfs2_sbd *sdp);
+void gfs2_unlock_from_flush(struct gfs2_sbd *sdp);
+
+unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
+			    unsigned int ssize);
+
+void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
+int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
+
+int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
+void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
+
+struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
+struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
+				      struct buffer_head *real);
+
+#define gfs2_log_flush(sdp) gfs2_log_flush_i((sdp), NULL)
+#define gfs2_log_flush_glock(gl) gfs2_log_flush_i((gl)->gl_sbd, (gl))
+void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
+void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
+
+void gfs2_log_shutdown(struct gfs2_sbd *sdp);
+
+#endif /* __LOG_DOT_H__ */
--- a/fs/gfs2/recovery.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/recovery.c	2005-09-01 17:36:55.451094800 +0800
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "glops.h"
+#include "lm.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "recovery.h"
+#include "super.h"
+
+int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+			   struct buffer_head **bh)
+{
+	struct gfs2_glock *gl = jd->jd_inode->i_gl;
+	int new = FALSE;
+	uint64_t dblock;
+	uint32_t extlen;
+	int error;
+
+	error = gfs2_block_map(jd->jd_inode, blk, &new, &dblock, &extlen);
+	if (error)
+		return error;
+	if (!dblock) {
+		gfs2_consist_inode(jd->jd_inode);
+		return -EIO;
+	}
+
+	gfs2_meta_ra(gl, dblock, extlen);
+	error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
+
+	return error;
+}
+
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	struct list_head *head = &sdp->sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+	int found = FALSE;
+
+	list_for_each_entry(rr, head, rr_list) {
+		if (rr->rr_blkno == blkno) {
+			found = TRUE;
+			break;
+		}
+	}
+
+	if (found) {
+		rr->rr_where = where;
+		return 0;
+	}
+
+	rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
+	if (!rr)
+		return -ENOMEM;
+
+	rr->rr_blkno = blkno;
+	rr->rr_where = where;
+	list_add(&rr->rr_list, head);
+
+	return 1;
+}
+
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	struct gfs2_revoke_replay *rr;
+	int wrap, a, b, revoke;
+	int found = FALSE;
+
+	list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
+		if (rr->rr_blkno == blkno) {
+			found = TRUE;
+			break;
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	wrap = (rr->rr_where < sdp->sd_replay_tail);
+	a = (sdp->sd_replay_tail < where);
+	b = (where < rr->rr_where);
+	revoke = (wrap) ? (a || b) : (a && b);
+
+	return revoke;
+}
+
+void gfs2_revoke_clean(struct gfs2_sbd *sdp)
+{
+	struct list_head *head = &sdp->sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+
+	while (!list_empty(head)) {
+		rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
+		list_del(&rr->rr_list);
+		kfree(rr);
+	}
+}
+
+/**
+ * get_log_header - read the log header for a given segment
+ * @jd: the journal
+ * @blk: the block to look at
+ * @lh: the log header to return
+ *
+ * Read the log header for a given segement in a given journal.  Do a few
+ * sanity checks on it.
+ *
+ * Returns: 0 on success,
+ *          1 if the header was invalid or incomplete,
+ *          errno on error
+ */
+
+static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
+			  struct gfs2_log_header *head)
+{
+	struct buffer_head *bh;
+	struct gfs2_log_header lh;
+	uint32_t hash;
+	int error;
+
+	error = gfs2_replay_read_block(jd, blk, &bh);
+	if (error)
+		return error;
+
+	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
+	lh.lh_hash = 0;
+	hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
+	gfs2_log_header_in(&lh, bh->b_data);
+
+	brelse(bh);
+
+	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
+	    lh.lh_header.mh_type != GFS2_METATYPE_LH ||
+	    lh.lh_header.mh_blkno != bh->b_blocknr ||
+	    lh.lh_blkno != blk ||
+	    lh.lh_hash != hash)
+		return 1;
+
+	*head = lh;
+
+	return 0;
+}
+
+/**
+ * find_good_lh - find a good log header
+ * @jd: the journal
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+
+static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
+			struct gfs2_log_header *head)
+{
+	unsigned int orig_blk = *blk;
+	int error;
+
+	for (;;) {
+		error = get_log_header(jd, *blk, head);
+		if (error <= 0)
+			return error;
+
+		if (++*blk == jd->jd_blocks)
+			*blk = 0;
+
+		if (*blk == orig_blk) {
+			gfs2_consist_inode(jd->jd_inode);
+			return -EIO;
+		}
+	}
+}
+
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+{
+	unsigned int blk = head->lh_blkno;
+	struct gfs2_log_header lh;
+	int error;
+
+	for (;;) {
+		if (++blk == jd->jd_blocks)
+			blk = 0;
+
+		error = get_log_header(jd, blk, &lh);
+		if (error < 0)
+			return error;
+		if (error == 1)
+			continue;
+
+		if (lh.lh_sequence == head->lh_sequence) {
+			gfs2_consist_inode(jd->jd_inode);
+			return -EIO;
+		}
+		if (lh.lh_sequence < head->lh_sequence)
+			break;
+
+		*head = lh;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+{
+	struct gfs2_log_header lh_1, lh_m;
+	uint32_t blk_1, blk_2, blk_m;
+	int error;
+
+	blk_1 = 0;
+	blk_2 = jd->jd_blocks - 1;
+
+	for (;;) {
+		blk_m = (blk_1 + blk_2) / 2;
+
+		error = find_good_lh(jd, &blk_1, &lh_1);
+		if (error)
+			return error;
+
+		error = find_good_lh(jd, &blk_m, &lh_m);
+		if (error)
+			return error;
+
+		if (blk_1 == blk_m || blk_m == blk_2)
+			break;
+
+		if (lh_1.lh_sequence <= lh_m.lh_sequence)
+			blk_1 = blk_m;
+		else
+			blk_2 = blk_m;
+	}
+
+	error = jhead_scan(jd, &lh_1);
+	if (error)
+		return error;
+
+	*head = lh_1;
+
+	return error;
+}
+
+/**
+ * foreach_descriptor - go through the active part of the log
+ * @jd: the journal
+ * @start: the first log header in the active region
+ * @end: the last log header (don't process the contents of this entry))
+ *
+ * Call a given function once for every log descriptor in the active
+ * portion of the log.
+ *
+ * Returns: errno
+ */
+
+static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
+			      unsigned int end, int pass)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+	struct buffer_head *bh;
+	struct gfs2_log_descriptor ld;
+	int error = 0;
+
+	while (start != end) {
+		error = gfs2_replay_read_block(jd, start, &bh);
+		if (error)
+			return error;
+		if (gfs2_meta_check(sdp, bh)) {
+			brelse(bh);
+			return -EIO;
+		}
+		gfs2_log_descriptor_in(&ld, bh->b_data);
+		brelse(bh);
+
+		if (ld.ld_header.mh_type == GFS2_METATYPE_LH) {
+			struct gfs2_log_header lh;
+			error = get_log_header(jd, start, &lh);
+			if (!error) {
+				gfs2_replay_incr_blk(sdp, &start);
+				continue;
+			}
+			if (error == 1) {
+				gfs2_consist_inode(jd->jd_inode);
+				error = -EIO;
+			}
+			return error;
+		} else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD))
+			return -EIO;
+
+		error = lops_scan_elements(jd, start, &ld, pass);
+		if (error)
+			return error;
+
+		while (ld.ld_length--)
+			gfs2_replay_incr_blk(sdp, &start);
+	}
+
+	return 0;
+}
+
+/**
+ * clean_journal - mark a dirty journal as being clean
+ * @sdp: the filesystem
+ * @jd: the journal
+ * @gl: the journal's glock
+ * @head: the head journal to start from
+ *
+ * Returns: errno
+ */
+
+static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+{
+	struct gfs2_inode *ip = jd->jd_inode;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int lblock;
+	int new = FALSE;
+	uint64_t dblock;
+	struct gfs2_log_header lh;
+	uint32_t hash;
+	struct buffer_head *bh;
+	int error;
+	
+	lblock = head->lh_blkno;
+	gfs2_replay_incr_blk(sdp, &lblock);
+	error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
+	if (error)
+		return error;
+	if (!dblock) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+
+	bh = sb_getblk(sdp->sd_vfs, dblock);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, bh->b_size);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+	unlock_buffer(bh);
+
+	memset(&lh, 0, sizeof(struct gfs2_log_header));
+	lh.lh_header.mh_magic = GFS2_MAGIC;
+	lh.lh_header.mh_type = GFS2_METATYPE_LH;
+	lh.lh_header.mh_format = GFS2_FORMAT_LH;
+	lh.lh_header.mh_blkno = dblock;
+	lh.lh_sequence = head->lh_sequence + 1;
+	lh.lh_flags = GFS2_LOG_HEAD_UNMOUNT;
+	lh.lh_blkno = lblock;
+	gfs2_log_header_out(&lh, bh->b_data);
+	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+	((struct gfs2_log_header *)bh->b_data)->lh_hash = cpu_to_gfs2_32(hash);
+
+	set_buffer_dirty(bh);
+	if (sync_dirty_buffer(bh))
+		gfs2_io_error_bh(sdp, bh);
+	brelse(bh);
+
+	return error;
+}
+
+/**
+ * gfs2_recover_journal - recovery a given journal
+ * @jd: the struct gfs2_jdesc describing the journal
+ * @wait: Don't return until the journal is clean (or an error is encountered)
+ *
+ * Acquire the journal's lock, check to see if the journal is clean, and
+ * do recovery if necessary.
+ *
+ * Returns: errno
+ */
+
+int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
+{
+	struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+	struct gfs2_log_header head;
+	struct gfs2_holder j_gh, ji_gh, t_gh;
+	unsigned long t;
+	int ro = FALSE;
+	unsigned int pass;
+	int error;
+
+	fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid);
+
+	/* Aquire the journal lock so we can do recovery */
+
+	error = gfs2_glock_nq_num(sdp,
+				  jd->jd_jid, &gfs2_journal_glops,
+				  LM_ST_EXCLUSIVE,
+				  LM_FLAG_NOEXP |
+				  ((wait) ? 0 : LM_FLAG_TRY) |
+				  GL_NOCACHE, &j_gh);
+	switch (error) {
+	case 0:
+		break;
+
+	case GLR_TRYFAILED:
+		fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
+		error = 0;
+
+	default:
+		goto fail;
+	};
+
+	error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED,
+				   LM_FLAG_NOEXP, &ji_gh);
+	if (error)
+		goto fail_gunlock_j;
+
+	fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
+
+	error = gfs2_jdesc_check(jd);
+	if (error)
+		goto fail_gunlock_ji;
+
+	error = gfs2_find_jhead(jd, &head);
+	if (error)
+		goto fail_gunlock_ji;
+
+	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+		fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
+			jd->jd_jid);
+
+		t = jiffies;
+
+		/* Acquire a shared hold on the transaction lock */
+
+		error = gfs2_glock_nq_init(sdp->sd_trans_gl,
+					   LM_ST_SHARED,
+					   LM_FLAG_NOEXP |
+					   LM_FLAG_PRIORITY |
+					   GL_NEVER_RECURSE |
+					   GL_NOCANCEL |
+					   GL_NOCACHE,
+					   &t_gh);
+		if (error)
+			goto fail_gunlock_ji;
+
+		if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
+			if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+				ro = TRUE;
+		} else {
+			if (sdp->sd_vfs->s_flags & MS_RDONLY)
+				ro = TRUE;
+		}
+
+		if (ro) {
+			fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
+				jd->jd_jid);
+			error = -EROFS;
+			goto fail_gunlock_tr;
+		}
+
+		fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
+
+		for (pass = 0; pass < 2; pass++) {
+			lops_before_scan(jd, &head, pass);
+			error = foreach_descriptor(jd, head.lh_tail,
+						   head.lh_blkno, pass);
+			lops_after_scan(jd, error, pass);
+			if (error)
+				goto fail_gunlock_tr;
+		}
+
+		error = clean_journal(jd, &head);
+		if (error)
+			goto fail_gunlock_tr;
+
+		gfs2_glock_dq_uninit(&t_gh);
+
+		t = DIV_RU(jiffies - t, HZ);
+		
+		fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
+			jd->jd_jid, t);
+	}
+
+	gfs2_glock_dq_uninit(&ji_gh);
+
+	gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
+
+	gfs2_glock_dq_uninit(&j_gh);
+
+	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
+
+	return 0;
+
+ fail_gunlock_tr:
+	gfs2_glock_dq_uninit(&t_gh);
+
+ fail_gunlock_ji:
+	gfs2_glock_dq_uninit(&ji_gh);
+
+ fail_gunlock_j:
+	gfs2_glock_dq_uninit(&j_gh);
+
+	fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
+
+ fail:
+	gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
+
+	return error;
+}
+
+/**
+ * gfs2_check_journals - Recover any dirty journals
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_check_journals(struct gfs2_sbd *sdp)
+{
+	struct gfs2_jdesc *jd;
+
+	for (;;) {
+		jd = gfs2_jdesc_find_dirty(sdp);
+		if (!jd)
+			break;
+
+		if (jd != sdp->sd_jdesc)
+			gfs2_recover_journal(jd, NO_WAIT);
+	}
+}
+
--- a/fs/gfs2/recovery.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/recovery.h	2005-09-01 17:36:55.451094800 +0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __RECOVERY_DOT_H__
+#define __RECOVERY_DOT_H__
+
+static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
+{
+	if (++*blk == sdp->sd_jdesc->jd_blocks)
+	        *blk = 0;
+}
+
+int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+			   struct buffer_head **bh);
+
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
+void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+
+int gfs2_find_jhead(struct gfs2_jdesc *jd,
+		    struct gfs2_log_header *head);
+int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, int wait);
+void gfs2_check_journals(struct gfs2_sbd *sdp);
+
+#endif /* __RECOVERY_DOT_H__ */
+

[PATCH 07/13] GFS: quotas

Code that deals with quotas.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/lvb.c   |   61 ++
 fs/gfs2/lvb.h   |   28 +
 fs/gfs2/quota.c | 1209 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/quota.h |   34 +
 4 files changed, 1332 insertions(+)

--- a/fs/gfs2/quota.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/quota.c	2005-09-01 17:36:55.443096016 +0800
@@ -0,0 +1,1209 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/tty.h>
+#include <linux/sort.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "glock.h"
+#include "glops.h"
+#include "jdata.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "super.h"
+#include "trans.h"
+
+#define QUOTA_USER 1
+#define QUOTA_GROUP 0
+
+static uint64_t qd2offset(struct gfs2_quota_data *qd)
+{
+	uint64_t offset;
+
+	offset = 2 * (uint64_t)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
+	offset *= sizeof(struct gfs2_quota);
+
+	return offset;
+}
+
+static int qd_alloc(struct gfs2_sbd *sdp, int user, uint32_t id,
+		    struct gfs2_quota_data **qdp)
+{
+	struct gfs2_quota_data *qd;
+	int error;
+
+	qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
+	if (!qd)
+		return -ENOMEM;
+
+	qd->qd_count = 1;
+	qd->qd_id = id;
+	if (user)
+		set_bit(QDF_USER, &qd->qd_flags);
+	qd->qd_slot = -1;
+
+	error = gfs2_glock_get(sdp, 2 * (uint64_t)id + !user,
+			      &gfs2_quota_glops, CREATE, &qd->qd_gl);
+	if (error)
+		goto fail;
+
+	error = gfs2_lvb_hold(qd->qd_gl);
+	gfs2_glock_put(qd->qd_gl);
+	if (error)
+		goto fail;
+
+	*qdp = qd;
+
+	return 0;
+
+ fail:
+	kfree(qd);
+	return error;
+}
+
+static int qd_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
+		  struct gfs2_quota_data **qdp)
+{
+	struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
+	int error, found;
+
+	*qdp = NULL;
+
+	for (;;) {
+		found = FALSE;
+		spin_lock(&sdp->sd_quota_spin);
+		list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+			if (qd->qd_id == id &&
+			    !test_bit(QDF_USER, &qd->qd_flags) == !user) {
+				qd->qd_count++;
+				found = TRUE;
+				break;
+			}
+		}
+
+		if (!found)
+			qd = NULL;
+
+		if (!qd && new_qd) {
+			qd = new_qd;
+			list_add(&qd->qd_list, &sdp->sd_quota_list);
+			atomic_inc(&sdp->sd_quota_count);
+			new_qd = NULL;
+		}
+
+		spin_unlock(&sdp->sd_quota_spin);
+
+		if (qd || !create) {
+			if (new_qd) {
+				gfs2_lvb_unhold(new_qd->qd_gl);
+				kfree(new_qd);
+			}
+			*qdp = qd;
+			return 0;
+		}
+
+		error = qd_alloc(sdp, user, id, &new_qd);
+		if (error)
+			return error;
+	}
+}
+
+static void qd_hold(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+	spin_lock(&sdp->sd_quota_spin);
+	gfs2_assert(sdp, qd->qd_count,);
+	qd->qd_count++;
+	spin_unlock(&sdp->sd_quota_spin);
+}
+
+static void qd_put(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	spin_lock(&sdp->sd_quota_spin);
+	gfs2_assert(sdp, qd->qd_count,);
+	if (!--qd->qd_count)
+		qd->qd_last_touched = jiffies;
+	spin_unlock(&sdp->sd_quota_spin);
+}
+
+static int slot_get(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	unsigned int c, o = 0, b;
+	unsigned char byte = 0;
+
+	spin_lock(&sdp->sd_quota_spin);
+
+	if (qd->qd_slot_count++) {
+		spin_unlock(&sdp->sd_quota_spin);
+		return 0;
+	}
+
+	for (c = 0; c < sdp->sd_quota_chunks; c++)
+		for (o = 0; o < PAGE_SIZE; o++) {
+			byte = sdp->sd_quota_bitmap[c][o];
+			if (byte != 0xFF)
+				goto found;
+		}
+
+	goto fail;
+
+ found:
+	for (b = 0; b < 8; b++)
+		if (!(byte & (1 << b)))
+			break;
+	qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
+
+	if (qd->qd_slot >= sdp->sd_quota_slots)
+		goto fail;
+
+	sdp->sd_quota_bitmap[c][o] |= 1 << b;
+
+	spin_unlock(&sdp->sd_quota_spin);
+
+	return 0;
+
+ fail:
+	qd->qd_slot_count--;
+	spin_unlock(&sdp->sd_quota_spin);
+	return -ENOSPC;
+}
+
+static void slot_hold(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+	spin_lock(&sdp->sd_quota_spin);
+	gfs2_assert(sdp, qd->qd_slot_count,);
+	qd->qd_slot_count++;
+	spin_unlock(&sdp->sd_quota_spin);
+}
+
+static void slot_put(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+	spin_lock(&sdp->sd_quota_spin);
+	gfs2_assert(sdp, qd->qd_slot_count,);
+	if (!--qd->qd_slot_count) {
+		gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
+		qd->qd_slot = -1;
+	}
+	spin_unlock(&sdp->sd_quota_spin);
+}
+
+static int bh_get(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	struct gfs2_inode *ip = sdp->sd_qc_inode;
+	unsigned int block, offset;
+	uint64_t dblock;
+	int new = FALSE;
+	struct buffer_head *bh;
+	int error;
+
+	down(&sdp->sd_quota_mutex);
+
+	if (qd->qd_bh_count++) {
+		up(&sdp->sd_quota_mutex);
+		return 0;
+	}
+
+	block = qd->qd_slot / sdp->sd_qc_per_block;
+	offset = qd->qd_slot % sdp->sd_qc_per_block;;
+
+	error = gfs2_block_map(ip, block, &new, &dblock, NULL);
+	if (error)
+		goto fail;
+	error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
+	if (error)
+		goto fail;
+	error = -EIO;
+	if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
+		goto fail_brelse;
+
+	qd->qd_bh = bh;
+	qd->qd_bh_qc = (struct gfs2_quota_change *)
+		(bh->b_data + sizeof(struct gfs2_meta_header) +
+		 offset * sizeof(struct gfs2_quota_change));
+
+	up(&sdp->sd_quota_mutex);
+
+	return 0;
+
+ fail_brelse:
+	brelse(bh);
+
+ fail:
+	qd->qd_bh_count--;
+	up(&sdp->sd_quota_mutex);
+	return error;
+}
+
+static void bh_put(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+	down(&sdp->sd_quota_mutex);
+	gfs2_assert(sdp, qd->qd_bh_count,);
+	if (!--qd->qd_bh_count) {
+		brelse(qd->qd_bh);
+		qd->qd_bh = NULL;
+		qd->qd_bh_qc = NULL;
+	}
+	up(&sdp->sd_quota_mutex);
+}
+
+static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
+{
+	struct gfs2_quota_data *qd = NULL;
+	int error;
+	int found = FALSE;
+
+	*qdp = NULL;
+
+	if (sdp->sd_vfs->s_flags & MS_RDONLY)
+		return 0;
+
+	spin_lock(&sdp->sd_quota_spin);
+
+	list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+		if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+		    !test_bit(QDF_CHANGE, &qd->qd_flags) ||
+		    qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
+			continue;
+
+		list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
+
+		set_bit(QDF_LOCKED, &qd->qd_flags);
+		gfs2_assert_warn(sdp, qd->qd_count);
+		qd->qd_count++;
+		qd->qd_change_sync = qd->qd_change;
+		gfs2_assert_warn(sdp, qd->qd_slot_count);
+		qd->qd_slot_count++;
+		found = TRUE;
+
+		break;
+	}
+
+	if (!found)
+		qd = NULL;
+
+	spin_unlock(&sdp->sd_quota_spin);
+
+	if (qd) {
+		gfs2_assert_warn(sdp, qd->qd_change_sync);
+		error = bh_get(qd);
+		if (error) {
+			clear_bit(QDF_LOCKED, &qd->qd_flags);
+			slot_put(qd);
+			qd_put(qd);
+			return error;
+		}
+	}
+
+	*qdp = qd;
+
+	return 0;
+}
+
+static int qd_trylock(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+	if (sdp->sd_vfs->s_flags & MS_RDONLY)
+		return FALSE;
+
+	spin_lock(&sdp->sd_quota_spin);
+
+	if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+	    !test_bit(QDF_CHANGE, &qd->qd_flags)) {
+		spin_unlock(&sdp->sd_quota_spin);
+		return FALSE;
+	}
+
+	list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
+
+	set_bit(QDF_LOCKED, &qd->qd_flags);
+	gfs2_assert_warn(sdp, qd->qd_count);
+	qd->qd_count++;
+	qd->qd_change_sync = qd->qd_change;
+	gfs2_assert_warn(sdp, qd->qd_slot_count);
+	qd->qd_slot_count++;
+
+	spin_unlock(&sdp->sd_quota_spin);
+
+	gfs2_assert_warn(sdp, qd->qd_change_sync);
+	if (bh_get(qd)) {
+		clear_bit(QDF_LOCKED, &qd->qd_flags);
+		slot_put(qd);
+		qd_put(qd);
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+static void qd_unlock(struct gfs2_quota_data *qd)
+{
+	gfs2_assert_warn(qd->qd_gl->gl_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
+	clear_bit(QDF_LOCKED, &qd->qd_flags);
+	bh_put(qd);
+	slot_put(qd);
+	qd_put(qd);
+}
+
+static int qdsb_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
+		    struct gfs2_quota_data **qdp)
+{
+	int error;
+
+	error = qd_get(sdp, user, id, create, qdp);
+	if (error)
+		return error;
+
+	error = slot_get(*qdp);
+	if (error)
+		goto fail;
+
+	error = bh_get(*qdp);
+	if (error)
+		goto fail_slot;
+
+	return 0;
+
+ fail_slot:
+	slot_put(*qdp);
+
+ fail:
+	qd_put(*qdp);
+	return error;
+}
+
+static void qdsb_put(struct gfs2_quota_data *qd)
+{
+	bh_put(qd);
+	slot_put(qd);
+	qd_put(qd);
+}
+
+int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_quota_data **qd = al->al_qd;
+	int error;
+
+	if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
+	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
+		return -EIO;
+
+	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+		return 0;
+
+	error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
+	if (error)
+		goto out;
+	al->al_qd_num++;
+	qd++;
+
+	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
+	if (error)
+		goto out;
+	al->al_qd_num++;
+	qd++;
+
+	if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
+		error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
+		if (error)
+			goto out;
+		al->al_qd_num++;
+		qd++;
+	}
+
+	if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
+		error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
+		if (error)
+			goto out;
+		al->al_qd_num++;
+		qd++;
+	}
+
+ out:
+	if (error)
+		gfs2_quota_unhold(ip);
+
+	return error;
+}
+
+void gfs2_quota_unhold(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	unsigned int x;
+
+	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
+
+	for (x = 0; x < al->al_qd_num; x++) {
+		qdsb_put(al->al_qd[x]);
+		al->al_qd[x] = NULL;
+	}
+	al->al_qd_num = 0;
+}
+
+static int sort_qd(const void *a, const void *b)
+{
+	struct gfs2_quota_data *qd_a = *(struct gfs2_quota_data **)a;
+	struct gfs2_quota_data *qd_b = *(struct gfs2_quota_data **)b;
+	int ret = 0;
+
+	if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
+	    !test_bit(QDF_USER, &qd_b->qd_flags)) {
+		if (test_bit(QDF_USER, &qd_a->qd_flags))
+			ret = -1;
+		else
+			ret = 1;
+	} else {
+		if (qd_a->qd_id < qd_b->qd_id)
+			ret = -1;
+		else if (qd_a->qd_id > qd_b->qd_id)
+			ret = 1;
+	}
+
+	return ret;
+}
+
+static void do_qc(struct gfs2_quota_data *qd, int64_t change)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	struct gfs2_inode *ip = sdp->sd_qc_inode;
+	struct gfs2_quota_change *qc = qd->qd_bh_qc;
+	int64_t x;
+
+	down(&sdp->sd_quota_mutex);
+	gfs2_trans_add_bh(ip->i_gl, qd->qd_bh);
+
+	if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
+		qc->qc_change = 0;
+		qc->qc_flags = 0;
+		if (test_bit(QDF_USER, &qd->qd_flags))
+			qc->qc_flags = cpu_to_gfs2_32(GFS2_QCF_USER);
+		qc->qc_id = cpu_to_gfs2_32(qd->qd_id);
+	}
+
+	x = qc->qc_change;
+	x = gfs2_64_to_cpu(x) + change;
+	qc->qc_change = cpu_to_gfs2_64(x);
+
+	spin_lock(&sdp->sd_quota_spin);
+	qd->qd_change = x;
+	spin_unlock(&sdp->sd_quota_spin);
+
+	if (!x) {
+		gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
+		clear_bit(QDF_CHANGE, &qd->qd_flags);
+		qc->qc_flags = 0;
+		qc->qc_id = 0;
+		slot_put(qd);
+		qd_put(qd);
+	} else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
+		qd_hold(qd);
+		slot_hold(qd);
+	}
+			
+	up(&sdp->sd_quota_mutex);
+}
+
+static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
+{
+	struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
+	struct gfs2_inode *ip = sdp->sd_quota_inode;
+	unsigned int data_blocks, ind_blocks;
+	struct gfs2_holder *ghs, i_gh;
+	unsigned int qx, x;
+	struct gfs2_quota_data *qd;
+	uint64_t offset;
+	unsigned int nalloc = 0;
+	struct gfs2_alloc *al = NULL;
+	int error;
+
+	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
+			      &data_blocks, &ind_blocks);
+
+	ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!ghs)
+		return -ENOMEM;
+
+	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
+	for (qx = 0; qx < num_qd; qx++) {
+		error = gfs2_glock_nq_init(qda[qx]->qd_gl,
+					   LM_ST_EXCLUSIVE,
+					   GL_NOCACHE, &ghs[qx]);
+		if (error)
+			goto out;
+	}
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		goto out;
+
+	for (x = 0; x < num_qd; x++) {
+		int alloc_required;
+
+		offset = qd2offset(qda[x]);
+		error = gfs2_write_alloc_required(ip, offset,
+						  sizeof(struct gfs2_quota),
+						  &alloc_required);
+		if (error)
+			goto out_gunlock;
+		if (alloc_required)
+			nalloc++;
+	}
+
+	if (nalloc) {
+		al = gfs2_alloc_get(ip);
+
+		al->al_requested = nalloc * (data_blocks + ind_blocks);
+
+		error = gfs2_inplace_reserve(ip);
+		if (error)
+			goto out_alloc;
+
+		error = gfs2_trans_begin(sdp,
+					 al->al_rgd->rd_ri.ri_length +
+					 num_qd * data_blocks +
+					 nalloc * ind_blocks +
+					 RES_DINODE + num_qd +
+					 RES_STATFS, 0);
+		if (error)
+			goto out_ipres;
+	} else {
+		error = gfs2_trans_begin(sdp,
+					 num_qd * data_blocks +
+					 RES_DINODE + num_qd, 0);
+		if (error)
+			goto out_gunlock;
+	}
+
+	for (x = 0; x < num_qd; x++) {
+		char buf[sizeof(struct gfs2_quota)];
+		struct gfs2_quota q;
+
+		qd = qda[x];
+		offset = qd2offset(qd);
+
+		/* The quota file may not be a multiple of
+		   sizeof(struct gfs2_quota) bytes. */
+		memset(buf, 0, sizeof(struct gfs2_quota));
+
+		error = gfs2_jdata_read_mem(ip, buf, offset,
+					    sizeof(struct gfs2_quota));
+		if (error < 0)
+			goto out_end_trans;
+
+		gfs2_quota_in(&q, buf);
+		q.qu_value += qda[x]->qd_change_sync;
+		gfs2_quota_out(&q, buf);
+
+		error = gfs2_jdata_write_mem(ip, buf, offset,
+					     sizeof(struct gfs2_quota));
+		if (error < 0)
+			goto out_end_trans;
+		else if (error != sizeof(struct gfs2_quota)) {
+			error = -EIO;
+			goto out_end_trans;
+		}
+
+		do_qc(qd, -qd->qd_change_sync);
+
+		memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
+		qd->qd_qb.qb_magic = GFS2_MAGIC;
+		qd->qd_qb.qb_limit = q.qu_limit;
+		qd->qd_qb.qb_warn = q.qu_warn;
+		qd->qd_qb.qb_value = q.qu_value;
+
+		gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
+	}
+
+	error = 0;
+
+ out_end_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipres:
+	if (nalloc)
+		gfs2_inplace_release(ip);
+
+ out_alloc:
+	if (nalloc)
+		gfs2_alloc_put(ip);
+
+ out_gunlock:
+	gfs2_glock_dq_uninit(&i_gh);
+
+ out:
+	while (qx--)
+		gfs2_glock_dq_uninit(&ghs[qx]);
+	kfree(ghs);
+	gfs2_log_flush_glock(ip->i_gl);
+
+	return error;
+}
+
+static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
+		    struct gfs2_holder *q_gh)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	struct gfs2_holder i_gh;
+	struct gfs2_quota q;
+	char buf[sizeof(struct gfs2_quota)];
+	int error;
+
+ restart:
+	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
+	if (error)
+		return error;
+
+	gfs2_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
+
+	if (force_refresh || qd->qd_qb.qb_magic != GFS2_MAGIC) {
+		gfs2_glock_dq_uninit(q_gh);
+		error = gfs2_glock_nq_init(qd->qd_gl,
+					  LM_ST_EXCLUSIVE, GL_NOCACHE,
+					  q_gh);
+		if (error)
+			return error;
+
+		error = gfs2_glock_nq_init(sdp->sd_quota_inode->i_gl,
+					  LM_ST_SHARED, 0,
+					  &i_gh);
+		if (error)
+			goto fail;
+
+		memset(buf, 0, sizeof(struct gfs2_quota));
+
+		error = gfs2_jdata_read_mem(sdp->sd_quota_inode, buf,
+					    qd2offset(qd),
+					    sizeof(struct gfs2_quota));
+		if (error < 0)
+			goto fail_gunlock;
+
+		gfs2_glock_dq_uninit(&i_gh);
+
+		gfs2_quota_in(&q, buf);
+
+		memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
+		qd->qd_qb.qb_magic = GFS2_MAGIC;
+		qd->qd_qb.qb_limit = q.qu_limit;
+		qd->qd_qb.qb_warn = q.qu_warn;
+		qd->qd_qb.qb_value = q.qu_value;
+
+		gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
+
+		if (gfs2_glock_is_blocking(qd->qd_gl)) {
+			gfs2_glock_dq_uninit(q_gh);
+			force_refresh = FALSE;
+			goto restart;
+		}
+	}
+
+	return 0;
+
+ fail_gunlock:
+	gfs2_glock_dq_uninit(&i_gh);
+
+ fail:
+	gfs2_glock_dq_uninit(q_gh);
+
+	return error;
+}
+
+int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	unsigned int x;
+	int error = 0;
+
+	gfs2_quota_hold(ip, uid, gid);
+
+	if (capable(CAP_SYS_RESOURCE) ||
+	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+		return 0;
+
+	sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
+	     sort_qd, NULL);
+
+	for (x = 0; x < al->al_qd_num; x++) {
+		error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
+		if (error)
+			break;
+	}
+
+	if (!error)
+		set_bit(GIF_QD_LOCKED, &ip->i_flags);
+	else {
+		while (x--)
+			gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
+		gfs2_quota_unhold(ip);
+	}
+
+	return error;
+}
+
+static int need_sync(struct gfs2_quota_data *qd)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	struct gfs2_tune *gt = &sdp->sd_tune;
+	int64_t value;
+	unsigned int num, den;
+	int do_sync = TRUE;
+
+	if (!qd->qd_qb.qb_limit)
+		return FALSE;
+
+	spin_lock(&sdp->sd_quota_spin);
+	value = qd->qd_change;
+	spin_unlock(&sdp->sd_quota_spin);
+
+	spin_lock(&gt->gt_spin);
+	num = gt->gt_quota_scale_num;
+	den = gt->gt_quota_scale_den;
+	spin_unlock(&gt->gt_spin);
+
+	if (value < 0)
+		do_sync = FALSE;
+	else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
+		do_sync = FALSE;
+	else {
+		value *= gfs2_jindex_size(sdp) * num;
+		do_div(value, den);
+		value += qd->qd_qb.qb_value;
+		if (value < (int64_t)qd->qd_qb.qb_limit)
+			do_sync = FALSE;
+	}
+
+	return do_sync;
+}
+
+void gfs2_quota_unlock(struct gfs2_inode *ip)
+{
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_quota_data *qda[4];
+	unsigned int count = 0;
+	unsigned int x;
+
+	if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
+		goto out;
+
+	for (x = 0; x < al->al_qd_num; x++) {
+		struct gfs2_quota_data *qd;
+		int sync;
+
+		qd = al->al_qd[x];
+		sync = need_sync(qd);
+
+		gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
+
+		if (sync && qd_trylock(qd))
+			qda[count++] = qd;
+	}
+
+	if (count) {
+		do_sync(count, qda);
+		for (x = 0; x < count; x++)
+			qd_unlock(qda[x]);
+	}
+
+ out:
+	gfs2_quota_unhold(ip);
+}
+
+#define MAX_LINE 256
+
+static int print_message(struct gfs2_quota_data *qd, char *type)
+{
+	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+	struct tty_struct *tty;
+	char *line;
+	int len;
+
+	line = kmalloc(MAX_LINE, GFP_KERNEL);
+	if (!line)
+		return -ENOMEM;
+
+	len = snprintf(line, MAX_LINE, "GFS2: fsid=%s: quota %s for %s %u\r\n",
+		       sdp->sd_fsname, type,
+		       (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
+		       qd->qd_id);
+
+	if (current->signal) {
+		tty = current->signal->tty;
+		if (tty && tty->driver->write)
+			tty->driver->write(tty, line, len);
+	}
+
+	kfree(line);
+
+	return 0;
+}
+
+int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_quota_data *qd;
+	int64_t value;
+	unsigned int x;
+	int error = 0;
+
+	if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
+		return 0;
+
+	for (x = 0; x < al->al_qd_num; x++) {
+		qd = al->al_qd[x];
+
+		if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
+		      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
+			continue;
+
+		value = qd->qd_qb.qb_value;
+		spin_lock(&sdp->sd_quota_spin);
+		value += qd->qd_change;
+		spin_unlock(&sdp->sd_quota_spin);
+
+		if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
+			print_message(qd, "exceeded");
+			error = -EDQUOT;
+			break;
+		} else if (qd->qd_qb.qb_warn &&
+			   (int64_t)qd->qd_qb.qb_warn < value &&
+			   time_after_eq(jiffies, qd->qd_last_warn +
+					 gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) {
+			error = print_message(qd, "warning");
+			qd->qd_last_warn = jiffies;
+		}
+	}
+
+	return error;
+}
+
+void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
+		       uint32_t uid, uint32_t gid)
+{
+	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_quota_data *qd;
+	unsigned int x;
+	unsigned int found = 0;
+
+	if (gfs2_assert_warn(ip->i_sbd, change))
+		return;
+	if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
+		return;
+
+	for (x = 0; x < al->al_qd_num; x++) {
+		qd = al->al_qd[x];
+
+		if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
+		    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
+			do_qc(qd, change);
+			found++;
+		}
+	}
+}
+
+int gfs2_quota_sync(struct gfs2_sbd *sdp)
+{
+	struct gfs2_quota_data **qda;
+	unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
+	unsigned int num_qd;
+	unsigned int x;
+	int error = 0;
+
+	sdp->sd_quota_sync_gen++;
+
+	qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
+	if (!qda)
+		return -ENOMEM;
+
+	do {
+		num_qd = 0;
+
+		for (;;) {
+			error = qd_fish(sdp, qda + num_qd);
+			if (error || !qda[num_qd])
+				break;
+			if (++num_qd == max_qd)
+				break;
+		}
+
+		if (num_qd) {
+			if (!error)
+				error = do_sync(num_qd, qda);
+			if (!error)
+				for (x = 0; x < num_qd; x++)
+					qda[x]->qd_sync_gen =
+						sdp->sd_quota_sync_gen;
+
+			for (x = 0; x < num_qd; x++)
+				qd_unlock(qda[x]);
+		}
+	} while (!error && num_qd == max_qd);
+
+	kfree(qda);
+
+	return error;
+}
+
+int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id)
+{
+	struct gfs2_quota_data *qd;
+	struct gfs2_holder q_gh;
+	int error;
+
+	error = qd_get(sdp, user, id, CREATE, &qd);
+	if (error)
+		return error;
+
+	error = do_glock(qd, FORCE, &q_gh);
+	if (!error)
+		gfs2_glock_dq_uninit(&q_gh);
+
+	qd_put(qd);
+
+	return error;
+}
+
+int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
+		    struct gfs2_quota *q)
+{
+	struct gfs2_quota_data *qd;
+	struct gfs2_holder q_gh;
+	int error;
+
+	if (((user) ? (id != current->fsuid) : (!in_group_p(id))) &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	error = qd_get(sdp, user, id, CREATE, &qd);
+	if (error)
+		return error;
+
+	error = do_glock(qd, NO_FORCE, &q_gh);
+	if (error)
+		goto out;
+
+	memset(q, 0, sizeof(struct gfs2_quota));
+	q->qu_limit = qd->qd_qb.qb_limit;
+	q->qu_warn = qd->qd_qb.qb_warn;
+	q->qu_value = qd->qd_qb.qb_value;
+
+	spin_lock(&sdp->sd_quota_spin);
+	q->qu_value += qd->qd_change;
+	spin_unlock(&sdp->sd_quota_spin);
+
+	gfs2_glock_dq_uninit(&q_gh);
+
+ out:
+	qd_put(qd);
+
+	return error;
+}
+
+int gfs2_quota_init(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *ip = sdp->sd_qc_inode;
+	unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+	unsigned int x, slot = 0;
+	unsigned int found = 0;
+	uint64_t dblock;
+	uint32_t extlen = 0;
+	int error;
+
+	if (!ip->i_di.di_size ||
+	    ip->i_di.di_size > (64 << 20) ||
+	    ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
+		gfs2_consist_inode(ip);
+		return -EIO;		
+	}
+	sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
+	sdp->sd_quota_chunks = DIV_RU(sdp->sd_quota_slots, 8 * PAGE_SIZE);
+
+	error = -ENOMEM;
+
+	sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
+				       sizeof(unsigned char *), GFP_KERNEL);
+	if (!sdp->sd_quota_bitmap)
+		return error;
+
+	for (x = 0; x < sdp->sd_quota_chunks; x++) {
+		sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!sdp->sd_quota_bitmap[x])
+			goto fail;
+	}
+
+	for (x = 0; x < blocks; x++) {
+		struct buffer_head *bh;
+		unsigned int y;
+
+		if (!extlen) {
+			int new = FALSE;
+			error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
+			if (error)
+				goto fail;
+		}
+		gfs2_meta_ra(ip->i_gl,  dblock, extlen);
+		error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
+				       &bh);
+		if (error)
+			goto fail;
+		error = -EIO;
+		if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
+			brelse(bh);
+			goto fail;
+		}
+
+		for (y = 0;
+		     y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
+		     y++, slot++) {
+			struct gfs2_quota_change qc;
+			struct gfs2_quota_data *qd;
+
+			gfs2_quota_change_in(&qc, bh->b_data +
+					  sizeof(struct gfs2_meta_header) +
+					  y * sizeof(struct gfs2_quota_change));
+			if (!qc.qc_change)
+				continue;
+
+			error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
+					 qc.qc_id, &qd);
+			if (error) {
+				brelse(bh);
+				goto fail;
+			}
+
+			set_bit(QDF_CHANGE, &qd->qd_flags);
+			qd->qd_change = qc.qc_change;
+			qd->qd_slot = slot;
+			qd->qd_slot_count = 1;
+			qd->qd_last_touched = jiffies;
+
+			spin_lock(&sdp->sd_quota_spin);
+			gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
+			list_add(&qd->qd_list, &sdp->sd_quota_list);
+			atomic_inc(&sdp->sd_quota_count);
+			spin_unlock(&sdp->sd_quota_spin);
+
+			found++;
+		}
+
+		brelse(bh);
+		dblock++;
+		extlen--;
+	}
+
+	if (found)
+		fs_info(sdp, "found %u quota changes\n", found);
+
+	return 0;
+
+ fail:
+	gfs2_quota_cleanup(sdp);
+	return error;
+}
+
+void gfs2_quota_scan(struct gfs2_sbd *sdp)
+{
+	struct gfs2_quota_data *qd, *safe;
+	LIST_HEAD(dead);
+
+	spin_lock(&sdp->sd_quota_spin);
+	list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
+		if (!qd->qd_count &&
+		    time_after_eq(jiffies, qd->qd_last_touched +
+			        gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
+			list_move(&qd->qd_list, &dead);
+			gfs2_assert_warn(sdp,
+					 atomic_read(&sdp->sd_quota_count) > 0);
+			atomic_dec(&sdp->sd_quota_count);
+		}
+	}
+	spin_unlock(&sdp->sd_quota_spin);
+
+	while (!list_empty(&dead)) {
+		qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
+		list_del(&qd->qd_list);
+
+		gfs2_assert_warn(sdp, !qd->qd_change);
+		gfs2_assert_warn(sdp, !qd->qd_slot_count);
+		gfs2_assert_warn(sdp, !qd->qd_bh_count);
+
+		gfs2_lvb_unhold(qd->qd_gl);
+		kfree(qd);
+	}
+}
+
+void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
+{
+	struct list_head *head = &sdp->sd_quota_list;
+	struct gfs2_quota_data *qd;
+	unsigned int x;
+
+	spin_lock(&sdp->sd_quota_spin);
+	while (!list_empty(head)) {
+		qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
+
+		if (qd->qd_count > 1 ||
+		    (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
+			list_move(&qd->qd_list, head);
+			spin_unlock(&sdp->sd_quota_spin);
+			schedule();
+			spin_lock(&sdp->sd_quota_spin);
+			continue;
+		}
+
+		list_del(&qd->qd_list);
+		atomic_dec(&sdp->sd_quota_count);
+		spin_unlock(&sdp->sd_quota_spin);
+
+		if (!qd->qd_count) {
+			gfs2_assert_warn(sdp, !qd->qd_change);
+			gfs2_assert_warn(sdp, !qd->qd_slot_count);
+		} else
+			gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
+		gfs2_assert_warn(sdp, !qd->qd_bh_count);
+
+		gfs2_lvb_unhold(qd->qd_gl);
+		kfree(qd);
+
+		spin_lock(&sdp->sd_quota_spin);
+	}
+	spin_unlock(&sdp->sd_quota_spin);
+
+	gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
+
+	if (sdp->sd_quota_bitmap) {
+		for (x = 0; x < sdp->sd_quota_chunks; x++)
+			kfree(sdp->sd_quota_bitmap[x]);
+		kfree(sdp->sd_quota_bitmap);
+	}
+}
+
--- a/fs/gfs2/quota.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/quota.h	2005-09-01 17:36:55.443096016 +0800
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __QUOTA_DOT_H__
+#define __QUOTA_DOT_H__
+
+#define NO_QUOTA_CHANGE ((uint32_t)-1)
+
+int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
+void gfs2_quota_unhold(struct gfs2_inode *ip);
+
+int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
+void gfs2_quota_unlock(struct gfs2_inode *ip);
+
+int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
+void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
+		       uint32_t uid, uint32_t gid);
+
+int gfs2_quota_sync(struct gfs2_sbd *sdp);
+int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id);
+int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
+		    struct gfs2_quota *q);
+
+int gfs2_quota_init(struct gfs2_sbd *sdp);
+void gfs2_quota_scan(struct gfs2_sbd *sdp);
+void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+
+#endif /* __QUOTA_DOT_H__ */
--- a/fs/gfs2/lvb.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lvb.c	2005-09-01 17:36:55.360108632 +0800
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+
+#define pv(struct, member, fmt) printk("  "#member" = "fmt"\n", struct->member);
+
+#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
+#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
+#define CPIN_16(s1, s2, member) {(s1->member) = gfs2_16_to_cpu((s2->member));}
+#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs2_16((s1->member));}
+#define CPIN_32(s1, s2, member) {(s1->member) = gfs2_32_to_cpu((s2->member));}
+#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs2_32((s1->member));}
+#define CPIN_64(s1, s2, member) {(s1->member) = gfs2_64_to_cpu((s2->member));}
+#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs2_64((s1->member));}
+
+void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb)
+{
+	struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
+
+	CPIN_32(qb, str, qb_magic);
+	CPIN_32(qb, str, qb_pad);
+	CPIN_64(qb, str, qb_limit);
+	CPIN_64(qb, str, qb_warn);
+	CPIN_64(qb, str, qb_value);
+}
+
+void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb)
+{
+	struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
+
+	CPOUT_32(qb, str, qb_magic);
+	CPOUT_32(qb, str, qb_pad);
+	CPOUT_64(qb, str, qb_limit);
+	CPOUT_64(qb, str, qb_warn);
+	CPOUT_64(qb, str, qb_value);
+}
+
+void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb)
+{
+	pv(qb, qb_magic, "%u");
+	pv(qb, qb_pad, "%u");
+	pv(qb, qb_limit, "%"PRIu64);
+	pv(qb, qb_warn, "%"PRIu64);
+	pv(qb, qb_value, "%"PRId64);
+}
+
--- a/fs/gfs2/lvb.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lvb.h	2005-09-01 17:36:55.360108632 +0800
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LVB_DOT_H__
+#define __LVB_DOT_H__
+
+#define GFS2_MIN_LVB_SIZE 32
+
+struct gfs2_quota_lvb {
+	uint32_t qb_magic;
+	uint32_t qb_pad;
+	uint64_t qb_limit;      /* Hard limit of # blocks to alloc */
+	uint64_t qb_warn;       /* Warn user when alloc is above this # */
+	int64_t qb_value;       /* Current # blocks allocated */
+};
+
+void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb);
+void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb);
+void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb);
+
+#endif /* __LVB_DOT_H__ */
+

[PATCH 08/13] GFS: mount and tuning options

There are a variety of mount options, tunable parameters, internal
statistics, and methods of online file system manipulation.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/ioctl.c  | 1485 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/ioctl.h  |   15 
 fs/gfs2/mount.c  |  209 +++++++
 fs/gfs2/mount.h  |   15 
 fs/gfs2/resize.c |  285 ++++++++++
 fs/gfs2/resize.h |   19 
 fs/gfs2/sys.c    |  201 +++++++
 fs/gfs2/sys.h    |   24 
 8 files changed, 2253 insertions(+)

--- a/fs/gfs2/ioctl.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ioctl.c	2005-09-01 17:36:55.321114560 +0800
@@ -0,0 +1,1485 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/gfs2_ioctl.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "dir.h"
+#include "eattr.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "ioctl.h"
+#include "jdata.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "resize.h"
+#include "rgrp.h"
+#include "super.h"
+#include "trans.h"
+
+typedef int (*gi_filler_t) (struct gfs2_inode *ip,
+			    struct gfs2_ioctl *gi,
+			    char *buf,
+			    unsigned int size,
+			    unsigned int *count);
+
+#define ARG_SIZE 32
+
+/**
+ * gi_skeleton - Setup a buffer that functions can print into
+ * @ip:
+ * @gi:
+ * @filler:
+ *
+ * Returns: -errno or count of bytes copied to userspace
+ */
+
+static int gi_skeleton(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+		       gi_filler_t filler)
+{
+	unsigned int size = gfs2_tune_get(ip->i_sbd, gt_lockdump_size);
+	char *buf;
+	unsigned int count = 0;
+	int error;
+
+	if (size > gi->gi_size)
+		size = gi->gi_size;
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	error = filler(ip, gi, buf, size, &count);
+	if (error)
+		goto out;
+
+	if (copy_to_user(gi->gi_data, buf, count + 1))
+		error = -EFAULT;
+	else
+		error = count + 1;
+
+ out:
+	kfree(buf);
+
+	return error;
+}
+
+/**
+ * gi_get_cookie - Return the "cookie" (identifying string) for a
+ *		 filesystem mount
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_cookie(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+			 char *buf, unsigned int size, unsigned int *count)
+{
+	int error = -ENOBUFS;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("%lu", (unsigned long)ip->i_sbd);
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * gi_get_super - Return the "struct gfs2_sb" for a filesystem
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_super(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_holder sb_gh;
+	struct buffer_head *bh;
+	struct gfs2_sb *sb;
+	int error;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	if (gi->gi_size != sizeof(struct gfs2_sb))
+		return -EINVAL;
+
+	sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
+	if (!sb)
+		return -ENOMEM;
+
+	error = gfs2_glock_nq_num(sdp,
+				 GFS2_SB_LOCK, &gfs2_meta_glops,
+				 LM_ST_SHARED, 0, &sb_gh);
+	if (error)
+		goto out;
+
+	error = gfs2_meta_read(sb_gh.gh_gl,
+			       GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
+			       DIO_START | DIO_WAIT,
+			       &bh);
+	if (error) {
+		gfs2_glock_dq_uninit(&sb_gh);
+		goto out;
+	}
+	gfs2_sb_in(sb, bh->b_data);
+	brelse(bh);
+
+	gfs2_glock_dq_uninit(&sb_gh);
+
+	if (copy_to_user(gi->gi_data, sb,
+			 sizeof(struct gfs2_sb)))
+		error = -EFAULT;
+	else
+		error = sizeof(struct gfs2_sb);
+
+ out:
+	kfree(sb);
+
+	return error;
+}
+
+/**
+ * gi_get_args - Return the mount arguments
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_args(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+		       char *buf, unsigned int size, unsigned int *count)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_args *args = &sdp->sd_args;
+	int error = -ENOBUFS;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("lockproto %s\n", args->ar_lockproto);
+	gfs2_printf("locktable %s\n", args->ar_locktable);
+	gfs2_printf("hostdata %s\n", args->ar_hostdata);
+	gfs2_printf("spectator %d\n", args->ar_spectator);
+	gfs2_printf("ignore_local_fs %d\n", args->ar_ignore_local_fs);
+	gfs2_printf("localcaching %d\n", args->ar_localcaching);
+	gfs2_printf("localflocks %d\n", args->ar_localflocks);
+	gfs2_printf("oopses_ok %d\n", args->ar_oopses_ok);
+	gfs2_printf("debug %d\n", args->ar_debug);
+	gfs2_printf("upgrade %d\n", args->ar_upgrade);
+	gfs2_printf("num_glockd %u\n", args->ar_num_glockd);
+	gfs2_printf("posix_acl %d\n", args->ar_posix_acl);
+	gfs2_printf("quota %u\n", args->ar_quota);
+	gfs2_printf("suiddir %d\n", args->ar_suiddir);
+	gfs2_printf("data %d\n", args->ar_data);
+	gfs2_printf("noatime %d\n", !!test_bit(SDF_NOATIME, &sdp->sd_flags));
+
+	error = 0;
+	
+ out:
+	return error;
+}
+
+/**
+ * gi_get_lockstruct - Return the information in the FS' lockstruct
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_lockstruct(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+			     char *buf, unsigned int size, unsigned int *count)
+{
+	struct lm_lockstruct *ls = &ip->i_sbd->sd_lockstruct;
+	int error = -ENOBUFS;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("jid %u\n", ls->ls_jid);
+	gfs2_printf("first %u\n", ls->ls_first);
+	gfs2_printf("lvb_size %u\n", ls->ls_lvb_size);
+	gfs2_printf("flags %d\n", ls->ls_flags);
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * gi_get_statfs - Return a filesystem's space usage information
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_statfs(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+			 char *buf, unsigned int size, unsigned int *count)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_statfs_change sc;
+	int error;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	if (gfs2_tune_get(sdp, gt_statfs_slow))
+		error = gfs2_statfs_slow(sdp, &sc);
+	else
+		error = gfs2_statfs_i(sdp, &sc);
+
+	if (error)
+		return error;
+
+	error = -ENOBUFS;
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("bsize %u\n", sdp->sd_sb.sb_bsize);
+	gfs2_printf("total %"PRIu64"\n", sc.sc_total);
+	gfs2_printf("free %"PRIu64"\n", sc.sc_free);
+	gfs2_printf("dinodes %"PRIu64"\n", sc.sc_dinodes);
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * handle_roll - Read a atomic_t as an unsigned int
+ * @a: a counter
+ *
+ * if @a is negative, reset it to zero
+ *
+ * Returns: the value of the counter
+ */
+
+static unsigned int handle_roll(atomic_t *a)
+{
+	int x = atomic_read(a);
+	if (x < 0) {
+		atomic_set(a, 0);
+		return 0;
+	}
+	return (unsigned int)x;
+}
+
+/**
+ * gi_get_counters - Return usage counters
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_counters(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+			   char *buf, unsigned int size, unsigned int *count)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	int error = -ENOBUFS;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("sd_glock_count:locks::%d\n",
+		    atomic_read(&sdp->sd_glock_count));
+	gfs2_printf("sd_glock_held_count:locks held::%d\n",
+		    atomic_read(&sdp->sd_glock_held_count));
+	gfs2_printf("sd_inode_count:incore inodes::%d\n",
+		    atomic_read(&sdp->sd_inode_count));
+	gfs2_printf("sd_bufdata_count:metadata buffers::%d\n",
+		    atomic_read(&sdp->sd_bufdata_count));
+	gfs2_printf("sd_unlinked_count:unlinked inodes::%d\n",
+		    atomic_read(&sdp->sd_unlinked_count));
+	gfs2_printf("sd_quota_count:quota IDs::%d\n",
+		    atomic_read(&sdp->sd_quota_count));
+	gfs2_printf("sd_log_num_gl:Glocks in current transaction::%u\n",
+		    sdp->sd_log_num_gl);
+	gfs2_printf("sd_log_num_buf:Blocks in current transaction::%u\n",
+		    sdp->sd_log_num_buf);
+	gfs2_printf("sd_log_num_revoke:Revokes in current transaction::%u\n",
+		    sdp->sd_log_num_revoke);
+	gfs2_printf("sd_log_num_rg:RGs in current transaction::%u\n",
+		    sdp->sd_log_num_rg);
+	gfs2_printf("sd_log_num_databuf:Databufs in current transaction::%u\n",
+		    sdp->sd_log_num_databuf);
+	gfs2_printf("sd_log_blks_free:log blks free::%u\n",
+		    sdp->sd_log_blks_free);
+	gfs2_printf("jd_blocks:log blocks total::%u\n",
+		    sdp->sd_jdesc->jd_blocks);
+	gfs2_printf("sd_reclaim_count:glocks on reclaim list::%d\n",
+		    atomic_read(&sdp->sd_reclaim_count));
+	gfs2_printf("sd_log_wraps:log wraps::%"PRIu64"\n",
+		    sdp->sd_log_wraps);
+	gfs2_printf("sd_bio_outstanding:outstanding BIO calls::%u\n",
+		    atomic_read(&sdp->sd_bio_outstanding));
+	gfs2_printf("sd_fh2dentry_misses:fh2dentry misses:diff:%u\n",
+		    handle_roll(&sdp->sd_fh2dentry_misses));
+	gfs2_printf("sd_reclaimed:glocks reclaimed:diff:%u\n",
+		    handle_roll(&sdp->sd_reclaimed));
+	gfs2_printf("sd_log_flush_incore:log incore flushes:diff:%u\n",
+		    handle_roll(&sdp->sd_log_flush_incore));
+	gfs2_printf("sd_log_flush_ondisk:log ondisk flushes:diff:%u\n",
+		    handle_roll(&sdp->sd_log_flush_ondisk));
+	gfs2_printf("sd_glock_nq_calls:glock nq calls:diff:%u\n",
+		    handle_roll(&sdp->sd_glock_nq_calls));
+	gfs2_printf("sd_glock_dq_calls:glock dq calls:diff:%u\n",
+		    handle_roll(&sdp->sd_glock_dq_calls));
+	gfs2_printf("sd_glock_prefetch_calls:glock prefetch calls:diff:%u\n",
+		    handle_roll(&sdp->sd_glock_prefetch_calls));
+	gfs2_printf("sd_lm_lock_calls:lm_lock calls:diff:%u\n",
+		    handle_roll(&sdp->sd_lm_lock_calls));
+	gfs2_printf("sd_lm_unlock_calls:lm_unlock calls:diff:%u\n",
+		    handle_roll(&sdp->sd_lm_unlock_calls));
+	gfs2_printf("sd_lm_callbacks:lm callbacks:diff:%u\n",
+		    handle_roll(&sdp->sd_lm_callbacks));
+	gfs2_printf("sd_ops_address:address operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_address));
+	gfs2_printf("sd_ops_dentry:dentry operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_dentry));
+	gfs2_printf("sd_ops_export:export operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_export));
+	gfs2_printf("sd_ops_file:file operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_file));
+	gfs2_printf("sd_ops_inode:inode operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_inode));
+	gfs2_printf("sd_ops_super:super operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_super));
+	gfs2_printf("sd_ops_vm:vm operations:diff:%u\n",
+		    handle_roll(&sdp->sd_ops_vm));
+	gfs2_printf("sd_bio_reads:block I/O reads:diff:%u\n",
+		    handle_roll(&sdp->sd_bio_reads) >>
+		    (sdp->sd_sb.sb_bsize_shift - 9));
+	gfs2_printf("sd_bio_writes:block I/O writes:diff:%u\n",
+		    handle_roll(&sdp->sd_bio_writes) >>
+		    (sdp->sd_sb.sb_bsize_shift - 9));
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * gi_get_tune - Return current values of the tuneable parameters
+ * @ip:
+ * @gi:
+ * @buf:
+ * @size:
+ * @count:
+ *
+ * Returns: errno
+ */
+
+static int gi_get_tune(struct gfs2_inode *ip, struct gfs2_ioctl *gi,
+		       char *buf, unsigned int size, unsigned int *count)
+{
+	struct gfs2_tune *gt = &ip->i_sbd->sd_tune;
+	int error = -ENOBUFS;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	spin_lock(&gt->gt_spin);
+
+	gfs2_printf("version 0\n");
+	gfs2_printf("ilimit %u\n", gt->gt_ilimit);
+	gfs2_printf("ilimit_tries %u\n", gt->gt_ilimit_tries);
+	gfs2_printf("ilimit_min %u\n", gt->gt_ilimit_min);
+	gfs2_printf("demote_secs %u\n", gt->gt_demote_secs);
+	gfs2_printf("incore_log_blocks %u\n", gt->gt_incore_log_blocks);
+	gfs2_printf("log_flush_secs %u\n", gt->gt_log_flush_secs);
+	gfs2_printf("jindex_refresh_secs %u\n", gt->gt_jindex_refresh_secs);
+	gfs2_printf("scand_secs %u\n", gt->gt_scand_secs);
+	gfs2_printf("recoverd_secs %u\n", gt->gt_recoverd_secs);
+	gfs2_printf("logd_secs %u\n", gt->gt_logd_secs);
+	gfs2_printf("quotad_secs %u\n", gt->gt_quotad_secs);
+	gfs2_printf("inoded_secs %u\n", gt->gt_inoded_secs);
+	gfs2_printf("quota_simul_sync %u\n", gt->gt_quota_simul_sync);
+	gfs2_printf("quota_warn_period %u\n", gt->gt_quota_warn_period);
+	gfs2_printf("quota_scale_num %u\n", gt->gt_quota_scale_num);
+	gfs2_printf("quota_scale_den %u\n", gt->gt_quota_scale_den);
+	gfs2_printf("quota_cache_secs %u\n", gt->gt_quota_cache_secs);
+	gfs2_printf("quota_quantum %u\n", gt->gt_quota_quantum);
+	gfs2_printf("atime_quantum %u\n", gt->gt_atime_quantum);
+	gfs2_printf("new_files_jdata %u\n", gt->gt_new_files_jdata);
+	gfs2_printf("new_files_directio %u\n", gt->gt_new_files_directio);
+	gfs2_printf("max_atomic_write %u\n", gt->gt_max_atomic_write);
+	gfs2_printf("max_readahead %u\n", gt->gt_max_readahead);
+	gfs2_printf("lockdump_size %u\n", gt->gt_lockdump_size);
+	gfs2_printf("stall_secs %u\n", gt->gt_stall_secs);
+	gfs2_printf("complain_secs %u\n", gt->gt_complain_secs);
+	gfs2_printf("reclaim_limit %u\n", gt->gt_reclaim_limit);
+	gfs2_printf("entries_per_readdir %u\n", gt->gt_entries_per_readdir);
+	gfs2_printf("prefetch_secs %u\n", gt->gt_prefetch_secs);
+	gfs2_printf("greedy_default %u\n", gt->gt_greedy_default);
+	gfs2_printf("greedy_quantum %u\n", gt->gt_greedy_quantum);
+	gfs2_printf("greedy_max %u\n", gt->gt_greedy_max);
+	gfs2_printf("statfs_quantum %u\n", gt->gt_statfs_quantum);
+	gfs2_printf("statfs_slow %u\n", gt->gt_statfs_slow);
+
+	error = 0;
+
+ out:
+	spin_unlock(&gt->gt_spin);
+
+	return error;
+}
+
+#define tune_set(f, v) \
+do { \
+	spin_lock(&gt->gt_spin); \
+	gt->f = (v); \
+	spin_unlock(&gt->gt_spin); \
+} while (0)
+
+/**
+ * gi_set_tune - Set a tuneable parameter
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_set_tune(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_tune *gt = &sdp->sd_tune;
+	char param[ARG_SIZE], value[ARG_SIZE];
+	unsigned int x;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 3)
+		return -EINVAL;
+
+	if (strncpy_from_user(param, gi->gi_argv[1], ARG_SIZE) < 0)
+		return -EFAULT;
+	param[ARG_SIZE - 1] = 0;
+
+	if (strncpy_from_user(value, gi->gi_argv[2], ARG_SIZE) < 0)
+		return -EFAULT;
+	value[ARG_SIZE - 1] = 0;
+
+	if (strcmp(param, "ilimit") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_ilimit, x);
+
+	} else if (strcmp(param, "ilimit_tries") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_ilimit_tries, x);
+
+	} else if (strcmp(param, "ilimit_min") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_ilimit_min, x);
+
+	} else if (strcmp(param, "demote_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_demote_secs, x);
+
+	} else if (strcmp(param, "incore_log_blocks") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_incore_log_blocks, x);
+
+	} else if (strcmp(param, "log_flush_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_log_flush_secs, x);
+
+	} else if (strcmp(param, "jindex_refresh_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_jindex_refresh_secs, x);
+
+	} else if (strcmp(param, "scand_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_scand_secs, x);
+		wake_up_process(sdp->sd_scand_process);
+
+	} else if (strcmp(param, "recoverd_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_recoverd_secs, x);
+		wake_up_process(sdp->sd_recoverd_process);
+
+	} else if (strcmp(param, "logd_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_logd_secs, x);
+		wake_up_process(sdp->sd_logd_process);
+
+	} else if (strcmp(param, "quotad_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_quotad_secs, x);
+		wake_up_process(sdp->sd_quotad_process);
+
+	} else if (strcmp(param, "inoded_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_inoded_secs, x);
+		wake_up_process(sdp->sd_inoded_process);
+
+	} else if (strcmp(param, "quota_simul_sync") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_quota_simul_sync, x);
+
+	} else if (strcmp(param, "quota_warn_period") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_quota_warn_period, x);
+
+	} else if (strcmp(param, "quota_scale") == 0) {
+		unsigned int y;
+		if (sscanf(value, "%u %u", &x, &y) != 2 || !y)
+			return -EINVAL;
+		spin_lock(&gt->gt_spin);
+		gt->gt_quota_scale_num = x;
+		gt->gt_quota_scale_den = y;
+		spin_unlock(&gt->gt_spin);
+
+	} else if (strcmp(param, "quota_cache_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_quota_cache_secs, x);
+
+	} else if (strcmp(param, "quota_quantum") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_quota_quantum, x);
+
+	} else if (strcmp(param, "atime_quantum") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_atime_quantum, x);
+
+	} else if (strcmp(param, "new_files_jdata") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		x = !!x;
+		tune_set(gt_new_files_jdata, x);
+
+	} else if (strcmp(param, "new_files_directio") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		x = !!x;
+		tune_set(gt_new_files_directio, x);
+
+	} else if (strcmp(param, "max_atomic_write") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_max_atomic_write, x);
+
+	} else if (strcmp(param, "max_readahead") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_max_readahead, x);
+
+	} else if (strcmp(param, "lockdump_size") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_lockdump_size, x);
+
+	} else if (strcmp(param, "stall_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_stall_secs, x);
+
+	} else if (strcmp(param, "complain_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_complain_secs, x);
+
+	} else if (strcmp(param, "reclaim_limit") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_reclaim_limit, x);
+
+	} else if (strcmp(param, "entries_per_readdir") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_entries_per_readdir, x);
+
+	} else if (strcmp(param, "prefetch_secs") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_prefetch_secs, x);
+
+	} else if (strcmp(param, "greedy_default") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_greedy_default, x);
+
+	} else if (strcmp(param, "greedy_quantum") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_greedy_quantum, x);
+
+	} else if (strcmp(param, "greedy_max") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_greedy_max, x);
+
+	} else if (strcmp(param, "statfs_quantum") == 0) {
+		if (sscanf(value, "%u", &x) != 1 || !x)
+			return -EINVAL;
+		tune_set(gt_statfs_quantum, x);
+
+	} else if (strcmp(param, "statfs_slow") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_statfs_slow, x);
+
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * gi_do_shrink - throw out unused glocks
+ * @sdp:
+ * @gi:
+ *
+ * Returns: 0
+ */
+
+static int gi_do_shrink(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	gfs2_gl_hash_clear(sdp, NO_WAIT);
+	return 0;
+}
+
+/**
+ * gi_get_file_stat -
+ * @ip:
+ * @gi:
+ *
+ * Returns: the number of bytes copied, or -errno
+ */
+
+static int gi_get_file_stat(struct gfs2_inode *ip, struct gfs2_ioctl *gi)
+{
+	struct gfs2_holder i_gh;
+	struct gfs2_dinode *di;
+	int error;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	if (gi->gi_size != sizeof(struct gfs2_dinode))
+		return -EINVAL;
+
+	di = kmalloc(sizeof(struct gfs2_dinode), GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		goto out;
+	memcpy(di, &ip->i_di, sizeof(struct gfs2_dinode));
+	gfs2_glock_dq_uninit(&i_gh);
+
+	if (copy_to_user(gi->gi_data, di,
+			 sizeof(struct gfs2_dinode)))
+		error = -EFAULT;
+	else
+		error = sizeof(struct gfs2_dinode);
+
+ out:
+	kfree(di);
+
+	return error;
+}
+
+/**
+ * gi_set_file_flag - set or clear a flag on a file
+ * @ip:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_set_file_flag(struct gfs2_inode *ip, struct gfs2_ioctl *gi)
+{
+	char buf[ARG_SIZE];
+	int set;
+	uint32_t flag;
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	int error;
+
+	if (gi->gi_argc != 3)
+		return -EINVAL;
+
+	if (strncpy_from_user(buf, gi->gi_argv[1], ARG_SIZE) < 0)
+		return -EFAULT;
+	buf[ARG_SIZE - 1] = 0;
+
+	if (strcmp(buf, "set") == 0)
+		set = TRUE;
+	else if (strcmp(buf, "clear") == 0)
+		set = FALSE;
+	else
+		return -EINVAL;
+
+	if (strncpy_from_user(buf, gi->gi_argv[2], ARG_SIZE) < 0)
+		return -EFAULT;
+	buf[ARG_SIZE - 1] = 0;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	error = -EACCES;
+	if (ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
+		goto out;
+
+	error = -EINVAL;
+
+	if (strcmp(buf, "jdata") == 0) {
+		if (!S_ISREG(ip->i_di.di_mode) || ip->i_di.di_size)
+			goto out;
+		flag = GFS2_DIF_JDATA;
+	} else if (strcmp(buf, "directio") == 0) {
+		if (!S_ISREG(ip->i_di.di_mode))
+			goto out;
+		flag = GFS2_DIF_DIRECTIO;
+	} else if (strcmp(buf, "immutable") == 0) {
+		/* The IMMUTABLE flag can only be changed by
+		   the relevant capability. */
+		error = -EPERM;
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto out;
+		flag = GFS2_DIF_IMMUTABLE;
+	} else if (strcmp(buf, "appendonly") == 0) {
+		/* The APPENDONLY flag can only be changed by
+		   the relevant capability. */
+		error = -EPERM;
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto out;
+		flag = GFS2_DIF_APPENDONLY;
+	} else if (strcmp(buf, "inherit_jdata") == 0) {
+		if (!S_ISDIR(ip->i_di.di_mode))
+			goto out;
+		flag = GFS2_DIF_INHERIT_JDATA;
+	} else if (strcmp(buf, "inherit_directio") == 0) {
+		if (S_ISDIR(ip->i_di.di_mode))
+			goto out;
+		flag = GFS2_DIF_INHERIT_DIRECTIO;
+	} else
+		goto out;
+
+	error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
+	if (error)
+		goto out;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_trans_end;
+
+	if (set)
+		ip->i_di.di_flags |= flag;
+	else
+		ip->i_di.di_flags &= ~flag;
+
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+
+	brelse(dibh);
+
+ out_trans_end:
+	gfs2_trans_end(ip->i_sbd);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+
+}
+
+static int gi_get_bmap(struct gfs2_inode *ip, struct gfs2_ioctl *gi)
+{
+	struct gfs2_holder gh;
+	uint64_t lblock, dblock = 0;
+	int new = FALSE;
+	int error;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	if (gi->gi_size != sizeof(uint64_t))
+		return -EINVAL;
+
+	error = copy_from_user(&lblock, gi->gi_data, sizeof(uint64_t));
+	if (error)
+		return -EFAULT;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+	if (error)
+		return error;
+
+	error = -EACCES;
+	if (ip->i_di.di_uid == current->fsuid || capable(CAP_FOWNER)) {
+		error = 0;
+		if (!gfs2_is_stuffed(ip))
+			error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
+	}
+
+	gfs2_glock_dq_uninit(&gh);
+
+	if (!error) {
+		error = copy_to_user(gi->gi_data, &dblock, sizeof(uint64_t));
+		if (error)
+			error = -EFAULT;
+	}
+
+	return error;
+}
+
+/**
+ * gi_get_file_meta - Return all the metadata for a file
+ * @ip:
+ * @gi:
+ *
+ * Returns: the number of bytes copied, or -errno
+ */
+
+static int gi_get_file_meta(struct gfs2_inode *ip, struct gfs2_ioctl *gi)
+{
+	struct gfs2_holder i_gh;
+	struct gfs2_user_buffer ub;
+	int error;
+
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+
+	ub.ub_data = gi->gi_data;
+	ub.ub_size = gi->gi_size;
+	ub.ub_count = 0;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return error;
+
+	error = -EACCES;
+	if (ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
+		goto out;
+
+	error = gfs2_get_file_meta(ip, &ub);
+	if (error)
+		goto out;
+
+	if (S_ISDIR(ip->i_di.di_mode) &&
+	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+		error = gfs2_get_dir_meta(ip, &ub);
+		if (error)
+			goto out;
+	}
+
+	if (ip->i_di.di_eattr) {
+		error = gfs2_get_eattr_meta(ip, &ub);
+		if (error)
+			goto out;
+	}
+
+	error = ub.ub_count;
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gi_do_file_flush - sync out all dirty data and
+ *		    drop the cache (and lock) for a file.
+ * @ip:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_do_file_flush(struct gfs2_inode *ip, struct gfs2_ioctl *gi)
+{
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	gfs2_glock_force_drop(ip->i_gl);
+	return 0;
+}
+
+/**
+ * gi2hip - return the "struct gfs2_inode" for a hidden file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: the "struct gfs2_inode"
+ */
+
+static struct gfs2_inode *gi2hip(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	char buf[ARG_SIZE];
+
+	if (gi->gi_argc != 2)
+		return ERR_PTR(-EINVAL);
+
+	if (strncpy_from_user(buf, gi->gi_argv[1], ARG_SIZE) < 0)
+		return ERR_PTR(-EFAULT);
+	buf[ARG_SIZE - 1] = 0;
+
+	if (strcmp(buf, "jindex") == 0)
+		return sdp->sd_jindex;
+	if (strcmp(buf, "rindex") == 0)
+		return sdp->sd_rindex;
+	if (strcmp(buf, "quota") == 0)
+		return sdp->sd_quota_inode;
+
+	return ERR_PTR(-EINVAL);
+}
+
+/**
+ * gi_get_hfile_stat - get stat info on a hidden file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: the number of bytes copied, or -errno
+ */
+
+static int gi_get_hfile_stat(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_dinode *di;
+	struct gfs2_holder i_gh;
+	int error;
+
+	ip = gi2hip(sdp, gi);
+	if (IS_ERR(ip))
+		return PTR_ERR(ip);
+
+	if (gi->gi_size != sizeof(struct gfs2_dinode))
+		return -EINVAL;
+
+	di = kmalloc(sizeof(struct gfs2_dinode), GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		goto out;
+	memcpy(di, &ip->i_di, sizeof(struct gfs2_dinode));
+	gfs2_glock_dq_uninit(&i_gh);
+
+	if (copy_to_user(gi->gi_data, di,
+			 sizeof(struct gfs2_dinode)))
+		error = -EFAULT;
+	else
+		error = sizeof(struct gfs2_dinode);
+
+ out:
+	kfree(di);
+
+	return error;
+}
+
+/**
+ * gi_do_hfile_read - Read data from a hidden file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: the number of bytes read, or -errno
+ */
+
+static int gi_do_hfile_read(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	ip = gi2hip(sdp, gi);
+	if (IS_ERR(ip))
+		return PTR_ERR(ip);
+
+	if (!S_ISREG(ip->i_di.di_mode))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, gi->gi_data, gi->gi_size))
+		return -EFAULT;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	if (error)
+		return error;
+
+	error = gfs2_jdata_read(ip, gi->gi_data, gi->gi_offset, gi->gi_size,
+				gfs2_copy2user);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gi_do_hfile_write - Write data to a hidden file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: the number of bytes written, or -errno
+ */
+
+static int gi_do_hfile_write(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_alloc *al = NULL;
+	struct gfs2_holder i_gh;
+	unsigned int data_blocks, ind_blocks;
+	int alloc_required;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	ip = gi2hip(sdp, gi);
+	if (IS_ERR(ip))
+		return PTR_ERR(ip);
+
+	if (!S_ISREG(ip->i_di.di_mode))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, gi->gi_data, gi->gi_size))
+		return -EFAULT;
+
+	gfs2_write_calc_reserv(ip, gi->gi_size, &data_blocks, &ind_blocks);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
+				   LM_FLAG_PRIORITY, &i_gh);
+	if (error)
+		return error;
+
+	if (!gfs2_is_jdata(ip)) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_write_alloc_required(ip, gi->gi_offset, gi->gi_size,
+					  &alloc_required);
+	if (error)
+		goto out;
+
+	if (alloc_required) {
+		al = gfs2_alloc_get(ip);
+
+		al->al_requested = data_blocks + ind_blocks;
+
+		error = gfs2_inplace_reserve(ip);
+		if (error)
+			goto out_alloc;
+
+		error = gfs2_trans_begin(sdp,
+					 al->al_rgd->rd_ri.ri_length +
+					 data_blocks + ind_blocks +
+					 RES_DINODE + RES_STATFS, 0);
+		if (error)
+			goto out_relse;
+	} else {
+		error = gfs2_trans_begin(sdp, data_blocks + RES_DINODE, 0);
+		if (error)
+			goto out;
+	}
+
+	error = gfs2_jdata_write(ip, gi->gi_data, gi->gi_offset, gi->gi_size,
+				 gfs2_copy_from_user);
+
+	gfs2_trans_end(sdp);
+
+ out_relse:
+	if (alloc_required)
+		gfs2_inplace_release(ip);
+
+ out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(ip);
+
+ out:
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gi_do_hfile_trunc - truncate a hidden file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: the number of bytes copied, or -errno
+ */
+
+static int gi_do_hfile_trunc(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_holder i_gh;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	ip = gi2hip(sdp, gi);
+	if (IS_ERR(ip))
+		return PTR_ERR(ip);
+
+	if (!S_ISREG(ip->i_di.di_mode))
+		return -EINVAL;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	error = gfs2_truncatei(ip, gi->gi_offset, NULL);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+/**
+ * gi_do_quota_sync - sync the outstanding quota changes for a FS
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_do_quota_sync(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	return gfs2_quota_sync(sdp);
+}
+
+/**
+ * gi_do_quota_refresh - Refresh the a quota LVB from the quota file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_do_quota_refresh(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	char buf[ARG_SIZE];
+	int user;
+	uint32_t id;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 2)
+		return -EINVAL;
+
+	if (strncpy_from_user(buf, gi->gi_argv[1], ARG_SIZE) < 0)
+		return -EFAULT;
+	buf[ARG_SIZE - 1] = 0;
+
+	switch (buf[0]) {
+	case 'u':
+		user = TRUE;
+		break;
+	case 'g':
+		user = FALSE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (buf[1] != ':')
+		return -EINVAL;
+
+	if (sscanf(buf + 2, "%u", &id) != 1)
+		return -EINVAL;
+
+	return gfs2_quota_refresh(sdp, user, id);
+}
+
+/**
+ * gi_do_quota_read - read quota values from the quota file
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_do_quota_read(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	char buf[ARG_SIZE];
+	int user;
+	uint32_t id;
+	struct gfs2_quota q;
+	int error;
+
+	if (gi->gi_argc != 2)
+		return -EINVAL;
+	if (gi->gi_size != sizeof(struct gfs2_quota))
+		return -EINVAL;
+
+	if (strncpy_from_user(buf, gi->gi_argv[1], ARG_SIZE) < 0)
+		return -EFAULT;
+	buf[ARG_SIZE - 1] = 0;
+
+	switch (buf[0]) {
+	case 'u':
+		user = TRUE;
+		break;
+	case 'g':
+		user = FALSE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (buf[1] != ':')
+		return -EINVAL;
+
+	if (sscanf(buf + 2, "%u", &id) != 1)
+		return -EINVAL;
+
+	error = gfs2_quota_read(sdp, user, id, &q);
+	if (error)
+		return error;
+
+	if (copy_to_user(gi->gi_data, &q, sizeof(struct gfs2_quota)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * gi_do_statfs_sync - sync the outstanding statfs changes for a FS
+ * @sdp:
+ * @gi:
+ *
+ * Returns: errno
+ */
+
+static int gi_do_statfs_sync(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	return gfs2_statfs_sync(sdp);
+}
+
+static int gi_resize_add_rgrps(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 1)
+		return -EINVAL;
+	if (gi->gi_size % sizeof(struct gfs2_rindex))
+		return -EINVAL;
+
+	return gfs2_resize_add_rgrps(sdp, gi->gi_data, gi->gi_size);
+}
+
+static int gi_rename2system(struct gfs2_sbd *sdp, struct gfs2_ioctl *gi)
+{
+	char new_dir[ARG_SIZE], new_name[ARG_SIZE];
+	struct gfs2_inode *old_dip, *ip, *new_dip;
+	int put_new_dip = FALSE;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (gi->gi_argc != 3)
+		return -EINVAL;
+
+	if (strncpy_from_user(new_dir, gi->gi_argv[1], ARG_SIZE) < 0)
+		return -EFAULT;
+	new_dir[ARG_SIZE - 1] = 0;
+	if (strncpy_from_user(new_name, gi->gi_argv[2], ARG_SIZE) < 0)
+		return -EFAULT;
+	new_name[ARG_SIZE - 1] = 0;
+
+	error = gfs2_lookup_simple(sdp->sd_root_dir, ".gfs2_admin", &old_dip);
+	if (error)
+		return error;
+
+	error = -ENOTDIR;
+	if (!S_ISDIR(old_dip->i_di.di_mode))
+		goto out;
+
+	error = gfs2_lookup_simple(old_dip, "new_inode", &ip);
+	if (error)
+		goto out;
+
+	if (!strcmp(new_dir, "per_node")) {
+		error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node",
+					   &new_dip);
+		if (error)
+			goto out2;
+		put_new_dip = TRUE;
+	} else if (!strcmp(new_dir, "jindex"))
+		new_dip = sdp->sd_jindex;
+	else {
+		error = -EINVAL;
+		goto out2;
+	}
+
+	error = gfs2_rename2system(ip, old_dip, "new_inode", new_dip, new_name);
+
+	if (put_new_dip)
+		gfs2_inode_put(new_dip);
+
+ out2:
+	gfs2_inode_put(ip);
+	
+ out:
+	gfs2_inode_put(old_dip);
+
+	return error;
+}
+
+/**
+ * gfs2_ioctl_i -
+ * @ip:
+ * @arg:
+ *
+ * Returns: -errno or positive byte count
+ */
+
+int gfs2_ioctl_i(struct gfs2_inode *ip, void *arg)
+{
+	struct gfs2_ioctl *gi_user = (struct gfs2_ioctl *)arg;
+	struct gfs2_ioctl gi;
+	char **argv;
+	char arg0[ARG_SIZE];
+	int error = -EFAULT;
+
+	if (copy_from_user(&gi, gi_user, sizeof(struct gfs2_ioctl)))
+		return -EFAULT;
+	if (!gi.gi_argc)
+		return -EINVAL;
+	argv = kcalloc(gi.gi_argc, sizeof(char *), GFP_KERNEL);
+	if (!argv)
+		return -ENOMEM;
+	if (copy_from_user(argv, gi.gi_argv,
+			   gi.gi_argc * sizeof(char *)))
+		goto out;
+	gi.gi_argv = argv;
+
+	if (strncpy_from_user(arg0, argv[0], ARG_SIZE) < 0)
+		goto out;
+	arg0[ARG_SIZE - 1] = 0;
+
+	if (strcmp(arg0, "get_cookie") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_cookie);
+	else if (strcmp(arg0, "get_super") == 0)
+		error = gi_get_super(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "get_args") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_args);
+	else if (strcmp(arg0, "get_lockstruct") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_lockstruct);
+	else if (strcmp(arg0, "get_statfs") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_statfs);
+	else if (strcmp(arg0, "get_counters") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_counters);
+	else if (strcmp(arg0, "get_tune") == 0)
+		error = gi_skeleton(ip, &gi, gi_get_tune);
+	else if (strcmp(arg0, "set_tune") == 0)
+		error = gi_set_tune(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_shrink") == 0)
+		error = gi_do_shrink(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "get_file_stat") == 0)
+		error = gi_get_file_stat(ip, &gi);
+	else if (strcmp(arg0, "set_file_flag") == 0)
+		error = gi_set_file_flag(ip, &gi);
+	else if (strcmp(arg0, "get_bmap") == 0)
+		error = gi_get_bmap(ip, &gi);
+	else if (strcmp(arg0, "get_file_meta") == 0)
+		error = gi_get_file_meta(ip, &gi);
+	else if (strcmp(arg0, "do_file_flush") == 0)
+		error = gi_do_file_flush(ip, &gi);
+	else if (strcmp(arg0, "get_hfile_stat") == 0)
+		error = gi_get_hfile_stat(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_hfile_read") == 0)
+		error = gi_do_hfile_read(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_hfile_write") == 0)
+		error = gi_do_hfile_write(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_hfile_trunc") == 0)
+		error = gi_do_hfile_trunc(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_quota_sync") == 0)
+		error = gi_do_quota_sync(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_quota_refresh") == 0)
+		error = gi_do_quota_refresh(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_quota_read") == 0)
+		error = gi_do_quota_read(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "do_statfs_sync") == 0)
+		error = gi_do_statfs_sync(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "resize_add_rgrps") == 0)
+		error = gi_resize_add_rgrps(ip->i_sbd, &gi);
+	else if (strcmp(arg0, "rename2system") == 0)
+		error = gi_rename2system(ip->i_sbd, &gi);
+	else
+		error = -ENOTTY;
+
+ out:
+	kfree(argv);
+
+	return error;
+}
+
--- a/fs/gfs2/ioctl.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/ioctl.h	2005-09-01 17:36:55.324114104 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __IOCTL_DOT_H__
+#define __IOCTL_DOT_H__
+
+int gfs2_ioctl_i(struct gfs2_inode *ip, void *arg);
+
+#endif /* __IOCTL_DOT_H__ */
--- a/fs/gfs2/sys.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/sys.c	2005-09-01 17:36:55.507086288 +0800
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "lm.h"
+#include "sys.h"
+#include "super.h"
+
+char *gfs2_sys_margs;
+spinlock_t gfs2_sys_margs_lock;
+
+static ssize_t gfs2_id_show(struct gfs2_sbd *sdp, char *buf)
+{
+	return sprintf(buf, "%s\n", sdp->sd_vfs->s_id);
+}
+
+static ssize_t gfs2_fsname_show(struct gfs2_sbd *sdp, char *buf)
+{
+	return sprintf(buf, "%s\n", sdp->sd_fsname);
+}
+
+static ssize_t gfs2_freeze_show(struct gfs2_sbd *sdp, char *buf)
+{
+	unsigned int count;
+
+	down(&sdp->sd_freeze_lock);
+	count = sdp->sd_freeze_count;
+	up(&sdp->sd_freeze_lock);
+
+	return sprintf(buf, "%u\n", count);
+}
+
+static ssize_t gfs2_freeze_store(struct gfs2_sbd *sdp, const char *buf,
+				 size_t len)
+{
+	ssize_t ret = len;
+	int error = 0;
+	int n = simple_strtol(buf, NULL, 0);
+
+	switch (n) {
+	case 0:
+		gfs2_unfreeze_fs(sdp);
+		break;
+	case 1:
+		error = gfs2_freeze_fs(sdp);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (error)
+		fs_warn(sdp, "freeze %d error %d", n, error);
+
+	return ret;
+}
+
+static ssize_t gfs2_withdraw_show(struct gfs2_sbd *sdp, char *buf)
+{
+	unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
+	return sprintf(buf, "%u\n", b);
+}
+
+static ssize_t gfs2_withdraw_store(struct gfs2_sbd *sdp, const char *buf,
+				   size_t len)
+{
+	ssize_t ret = len;
+	int n = simple_strtol(buf, NULL, 0);
+
+	if (n != 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	gfs2_lm_withdraw(sdp,
+		"GFS2: fsid=%s: withdrawing from cluster at user's request\n",
+		sdp->sd_fsname);
+ out:
+	return ret;
+}
+
+struct gfs2_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gfs2_sbd*, char *);
+	ssize_t (*store)(struct gfs2_sbd*, const char *, size_t);
+};
+
+static struct gfs2_attr gfs2_attr_id = {
+	.attr = {.name = "id", .mode = S_IRUSR},
+	.show = gfs2_id_show
+};
+
+static struct gfs2_attr gfs2_attr_fsname = {
+	.attr = {.name = "fsname", .mode = S_IRUSR},
+	.show = gfs2_fsname_show
+};
+
+static struct gfs2_attr gfs2_attr_freeze = {
+	.attr  = {.name = "freeze", .mode = S_IRUSR | S_IWUSR},
+	.show  = gfs2_freeze_show,
+	.store = gfs2_freeze_store
+};
+
+static struct gfs2_attr gfs2_attr_withdraw = {
+	.attr  = {.name = "withdraw", .mode = S_IRUSR | S_IWUSR},
+	.show  = gfs2_withdraw_show,
+	.store = gfs2_withdraw_store
+};
+
+static struct attribute *gfs2_attrs[] = {
+	&gfs2_attr_id.attr,
+	&gfs2_attr_fsname.attr,
+	&gfs2_attr_freeze.attr,
+	&gfs2_attr_withdraw.attr,
+	NULL,
+};
+
+static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->show ? a->show(sdp, buf) : 0;
+}
+
+static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->store ? a->store(sdp, buf, len) : len;
+}
+
+static struct sysfs_ops gfs2_attr_ops = {
+	.show  = gfs2_attr_show,
+	.store = gfs2_attr_store,
+};
+
+static struct kobj_type gfs2_ktype = {
+	.default_attrs = gfs2_attrs,
+	.sysfs_ops     = &gfs2_attr_ops,
+};
+
+/* FIXME: should this go under /sys/fs/ ? */
+
+static struct kset gfs2_kset = {
+	.subsys = &kernel_subsys,
+	.kobj   = {.name = "gfs2",},
+	.ktype  = &gfs2_ktype,
+};
+
+int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
+{
+	int error;
+
+	error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_fsname);
+	if (error)
+		goto out;
+
+	sdp->sd_kobj.kset = &gfs2_kset;
+	sdp->sd_kobj.ktype = &gfs2_ktype;
+
+	error = kobject_register(&sdp->sd_kobj);
+ out:
+	return error;
+}
+
+void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
+{
+	kobject_unregister(&sdp->sd_kobj);
+}
+
+int gfs2_sys_init(void)
+{
+	gfs2_sys_margs = NULL;
+	spin_lock_init(&gfs2_sys_margs_lock);
+	return kset_register(&gfs2_kset);
+}
+
+void gfs2_sys_uninit(void)
+{
+	kfree(gfs2_sys_margs);
+	kset_unregister(&gfs2_kset);
+}
+
--- a/fs/gfs2/sys.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/sys.h	2005-09-01 17:36:55.517084768 +0800
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __SYS_DOT_H__
+#define __SYS_DOT_H__
+
+/* Allow args to be passed to GFS2 when using an initial ram disk */
+extern char *gfs2_sys_margs;
+extern spinlock_t gfs2_sys_margs_lock;
+
+int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
+void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
+
+int gfs2_sys_init(void);
+void gfs2_sys_uninit(void);
+
+#endif /* __SYS_DOT_H__ */
+
--- a/fs/gfs2/resize.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/resize.c	2005-09-01 17:36:55.452094648 +0800
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "bmap.h"
+#include "dir.h"
+#include "glock.h"
+#include "inode.h"
+#include "jdata.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "resize.h"
+#include "rgrp.h"
+#include "super.h"
+#include "trans.h"
+
+int gfs2_resize_add_rgrps(struct gfs2_sbd *sdp, char __user *buf,
+			  unsigned int size)
+{
+	unsigned int num = size / sizeof(struct gfs2_rindex);
+	struct gfs2_inode *ip = sdp->sd_rindex;
+	struct gfs2_alloc *al = NULL;
+	struct gfs2_holder i_gh;
+	unsigned int data_blocks, ind_blocks;
+	int alloc_required;
+	unsigned int x;
+	int error;
+
+	gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
+				   LM_FLAG_PRIORITY | GL_SYNC, &i_gh);
+	if (error)
+		return error;
+
+	if (!gfs2_is_jdata(ip)) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_write_alloc_required(ip, ip->i_di.di_size, size,
+					  &alloc_required);
+	if (error)
+		goto out;
+
+	if (alloc_required) {
+		al = gfs2_alloc_get(ip);
+
+		al->al_requested = data_blocks + ind_blocks;
+
+		error = gfs2_inplace_reserve(ip);
+		if (error)
+			goto out_alloc;
+
+		error = gfs2_trans_begin(sdp,
+					 al->al_rgd->rd_ri.ri_length +
+					 data_blocks + ind_blocks +
+					 RES_DINODE + RES_STATFS, 0);
+		if (error)
+			goto out_relse;
+	} else {
+		error = gfs2_trans_begin(sdp, data_blocks +
+					 RES_DINODE + RES_STATFS, 0);
+		if (error)
+			goto out;
+	}
+
+	for (x = 0; x < num; x++) {
+		struct gfs2_rindex ri;
+		char ri_buf[sizeof(struct gfs2_rindex)];
+
+		error = copy_from_user(&ri, buf, sizeof(struct gfs2_rindex));
+		if (error) {
+			error = -EFAULT;
+			goto out_trans;
+		}
+		gfs2_rindex_out(&ri, ri_buf);
+
+		error = gfs2_jdata_write_mem(ip, ri_buf, ip->i_di.di_size,
+					     sizeof(struct gfs2_rindex));
+		if (error < 0)
+			goto out_trans;
+		gfs2_assert_withdraw(sdp, error == sizeof(struct gfs2_rindex));
+		error = 0;
+
+		gfs2_statfs_change(sdp, ri.ri_data, ri.ri_data, 0);
+
+		buf += sizeof(struct gfs2_rindex);
+	}
+
+ out_trans:
+	gfs2_trans_end(sdp);
+
+ out_relse:
+	if (alloc_required)
+		gfs2_inplace_release(ip);
+
+ out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(ip);
+
+ out:
+	ip->i_gl->gl_vn++;
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
+}
+
+static void drop_dentries(struct gfs2_inode *ip)
+{
+	struct inode *inode;
+	struct dentry *d;
+
+	inode = gfs2_ip2v_lookup(ip);
+	if (!inode)
+		return;
+
+ restart:
+	spin_lock(&dcache_lock);
+	list_for_each_entry(d, &inode->i_dentry, d_alias) {
+		if (d_unhashed(d))
+			continue;
+		dget_locked(d);
+		__d_drop(d);
+		spin_unlock(&dcache_lock);
+		dput(d);
+		goto restart;
+	}
+	spin_unlock(&dcache_lock);
+
+	iput(inode);
+}
+
+int gfs2_rename2system(struct gfs2_inode *ip,
+		       struct gfs2_inode *old_dip, char *old_name,
+		       struct gfs2_inode *new_dip, char *new_name)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_holder ghs[3];
+	struct qstr old_qstr, new_qstr;
+	struct gfs2_inum inum;
+	int alloc_required;
+	struct buffer_head *dibh;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, ghs);
+	gfs2_holder_init(old_dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+	gfs2_holder_init(new_dip->i_gl, LM_ST_EXCLUSIVE, GL_SYNC, ghs + 2);
+
+	error = gfs2_glock_nq_m(3, ghs);
+	if (error)
+		goto out;	
+
+	error = -EMLINK;
+	if (ip->i_di.di_nlink != 1)
+		goto out_gunlock;
+	error = -EINVAL;
+	if (!S_ISREG(ip->i_di.di_mode))
+		goto out_gunlock;
+
+	old_qstr.name = old_name;
+	old_qstr.len = strlen(old_name);
+	error = gfs2_dir_search(old_dip, &old_qstr, &inum, NULL);
+	switch (error) {
+	case 0:
+		break;
+	default:
+		goto out_gunlock;
+	}
+
+	error = -EINVAL;
+	if (!gfs2_inum_equal(&inum, &ip->i_num))
+		goto out_gunlock;
+
+	new_qstr.name = new_name;
+	new_qstr.len = strlen(new_name);
+	error = gfs2_dir_search(new_dip, &new_qstr, NULL, NULL);
+	switch (error) {
+	case -ENOENT:
+		break;
+	case 0:
+		error = -EEXIST;
+	default:
+		goto out_gunlock;
+	}
+
+	gfs2_alloc_get(ip);
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out_alloc;
+
+	error = gfs2_diradd_alloc_required(new_dip, &new_qstr, &alloc_required);
+	if (error)
+		goto out_unhold;
+
+	if (alloc_required) {
+		struct gfs2_alloc *al = gfs2_alloc_get(new_dip);
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve(new_dip);
+		if (error)
+			goto out_alloc2;
+
+		error = gfs2_trans_begin(sdp,
+					 sdp->sd_max_dirres +
+					 al->al_rgd->rd_ri.ri_length +
+					 3 * RES_DINODE + RES_LEAF +
+					 RES_STATFS + RES_QUOTA, 0);
+		if (error)
+			goto out_ipreserv;
+	} else {
+		error = gfs2_trans_begin(sdp,
+					 3 * RES_DINODE + 2 * RES_LEAF +
+					 RES_QUOTA, 0);
+		if (error)
+			goto out_unhold;
+	}
+	
+	error = gfs2_dir_del(old_dip, &old_qstr);
+	if (error)
+		goto out_trans;
+
+	error = gfs2_dir_add(new_dip, &new_qstr, &ip->i_num,
+			     IF2DT(ip->i_di.di_mode));
+	if (error)
+		goto out_trans;
+
+	gfs2_quota_change(ip, -ip->i_di.di_blocks, ip->i_di.di_uid,
+			  ip->i_di.di_gid);
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_trans;
+	ip->i_di.di_flags |= GFS2_DIF_SYSTEM;
+	gfs2_trans_add_bh(ip->i_gl, dibh);
+	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	brelse(dibh);
+
+	drop_dentries(ip);
+
+ out_trans:
+	gfs2_trans_end(sdp);
+
+ out_ipreserv:
+	if (alloc_required)
+		gfs2_inplace_release(new_dip);
+
+ out_alloc2:
+	if (alloc_required)
+		gfs2_alloc_put(new_dip);
+
+ out_unhold:
+	gfs2_quota_unhold(ip);
+
+ out_alloc:
+	gfs2_alloc_put(ip);
+
+ out_gunlock:
+	gfs2_glock_dq_m(3, ghs);
+
+ out:
+	gfs2_holder_uninit(ghs);
+	gfs2_holder_uninit(ghs + 1);
+	gfs2_holder_uninit(ghs + 2);
+
+	return error;
+}
+
--- a/fs/gfs2/resize.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/resize.h	2005-09-01 17:36:55.461093280 +0800
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __RESIZE_DOT_H__
+#define __RESIZE_DOT_H__
+
+int gfs2_resize_add_rgrps(struct gfs2_sbd *sdp, char __user *buf,
+			  unsigned int size);
+int gfs2_rename2system(struct gfs2_inode *ip,
+		       struct gfs2_inode *old_dip, char *old_name,
+		       struct gfs2_inode *new_dip, char *new_name);
+
+#endif /* __RESIZE_DOT_H__ */
--- a/fs/gfs2/mount.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/mount.c	2005-09-01 17:36:55.391103920 +0800
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "mount.h"
+#include "sys.h"
+
+/**
+ * gfs2_mount_args - Parse mount options
+ * @sdp:
+ * @data:
+ *
+ * Return: errno
+ */
+
+int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
+{
+	struct gfs2_args *args = &sdp->sd_args;
+	char *data = data_arg;
+	char *options, *o, *v;
+	int error = 0;
+
+	if (!remount) {
+		/*  If someone preloaded options, use those instead  */
+		spin_lock(&gfs2_sys_margs_lock);
+		if (gfs2_sys_margs) {
+			data = gfs2_sys_margs;
+			gfs2_sys_margs = NULL;
+		}
+		spin_unlock(&gfs2_sys_margs_lock);
+
+		/*  Set some defaults  */
+		args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+		args->ar_quota = GFS2_QUOTA_DEFAULT;
+		args->ar_data = GFS2_DATA_DEFAULT;
+	}
+
+	/* Split the options into tokens with the "," character and
+	   process them */
+
+	for (options = data; (o = strsep(&options, ",")); ) {
+		if (!*o)
+			continue;
+
+		v = strchr(o, '=');
+		if (v)
+			*v++ = 0;
+
+		if (!strcmp(o, "lockproto")) {
+			if (!v)
+				goto need_value;
+			if (remount && strcmp(v, args->ar_lockproto))
+				goto cant_remount;
+			strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
+			args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
+		}
+
+		else if (!strcmp(o, "locktable")) {
+			if (!v)
+				goto need_value;
+			if (remount && strcmp(v, args->ar_locktable))
+				goto cant_remount;
+			strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
+			args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
+		}
+
+		else if (!strcmp(o, "hostdata")) {
+			if (!v)
+				goto need_value;
+			if (remount && strcmp(v, args->ar_hostdata))
+				goto cant_remount;
+			strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
+			args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
+		}
+
+		else if (!strcmp(o, "spectator")) {
+			if (remount && !args->ar_spectator)
+				goto cant_remount;
+			args->ar_spectator = TRUE;
+			sdp->sd_vfs->s_flags |= MS_RDONLY;
+
+		} else if (!strcmp(o, "ignore_local_fs")) {
+			if (remount && !args->ar_ignore_local_fs)
+				goto cant_remount;
+			args->ar_ignore_local_fs = TRUE;
+
+		} else if (!strcmp(o, "localflocks")) {
+			if (remount && !args->ar_localflocks)
+				goto cant_remount;
+			args->ar_localflocks = TRUE;
+
+		} else if (!strcmp(o, "localcaching")) {
+			if (remount && !args->ar_localcaching)
+				goto cant_remount;
+			args->ar_localcaching = TRUE;
+
+		} else if (!strcmp(o, "oopses_ok"))
+			args->ar_oopses_ok = TRUE;
+
+		else if (!strcmp(o, "nooopses_ok"))
+			args->ar_oopses_ok = FALSE;
+
+		else if (!strcmp(o, "debug")) {
+			args->ar_debug = TRUE;
+
+		} else if (!strcmp(o, "nodebug"))
+			args->ar_debug = FALSE;
+
+		else if (!strcmp(o, "upgrade")) {
+			if (remount && !args->ar_upgrade)
+				goto cant_remount;
+			args->ar_upgrade = TRUE;
+
+		} else if (!strcmp(o, "num_glockd")) {
+			unsigned int x;
+			if (!v)
+				goto need_value;
+			sscanf(v, "%u", &x);
+			if (remount && x != args->ar_num_glockd)
+				goto cant_remount;
+			if (!x || x > GFS2_GLOCKD_MAX) {
+				fs_info(sdp, "0 < num_glockd <= %u  (not %u)\n",
+				        GFS2_GLOCKD_MAX, x);
+				error = -EINVAL;
+				break;
+			}
+			args->ar_num_glockd = x;
+		}
+
+		else if (!strcmp(o, "acl")) {
+			args->ar_posix_acl = TRUE;
+			sdp->sd_vfs->s_flags |= MS_POSIXACL;
+
+		} else if (!strcmp(o, "noacl")) {
+			args->ar_posix_acl = FALSE;
+			sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
+
+		} else if (!strcmp(o, "quota")) {
+			if (!v)
+				goto need_value;
+			if (!strcmp(v, "off"))
+				args->ar_quota = GFS2_QUOTA_OFF;
+			else if (!strcmp(v, "account"))
+				args->ar_quota = GFS2_QUOTA_ACCOUNT;
+			else if (!strcmp(v, "on"))
+				args->ar_quota = GFS2_QUOTA_ON;
+			else {
+				fs_info(sdp, "invalid value for quota\n");
+				error = -EINVAL;
+				break;
+			}
+
+		} else if (!strcmp(o, "suiddir"))
+			args->ar_suiddir = TRUE;
+
+		else if (!strcmp(o, "nosuiddir"))
+			args->ar_suiddir = FALSE;
+
+		else if (!strcmp(o, "data")) {
+			if (!v)
+				goto need_value;
+			if (!strcmp(v, "writeback"))
+				args->ar_data = GFS2_DATA_WRITEBACK;
+			else if (!strcmp(v, "ordered"))
+				args->ar_data = GFS2_DATA_ORDERED;
+			else {
+				fs_info(sdp, "invalid value for data\n");
+				error = -EINVAL;
+				break;
+			}
+
+		} else {
+			fs_info(sdp, "unknown option: %s\n", o);
+			error = -EINVAL;
+			break;
+		}
+	}
+
+	if (error)
+		fs_info(sdp, "invalid mount option(s)\n");
+
+	if (data != data_arg)
+		kfree(data);
+
+	return error;
+
+ need_value:
+	fs_info(sdp, "need value for option %s\n", o);
+	return -EINVAL;
+
+ cant_remount:
+	fs_info(sdp, "can't remount with option %s\n", o);
+	return -EINVAL;
+}
+
--- a/fs/gfs2/mount.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/mount.h	2005-09-01 17:36:55.391103920 +0800
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __MOUNT_DOT_H__
+#define __MOUNT_DOT_H__
+
+int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
+
+#endif /* __MOUNT_DOT_H__ */

[PATCH 09/13] GFS: locking

The internal glock layer and code that calls into the lock module to do
inter-node locking.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/glock.c | 2540 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/glock.h |  147 +++
 fs/gfs2/lm.c    |  284 ++++++
 fs/gfs2/lm.h    |   42 
 4 files changed, 3013 insertions(+)

--- a/fs/gfs2/glock.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/glock.c	2005-09-01 17:36:55.252125048 +0800
@@ -0,0 +1,2540 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/delay.h>
+#include <linux/sort.h>
+#include <linux/jhash.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "lm.h"
+#include "lops.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "super.h"
+
+/*  Must be kept in sync with the beginning of struct gfs2_glock  */
+struct glock_plug {
+	struct list_head gl_list;
+	unsigned long gl_flags;
+};
+
+struct greedy {
+	struct gfs2_holder gr_gh;
+	struct work_struct gr_work;
+};
+
+typedef void (*glock_examiner) (struct gfs2_glock * gl);
+
+/**
+ * relaxed_state_ok - is a requested lock compatible with the current lock mode?
+ * @actual: the current state of the lock
+ * @requested: the lock state that was requested by the caller
+ * @flags: the modifier flags passed in by the caller
+ *
+ * Returns: TRUE if the locks are compatible, FALSE otherwise
+ */
+
+static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
+				   int flags)
+{
+	if (actual == requested)
+		return TRUE;
+
+	if (flags & GL_EXACT)
+		return FALSE;
+
+	if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
+		return TRUE;
+
+	if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
+		return TRUE;
+
+	return FALSE;
+}
+
+/**
+ * gl_hash() - Turn glock number into hash bucket number
+ * @lock: The glock number
+ *
+ * Returns: The number of the corresponding hash bucket
+ */
+
+static unsigned int gl_hash(struct lm_lockname *name)
+{
+	unsigned int h;
+
+	h = jhash(&name->ln_number, sizeof(uint64_t), 0);
+	h = jhash(&name->ln_type, sizeof(unsigned int), h);
+	h &= GFS2_GL_HASH_MASK;
+
+	return h;
+}
+
+/**
+ * glock_hold() - increment reference count on glock
+ * @gl: The glock to hold
+ *
+ */
+
+static inline void glock_hold(struct gfs2_glock *gl)
+{
+	gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_count) > 0,);
+	atomic_inc(&gl->gl_count);
+}
+
+/**
+ * glock_put() - Decrement reference count on glock
+ * @gl: The glock to put
+ *
+ */
+
+static inline void glock_put(struct gfs2_glock *gl)
+{
+	if (atomic_read(&gl->gl_count) == 1)
+		gfs2_glock_schedule_for_reclaim(gl);
+	gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_count) > 0,);
+	atomic_dec(&gl->gl_count);
+}
+
+/**
+ * queue_empty - check to see if a glock's queue is empty
+ * @gl: the glock
+ * @head: the head of the queue to check
+ *
+ * Returns: TRUE if the queue is empty
+ */
+
+static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
+{
+	int empty;
+	spin_lock(&gl->gl_spin);
+	empty = list_empty(head);
+	spin_unlock(&gl->gl_spin);
+	return empty;
+}
+
+/**
+ * search_bucket() - Find struct gfs2_glock by lock number
+ * @bucket: the bucket to search
+ * @name: The lock name
+ *
+ * Returns: NULL, or the struct gfs2_glock with the requested number
+ */
+
+static struct gfs2_glock *search_bucket(struct gfs2_gl_hash_bucket *bucket,
+					struct lm_lockname *name)
+{
+	struct gfs2_glock *gl;
+
+	list_for_each_entry(gl, &bucket->hb_list, gl_list) {
+		if (test_bit(GLF_PLUG, &gl->gl_flags))
+			continue;
+		if (!lm_name_equal(&gl->gl_name, name))
+			continue;
+
+		atomic_inc(&gl->gl_count);
+
+		return gl;
+	}
+
+	return NULL;
+}
+
+/**
+ * gfs2_glock_find() - Find glock by lock number
+ * @sdp: The GFS2 superblock
+ * @name: The lock name
+ *
+ * Returns: NULL, or the struct gfs2_glock with the requested number
+ */
+
+struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
+				   struct lm_lockname *name)
+{
+	struct gfs2_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
+	struct gfs2_glock *gl;
+
+	read_lock(&bucket->hb_lock);
+	gl = search_bucket(bucket, name);
+	read_unlock(&bucket->hb_lock);
+
+	return gl;
+}
+
+/**
+ * glock_free() - Perform a few checks and then release struct gfs2_glock
+ * @gl: The glock to release
+ *
+ * Also calls lock module to release its internal structure for this glock.
+ *
+ */
+
+static void glock_free(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct inode *aspace = gl->gl_aspace;
+
+	gfs2_lm_put_lock(sdp, gl->gl_lock);
+
+	if (aspace)
+		gfs2_aspace_put(aspace);
+
+	kmem_cache_free(gfs2_glock_cachep, gl);
+
+	atomic_dec(&sdp->sd_glock_count);
+}
+
+/**
+ * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
+ * @sdp: The GFS2 superblock
+ * @number: the lock number
+ * @glops: The glock_operations to use
+ * @create: If FALSE, don't create the glock if it doesn't exist
+ * @glp: the glock is returned here
+ *
+ * This does not lock a glock, just finds/creates structures for one.
+ *
+ * Returns: errno
+ */
+
+int gfs2_glock_get(struct gfs2_sbd *sdp, uint64_t number,
+		   struct gfs2_glock_operations *glops, int create,
+		   struct gfs2_glock **glp)
+{
+	struct lm_lockname name;
+	struct gfs2_glock *gl, *tmp;
+	struct gfs2_gl_hash_bucket *bucket;
+	int error;
+
+	name.ln_number = number;
+	name.ln_type = glops->go_type;
+	bucket = &sdp->sd_gl_hash[gl_hash(&name)];
+
+	read_lock(&bucket->hb_lock);
+	gl = search_bucket(bucket, &name);
+	read_unlock(&bucket->hb_lock);
+
+	if (gl || !create) {
+		*glp = gl;
+		return 0;
+	}
+
+	gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
+	if (!gl)
+		return -ENOMEM;
+
+	memset(gl, 0, sizeof(struct gfs2_glock));
+
+	INIT_LIST_HEAD(&gl->gl_list);
+	gl->gl_name = name;
+	atomic_set(&gl->gl_count, 1);
+
+	spin_lock_init(&gl->gl_spin);
+
+	gl->gl_state = LM_ST_UNLOCKED;
+	INIT_LIST_HEAD(&gl->gl_holders);
+	INIT_LIST_HEAD(&gl->gl_waiters1);
+	INIT_LIST_HEAD(&gl->gl_waiters2);
+	INIT_LIST_HEAD(&gl->gl_waiters3);
+
+	gl->gl_ops = glops;
+
+	gl->gl_bucket = bucket;
+	INIT_LIST_HEAD(&gl->gl_reclaim);
+
+	gl->gl_sbd = sdp;
+
+	lops_init_le(&gl->gl_le, &gfs2_glock_lops);
+	INIT_LIST_HEAD(&gl->gl_ail_list);
+
+	/* If this glock protects actual on-disk data or metadata blocks,
+	   create a VFS inode to manage the pages/buffers holding them. */
+	if (glops == &gfs2_inode_glops ||
+	    glops == &gfs2_rgrp_glops ||
+	    glops == &gfs2_meta_glops) {
+		gl->gl_aspace = gfs2_aspace_get(sdp);
+		if (!gl->gl_aspace) {
+			error = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
+	if (error)
+		goto fail_aspace;
+
+	atomic_inc(&sdp->sd_glock_count);
+
+	write_lock(&bucket->hb_lock);
+	tmp = search_bucket(bucket, &name);
+	if (tmp) {
+		write_unlock(&bucket->hb_lock);
+		glock_free(gl);
+		gl = tmp;
+	} else {
+		list_add_tail(&gl->gl_list, &bucket->hb_list);
+		write_unlock(&bucket->hb_lock);
+	}
+
+	*glp = gl;
+
+	return 0;
+
+ fail_aspace:
+	if (gl->gl_aspace)
+		gfs2_aspace_put(gl->gl_aspace);
+
+ fail:
+	kmem_cache_free(gfs2_glock_cachep, gl);	
+
+	return error;
+}
+
+/**
+ * gfs2_glock_hold() - As glock_hold(), but suitable for exporting
+ * @gl: The glock to hold
+ *
+ */
+
+void gfs2_glock_hold(struct gfs2_glock *gl)
+{
+	glock_hold(gl);
+}
+
+/**
+ * gfs2_glock_put() - As glock_put(), but suitable for exporting
+ * @gl: The glock to put
+ *
+ */
+
+void gfs2_glock_put(struct gfs2_glock *gl)
+{
+	glock_put(gl);
+}
+
+/**
+ * gfs2_holder_init - initialize a struct gfs2_holder in the default way
+ * @gl: the glock
+ * @state: the state we're requesting
+ * @flags: the modifier flags
+ * @gh: the holder structure
+ *
+ */
+
+void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
+		      struct gfs2_holder *gh)
+{
+	INIT_LIST_HEAD(&gh->gh_list);
+	gh->gh_gl = gl;
+	gh->gh_owner = (flags & GL_NEVER_RECURSE) ? NULL : current;
+	gh->gh_state = state;
+	gh->gh_flags = flags;
+	gh->gh_error = 0;
+	gh->gh_iflags = 0;
+	init_completion(&gh->gh_wait);
+
+	if (gh->gh_state == LM_ST_EXCLUSIVE)
+		gh->gh_flags |= GL_LOCAL_EXCL;
+
+	glock_hold(gl);
+}
+
+/**
+ * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
+ * @state: the state we're requesting
+ * @flags: the modifier flags
+ * @gh: the holder structure
+ *
+ * Don't mess with the glock.
+ *
+ */
+
+void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh)
+{
+	gh->gh_state = state;
+	gh->gh_flags = flags;
+	if (gh->gh_state == LM_ST_EXCLUSIVE)
+		gh->gh_flags |= GL_LOCAL_EXCL;
+
+	gh->gh_iflags &= 1 << HIF_ALLOCED;
+}
+
+/**
+ * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
+ * @gh: the holder structure
+ *
+ */
+
+void gfs2_holder_uninit(struct gfs2_holder *gh)
+{
+	glock_put(gh->gh_gl);
+	gh->gh_gl = NULL;
+}
+
+/**
+ * gfs2_holder_get - get a struct gfs2_holder structure
+ * @gl: the glock
+ * @state: the state we're requesting
+ * @flags: the modifier flags
+ * @gfp_flags: __GFP_NOFAIL
+ *
+ * Figure out how big an impact this function has.  Either:
+ * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
+ * 2) Leave it like it is
+ *
+ * Returns: the holder structure, NULL on ENOMEM
+ */
+
+struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
+				    int flags, int gfp_flags)
+{
+	struct gfs2_holder *gh;
+
+	gh = kmalloc(sizeof(struct gfs2_holder), GFP_KERNEL | gfp_flags);
+	if (!gh)
+		return NULL;
+
+	gfs2_holder_init(gl, state, flags, gh);
+	set_bit(HIF_ALLOCED, &gh->gh_iflags);
+
+	return gh;
+}
+
+/**
+ * gfs2_holder_put - get rid of a struct gfs2_holder structure
+ * @gh: the holder structure
+ *
+ */
+
+void gfs2_holder_put(struct gfs2_holder *gh)
+{
+	gfs2_holder_uninit(gh);
+	kfree(gh);
+}
+
+/**
+ * handle_recurse - put other holder structures (marked recursive)
+ *                  into the holders list
+ * @gh: the holder structure
+ *
+ */
+
+static void handle_recurse(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_holder *tmp_gh, *safe;
+	int found = FALSE;
+
+	if (gfs2_assert_warn(sdp, gh->gh_owner))
+		return;
+
+	list_for_each_entry_safe(tmp_gh, safe, &gl->gl_waiters3, gh_list) {
+		if (tmp_gh->gh_owner != gh->gh_owner)
+			continue;
+
+		gfs2_assert_warn(sdp,
+				 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
+
+		list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
+		tmp_gh->gh_error = 0;
+		set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
+
+		complete(&tmp_gh->gh_wait);
+
+		found = TRUE;
+	}
+
+	gfs2_assert_warn(sdp, found);
+}
+
+/**
+ * do_unrecurse - a recursive holder was just dropped of the waiters3 list
+ * @gh: the holder
+ *
+ * If there is only one other recursive holder, clear its HIF_RECURSE bit.
+ * If there is more than one, leave them alone.
+ *
+ */
+
+static void do_unrecurse(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_holder *tmp_gh, *last_gh = NULL;
+	int found = FALSE;
+
+	if (gfs2_assert_warn(sdp, gh->gh_owner))
+		return;
+
+	list_for_each_entry(tmp_gh, &gl->gl_waiters3, gh_list) {
+		if (tmp_gh->gh_owner != gh->gh_owner)
+			continue;
+
+		gfs2_assert_warn(sdp,
+				 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
+
+		if (found)
+			return;
+
+		found = TRUE;
+		last_gh = tmp_gh;
+	}
+
+	if (!gfs2_assert_warn(sdp, found))
+		clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
+}
+
+/**
+ * rq_mutex - process a mutex request in the queue
+ * @gh: the glock holder
+ *
+ * Returns: TRUE if the queue is blocked
+ */
+
+static int rq_mutex(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+
+	list_del_init(&gh->gh_list);
+	/*  gh->gh_error never examined.  */
+	set_bit(GLF_LOCK, &gl->gl_flags);
+	complete(&gh->gh_wait);
+
+	return TRUE;
+}
+
+/**
+ * rq_promote - process a promote request in the queue
+ * @gh: the glock holder
+ *
+ * Acquire a new inter-node lock, or change a lock state to more restrictive.
+ *
+ * Returns: TRUE if the queue is blocked
+ */
+
+static int rq_promote(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	int recurse;
+
+	if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
+		if (list_empty(&gl->gl_holders)) {
+			gl->gl_req_gh = gh;
+			set_bit(GLF_LOCK, &gl->gl_flags);
+			spin_unlock(&gl->gl_spin);
+
+			if (atomic_read(&sdp->sd_reclaim_count) >
+			    gfs2_tune_get(sdp, gt_reclaim_limit) &&
+			    !(gh->gh_flags & LM_FLAG_PRIORITY)) {
+				gfs2_reclaim_glock(sdp);
+				gfs2_reclaim_glock(sdp);
+			}
+
+			glops->go_xmote_th(gl, gh->gh_state,
+					   gh->gh_flags);
+
+			spin_lock(&gl->gl_spin);
+		}
+		return TRUE;
+	}
+
+	if (list_empty(&gl->gl_holders)) {
+		set_bit(HIF_FIRST, &gh->gh_iflags);
+		set_bit(GLF_LOCK, &gl->gl_flags);
+		recurse = FALSE;
+	} else {
+		struct gfs2_holder *next_gh;
+		if (gh->gh_flags & GL_LOCAL_EXCL)
+			return TRUE;
+		next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
+				     gh_list);
+		if (next_gh->gh_flags & GL_LOCAL_EXCL)
+			 return TRUE;
+		recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
+	}
+
+	list_move_tail(&gh->gh_list, &gl->gl_holders);
+	gh->gh_error = 0;
+	set_bit(HIF_HOLDER, &gh->gh_iflags);
+
+	if (recurse)
+		handle_recurse(gh);
+
+	complete(&gh->gh_wait);
+
+	return FALSE;
+}
+
+/**
+ * rq_demote - process a demote request in the queue
+ * @gh: the glock holder
+ *
+ * Returns: TRUE if the queue is blocked
+ */
+
+static int rq_demote(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+
+	if (!list_empty(&gl->gl_holders))
+		return TRUE;
+
+	if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
+		list_del_init(&gh->gh_list);
+		gh->gh_error = 0;
+		spin_unlock(&gl->gl_spin);
+		if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
+			gfs2_holder_put(gh);
+		else
+			complete(&gh->gh_wait);
+		spin_lock(&gl->gl_spin);
+	} else {
+		gl->gl_req_gh = gh;
+		set_bit(GLF_LOCK, &gl->gl_flags);
+		spin_unlock(&gl->gl_spin);
+
+		if (gh->gh_state == LM_ST_UNLOCKED ||
+		    gl->gl_state != LM_ST_EXCLUSIVE)
+			glops->go_drop_th(gl);
+		else
+			glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
+
+		spin_lock(&gl->gl_spin);
+	}
+
+	return FALSE;
+}
+
+/**
+ * rq_greedy - process a queued request to drop greedy status
+ * @gh: the glock holder
+ *
+ * Returns: TRUE if the queue is blocked
+ */
+
+static int rq_greedy(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+
+	list_del_init(&gh->gh_list);
+	/*  gh->gh_error never examined.  */
+	clear_bit(GLF_GREEDY, &gl->gl_flags);
+	spin_unlock(&gl->gl_spin);
+
+	gfs2_holder_uninit(gh);
+	kfree(container_of(gh, struct greedy, gr_gh));
+
+	spin_lock(&gl->gl_spin);		
+
+	return FALSE;
+}
+
+/**
+ * run_queue - process holder structures on a glock
+ * @gl: the glock
+ *
+ */
+
+static void run_queue(struct gfs2_glock *gl)
+{
+	struct gfs2_holder *gh;
+	int blocked = TRUE;
+
+	for (;;) {
+		if (test_bit(GLF_LOCK, &gl->gl_flags))
+			break;
+
+		if (!list_empty(&gl->gl_waiters1)) {
+			gh = list_entry(gl->gl_waiters1.next,
+					struct gfs2_holder, gh_list);
+
+			if (test_bit(HIF_MUTEX, &gh->gh_iflags))
+				blocked = rq_mutex(gh);
+			else
+				gfs2_assert_warn(gl->gl_sbd, FALSE);
+
+		} else if (!list_empty(&gl->gl_waiters2) &&
+			   !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
+			gh = list_entry(gl->gl_waiters2.next,
+					struct gfs2_holder, gh_list);
+
+			if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
+				blocked = rq_demote(gh);
+			else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
+				blocked = rq_greedy(gh);
+			else
+				gfs2_assert_warn(gl->gl_sbd, FALSE);
+
+		} else if (!list_empty(&gl->gl_waiters3)) {
+			gh = list_entry(gl->gl_waiters3.next,
+					struct gfs2_holder, gh_list);
+
+			if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
+				blocked = rq_promote(gh);
+			else
+				gfs2_assert_warn(gl->gl_sbd, FALSE);
+
+		} else
+			break;
+
+		if (blocked)
+			break;
+	}
+}
+
+/**
+ * gfs2_glmutex_lock - acquire a local lock on a glock
+ * @gl: the glock
+ *
+ * Gives caller exclusive access to manipulate a glock structure.
+ */
+
+void gfs2_glmutex_lock(struct gfs2_glock *gl)
+{
+	struct gfs2_holder gh;
+
+	gfs2_holder_init(gl, 0, 0, &gh);
+	set_bit(HIF_MUTEX, &gh.gh_iflags);
+
+	spin_lock(&gl->gl_spin);
+	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
+		list_add_tail(&gh.gh_list, &gl->gl_waiters1);
+	else
+		complete(&gh.gh_wait);
+	spin_unlock(&gl->gl_spin);
+
+	wait_for_completion(&gh.gh_wait);
+	gfs2_holder_uninit(&gh);
+}
+
+/**
+ * gfs2_glmutex_trylock - try to acquire a local lock on a glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if the glock is acquired
+ */
+
+int gfs2_glmutex_trylock(struct gfs2_glock *gl)
+{
+	int acquired = TRUE;
+
+	spin_lock(&gl->gl_spin);
+	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
+		acquired = FALSE;
+	spin_unlock(&gl->gl_spin);
+
+	return acquired;
+}
+
+/**
+ * gfs2_glmutex_unlock - release a local lock on a glock
+ * @gl: the glock
+ *
+ */
+
+void gfs2_glmutex_unlock(struct gfs2_glock *gl)
+{
+	spin_lock(&gl->gl_spin);
+	clear_bit(GLF_LOCK, &gl->gl_flags);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+}
+
+/**
+ * handle_callback - add a demote request to a lock's queue
+ * @gl: the glock
+ * @state: the state the caller wants us to change to
+ *
+ */
+
+static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+{
+	struct gfs2_holder *gh, *new_gh = NULL;
+
+ restart:
+	spin_lock(&gl->gl_spin);
+
+	list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
+		if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
+		    gl->gl_req_gh != gh) {
+			if (gh->gh_state != state)
+				gh->gh_state = LM_ST_UNLOCKED;
+			goto out;
+		}
+	}
+
+	if (new_gh) {
+		list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
+		new_gh = NULL;
+	} else {
+		spin_unlock(&gl->gl_spin);
+
+		new_gh = gfs2_holder_get(gl, state,
+					 LM_FLAG_TRY | GL_NEVER_RECURSE,
+					 __GFP_NOFAIL),
+		set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
+		set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
+
+		goto restart;
+	}
+
+ out:
+	spin_unlock(&gl->gl_spin);
+
+	if (new_gh)
+		gfs2_holder_put(new_gh);
+}
+
+/**
+ * state_change - record that the glock is now in a different state
+ * @gl: the glock
+ * @new_state the new state
+ *
+ */
+
+static void state_change(struct gfs2_glock *gl, unsigned int new_state)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	int held1, held2;
+
+	held1 = (gl->gl_state != LM_ST_UNLOCKED);
+	held2 = (new_state != LM_ST_UNLOCKED);
+
+	if (held1 != held2) {
+		if (held2) {
+			atomic_inc(&sdp->sd_glock_held_count);
+			glock_hold(gl);
+		} else {
+			atomic_dec(&sdp->sd_glock_held_count);
+			glock_put(gl);
+		}
+	}
+
+	gl->gl_state = new_state;
+}
+
+/**
+ * xmote_bh - Called after the lock module is done acquiring a lock
+ * @gl: The glock in question
+ * @ret: the int returned from the lock module
+ *
+ */
+
+static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	struct gfs2_holder *gh = gl->gl_req_gh;
+	int prev_state = gl->gl_state;
+	int op_done = TRUE;
+
+	gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
+	gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+	gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
+
+	state_change(gl, ret & LM_OUT_ST_MASK);
+
+	if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
+		if (glops->go_inval)
+			glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+	} else if (gl->gl_state == LM_ST_DEFERRED) {
+		/* We might not want to do this here.
+		   Look at moving to the inode glops. */
+		if (glops->go_inval)
+			glops->go_inval(gl, DIO_DATA);
+	}
+
+	/*  Deal with each possible exit condition  */
+
+	if (!gh)
+		gl->gl_stamp = jiffies;
+
+	else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+		spin_lock(&gl->gl_spin);
+		list_del_init(&gh->gh_list);
+		gh->gh_error = -EIO;
+		if (test_bit(HIF_RECURSE, &gh->gh_iflags))
+			do_unrecurse(gh);
+		spin_unlock(&gl->gl_spin);
+
+	} else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
+		spin_lock(&gl->gl_spin);
+		list_del_init(&gh->gh_list);
+		if (gl->gl_state == gh->gh_state ||
+		    gl->gl_state == LM_ST_UNLOCKED)
+			gh->gh_error = 0;
+		else {
+			if (gfs2_assert_warn(sdp, gh->gh_flags &
+					(LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
+				fs_warn(sdp, "ret = 0x%.8X\n", ret);
+			gh->gh_error = GLR_TRYFAILED;
+		}
+		spin_unlock(&gl->gl_spin);
+
+		if (ret & LM_OUT_CANCELED)
+			handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
+
+	} else if (ret & LM_OUT_CANCELED) {
+		spin_lock(&gl->gl_spin);
+		list_del_init(&gh->gh_list);
+		gh->gh_error = GLR_CANCELED;
+		if (test_bit(HIF_RECURSE, &gh->gh_iflags))
+			do_unrecurse(gh);
+		spin_unlock(&gl->gl_spin);
+
+	} else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
+		spin_lock(&gl->gl_spin);
+		list_move_tail(&gh->gh_list, &gl->gl_holders);
+		gh->gh_error = 0;
+		set_bit(HIF_HOLDER, &gh->gh_iflags);
+		spin_unlock(&gl->gl_spin);
+
+		set_bit(HIF_FIRST, &gh->gh_iflags);
+
+		op_done = FALSE;
+
+	} else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
+		spin_lock(&gl->gl_spin);
+		list_del_init(&gh->gh_list);
+		gh->gh_error = GLR_TRYFAILED;
+		if (test_bit(HIF_RECURSE, &gh->gh_iflags))
+			do_unrecurse(gh);
+		spin_unlock(&gl->gl_spin);
+
+	} else {
+		if (gfs2_assert_withdraw(sdp, FALSE) == -1)
+			fs_err(sdp, "ret = 0x%.8X\n", ret);
+	}
+
+	if (glops->go_xmote_bh)
+		glops->go_xmote_bh(gl);
+
+	if (op_done) {
+		spin_lock(&gl->gl_spin);
+		gl->gl_req_gh = NULL;
+		gl->gl_req_bh = NULL;
+		clear_bit(GLF_LOCK, &gl->gl_flags);
+		run_queue(gl);
+		spin_unlock(&gl->gl_spin);
+	}
+
+	glock_put(gl);
+
+	if (gh) {
+		if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
+			gfs2_holder_put(gh);
+		else
+			complete(&gh->gh_wait);
+	}
+}
+
+/**
+ * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
+ * @gl: The glock in question
+ * @state: the requested state
+ * @flags: modifier flags to the lock call
+ *
+ */
+
+void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
+				 LM_FLAG_NOEXP | LM_FLAG_ANY |
+				 LM_FLAG_PRIORITY);
+	unsigned int lck_ret;
+
+	gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
+	gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+	gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
+	gfs2_assert_warn(sdp, state != gl->gl_state);
+
+	if (gl->gl_state == LM_ST_EXCLUSIVE) {
+		if (glops->go_sync)
+			glops->go_sync(gl,
+				       DIO_METADATA | DIO_DATA | DIO_RELEASE);
+	}
+
+	glock_hold(gl);
+	gl->gl_req_bh = xmote_bh;
+
+	atomic_inc(&sdp->sd_lm_lock_calls);
+
+	lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state,
+			       lck_flags);
+
+	if (lck_ret & LM_OUT_ASYNC)
+		gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
+	else
+		xmote_bh(gl, lck_ret);
+}
+
+/**
+ * drop_bh - Called after a lock module unlock completes
+ * @gl: the glock
+ * @ret: the return status
+ *
+ * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
+ * Doesn't drop the reference on the glock the top half took out
+ *
+ */
+
+static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	struct gfs2_holder *gh = gl->gl_req_gh;
+
+	clear_bit(GLF_PREFETCH, &gl->gl_flags);
+
+	gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
+	gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+	gfs2_assert_warn(sdp, !ret);
+
+	state_change(gl, LM_ST_UNLOCKED);
+
+	if (glops->go_inval)
+		glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+
+	if (gh) {
+		spin_lock(&gl->gl_spin);
+		list_del_init(&gh->gh_list);
+		gh->gh_error = 0;
+		spin_unlock(&gl->gl_spin);
+	}
+
+	if (glops->go_drop_bh)
+		glops->go_drop_bh(gl);
+
+	spin_lock(&gl->gl_spin);
+	gl->gl_req_gh = NULL;
+	gl->gl_req_bh = NULL;
+	clear_bit(GLF_LOCK, &gl->gl_flags);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+
+	glock_put(gl);
+
+	if (gh) {
+		if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
+			gfs2_holder_put(gh);
+		else
+			complete(&gh->gh_wait);
+	}
+}
+
+/**
+ * gfs2_glock_drop_th - call into the lock module to unlock a lock
+ * @gl: the glock
+ *
+ */
+
+void gfs2_glock_drop_th(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	unsigned int ret;
+
+	gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
+	gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+	gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
+
+	if (gl->gl_state == LM_ST_EXCLUSIVE) {
+		if (glops->go_sync)
+			glops->go_sync(gl,
+				       DIO_METADATA | DIO_DATA | DIO_RELEASE);
+	}
+
+	glock_hold(gl);
+	gl->gl_req_bh = drop_bh;
+
+	atomic_inc(&sdp->sd_lm_unlock_calls);
+
+	ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
+
+	if (!ret)
+		drop_bh(gl, ret);
+	else
+		gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
+}
+
+/**
+ * do_cancels - cancel requests for locks stuck waiting on an expire flag
+ * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
+ *
+ * Don't cancel GL_NOCANCEL requests.
+ */
+
+static void do_cancels(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+
+	spin_lock(&gl->gl_spin);
+
+	while (gl->gl_req_gh != gh &&
+	       !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
+	       !list_empty(&gh->gh_list)) {
+		if (gl->gl_req_bh &&
+		    !(gl->gl_req_gh &&
+		      (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
+			spin_unlock(&gl->gl_spin);
+			gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
+			msleep(100);
+			spin_lock(&gl->gl_spin);
+		} else {
+			spin_unlock(&gl->gl_spin);
+			msleep(100);
+			spin_lock(&gl->gl_spin);
+		}
+	}
+
+	spin_unlock(&gl->gl_spin);
+}
+
+/**
+ * glock_wait_internal - wait on a glock acquisition
+ * @gh: the glock holder
+ *
+ * Returns: 0 on success
+ */
+
+static int glock_wait_internal(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	int error = 0;
+
+	if (test_bit(HIF_ABORTED, &gh->gh_iflags))
+		return -EIO;
+
+	if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
+		spin_lock(&gl->gl_spin);
+		if (gl->gl_req_gh != gh &&
+		    !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
+		    !list_empty(&gh->gh_list)) {
+			list_del_init(&gh->gh_list);
+			gh->gh_error = GLR_TRYFAILED;
+			if (test_bit(HIF_RECURSE, &gh->gh_iflags))
+				do_unrecurse(gh);
+			run_queue(gl);
+			spin_unlock(&gl->gl_spin);
+			return GLR_TRYFAILED;
+		}
+		spin_unlock(&gl->gl_spin);
+	}
+
+	if (gh->gh_flags & LM_FLAG_PRIORITY)
+		do_cancels(gh);
+
+	wait_for_completion(&gh->gh_wait);
+
+	if (gh->gh_error)
+		return gh->gh_error;
+
+	gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
+	gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state,
+						   gh->gh_state,
+						   gh->gh_flags));
+
+	if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
+		gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
+
+		if (glops->go_lock) {
+			error = glops->go_lock(gh);
+			if (error) {
+				spin_lock(&gl->gl_spin);
+				list_del_init(&gh->gh_list);
+				gh->gh_error = error;
+				if (test_and_clear_bit(HIF_RECURSE,
+						       &gh->gh_iflags))
+					do_unrecurse(gh);
+				spin_unlock(&gl->gl_spin);
+			}
+		}
+
+		spin_lock(&gl->gl_spin);
+		gl->gl_req_gh = NULL;
+		gl->gl_req_bh = NULL;
+		clear_bit(GLF_LOCK, &gl->gl_flags);
+		if (test_bit(HIF_RECURSE, &gh->gh_iflags))
+			handle_recurse(gh);
+		run_queue(gl);
+		spin_unlock(&gl->gl_spin);
+	}
+
+	return error;
+}
+
+static inline struct gfs2_holder *
+find_holder_by_owner(struct list_head *head, struct task_struct *owner)
+{
+	struct gfs2_holder *gh;
+
+	list_for_each_entry(gh, head, gh_list) {
+		if (gh->gh_owner == owner)
+			return gh;
+	}
+
+	return NULL;
+}
+
+/**
+ * recurse_check -
+ *
+ * Make sure the new holder is compatible
+ * with the pre-existing one.
+ *
+ */
+
+static int recurse_check(struct gfs2_holder *existing, struct gfs2_holder *new,
+			 unsigned int state)
+{
+	struct gfs2_sbd *sdp = existing->gh_gl->gl_sbd;
+	int error = 0;
+
+	if (gfs2_assert_warn(sdp, (new->gh_flags & LM_FLAG_ANY) ||
+			     !(existing->gh_flags & LM_FLAG_ANY)) ||
+	    gfs2_assert_warn(sdp, (existing->gh_flags & GL_LOCAL_EXCL) ||
+			     !(new->gh_flags & GL_LOCAL_EXCL)) ||
+	    gfs2_assert_warn(sdp, relaxed_state_ok(state,
+						   new->gh_state,
+						   new->gh_flags))) {
+		set_bit(HIF_ABORTED, &new->gh_iflags);
+		error = -EINVAL;
+	}
+
+	return error;
+}
+
+/**
+ * add_to_queue - Add a holder to the wait queue (but look for recursion)
+ * @gh: the holder structure to add
+ *
+ */
+
+static void add_to_queue(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_holder *existing;
+
+	if (!gh->gh_owner)
+		goto out;
+
+	existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
+	if (existing) {
+		if (recurse_check(existing, gh, gl->gl_state))
+			return;
+
+		list_add_tail(&gh->gh_list, &gl->gl_holders);
+		set_bit(HIF_HOLDER, &gh->gh_iflags);
+
+		gh->gh_error = 0;
+		complete(&gh->gh_wait);
+
+		return;
+	}
+
+	existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
+	if (existing) {
+		if (recurse_check(existing, gh, existing->gh_state))
+			return;
+
+		set_bit(HIF_RECURSE, &gh->gh_iflags);
+		set_bit(HIF_RECURSE, &existing->gh_iflags);
+
+		list_add_tail(&gh->gh_list, &gl->gl_waiters3);
+
+		return;
+	}
+
+ out:
+	if (gh->gh_flags & LM_FLAG_PRIORITY)
+		list_add(&gh->gh_list, &gl->gl_waiters3);
+	else
+		list_add_tail(&gh->gh_list, &gl->gl_waiters3);	
+}
+
+/**
+ * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
+ * @gh: the holder structure
+ *
+ * if (gh->gh_flags & GL_ASYNC), this never returns an error
+ *
+ * Returns: 0, GLR_TRYFAILED, or errno on failure
+ */
+
+int gfs2_glock_nq(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	int error = 0;
+
+	atomic_inc(&sdp->sd_glock_nq_calls);
+
+ restart:
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+		set_bit(HIF_ABORTED, &gh->gh_iflags);
+		return -EIO;
+	}
+
+	set_bit(HIF_PROMOTE, &gh->gh_iflags);
+
+	spin_lock(&gl->gl_spin);
+	add_to_queue(gh);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+
+	if (!(gh->gh_flags & GL_ASYNC)) {
+		error = glock_wait_internal(gh);
+		if (error == GLR_CANCELED) {
+			msleep(1000);
+			goto restart;
+		}
+	}
+
+	clear_bit(GLF_PREFETCH, &gl->gl_flags);
+
+	return error;
+}
+
+/**
+ * gfs2_glock_poll - poll to see if an async request has been completed
+ * @gh: the holder
+ *
+ * Returns: TRUE if the request is ready to be gfs2_glock_wait()ed on
+ */
+
+int gfs2_glock_poll(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	int ready = FALSE;
+
+	spin_lock(&gl->gl_spin);
+
+	if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+		ready = TRUE;
+	else if (list_empty(&gh->gh_list)) {
+		if (gh->gh_error == GLR_CANCELED) {
+			spin_unlock(&gl->gl_spin);
+			msleep(1000);
+			if (gfs2_glock_nq(gh))
+				return TRUE;
+			return FALSE;
+		} else
+			ready = TRUE;
+	}
+
+	spin_unlock(&gl->gl_spin);
+
+	return ready;
+}
+
+/**
+ * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
+ * @gh: the holder structure
+ *
+ * Returns: 0, GLR_TRYFAILED, or errno on failure
+ */
+
+int gfs2_glock_wait(struct gfs2_holder *gh)
+{
+	int error;
+
+	error = glock_wait_internal(gh);
+	if (error == GLR_CANCELED) {
+		msleep(1000);
+		gh->gh_flags &= ~GL_ASYNC;
+		error = gfs2_glock_nq(gh);
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
+ * @gh: the glock holder
+ *
+ */
+
+void gfs2_glock_dq(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+
+	atomic_inc(&sdp->sd_glock_dq_calls);
+
+	if (gh->gh_flags & GL_SYNC)
+		set_bit(GLF_SYNC, &gl->gl_flags);
+
+	if (gh->gh_flags & GL_NOCACHE)
+		handle_callback(gl, LM_ST_UNLOCKED);
+
+	gfs2_glmutex_lock(gl);
+
+	spin_lock(&gl->gl_spin);
+	list_del_init(&gh->gh_list);
+
+	if (list_empty(&gl->gl_holders)) {
+		spin_unlock(&gl->gl_spin);
+
+		if (glops->go_unlock)
+			glops->go_unlock(gh);
+
+		if (test_bit(GLF_SYNC, &gl->gl_flags)) {
+			if (glops->go_sync)
+				glops->go_sync(gl, DIO_METADATA | DIO_DATA);
+		}
+
+		gl->gl_stamp = jiffies;
+
+		spin_lock(&gl->gl_spin);
+	}
+
+	clear_bit(GLF_LOCK, &gl->gl_flags);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+}
+
+/**
+ * gfs2_glock_prefetch - Try to prefetch a glock
+ * @gl: the glock
+ * @state: the state to prefetch in
+ * @flags: flags passed to go_xmote_th()
+ *
+ */
+
+void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags)
+{
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+
+	spin_lock(&gl->gl_spin);
+
+	if (test_bit(GLF_LOCK, &gl->gl_flags) ||
+	    !list_empty(&gl->gl_holders) ||
+	    !list_empty(&gl->gl_waiters1) ||
+	    !list_empty(&gl->gl_waiters2) ||
+	    !list_empty(&gl->gl_waiters3) ||
+	    relaxed_state_ok(gl->gl_state, state, flags)) {
+		spin_unlock(&gl->gl_spin);
+		return;
+	}
+
+	set_bit(GLF_PREFETCH, &gl->gl_flags);
+	set_bit(GLF_LOCK, &gl->gl_flags);
+	spin_unlock(&gl->gl_spin);
+
+	glops->go_xmote_th(gl, state, flags);
+
+	atomic_inc(&gl->gl_sbd->sd_glock_prefetch_calls);
+}
+
+/**
+ * gfs2_glock_force_drop - Force a glock to be uncached
+ * @gl: the glock
+ *
+ */
+
+void gfs2_glock_force_drop(struct gfs2_glock *gl)
+{
+	struct gfs2_holder gh;
+
+	gfs2_holder_init(gl, LM_ST_UNLOCKED, GL_NEVER_RECURSE, &gh);
+	set_bit(HIF_DEMOTE, &gh.gh_iflags);
+
+	spin_lock(&gl->gl_spin);
+	list_add_tail(&gh.gh_list, &gl->gl_waiters2);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+
+	wait_for_completion(&gh.gh_wait);
+	gfs2_holder_uninit(&gh);
+}
+
+static void greedy_work(void *data)
+{
+	struct greedy *gr = (struct greedy *)data;
+	struct gfs2_holder *gh = &gr->gr_gh;
+	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+
+	clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
+
+	if (glops->go_greedy)
+		glops->go_greedy(gl);
+
+	spin_lock(&gl->gl_spin);
+
+	if (list_empty(&gl->gl_waiters2)) {
+		clear_bit(GLF_GREEDY, &gl->gl_flags);
+		spin_unlock(&gl->gl_spin);
+		gfs2_holder_uninit(gh);
+		kfree(gr);
+	} else {
+		glock_hold(gl);
+		list_add_tail(&gh->gh_list, &gl->gl_waiters2);
+		run_queue(gl);
+		spin_unlock(&gl->gl_spin);
+		glock_put(gl);
+	}
+}
+
+/**
+ * gfs2_glock_be_greedy -
+ * @gl:
+ * @time:
+ *
+ * Returns: 0 if go_greedy will be called, 1 otherwise
+ */
+
+int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
+{
+	struct greedy *gr;
+	struct gfs2_holder *gh;
+
+	if (!time ||
+	    gl->gl_sbd->sd_args.ar_localcaching ||
+	    test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
+		return 1;
+
+	gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
+	if (!gr) {
+		clear_bit(GLF_GREEDY, &gl->gl_flags);
+		return 1;
+	}
+	gh = &gr->gr_gh;
+
+	gfs2_holder_init(gl, 0, GL_NEVER_RECURSE, gh);
+	set_bit(HIF_GREEDY, &gh->gh_iflags);
+	INIT_WORK(&gr->gr_work, greedy_work, gr);
+
+	set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
+	schedule_delayed_work(&gr->gr_work, time);
+
+	return 0;
+}
+
+/**
+ * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
+ * @gl: the glock
+ * @state: the state we're requesting
+ * @flags: the modifier flags
+ * @gh: the holder structure
+ *
+ * Returns: 0, GLR_*, or errno
+ */
+
+int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
+		       struct gfs2_holder *gh)
+{
+	int error;
+
+	gfs2_holder_init(gl, state, flags, gh);
+
+	error = gfs2_glock_nq(gh);
+	if (error)
+		gfs2_holder_uninit(gh);
+
+	return error;
+}
+
+/**
+ * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
+ * @gh: the holder structure
+ *
+ */
+
+void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
+{
+	gfs2_glock_dq(gh);
+	gfs2_holder_uninit(gh);
+}
+
+/**
+ * gfs2_glock_nq_num - acquire a glock based on lock number
+ * @sdp: the filesystem
+ * @number: the lock number
+ * @glops: the glock operations for the type of glock
+ * @state: the state to acquire the glock in
+ * @flags: modifier flags for the aquisition
+ * @gh: the struct gfs2_holder
+ *
+ * Returns: errno
+ */
+
+int gfs2_glock_nq_num(struct gfs2_sbd *sdp, uint64_t number,
+		      struct gfs2_glock_operations *glops, unsigned int state,
+		      int flags, struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl;
+	int error;
+
+	error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
+	if (!error) {
+		error = gfs2_glock_nq_init(gl, state, flags, gh);
+		glock_put(gl);
+	}
+
+	return error;
+}
+
+/**
+ * glock_compare - Compare two struct gfs2_glock structures for sorting
+ * @arg_a: the first structure
+ * @arg_b: the second structure
+ *
+ */
+
+static int glock_compare(const void *arg_a, const void *arg_b)
+{
+	struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
+	struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
+	struct lm_lockname *a = &gh_a->gh_gl->gl_name;
+	struct lm_lockname *b = &gh_b->gh_gl->gl_name;
+	int ret = 0;
+
+	if (a->ln_number > b->ln_number)
+		ret = 1;
+	else if (a->ln_number < b->ln_number)
+		ret = -1;
+	else {
+		if (gh_a->gh_state == LM_ST_SHARED &&
+		    gh_b->gh_state == LM_ST_EXCLUSIVE)
+			ret = 1;
+		else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
+			 (gh_b->gh_flags & GL_LOCAL_EXCL))
+			ret = 1;
+	}
+
+	return ret;
+}
+
+/**
+ * nq_m_sync - synchonously acquire more than one glock in deadlock free order
+ * @num_gh: the number of structures
+ * @ghs: an array of struct gfs2_holder structures
+ *
+ * Returns: 0 on success (all glocks acquired),
+ *          errno on failure (no glocks acquired)
+ */
+
+static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
+		     struct gfs2_holder **p)
+{
+	unsigned int x;
+	int error = 0;
+
+	for (x = 0; x < num_gh; x++)
+		p[x] = &ghs[x];
+
+	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
+
+	for (x = 0; x < num_gh; x++) {
+		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
+
+		error = gfs2_glock_nq(p[x]);
+		if (error) {
+			while (x--)
+				gfs2_glock_dq(p[x]);
+			break;
+		}
+	}
+
+	return error;
+}
+
+/**
+ * gfs2_glock_nq_m - acquire multiple glocks
+ * @num_gh: the number of structures
+ * @ghs: an array of struct gfs2_holder structures
+ *
+ * Figure out how big an impact this function has.  Either:
+ * 1) Replace this code with code that calls gfs2_glock_prefetch()
+ * 2) Forget async stuff and just call nq_m_sync()
+ * 3) Leave it like it is
+ *
+ * Returns: 0 on success (all glocks acquired),
+ *          errno on failure (no glocks acquired)
+ */
+
+int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+	int *e;
+	unsigned int x;
+	int borked = FALSE, serious = 0;
+	int error = 0;
+
+	if (!num_gh)
+		return 0;
+
+	if (num_gh == 1) {
+		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
+		return gfs2_glock_nq(ghs);
+	}
+
+	e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+
+	for (x = 0; x < num_gh; x++) {
+		ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
+		error = gfs2_glock_nq(&ghs[x]);
+		if (error) {
+			borked = TRUE;
+			serious = error;
+			num_gh = x;
+			break;
+		}
+	}
+
+	for (x = 0; x < num_gh; x++) {
+		error = e[x] = glock_wait_internal(&ghs[x]);
+		if (error) {
+			borked = TRUE;
+			if (error != GLR_TRYFAILED && error != GLR_CANCELED)
+				serious = error;
+		}
+	}
+
+	if (!borked) {
+		kfree(e);
+		return 0;
+	}
+
+	for (x = 0; x < num_gh; x++)
+		if (!e[x])
+			gfs2_glock_dq(&ghs[x]);
+
+	if (serious)
+		error = serious;
+	else {
+		for (x = 0; x < num_gh; x++)
+			gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
+					  &ghs[x]);
+		error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
+	}
+
+	kfree(e);
+
+	return error;
+}
+
+/**
+ * gfs2_glock_dq_m - release multiple glocks
+ * @num_gh: the number of structures
+ * @ghs: an array of struct gfs2_holder structures
+ *
+ */
+
+void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+	unsigned int x;
+
+	for (x = 0; x < num_gh; x++)
+		gfs2_glock_dq(&ghs[x]);
+}
+
+/**
+ * gfs2_glock_dq_uninit_m - release multiple glocks
+ * @num_gh: the number of structures
+ * @ghs: an array of struct gfs2_holder structures
+ *
+ */
+
+void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+	unsigned int x;
+
+	for (x = 0; x < num_gh; x++)
+		gfs2_glock_dq_uninit(&ghs[x]);
+}
+
+/**
+ * gfs2_glock_prefetch_num - prefetch a glock based on lock number
+ * @sdp: the filesystem
+ * @number: the lock number
+ * @glops: the glock operations for the type of glock
+ * @state: the state to acquire the glock in
+ * @flags: modifier flags for the aquisition
+ *
+ * Returns: errno
+ */
+
+void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
+			     struct gfs2_glock_operations *glops,
+			     unsigned int state, int flags)
+{
+	struct gfs2_glock *gl;
+	int error;
+
+	if (atomic_read(&sdp->sd_reclaim_count) <
+	    gfs2_tune_get(sdp, gt_reclaim_limit)) {
+		error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
+		if (!error) {
+			gfs2_glock_prefetch(gl, state, flags);
+			glock_put(gl);
+		}
+	}
+}
+
+/**
+ * gfs2_lvb_hold - attach a LVB from a glock
+ * @gl: The glock in question
+ *
+ */
+
+int gfs2_lvb_hold(struct gfs2_glock *gl)
+{
+	int error;
+
+	gfs2_glmutex_lock(gl);
+
+	if (!atomic_read(&gl->gl_lvb_count)) {
+		error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
+		if (error) {
+			gfs2_glmutex_unlock(gl);
+			return error;
+		}
+		glock_hold(gl);
+	}
+	atomic_inc(&gl->gl_lvb_count);
+
+	gfs2_glmutex_unlock(gl);
+
+	return 0;
+}
+
+/**
+ * gfs2_lvb_unhold - detach a LVB from a glock
+ * @gl: The glock in question
+ *
+ */
+
+void gfs2_lvb_unhold(struct gfs2_glock *gl)
+{
+	glock_hold(gl);
+	gfs2_glmutex_lock(gl);
+
+	gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0,);
+	if (atomic_dec_and_test(&gl->gl_lvb_count)) {
+		gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
+		gl->gl_lvb = NULL;
+		glock_put(gl);
+	}
+
+	gfs2_glmutex_unlock(gl);
+	glock_put(gl);
+}
+
+void gfs2_lvb_sync(struct gfs2_glock *gl)
+{
+	gfs2_glmutex_lock(gl);
+
+	gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count),);
+	if (!gfs2_assert_warn(gl->gl_sbd, gfs2_glock_is_held_excl(gl)))
+		gfs2_lm_sync_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
+
+	gfs2_glmutex_unlock(gl);
+}
+
+static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
+			unsigned int state)
+{
+	struct gfs2_glock *gl;
+
+	gl = gfs2_glock_find(sdp, name);
+	if (!gl)
+		return;
+
+	if (gl->gl_ops->go_callback)
+		gl->gl_ops->go_callback(gl, state);
+	handle_callback(gl, state);
+
+	spin_lock(&gl->gl_spin);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+
+	glock_put(gl);
+}
+
+/**
+ * gfs2_glock_cb - Callback used by locking module
+ * @fsdata: Pointer to the superblock
+ * @type: Type of callback
+ * @data: Type dependent data pointer
+ *
+ * Called by the locking module when it wants to tell us something.
+ * Either we need to drop a lock, one of our ASYNC requests completed, or
+ * a journal from another client needs to be recovered.
+ */
+
+void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data)
+{
+	struct gfs2_sbd *sdp = (struct gfs2_sbd *)fsdata;
+
+	atomic_inc(&sdp->sd_lm_callbacks);
+
+	switch (type) {
+	case LM_CB_NEED_E:
+		blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_UNLOCKED);
+		return;
+
+	case LM_CB_NEED_D:
+		blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_DEFERRED);
+		return;
+
+	case LM_CB_NEED_S:
+		blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_SHARED);
+		return;
+
+	case LM_CB_ASYNC: {
+		struct lm_async_cb *async = (struct lm_async_cb *)data;
+		struct gfs2_glock *gl;
+
+		gl = gfs2_glock_find(sdp, &async->lc_name);
+		if (gfs2_assert_warn(sdp, gl))
+			return;
+		if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
+			gl->gl_req_bh(gl, async->lc_ret);
+		glock_put(gl);
+
+		return;
+	}
+
+	case LM_CB_NEED_RECOVERY:
+		gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
+		if (sdp->sd_recoverd_process)
+			wake_up_process(sdp->sd_recoverd_process);
+		return;
+
+	case LM_CB_DROPLOCKS:
+		gfs2_gl_hash_clear(sdp, NO_WAIT);
+		gfs2_quota_scan(sdp);
+		return;
+
+	default:
+		gfs2_assert_warn(sdp, FALSE);
+		return;
+	}
+}
+
+/**
+ * gfs2_try_toss_inode - try to remove a particular inode struct from cache
+ * sdp: the filesystem
+ * inum: the inode number
+ *
+ */
+
+void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum)
+{
+	struct gfs2_glock *gl;
+	struct gfs2_inode *ip;
+	int error;
+
+	error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
+			       NO_CREATE, &gl);
+	if (error || !gl)
+		return;
+
+	if (!gfs2_glmutex_trylock(gl))
+		goto out;
+
+	ip = get_gl2ip(gl);
+	if (!ip)
+		goto out_unlock;
+
+	if (atomic_read(&ip->i_count))
+		goto out_unlock;
+
+	gfs2_inode_destroy(ip);
+
+ out_unlock:
+	gfs2_glmutex_unlock(gl);
+
+ out:
+	glock_put(gl);
+}
+
+/**
+ * gfs2_iopen_go_callback - Try to kick the inode/vnode associated with an
+ *                          iopen glock from memory
+ * @io_gl: the iopen glock
+ * @state: the state into which the glock should be put
+ *
+ */
+
+void gfs2_iopen_go_callback(struct gfs2_glock *io_gl, unsigned int state)
+{
+	struct gfs2_glock *i_gl;
+
+	if (state != LM_ST_UNLOCKED)
+		return;
+
+	spin_lock(&io_gl->gl_spin);
+	i_gl = get_gl2gl(io_gl);
+	if (i_gl) {
+		glock_hold(i_gl);
+		spin_unlock(&io_gl->gl_spin);
+	} else {
+		spin_unlock(&io_gl->gl_spin);
+		return;
+	}
+
+	if (gfs2_glmutex_trylock(i_gl)) {
+		struct gfs2_inode *ip = get_gl2ip(i_gl);
+		if (ip) {
+			gfs2_try_toss_vnode(ip);
+			gfs2_glmutex_unlock(i_gl);
+			gfs2_glock_schedule_for_reclaim(i_gl);
+				goto out;
+		}
+		gfs2_glmutex_unlock(i_gl);
+	}
+
+ out:
+	glock_put(i_gl);
+}
+
+/**
+ * demote_ok - Check to see if it's ok to unlock a glock
+ * @gl: the glock
+ *
+ * Returns: TRUE if it's ok
+ */
+
+static int demote_ok(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_glock_operations *glops = gl->gl_ops;
+	int demote = TRUE;
+
+	if (test_bit(GLF_STICKY, &gl->gl_flags))
+		demote = FALSE;
+	else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
+		demote = time_after_eq(jiffies,
+				    gl->gl_stamp +
+				    gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
+	else if (glops->go_demote_ok)
+		demote = glops->go_demote_ok(gl);
+
+	return demote;
+}
+
+/**
+ * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * @gl: the glock
+ *
+ */
+
+void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+
+	spin_lock(&sdp->sd_reclaim_lock);
+	if (list_empty(&gl->gl_reclaim)) {
+		glock_hold(gl);
+		list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
+		atomic_inc(&sdp->sd_reclaim_count);
+	}
+	spin_unlock(&sdp->sd_reclaim_lock);
+
+	wake_up(&sdp->sd_reclaim_wq);
+}
+
+/**
+ * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
+ * @sdp: the filesystem
+ *
+ * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
+ * different glock and we notice that there are a lot of glocks in the
+ * reclaim list.
+ *
+ */
+
+void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
+{
+	struct gfs2_glock *gl;
+	struct gfs2_gl_hash_bucket *bucket;
+
+	spin_lock(&sdp->sd_reclaim_lock);
+
+	if (list_empty(&sdp->sd_reclaim_list)) {
+		spin_unlock(&sdp->sd_reclaim_lock);
+		return;
+	}
+
+	gl = list_entry(sdp->sd_reclaim_list.next,
+			struct gfs2_glock, gl_reclaim);
+	list_del_init(&gl->gl_reclaim);
+
+	spin_unlock(&sdp->sd_reclaim_lock);
+
+	atomic_dec(&sdp->sd_reclaim_count);
+	atomic_inc(&sdp->sd_reclaimed);
+
+	if (gfs2_glmutex_trylock(gl)) {
+		if (gl->gl_ops == &gfs2_inode_glops) {
+			struct gfs2_inode *ip = get_gl2ip(gl);
+			if (ip && !atomic_read(&ip->i_count))
+				gfs2_inode_destroy(ip);
+		}
+		if (queue_empty(gl, &gl->gl_holders) &&
+		    gl->gl_state != LM_ST_UNLOCKED &&
+		    demote_ok(gl))
+			handle_callback(gl, LM_ST_UNLOCKED);
+		gfs2_glmutex_unlock(gl);
+	}
+
+	bucket = gl->gl_bucket;
+
+	write_lock(&bucket->hb_lock);
+	if (atomic_read(&gl->gl_count) == 1) {
+		list_del_init(&gl->gl_list);
+		write_unlock(&bucket->hb_lock);
+		glock_free(gl);
+	} else {
+		write_unlock(&bucket->hb_lock);
+		glock_put(gl);
+	}
+}
+
+/**
+ * examine_bucket - Call a function for glock in a hash bucket
+ * @examiner: the function
+ * @sdp: the filesystem
+ * @bucket: the bucket
+ *
+ * Returns: TRUE if the bucket has entries
+ */
+
+static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
+			  struct gfs2_gl_hash_bucket *bucket)
+{
+	struct glock_plug plug;
+	struct list_head *tmp;
+	struct gfs2_glock *gl;
+	int entries;
+
+	/* Add "plug" to end of bucket list, work back up list from there */
+	memset(&plug.gl_flags, 0, sizeof(unsigned long));
+	set_bit(GLF_PLUG, &plug.gl_flags);
+
+	write_lock(&bucket->hb_lock);
+	list_add(&plug.gl_list, &bucket->hb_list);
+	write_unlock(&bucket->hb_lock);
+
+	for (;;) {
+		write_lock(&bucket->hb_lock);
+
+		for (;;) {
+			tmp = plug.gl_list.next;
+
+			if (tmp == &bucket->hb_list) {
+				list_del(&plug.gl_list);
+				entries = !list_empty(&bucket->hb_list);
+				write_unlock(&bucket->hb_lock);
+				return entries;
+			}
+			gl = list_entry(tmp, struct gfs2_glock, gl_list);
+
+			/* Move plug up list */
+			list_move(&plug.gl_list, &gl->gl_list);
+
+			if (test_bit(GLF_PLUG, &gl->gl_flags))
+				continue;
+
+			/* examiner must glock_put() */
+			atomic_inc(&gl->gl_count);
+
+			break;
+		}
+
+		write_unlock(&bucket->hb_lock);
+
+		examiner(gl);
+	}
+}
+
+/**
+ * scan_glock - look at a glock and see if we can reclaim it
+ * @gl: the glock to look at
+ *
+ */
+
+static void scan_glock(struct gfs2_glock *gl)
+{
+	if (gfs2_glmutex_trylock(gl)) {
+		if (gl->gl_ops == &gfs2_inode_glops) {
+			struct gfs2_inode *ip = get_gl2ip(gl);
+			if (ip && !atomic_read(&ip->i_count))
+				goto out_schedule;
+		}
+		if (queue_empty(gl, &gl->gl_holders) &&
+		    gl->gl_state != LM_ST_UNLOCKED &&
+		    demote_ok(gl))
+			goto out_schedule;
+
+		gfs2_glmutex_unlock(gl);
+	}
+
+	glock_put(gl);
+
+	return;
+
+ out_schedule:
+	gfs2_glmutex_unlock(gl);
+	gfs2_glock_schedule_for_reclaim(gl);
+	glock_put(gl);
+}
+
+/**
+ * gfs2_scand_internal - Look for glocks and inodes to toss from memory
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_scand_internal(struct gfs2_sbd *sdp)
+{
+	unsigned int x;
+
+	for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
+		examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
+		cond_resched();
+	}
+}
+
+/**
+ * clear_glock - look at a glock and see if we can free it from glock cache
+ * @gl: the glock to look at
+ *
+ */
+
+static void clear_glock(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_gl_hash_bucket *bucket = gl->gl_bucket;
+
+	spin_lock(&sdp->sd_reclaim_lock);
+	if (!list_empty(&gl->gl_reclaim)) {
+		list_del_init(&gl->gl_reclaim);
+		atomic_dec(&sdp->sd_reclaim_count);
+		glock_put(gl);
+	}
+	spin_unlock(&sdp->sd_reclaim_lock);
+
+	if (gfs2_glmutex_trylock(gl)) {
+		if (gl->gl_ops == &gfs2_inode_glops) {
+			struct gfs2_inode *ip = get_gl2ip(gl);
+			if (ip && !atomic_read(&ip->i_count))
+				gfs2_inode_destroy(ip);
+		}
+		if (queue_empty(gl, &gl->gl_holders) &&
+		    gl->gl_state != LM_ST_UNLOCKED)
+			handle_callback(gl, LM_ST_UNLOCKED);
+
+		gfs2_glmutex_unlock(gl);
+	}
+
+	write_lock(&bucket->hb_lock);
+	if (atomic_read(&gl->gl_count) == 1) {
+		list_del_init(&gl->gl_list);
+		write_unlock(&bucket->hb_lock);
+		glock_free(gl);
+	} else {
+		write_unlock(&bucket->hb_lock);
+		glock_put(gl);
+	}
+}
+
+/**
+ * gfs2_gl_hash_clear - Empty out the glock hash table
+ * @sdp: the filesystem
+ * @wait: wait until it's all gone
+ *
+ * Called when unmounting the filesystem, or when inter-node lock manager
+ * requests DROPLOCKS because it is running out of capacity.
+ */
+
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
+{
+	unsigned long t;
+	unsigned int x;
+	int cont;
+
+	t = jiffies;
+
+	for (;;) {
+		cont = FALSE;
+
+		for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+			if (examine_bucket(clear_glock, sdp,
+					   &sdp->sd_gl_hash[x]))
+				cont = TRUE;
+
+		if (!wait || !cont)
+			break;
+
+		if (time_after_eq(jiffies,
+				  t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
+			fs_warn(sdp, "Unmount seems to be stalled. "
+				     "Dumping lock state...\n");
+			gfs2_dump_lockstate(sdp, NULL);
+			t = jiffies;
+		}
+
+		invalidate_inodes(sdp->sd_vfs);
+		yield();
+	}
+}
+
+/*
+ *  Diagnostic routines to help debug distributed deadlock
+ */
+
+/**
+ * dump_holder - print information about a glock holder
+ * @str: a string naming the type of holder
+ * @gh: the glock holder
+ * @buf: the buffer
+ * @size: the size of the buffer
+ * @count: where we are in the buffer
+ *
+ * Returns: 0 on success, -ENOBUFS when we run out of space
+ */
+
+static int dump_holder(char *str, struct gfs2_holder *gh, char *buf,
+		       unsigned int size, unsigned int *count)
+{
+	unsigned int x;
+	int error = -ENOBUFS;
+
+	gfs2_printf("  %s\n", str);
+	gfs2_printf("    owner = %ld\n",
+		   (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
+	gfs2_printf("    gh_state = %u\n", gh->gh_state);
+	gfs2_printf("    gh_flags =");
+	for (x = 0; x < 32; x++)
+		if (gh->gh_flags & (1 << x))
+			gfs2_printf(" %u", x);
+	gfs2_printf(" \n");
+	gfs2_printf("    error = %d\n", gh->gh_error);
+	gfs2_printf("    gh_iflags =");
+	for (x = 0; x < 32; x++)
+		if (test_bit(x, &gh->gh_iflags))
+			gfs2_printf(" %u", x);
+	gfs2_printf(" \n");
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * dump_inode - print information about an inode
+ * @ip: the inode
+ * @buf: the buffer
+ * @size: the size of the buffer
+ * @count: where we are in the buffer
+ *
+ * Returns: 0 on success, -ENOBUFS when we run out of space
+ */
+
+static int dump_inode(struct gfs2_inode *ip, char *buf, unsigned int size,
+		      unsigned int *count)
+{
+	unsigned int x;
+	int error = -ENOBUFS;
+
+	gfs2_printf("  Inode:\n");
+	gfs2_printf("    num = %"PRIu64"/%"PRIu64"\n",
+		    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+	gfs2_printf("    type = %u\n", IF2DT(ip->i_di.di_mode));
+	gfs2_printf("    i_count = %d\n", atomic_read(&ip->i_count));
+	gfs2_printf("    i_flags =");
+	for (x = 0; x < 32; x++)
+		if (test_bit(x, &ip->i_flags))
+			gfs2_printf(" %u", x);
+	gfs2_printf(" \n");
+	gfs2_printf("    vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
+
+	error = 0;
+
+ out:
+	return error;
+}
+
+/**
+ * dump_glock - print information about a glock
+ * @gl: the glock
+ * @buf: the buffer
+ * @size: the size of the buffer
+ * @count: where we are in the buffer
+ *
+ * Returns: 0 on success, -ENOBUFS when we run out of space
+ */
+
+static int dump_glock(struct gfs2_glock *gl, char *buf, unsigned int size,
+		      unsigned int *count)
+{
+	struct gfs2_holder *gh;
+	unsigned int x;
+	int error = -ENOBUFS;
+
+	spin_lock(&gl->gl_spin);
+
+	gfs2_printf("Glock (%u, %"PRIu64")\n",
+		    gl->gl_name.ln_type,
+		    gl->gl_name.ln_number);
+	gfs2_printf("  gl_flags =");
+	for (x = 0; x < 32; x++)
+		if (test_bit(x, &gl->gl_flags))
+			gfs2_printf(" %u", x);
+	gfs2_printf(" \n");
+	gfs2_printf("  gl_count = %d\n", atomic_read(&gl->gl_count));
+	gfs2_printf("  gl_state = %u\n", gl->gl_state);
+	gfs2_printf("  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
+	gfs2_printf("  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
+	gfs2_printf("  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
+	gfs2_printf("  object = %s\n", (gl->gl_object) ? "yes" : "no");
+	gfs2_printf("  le = %s\n",
+		   (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
+	gfs2_printf("  reclaim = %s\n",
+		    (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
+	if (gl->gl_aspace)
+		gfs2_printf("  aspace = %lu\n",
+			    gl->gl_aspace->i_mapping->nrpages);
+	else
+		gfs2_printf("  aspace = no\n");
+	gfs2_printf("  ail = %d\n", atomic_read(&gl->gl_ail_count));
+	if (gl->gl_req_gh) {
+		error = dump_holder("Request", gl->gl_req_gh, buf, size, count);
+		if (error)
+			goto out;
+	}
+	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+		error = dump_holder("Holder", gh, buf, size, count);
+		if (error)
+			goto out;
+	}
+	list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
+		error = dump_holder("Waiter1", gh, buf, size, count);
+		if (error)
+			goto out;
+	}
+	list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
+		error = dump_holder("Waiter2", gh, buf, size, count);
+		if (error)
+			goto out;
+	}
+	list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
+		error = dump_holder("Waiter3", gh, buf, size, count);
+		if (error)
+			goto out;
+	}
+	if (gl->gl_ops == &gfs2_inode_glops && get_gl2ip(gl)) {
+		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
+		    list_empty(&gl->gl_holders)) {
+			error = dump_inode(get_gl2ip(gl), buf, size, count);
+			if (error)
+				goto out;
+		} else {
+			error = -ENOBUFS;
+			gfs2_printf("  Inode: busy\n");
+		}
+	}
+
+	error = 0;
+
+ out:
+	spin_unlock(&gl->gl_spin);
+
+	return error;
+}
+
+/**
+ * gfs2_dump_lockstate - print out the current lockstate
+ * @sdp: the filesystem
+ * @ub: the buffer to copy the information into
+ *
+ * If @ub is NULL, dump the lockstate to the console.
+ *
+ */
+
+int gfs2_dump_lockstate(struct gfs2_sbd *sdp, struct gfs2_user_buffer *ub)
+{
+	struct gfs2_gl_hash_bucket *bucket;
+	struct gfs2_glock *gl;
+	char *buf = NULL;
+	unsigned int size = gfs2_tune_get(sdp, gt_lockdump_size);
+	unsigned int x, count;
+	int error = 0;
+
+	if (ub) {
+		buf = kmalloc(size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
+		bucket = &sdp->sd_gl_hash[x];
+		count = 0;
+
+		read_lock(&bucket->hb_lock);
+
+		list_for_each_entry(gl, &bucket->hb_list, gl_list) {
+			if (test_bit(GLF_PLUG, &gl->gl_flags))
+				continue;
+
+			error = dump_glock(gl, buf, size, &count);
+			if (error)
+				break;
+		}
+
+		read_unlock(&bucket->hb_lock);
+
+		if (error)
+			break;
+
+		if (ub) {
+			if (ub->ub_count + count > ub->ub_size) {
+				error = -ENOMEM;
+				break;
+			}
+			if (copy_to_user(ub->ub_data + ub->ub_count, buf,
+					 count)) {
+				error = -EFAULT;
+				break;
+			}
+			ub->ub_count += count;
+		}
+	}
+
+	kfree(buf);
+
+	return error;
+}
+
--- a/fs/gfs2/glock.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/glock.h	2005-09-01 17:36:55.262123528 +0800
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __GLOCK_DOT_H__
+#define __GLOCK_DOT_H__
+
+/* Flags for lock requests; used in gfs2_holder gh_flag field.
+   From lm_interface.h:
+#define LM_FLAG_TRY		0x00000001
+#define LM_FLAG_TRY_1CB		0x00000002
+#define LM_FLAG_NOEXP		0x00000004
+#define LM_FLAG_ANY		0x00000008
+#define LM_FLAG_PRIORITY	0x00000010 */
+
+#define GL_LOCAL_EXCL		0x00000020
+#define GL_ASYNC		0x00000040
+#define GL_EXACT		0x00000080
+#define GL_SKIP			0x00000100
+#define GL_ATIME		0x00000200
+#define GL_NOCACHE		0x00000400
+#define GL_SYNC			0x00000800
+#define GL_NOCANCEL		0x00001000
+#define GL_NEVER_RECURSE	0x00002000
+
+#define GLR_TRYFAILED		13
+#define GLR_CANCELED		14
+
+static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
+{
+	struct list_head *tmp, *head;
+	struct gfs2_holder *gh;
+	int locked = FALSE;
+
+	/* Look in glock's list of holders for one with current task as owner */
+	spin_lock(&gl->gl_spin);
+	for (head = &gl->gl_holders, tmp = head->next;
+	     tmp != head;
+	     tmp = tmp->next) {
+		gh = list_entry(tmp, struct gfs2_holder, gh_list);
+		if (gh->gh_owner == current) {
+			locked = TRUE;
+			break;
+		}
+	}
+	spin_unlock(&gl->gl_spin);
+
+	return locked;
+}
+
+static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
+{
+	return (gl->gl_state == LM_ST_EXCLUSIVE);
+}
+
+static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
+{
+	return (gl->gl_state == LM_ST_DEFERRED);
+}
+
+static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
+{
+	return (gl->gl_state == LM_ST_SHARED);
+}
+
+static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
+{
+	int ret;
+	spin_lock(&gl->gl_spin);
+	ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
+	spin_unlock(&gl->gl_spin);
+	return ret;
+}
+
+struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
+				   struct lm_lockname *name);
+int gfs2_glock_get(struct gfs2_sbd *sdp,
+		   uint64_t number, struct gfs2_glock_operations *glops,
+		   int create, struct gfs2_glock **glp);
+void gfs2_glock_hold(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
+
+void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
+		      struct gfs2_holder *gh);
+void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh);
+void gfs2_holder_uninit(struct gfs2_holder *gh);
+struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
+				    int flags, int gfp_flags);
+void gfs2_holder_put(struct gfs2_holder *gh);
+
+void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
+void gfs2_glock_drop_th(struct gfs2_glock *gl);
+
+void gfs2_glmutex_lock(struct gfs2_glock *gl);
+int gfs2_glmutex_trylock(struct gfs2_glock *gl);
+void gfs2_glmutex_unlock(struct gfs2_glock *gl);
+
+int gfs2_glock_nq(struct gfs2_holder *gh);
+int gfs2_glock_poll(struct gfs2_holder *gh);
+int gfs2_glock_wait(struct gfs2_holder *gh);
+void gfs2_glock_dq(struct gfs2_holder *gh);
+
+void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags);
+void gfs2_glock_force_drop(struct gfs2_glock *gl);
+
+int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
+
+int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
+		       struct gfs2_holder *gh);
+void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
+int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
+		      uint64_t number, struct gfs2_glock_operations *glops,
+		      unsigned int state, int flags, struct gfs2_holder *gh);
+
+int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+
+void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
+			     struct gfs2_glock_operations *glops,
+			     unsigned int state, int flags);
+
+/*  Lock Value Block functions  */
+
+int gfs2_lvb_hold(struct gfs2_glock *gl);
+void gfs2_lvb_unhold(struct gfs2_glock *gl);
+void gfs2_lvb_sync(struct gfs2_glock *gl);
+
+void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data);
+
+void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum);
+void gfs2_iopen_go_callback(struct gfs2_glock *gl, unsigned int state);
+
+void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
+
+void gfs2_scand_internal(struct gfs2_sbd *sdp);
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
+
+int gfs2_dump_lockstate(struct gfs2_sbd *sdp, struct gfs2_user_buffer *ub);
+
+#endif /* __GLOCK_DOT_H__ */
--- a/fs/gfs2/lm.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lm.c	2005-09-01 17:36:55.338111976 +0800
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/delay.h>
+#include <asm/semaphore.h>
+
+#include "gfs2.h"
+#include "glock.h"
+#include "lm.h"
+#include "super.h"
+
+/**
+ * gfs2_lm_mount - mount a locking protocol
+ * @sdp: the filesystem
+ * @args: mount arguements
+ * @silent: if TRUE, don't complain if the FS isn't a GFS2 fs
+ *
+ * Returns: errno
+ */
+
+int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
+{
+	struct gfs2_sb *sb = NULL;
+	char *proto, *table;
+	int flags = 0;
+	int error;
+
+	if (sdp->sd_args.ar_spectator)
+		flags |= LM_MFLAG_SPECTATOR;
+
+	proto = sdp->sd_args.ar_lockproto;
+	table = sdp->sd_args.ar_locktable;
+
+	/*  Try to autodetect  */
+
+	if (!proto[0] || !table[0]) {
+		struct buffer_head *bh;
+		bh = sb_getblk(sdp->sd_vfs,
+			       GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+		lock_buffer(bh);
+		clear_buffer_uptodate(bh);
+		clear_buffer_dirty(bh);
+		unlock_buffer(bh);
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+
+		if (!buffer_uptodate(bh)) {
+			brelse(bh);
+			return -EIO;
+		}
+
+		sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
+		if (!sb) {
+			brelse(bh);
+			return -ENOMEM;
+		}
+		gfs2_sb_in(sb, bh->b_data);
+		brelse(bh);
+
+		error = gfs2_check_sb(sdp, sb, silent);
+		if (error)
+			goto out;
+
+		if (!proto[0])
+			proto = sb->sb_lockproto;
+
+		if (!table[0])
+			table = sb->sb_locktable;
+	}
+
+	fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
+
+	error = lm_mount(proto, table, sdp->sd_args.ar_hostdata,
+			 gfs2_glock_cb, sdp,
+			 GFS2_MIN_LVB_SIZE, flags,
+			 &sdp->sd_lockstruct);
+	if (error) {
+		fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
+			proto, table, sdp->sd_args.ar_hostdata);
+		goto out;
+	}
+
+	if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
+	    gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
+	    gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= GFS2_MIN_LVB_SIZE)) {
+		lm_unmount(&sdp->sd_lockstruct);
+		goto out;
+	}
+
+	if (sdp->sd_args.ar_spectator)
+		snprintf(sdp->sd_fsname, 256, "%s.s",
+			 (*table) ? table : sdp->sd_vfs->s_id);
+	else
+		snprintf(sdp->sd_fsname, 256, "%s.%u",
+			 (*table) ? table : sdp->sd_vfs->s_id,
+			 sdp->sd_lockstruct.ls_jid);
+
+	fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+
+	if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
+	    !sdp->sd_args.ar_ignore_local_fs) {
+		/* Force local [p|f]locks */
+		sdp->sd_args.ar_localflocks = TRUE;
+
+		/* Force local read ahead and caching */
+		sdp->sd_args.ar_localcaching = TRUE;
+
+		/* Allow the machine to oops */
+		sdp->sd_args.ar_oopses_ok = TRUE;
+	}
+
+ out:
+	kfree(sb);
+
+	return error;
+}
+
+void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_others_may_mount(sdp->sd_lockstruct.ls_lockspace);
+}
+
+void gfs2_lm_unmount(struct gfs2_sbd *sdp)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		lm_unmount(&sdp->sd_lockstruct);
+}
+
+int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
+{
+	va_list args;
+
+	if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return 0;
+
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+
+	fs_err(sdp, "about to withdraw from the cluster\n");
+	if (sdp->sd_args.ar_debug)
+		BUG();
+
+	fs_err(sdp, "waiting for outstanding I/O\n");
+
+	/* FIXME: suspend dm device so oustanding bio's complete
+	   and all further io requests fail */
+
+	fs_err(sdp, "telling LM to withdraw\n");
+	lm_withdraw(&sdp->sd_lockstruct);
+	fs_err(sdp, "withdrawn\n");
+
+	return -1;
+}
+
+int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
+		     lm_lock_t **lockp)
+{
+	int error;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+	else
+		error = sdp->sd_lockstruct.ls_ops->lm_get_lock(sdp->sd_lockstruct.ls_lockspace, name, lockp);
+	return error;
+}
+
+void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
+}
+
+unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
+			  unsigned int cur_state, unsigned int req_state,
+			  unsigned int flags)
+{
+	int ret;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		ret = 0;
+	else
+		ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
+							 cur_state,
+							 req_state, flags);
+	return ret;
+}
+
+unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
+			    unsigned int cur_state)
+{
+	int ret;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		ret = 0;
+	else
+		ret =  sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
+	return ret;
+}
+
+void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
+}
+
+int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp)
+{
+	int error;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+	else
+		error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
+	return error;
+}
+
+void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
+}
+
+void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_sync_lvb(lock, lvb);
+}
+
+int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
+		      struct file *file, struct file_lock *fl)
+{
+	int error;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+	else
+		error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
+			sdp->sd_lockstruct.ls_lockspace,
+			name, file, fl);
+	return error;
+}
+
+int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
+		  struct file *file, int cmd, struct file_lock *fl)
+{
+	int error;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+	else
+		error = sdp->sd_lockstruct.ls_ops->lm_plock(
+			sdp->sd_lockstruct.ls_lockspace,
+			name, file, cmd, fl);
+	return error;
+}
+
+int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
+		    struct file *file, struct file_lock *fl)
+{
+	int error;
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = -EIO;
+	else
+		error = sdp->sd_lockstruct.ls_ops->lm_punlock(
+			sdp->sd_lockstruct.ls_lockspace,
+			name, file, fl);
+	return error;
+}
+
+void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
+			   unsigned int message)
+{
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace, jid, message);
+}
+
--- a/fs/gfs2/lm.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/lm.h	2005-09-01 17:36:55.338111976 +0800
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LM_DOT_H__
+#define __LM_DOT_H__
+
+int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
+void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
+void gfs2_lm_unmount(struct gfs2_sbd *sdp);
+int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
+__attribute__ ((format(printf, 2, 3)));
+int gfs2_lm_get_lock(struct gfs2_sbd *sdp,
+		    struct lm_lockname *name, lm_lock_t **lockp);
+void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock);
+unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
+			 unsigned int cur_state, unsigned int req_state,
+			 unsigned int flags);
+unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
+			   unsigned int cur_state);
+void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock);
+int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp);
+void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
+void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
+int gfs2_lm_plock_get(struct gfs2_sbd *sdp,
+		     struct lm_lockname *name,
+		     struct file *file, struct file_lock *fl);
+int gfs2_lm_plock(struct gfs2_sbd *sdp,
+		 struct lm_lockname *name,
+		 struct file *file, int cmd, struct file_lock *fl);
+int gfs2_lm_punlock(struct gfs2_sbd *sdp,
+		   struct lm_lockname *name,
+		   struct file *file, struct file_lock *fl);
+void gfs2_lm_recovery_done(struct gfs2_sbd *sdp,
+			  unsigned int jid, unsigned int message);
+
+#endif /* __LM_DOT_H__ */

[PATCH 10/13] GFS: build and documentation

Add gfs to the build system and gfs2.txt to Documentation.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 Documentation/filesystems/gfs2.txt |  194 +++++++++++++++++++++++++++++++++++++
 fs/Kconfig                         |   15 ++
 fs/Makefile                        |    1 
 fs/gfs2/Makefile                   |   45 ++++++++
 4 files changed, 255 insertions(+)

--- a/fs/gfs2/Makefile	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/Makefile	2005-09-01 17:36:55.572076408 +0800
@@ -0,0 +1,45 @@
+obj-$(CONFIG_GFS2_FS) += gfs2.o
+gfs2-y := \
+	acl.o \
+	bits.o \
+	bmap.o \
+	daemon.o \
+	dir.o \
+	eaops.o \
+	eattr.o \
+	glock.o \
+	glops.o \
+	inode.o \
+	ioctl.o \
+	jdata.o \
+	lm.o \
+	log.o \
+	lops.o \
+	lvb.o \
+	main.o \
+	meta_io.o \
+	mount.o \
+	ondisk.o \
+	ops_address.o \
+	ops_dentry.o \
+	ops_export.o \
+	ops_file.o \
+	ops_fstype.o \
+	ops_inode.o \
+	ops_super.o \
+	ops_vm.o \
+	page.o \
+	quota.o \
+	resize.o \
+	recovery.o \
+	rgrp.o \
+	super.o \
+	sys.o \
+	trans.o \
+	unlinked.o \
+	util.o
+
+obj-$(CONFIG_GFS2_FS) += locking/harness/
+obj-$(CONFIG_GFS2_FS) += locking/nolock/
+obj-$(CONFIG_GFS2_FS) += locking/dlm/
+
--- a/fs/Makefile	2005-09-01 16:59:28.042752800 +0800
+++ b/fs/Makefile	2005-09-01 17:10:11.211976216 +0800
@@ -105,3 +105,4 @@
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_9P_FS)		+= 9p/
+obj-$(CONFIG_GFS2_FS)		+= gfs2/
--- a/fs/Kconfig	2005-09-01 16:59:28.038753408 +0800
+++ b/fs/Kconfig	2005-09-01 17:09:39.810749928 +0800
@@ -360,6 +360,21 @@
 	          - POSIX ACLs
 	          - readpages / writepages (not user visible)
 
+config GFS2_FS
+	tristate "GFS2 file system support"
+	depends on DLM
+	select FS_POSIX_ACL
+	help
+	A cluster filesystem.
+
+	Allows a cluster of computers to simultaneously use a block device
+	that is shared between them (with FC, iSCSI, NBD, etc...).  GFS reads
+	and writes to the block device like a local filesystem, but also uses
+	a lock module to allow the computers coordinate their I/O so
+	filesystem consistency is maintained.  One of the nifty features of
+	GFS is perfect consistency -- changes made to the filesystem on one
+	machine show up immediately on all other machines in the cluster.
+
 config MINIX_FS
 	tristate "Minix fs support"
 	help
--- a/Documentation/filesystems/gfs2.txt	1970-01-01 07:30:00.000000000 +0730
+++ b/Documentation/filesystems/gfs2.txt	2005-09-01 17:36:55.593073216 +0800
@@ -0,0 +1,194 @@
+Global File System
+------------------
+
+http://sources.redhat.com/cluster/
+
+GFS is a cluster file system. It allows a cluster of computers to
+simultaneously use a block device that is shared between them (with FC,
+iSCSI, NBD, etc).  GFS reads and writes to the block device like a local
+file system, but also uses a lock module to allow the computers coordinate
+their I/O so file system consistency is maintained.  One of the nifty
+features of GFS is perfect consistency -- changes made to the file system
+on one machine show up immediately on all other machines in the cluster.
+
+GFS uses interchangable inter-node locking mechanisms.  GFS plugs into one
+side of a module called "lock_harness" and different lock modules can plug
+into the other side of the harness.  Each gfs file system selects the
+appropriate lock module at mount time.  Lock modules include:
+
+  lock_nolock -- does no real locking and allows gfs to be used as a
+  local file system
+
+  lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
+  The dlm is found at linux/drivers/dlm/
+
+In addition to interfacing with an external locking manager, a gfs lock
+module is responsible for interacting with external cluster management
+systems.  Lock_dlm depends on user space cluster management systems found
+at the location above.
+
+To use gfs as a local file system, no external clustering systems are
+needed, simply:
+
+  $ gfs2_mkfs -p lock_nolock -j 1 /dev/block_device
+  $ mount -t gfs2 /dev/block_device /dir
+
+GFS2 is not on-disk compatible with previous versions of GFS.
+
+
+The following man pages can be found at the location above:
+  gfs2_mkfs	to make a filesystem
+  gfs2_fsck	to repair a filesystem
+  gfs2_grow	to expand a filesystem online
+  gfs2_jadd	to add journals to a filesystem online
+  gfs2_tool	to manipulate, examine and tune a filesystem
+  gfs2_quota	to examine and change quota values in a filesystem
+  gfs2_mount	to find mount options
+
+Mount options (from the gfs2_mount man page)
+
+       lockproto=LockModuleName
+              This  specifies  which  inter-node lock protocol is used by the
+              GFS2 filesystem for this mount,  overriding  the  default  lock
+              protocol name stored in the filesystem's on-disk superblock.
+
+              The  LockModuleName must be an exact match of the protocol name
+              presented by the lock module when it registers  with  the  lock
+              harness.   Traditionally,  this  matches the .o filename of the
+              lock module, e.g. lock_dlm, lock_gulm, or lock_nolock.
+
+              The default lock protocol name is  written  to  disk  initially
+              when  creating the filesystem with gfs2_mkfs(8), -p option.  It
+              can be changed on-disk by using the gfs2_tool(8)  utility's  sb
+              proto command.
+
+              The  lockproto  mount  option should be used only under special
+              circumstances in which you want to temporarily use a  different
+              lock protocol without changing the on-disk default.
+
+       locktable=LockTableName
+              This  specifies the identity of the cluster and of the filesys-
+              tem for this mount, overriding the  default  cluster/filesystem
+              identify  stored  in  the filesystem's on-disk superblock.  The
+              cluster/filesystem name is recognized globally  throughout  the
+              cluster,  and establishes a unique namespace for the inter-node
+              locking system, enabling the mounting of multiple GFS2 filesys-
+              tems.
+
+              The  format  of  LockTableName  is  lock-module-specific.   For
+              lock_gulm and lock_dlm, the format is clustername:fsname.   For
+              lock_nolock, the field is ignored.
+
+              The  default  cluster/filesystem  name  is written to disk ini-
+              tially when  creating  the  filesystem  with  gfs2_mkfs(8),  -t
+              option.   It  can  be changed on-disk by using the gfs2_tool(8)
+              utility's sb table command.
+
+              The locktable mount option should be used  only  under  special
+              circumstances  in  which  you want to mount the filesystem in a
+              different cluster, or mount it as a different filesystem  name,
+              without changing the on-disk default.
+
+       hostdata=HostIDInfo
+              This  field sends host (the computer on which the filesystem is
+              being mounted) identity information to the lock module.
+
+              The format and behavior of HostIDInfo is  lock-module-specific.
+              For  lock_gulm,  it overrides the uname(1) -n network node name
+              used as default by lock_gulm.  For lock_nolock, a HostIDInfo of
+              "jid=x" tells GFS to mount journal number x.
+
+              This field is ignored by lock_dlm.
+
+       localcaching
+              This  flag  tells GFS2 that it is running as a local (not clus-
+              tered) filesystem, so it can turn on some block  caching  opti-
+              mizations that can't be used when running in cluster mode.
+
+              This  is turned on automatically by the lock_nolock module, but
+              can be overridden by using the ignore_local_fs option.
+
+       localflocks
+              This flag tells GFS2 that it is running as a local  (not  clus-
+              tered)  filesystem,  so it can allow the kernel VFS layer to do
+              all flock and fcntl file  locking.   When  running  in  cluster
+              mode,  these  file  locks require inter-node locks, and require
+              the support of GFS2.  When running locally, better  performance
+              is achieved by letting VFS handle the whole job.
+
+              This  is turned on automatically by the lock_nolock module, but
+              can be overridden by using the ignore_local_fs option.
+
+       oopses_ok
+              When GFS2 detects certain errors (mostly some  type  of  memory
+              corruption) it will panic the machine rather than risk corrupt-
+              ing the ondisk state.  Using this option  will  cause  GFS2  to
+              oops  instead of panic.  This may produce more useful debugging
+              information than a panic will.  The downside of this option  is
+              that  an  oops on one machine of a cluster filesystem may cause
+              the filesystem to stall on all machines in the cluster.   (Pan-
+              ics don't have this "feature".)  Use this option with care.
+
+              This  is turned on automatically by the lock_nolock module, but
+              can be overridden by using the ignore_local_fs option.
+
+       debug  Causes GFS2 to oops when encountering an error that would cause
+              the  mount  to  withdraw  or  print an assertion warning.  This
+              option should probably not be used in a production system.
+
+       ignore_local_fs
+              By default, using the nolock lock module automatically turns on
+              the     localcaching     and     localflocks     optimizations.
+              ignore_local_fs forces GFS2 to treat the filesystem  as  if  it
+              were  a multihost (clustered) filesystem, with localcaching and
+              localflocks optimizations turned off.
+
+       upgrade
+              This flag tells GFS2 to upgrade the filesystem's on-disk format
+              to the version supported by the current GFS2 software installa-
+              tion on this computer.  If you try to mount an old-version disk
+              image,  GFS2 will notify you via a syslog message that you need
+              to upgrade.  Try mounting again, using the -o  upgrade  option.
+              When upgrading, only one node may mount the GFS2 filesystem.
+
+       num_glockd=Number
+              Tunes  GFS2  to alleviate memory pressure when rapidly aquiring
+              many locks  (e.g.   several  processes  scanning  through  huge
+              directory  trees).  GFS2' glockd kernel daemon cleans up memory
+              for no-longer-needed glocks.  Multiple instances of the  daemon
+              clean  up  faster than a single instance.  The default value is
+              one daemon, with a maximum of 16.  Since this option was intro-
+              duced,  other  methods  of  rapid  cleanup  have been developed
+              within GFS2, so this option may go away in the future.
+
+       acl    Enables POSIX Access Control List acl(5) support within GFS2.
+
+       spectator
+              Mount this filesystem using a special form of read-only  mount.
+              The mount does not use one of the filesystem's journals.
+
+       suiddir
+              Sets owner of any newly created file or directory to be that of
+              parent directory, if parent directory  has  S_ISUID  permission
+              attribute  bit  set.  Sets S_ISUID in any new directory, if its
+              parent directory's S_ISUID is set.  Strips all  execution  bits
+              on  a  new  file,  if  parent directory owner is different from
+              owner of process creating the file.  Set this  option  only  if
+              you know why you are setting it.
+
+       quota=[off/account/on]
+              Turns quotas on or off for a filesystem.  Setting the quotas to
+              be in the "account" state causes the per UID/GID usage  statis-
+              tics  to  be  correctly maintained by the filesystem, limit and
+              warn values are ignored.  The default value is "off".
+
+       data=[ordered/writeback]
+              When data=ordered is set, the user data modified by a  transac-
+              tion  is flushed to the disk before the transaction is commited
+              to disk.  This should prevent the user from  seeing  uninitial-
+              ized  blocks  in  a  file  after  a crash.  Data=writeback mode
+              writes the user data to the disk at any time after  it's  dirt-
+              ied.   This  doesn't  provide the same consistency guarantee as
+              ordered mode, but it should be slightly faster for  some  work-
+              loads.  The default is ordered mode.
+

[PATCH 11/13] GFS: lock_harness module

The lock_harness module allows a gfs file system to connect to a given
lock module.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/locking/harness/Makefile       |    3 
 fs/gfs2/locking/harness/lm_interface.h |  286 +++++++++++++++++++++++++++++++++
 fs/gfs2/locking/harness/main.c         |  206 +++++++++++++++++++++++
 3 files changed, 495 insertions(+)

diff -urpN a/fs/gfs2/locking/harness/Makefile b/fs/gfs2/locking/harness/Makefile
--- a/fs/gfs2/locking/harness/Makefile	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/harness/Makefile	2005-09-01 17:23:36.150606944 +0800
@@ -0,0 +1,3 @@
+obj-$(CONFIG_GFS2_FS) += lock_harness.o
+lock_harness-y := main.o
+
diff -urpN a/fs/gfs2/locking/harness/lm_interface.h b/fs/gfs2/locking/harness/lm_interface.h
--- a/fs/gfs2/locking/harness/lm_interface.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/harness/lm_interface.h	2005-09-01 17:23:36.119611656 +0800
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LM_INTERFACE_DOT_H__
+#define __LM_INTERFACE_DOT_H__
+
+/*
+ * Opaque handles represent the lock module's lockspace structure, the lock
+ * module's lock structures, and GFS's file system (superblock) structure.
+ */
+
+typedef void lm_lockspace_t;
+typedef void lm_lock_t;
+typedef void lm_fsdata_t;
+
+typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
+			       void *data);
+
+/*
+ * lm_mount() flags
+ *
+ * LM_MFLAG_SPECTATOR
+ * GFS is asking to join the filesystem's lockspace, but it doesn't want to
+ * modify the filesystem.  The lock module shouldn't assign a journal to the FS
+ * mount.  It shouldn't send recovery callbacks to the FS mount.  If the node
+ * dies or withdraws, all locks can be wiped immediately.
+ */
+
+#define LM_MFLAG_SPECTATOR	0x00000001
+
+/*
+ * lm_lockstruct flags
+ *
+ * LM_LSFLAG_LOCAL
+ * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS
+ * can make single-node optimizations.
+ */
+
+#define LM_LSFLAG_LOCAL		0x00000001
+
+/*
+ * lm_lockname types
+ */
+
+#define LM_TYPE_RESERVED	0x00
+#define LM_TYPE_NONDISK		0x01
+#define LM_TYPE_INODE		0x02
+#define LM_TYPE_RGRP		0x03
+#define LM_TYPE_META		0x04
+#define LM_TYPE_IOPEN		0x05
+#define LM_TYPE_FLOCK		0x06
+#define LM_TYPE_PLOCK		0x07
+#define LM_TYPE_QUOTA		0x08
+#define LM_TYPE_JOURNAL		0x09
+
+/*
+ * lm_lock() states
+ *
+ * SHARED is compatible with SHARED, not with DEFERRED or EX.
+ * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
+ */
+
+#define LM_ST_UNLOCKED		0
+#define LM_ST_EXCLUSIVE		1
+#define LM_ST_DEFERRED		2
+#define LM_ST_SHARED		3
+
+/*
+ * lm_lock() flags
+ *
+ * LM_FLAG_TRY
+ * Don't wait to acquire the lock if it can't be granted immediately.
+ *
+ * LM_FLAG_TRY_1CB
+ * Send one blocking callback if TRY is set and the lock is not granted.
+ *
+ * LM_FLAG_NOEXP
+ * GFS sets this flag on lock requests it makes while doing journal recovery.
+ * These special requests should not be blocked due to the recovery like
+ * ordinary locks would be.
+ *
+ * LM_FLAG_ANY
+ * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
+ * also be granted in SHARED.  The preferred state is whichever is compatible
+ * with other granted locks, or the specified state if no other locks exist.
+ *
+ * LM_FLAG_PRIORITY
+ * Override fairness considerations.  Suppose a lock is held in a shared state
+ * and there is a pending request for the deferred state.  A shared lock
+ * request with the priority flag would be allowed to bypass the deferred
+ * request and directly join the other shared lock.  A shared lock request
+ * without the priority flag might be forced to wait until the deferred
+ * requested had acquired and released the lock.
+ */
+
+#define LM_FLAG_TRY		0x00000001
+#define LM_FLAG_TRY_1CB		0x00000002
+#define LM_FLAG_NOEXP		0x00000004
+#define LM_FLAG_ANY		0x00000008
+#define LM_FLAG_PRIORITY	0x00000010
+
+/*
+ * lm_lock() and lm_async_cb return flags
+ *
+ * LM_OUT_ST_MASK
+ * Masks the lower two bits of lock state in the returned value.
+ *
+ * LM_OUT_CACHEABLE
+ * The lock hasn't been released so GFS can continue to cache data for it.
+ *
+ * LM_OUT_CANCELED
+ * The lock request was canceled.
+ *
+ * LM_OUT_ASYNC
+ * The result of the request will be returned in an LM_CB_ASYNC callback.
+ */
+
+#define LM_OUT_ST_MASK		0x00000003
+#define LM_OUT_CACHEABLE	0x00000004
+#define LM_OUT_CANCELED		0x00000008
+#define LM_OUT_ASYNC		0x00000080
+
+/*
+ * lm_callback_t types
+ *
+ * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S
+ * Blocking callback, a remote node is requesting the given lock in
+ * EXCLUSIVE, DEFERRED, or SHARED.
+ *
+ * LM_CB_NEED_RECOVERY
+ * The given journal needs to be recovered.
+ *
+ * LM_CB_DROPLOCKS
+ * Reduce the number of cached locks.
+ *
+ * LM_CB_ASYNC
+ * The given lock has been granted.
+ */
+
+#define LM_CB_NEED_E		257
+#define LM_CB_NEED_D		258
+#define LM_CB_NEED_S		259
+#define LM_CB_NEED_RECOVERY	260
+#define LM_CB_DROPLOCKS		261
+#define LM_CB_ASYNC		262
+
+/*
+ * lm_recovery_done() messages
+ */
+
+#define LM_RD_GAVEUP		308
+#define LM_RD_SUCCESS		309
+
+
+struct lm_lockname {
+	uint64_t ln_number;
+	unsigned int ln_type;
+};
+
+#define lm_name_equal(name1, name2) \
+	(((name1)->ln_number == (name2)->ln_number) && \
+	 ((name1)->ln_type == (name2)->ln_type)) \
+
+struct lm_async_cb {
+	struct lm_lockname lc_name;
+	int lc_ret;
+};
+
+struct lm_lockstruct;
+
+struct lm_lockops {
+	char lm_proto_name[256];
+
+	/*
+	 * Mount/Unmount
+	 */
+
+	int (*lm_mount) (char *table_name, char *host_data,
+			 lm_callback_t cb, lm_fsdata_t *fsdata,
+			 unsigned int min_lvb_size, int flags,
+			 struct lm_lockstruct *lockstruct);
+
+	void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
+
+	void (*lm_unmount) (lm_lockspace_t *lockspace);
+
+	void (*lm_withdraw) (lm_lockspace_t *lockspace);
+
+	/*
+	 * Lock oriented operations
+	 */
+
+	int (*lm_get_lock) (lm_lockspace_t *lockspace,
+			    struct lm_lockname *name, lm_lock_t **lockp);
+
+	void (*lm_put_lock) (lm_lock_t *lock);
+
+	unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
+				 unsigned int req_state, unsigned int flags);
+
+	unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
+
+	void (*lm_cancel) (lm_lock_t *lock);
+
+	int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
+	void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
+	void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
+
+	/*
+	 * Posix Lock oriented operations
+	 */
+
+	int (*lm_plock_get) (lm_lockspace_t *lockspace,
+			     struct lm_lockname *name,
+			     struct file *file, struct file_lock *fl);
+
+	int (*lm_plock) (lm_lockspace_t *lockspace,
+			 struct lm_lockname *name,
+			 struct file *file, int cmd, struct file_lock *fl);
+
+	int (*lm_punlock) (lm_lockspace_t *lockspace,
+			   struct lm_lockname *name,
+			   struct file *file, struct file_lock *fl);
+
+	/*
+	 * Client oriented operations
+	 */
+
+	void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
+				  unsigned int message);
+
+	struct module *lm_owner;
+};
+
+/*
+ * lm_mount() return values
+ *
+ * ls_jid - the journal ID this node should use
+ * ls_first - this node is the first to mount the file system
+ * ls_lvb_size - size in bytes of lock value blocks
+ * ls_lockspace - lock module's context for this file system
+ * ls_ops - lock module's functions
+ * ls_flags - lock module features
+ */
+
+struct lm_lockstruct {
+	unsigned int ls_jid;
+	unsigned int ls_first;
+	unsigned int ls_lvb_size;
+	lm_lockspace_t *ls_lockspace;
+	struct lm_lockops *ls_ops;
+	int ls_flags;
+};
+
+/*
+ * Lock module bottom interface.  A lock module makes itself available to GFS
+ * with these functions.
+ */
+
+int lm_register_proto(struct lm_lockops *proto);
+
+void lm_unregister_proto(struct lm_lockops *proto);
+
+/*
+ * Lock module top interface.  GFS calls these functions when mounting or
+ * unmounting a file system.
+ */
+
+int lm_mount(char *proto_name,
+	     char *table_name, char *host_data,
+	     lm_callback_t cb, lm_fsdata_t *fsdata,
+	     unsigned int min_lvb_size, int flags,
+	     struct lm_lockstruct *lockstruct);
+
+void lm_unmount(struct lm_lockstruct *lockstruct);
+
+void lm_withdraw(struct lm_lockstruct *lockstruct);
+
+#endif /* __LM_INTERFACE_DOT_H__ */
+
diff -urpN a/fs/gfs2/locking/harness/main.c b/fs/gfs2/locking/harness/main.c
--- a/fs/gfs2/locking/harness/main.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/harness/main.c	2005-09-01 17:23:36.141608312 +0800
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/kmod.h>
+#include <linux/fs.h>
+
+#include "lm_interface.h"
+
+struct lmh_wrapper {
+	struct list_head lw_list;
+	struct lm_lockops *lw_ops;
+};
+
+static struct semaphore lmh_lock;
+static struct list_head lmh_list;
+
+/**
+ * lm_register_proto - Register a low-level locking protocol
+ * @proto: the protocol definition
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
+int lm_register_proto(struct lm_lockops *proto)
+{
+	struct lmh_wrapper *lw;
+
+	down(&lmh_lock);
+
+	list_for_each_entry(lw, &lmh_list, lw_list) {
+		if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
+			up(&lmh_lock);
+			printk("lock_harness:  protocol %s already exists\n",
+			       proto->lm_proto_name);
+			return -EEXIST;
+		}
+	}
+
+	lw = kmalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
+	if (!lw) {
+		up(&lmh_lock);
+		return -ENOMEM;
+	}
+	memset(lw, 0, sizeof(struct lmh_wrapper));
+
+	lw->lw_ops = proto;
+	list_add(&lw->lw_list, &lmh_list);
+
+	up(&lmh_lock);
+
+	return 0;
+}
+
+/**
+ * lm_unregister_proto - Unregister a low-level locking protocol
+ * @proto: the protocol definition
+ *
+ */
+
+void lm_unregister_proto(struct lm_lockops *proto)
+{
+	struct lmh_wrapper *lw;
+
+	down(&lmh_lock);
+
+	list_for_each_entry(lw, &lmh_list, lw_list) {
+		if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
+			list_del(&lw->lw_list);
+			up(&lmh_lock);
+			kfree(lw);
+			return;
+		}
+	}
+
+	up(&lmh_lock);
+
+	printk("lock_harness:  can't unregister lock protocol %s\n",
+	       proto->lm_proto_name);
+}
+
+/**
+ * lm_mount - Mount a lock protocol
+ * @proto_name - the name of the protocol
+ * @table_name - the name of the lock space
+ * @host_data - data specific to this host
+ * @cb - the callback to the code using the lock module
+ * @fsdata - data to pass back with the callback
+ * @min_lvb_size - the mininum LVB size that the caller can deal with
+ * @flags - LM_MFLAG_*
+ * @lockstruct - a structure returned describing the mount
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
+int lm_mount(char *proto_name, char *table_name, char *host_data,
+	 lm_callback_t cb, lm_fsdata_t * fsdata,
+	 unsigned int min_lvb_size, int flags,
+	 struct lm_lockstruct *lockstruct)
+{
+	struct lmh_wrapper *lw = NULL;
+	int try = 0;
+	int error, found;
+
+ retry:
+	down(&lmh_lock);
+
+	found = 0;
+	list_for_each_entry(lw, &lmh_list, lw_list) {
+		if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		if (!try && capable(CAP_SYS_MODULE)) {
+			try = 1;
+			up(&lmh_lock);
+			request_module(proto_name);
+			goto retry;
+		}
+		printk("lock_harness:  can't find protocol %s\n", proto_name);
+		error = -ENOENT;
+		goto out;
+	}
+
+	if (!try_module_get(lw->lw_ops->lm_owner)) {
+		try = 0;
+		up(&lmh_lock);
+		current->state = TASK_UNINTERRUPTIBLE;
+		schedule_timeout(HZ);
+		goto retry;
+	}
+
+	error = lw->lw_ops->lm_mount(table_name, host_data,
+				     cb, fsdata,
+				     min_lvb_size, flags,
+				     lockstruct);
+	if (error)
+		module_put(lw->lw_ops->lm_owner);
+ out:
+	up(&lmh_lock);
+	return error;
+}
+
+void lm_unmount(struct lm_lockstruct *lockstruct)
+{
+	down(&lmh_lock);
+	lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
+	if (lockstruct->ls_ops->lm_owner)
+		module_put(lockstruct->ls_ops->lm_owner);
+	up(&lmh_lock);
+}
+
+/**
+ * lm_withdraw - abnormally unmount a lock module
+ * @lockstruct: the lockstruct passed into mount
+ *
+ */
+
+void lm_withdraw(struct lm_lockstruct *lockstruct)
+{
+	down(&lmh_lock);
+	lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
+	if (lockstruct->ls_ops->lm_owner)
+		module_put(lockstruct->ls_ops->lm_owner);
+	up(&lmh_lock);
+}
+
+int __init init_lmh(void)
+{
+	init_MUTEX(&lmh_lock);
+	INIT_LIST_HEAD(&lmh_list);
+	printk("Lock_Harness (built %s %s) installed\n", __DATE__, __TIME__);
+	return 0;
+}
+
+void __exit exit_lmh(void)
+{
+}
+
+module_init(init_lmh);
+module_exit(exit_lmh);
+
+MODULE_DESCRIPTION("GFS Lock Module Harness");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL_GPL(lm_register_proto);
+EXPORT_SYMBOL_GPL(lm_unregister_proto);
+EXPORT_SYMBOL_GPL(lm_mount);
+EXPORT_SYMBOL_GPL(lm_unmount);
+EXPORT_SYMBOL_GPL(lm_withdraw);
+

[PATCH 12/13] GFS: lock_nolock module

The lock_nolock module does no inter-node locking and allows gfs to be
used as a local file system.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/locking/nolock/Makefile |    3 
 fs/gfs2/locking/nolock/main.c   |  267 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 270 insertions(+)

diff -urpN a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
--- a/fs/gfs2/locking/nolock/Makefile	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/nolock/Makefile	2005-09-01 17:23:56.963442912 +0800
@@ -0,0 +1,3 @@
+obj-$(CONFIG_GFS2_FS) += lock_nolock.o
+lock_nolock-y := main.o
+
diff -urpN a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
--- a/fs/gfs2/locking/nolock/main.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/nolock/main.c	2005-09-01 17:23:56.952444584 +0800
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+
+#include "../harness/lm_interface.h"
+
+struct nolock_lockspace {
+	unsigned int nl_lvb_size;
+};
+
+struct lm_lockops nolock_ops;
+
+static int nolock_mount(char *table_name, char *host_data,
+			lm_callback_t cb, lm_fsdata_t *fsdata,
+			unsigned int min_lvb_size, int flags,
+			struct lm_lockstruct *lockstruct)
+{
+	char *c;
+	unsigned int jid;
+	struct nolock_lockspace *nl;
+
+	/* If there is a "jid=" in the hostdata, return that jid.
+	   Otherwise, return zero. */
+
+	c = strstr(host_data, "jid=");
+	if (!c)
+		jid = 0;
+	else {
+		c += 4;
+		sscanf(c, "%u", &jid);
+	}
+
+	nl = kmalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
+	if (!nl)
+		return -ENOMEM;
+
+	memset(nl, 0, sizeof(struct nolock_lockspace));
+	nl->nl_lvb_size = min_lvb_size;
+
+	lockstruct->ls_jid = jid;
+	lockstruct->ls_first = 1;
+	lockstruct->ls_lvb_size = min_lvb_size;
+	lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
+	lockstruct->ls_ops = &nolock_ops;
+	lockstruct->ls_flags = LM_LSFLAG_LOCAL;
+
+	return 0;
+}
+
+static void nolock_others_may_mount(lm_lockspace_t *lockspace)
+{
+}
+
+static void nolock_unmount(lm_lockspace_t *lockspace)
+{
+	struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
+	kfree(nl);
+}
+
+static void nolock_withdraw(lm_lockspace_t *lockspace)
+{
+}
+
+/**
+ * nolock_get_lock - get a lm_lock_t given a descripton of the lock
+ * @lockspace: the lockspace the lock lives in
+ * @name: the name of the lock
+ * @lockp: return the lm_lock_t here
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
+static int nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+			   lm_lock_t **lockp)
+{
+	*lockp = (lm_lock_t *)lockspace;
+	return 0;
+}
+
+/**
+ * nolock_put_lock - get rid of a lock structure
+ * @lock: the lock to throw away
+ *
+ */
+
+static void nolock_put_lock(lm_lock_t *lock)
+{
+}
+
+/**
+ * nolock_lock - acquire a lock
+ * @lock: the lock to manipulate
+ * @cur_state: the current state
+ * @req_state: the requested state
+ * @flags: modifier flags
+ *
+ * Returns: A bitmap of LM_OUT_*
+ */
+
+static unsigned int nolock_lock(lm_lock_t *lock, unsigned int cur_state,
+				unsigned int req_state, unsigned int flags)
+{
+	return req_state | LM_OUT_CACHEABLE;
+}
+
+/**
+ * nolock_unlock - unlock a lock
+ * @lock: the lock to manipulate
+ * @cur_state: the current state
+ *
+ * Returns: 0
+ */
+
+static unsigned int nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
+{
+	return 0;
+}
+
+static void nolock_cancel(lm_lock_t *lock)
+{
+}
+
+/**
+ * nolock_hold_lvb - hold on to a lock value block
+ * @lock: the lock the LVB is associated with
+ * @lvbp: return the lm_lvb_t here
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
+static int nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
+{
+	struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
+	int error = 0;
+
+	*lvbp = kmalloc(nl->nl_lvb_size, GFP_KERNEL);
+	if (*lvbp)
+		memset(*lvbp, 0, nl->nl_lvb_size);
+	else
+		error = -ENOMEM;
+
+	return error;
+}
+
+/**
+ * nolock_unhold_lvb - release a LVB
+ * @lock: the lock the LVB is associated with
+ * @lvb: the lock value block
+ *
+ */
+
+static void nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
+{
+	kfree(lvb);
+}
+
+/**
+ * nolock_sync_lvb - sync out the value of a lvb
+ * @lock: the lock the LVB is associated with
+ * @lvb: the lock value block
+ *
+ */
+
+static void nolock_sync_lvb(lm_lock_t *lock, char *lvb)
+{
+}
+
+static int nolock_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
+			    struct file *file, struct file_lock *fl)
+{
+	struct file_lock *tmp;
+
+	lock_kernel();
+	tmp = posix_test_lock(file, fl);
+	fl->fl_type = F_UNLCK;
+	if (tmp)
+		memcpy(fl, tmp, sizeof(struct file_lock));
+	unlock_kernel();
+
+	return 0;
+}
+
+static int nolock_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+			struct file *file, int cmd, struct file_lock *fl)
+{
+	int error;
+	lock_kernel();
+	error = posix_lock_file_wait(file, fl);
+	unlock_kernel();
+	return error;
+}
+
+static int nolock_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+			  struct file *file, struct file_lock *fl)
+{
+	int error;
+	lock_kernel();
+	error = posix_lock_file_wait(file, fl);
+	unlock_kernel();
+	return error;
+}
+
+static void nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
+				 unsigned int message)
+{
+}
+
+struct lm_lockops nolock_ops = {
+	.lm_proto_name = "lock_nolock",
+	.lm_mount = nolock_mount,
+	.lm_others_may_mount = nolock_others_may_mount,
+	.lm_unmount = nolock_unmount,
+	.lm_withdraw = nolock_withdraw,
+	.lm_get_lock = nolock_get_lock,
+	.lm_put_lock = nolock_put_lock,
+	.lm_lock = nolock_lock,
+	.lm_unlock = nolock_unlock,
+	.lm_cancel = nolock_cancel,
+	.lm_hold_lvb = nolock_hold_lvb,
+	.lm_unhold_lvb = nolock_unhold_lvb,
+	.lm_sync_lvb = nolock_sync_lvb,
+	.lm_plock_get = nolock_plock_get,
+	.lm_plock = nolock_plock,
+	.lm_punlock = nolock_punlock,
+	.lm_recovery_done = nolock_recovery_done,
+	.lm_owner = THIS_MODULE,
+};
+
+int __init init_nolock(void)
+{
+	int error;
+
+	error = lm_register_proto(&nolock_ops);
+	if (error) {
+		printk("lock_nolock: can't register protocol: %d\n", error);
+		return error;
+	}
+
+	printk("Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
+	return 0;
+}
+
+void __exit exit_nolock(void)
+{
+	lm_unregister_proto(&nolock_ops);
+}
+
+module_init(init_nolock);
+module_exit(exit_nolock);
+
+MODULE_DESCRIPTION("GFS Nolock Locking Module");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+

[PATCH 13/13] GFS: lock_dlm module

The lock_dlm module uses the DLM in linux/drivers/dlm/ for inter-node
locking.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/locking/dlm/Makefile   |    3 
 fs/gfs2/locking/dlm/lock.c     |  533 +++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/locking/dlm/lock_dlm.h |  200 +++++++++++++++
 fs/gfs2/locking/dlm/main.c     |   62 ++++
 fs/gfs2/locking/dlm/mount.c    |  218 ++++++++++++++++
 fs/gfs2/locking/dlm/plock.c    |  274 +++++++++++++++++++++
 fs/gfs2/locking/dlm/sysfs.c    |  283 +++++++++++++++++++++
 fs/gfs2/locking/dlm/thread.c   |  355 +++++++++++++++++++++++++++
 include/linux/lock_dlm_plock.h |   40 +++
 9 files changed, 1968 insertions(+)

diff -urpN a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
--- a/fs/gfs2/locking/dlm/Makefile	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/Makefile	2005-09-01 17:48:48.143749048 +0800
@@ -0,0 +1,3 @@
+obj-$(CONFIG_GFS2_FS) += lock_dlm.o
+lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
+
diff -urpN a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
--- a/fs/gfs2/locking/dlm/lock.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock.c	2005-09-01 17:48:48.139749656 +0800
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+static char junk_lvb[GDLM_LVB_SIZE];
+
+static void queue_complete(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	clear_bit(LFL_ACTIVE, &lp->flags);
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->clist, &ls->complete);
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+static inline void gdlm_ast(void *astarg)
+{
+	queue_complete((struct gdlm_lock *) astarg);
+}
+
+static inline void gdlm_bast(void *astarg, int mode)
+{
+	struct gdlm_lock *lp = astarg;
+	struct gdlm_ls *ls = lp->ls;
+
+	if (!mode) {
+		printk("lock_dlm: bast mode zero %x,%"PRIx64"\n",
+			lp->lockname.ln_type, lp->lockname.ln_number);
+		return;
+	}
+
+	spin_lock(&ls->async_lock);
+	if (!lp->bast_mode) {
+		list_add_tail(&lp->blist, &ls->blocking);
+		lp->bast_mode = mode;
+	} else if (lp->bast_mode < mode)
+		lp->bast_mode = mode;
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+void gdlm_queue_delayed(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->delay_list, &ls->delayed);
+	spin_unlock(&ls->async_lock);
+}
+
+/* convert gfs lock-state to dlm lock-mode */
+
+static int16_t make_mode(int16_t lmstate)
+{
+	switch (lmstate) {
+	case LM_ST_UNLOCKED:
+		return DLM_LOCK_NL;
+	case LM_ST_EXCLUSIVE:
+		return DLM_LOCK_EX;
+	case LM_ST_DEFERRED:
+		return DLM_LOCK_CW;
+	case LM_ST_SHARED:
+		return DLM_LOCK_PR;
+	default:
+		GDLM_ASSERT(0, printk("unknown LM state %d\n", lmstate););
+	}
+}
+
+/* convert dlm lock-mode to gfs lock-state */
+
+int16_t gdlm_make_lmstate(int16_t dlmmode)
+{
+	switch (dlmmode) {
+	case DLM_LOCK_IV:
+	case DLM_LOCK_NL:
+		return LM_ST_UNLOCKED;
+	case DLM_LOCK_EX:
+		return LM_ST_EXCLUSIVE;
+	case DLM_LOCK_CW:
+		return LM_ST_DEFERRED;
+	case DLM_LOCK_PR:
+		return LM_ST_SHARED;
+	default:
+		GDLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode););
+	}
+}
+
+/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
+   DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
+
+static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
+{
+	int16_t cur = make_mode(cur_state);
+	if (lp->cur != DLM_LOCK_IV)
+		GDLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur););
+}
+
+static inline unsigned int make_flags(struct gdlm_lock *lp,
+				      unsigned int gfs_flags,
+				      int16_t cur, int16_t req)
+{
+	unsigned int lkf = 0;
+
+	if (gfs_flags & LM_FLAG_TRY)
+		lkf |= DLM_LKF_NOQUEUE;
+
+	if (gfs_flags & LM_FLAG_TRY_1CB) {
+		lkf |= DLM_LKF_NOQUEUE;
+		lkf |= DLM_LKF_NOQUEUEBAST;
+	}
+
+	if (gfs_flags & LM_FLAG_PRIORITY) {
+		lkf |= DLM_LKF_NOORDER;
+		lkf |= DLM_LKF_HEADQUE;
+	}
+
+	if (gfs_flags & LM_FLAG_ANY) {
+		if (req == DLM_LOCK_PR)
+			lkf |= DLM_LKF_ALTCW;
+		else if (req == DLM_LOCK_CW)
+			lkf |= DLM_LKF_ALTPR;
+	}
+
+	if (lp->lksb.sb_lkid != 0) {
+		lkf |= DLM_LKF_CONVERT;
+
+		/* Conversion deadlock avoidance by DLM */
+
+		if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+		    !(lkf & DLM_LKF_NOQUEUE) &&
+		    cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
+			lkf |= DLM_LKF_CONVDEADLK;
+	}
+
+	if (lp->lvb)
+		lkf |= DLM_LKF_VALBLK;
+
+	return lkf;
+}
+
+/* make_strname - convert GFS lock numbers to a string */
+
+static inline void make_strname(struct lm_lockname *lockname,
+				struct gdlm_strname *str)
+{
+	sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type,
+		lockname->ln_number);
+	str->namelen = GDLM_STRNAME_BYTES;
+}
+
+int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
+		   struct gdlm_lock **lpp)
+{
+	struct gdlm_lock *lp;
+
+	lp = kmalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+
+	memset(lp, 0, sizeof(struct gdlm_lock));
+	lp->lockname = *name;
+	lp->ls = ls;
+	lp->cur = DLM_LOCK_IV;
+	lp->lvb = NULL;
+	lp->hold_null = NULL;
+	init_completion(&lp->ast_wait);
+	INIT_LIST_HEAD(&lp->clist);
+	INIT_LIST_HEAD(&lp->blist);
+	INIT_LIST_HEAD(&lp->delay_list);
+
+	spin_lock(&ls->async_lock);
+	list_add(&lp->all_list, &ls->all_locks);
+	ls->all_locks_count++;
+	spin_unlock(&ls->async_lock);
+
+	*lpp = lp;
+	return 0;
+}
+
+void gdlm_delete_lp(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	if (!list_empty(&lp->clist))
+		list_del_init(&lp->clist);
+	if (!list_empty(&lp->blist))
+		list_del_init(&lp->blist);
+	if (!list_empty(&lp->delay_list))
+		list_del_init(&lp->delay_list);
+	GDLM_ASSERT(!list_empty(&lp->all_list),);
+	list_del_init(&lp->all_list);
+	ls->all_locks_count--;
+	spin_unlock(&ls->async_lock);
+
+	kfree(lp);
+}
+
+int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		  lm_lock_t **lockp)
+{
+	struct gdlm_lock *lp;
+	int error;
+
+	error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
+
+	*lockp = (lm_lock_t *) lp;
+	return error;
+}
+
+void gdlm_put_lock(lm_lock_t *lock)
+{
+	gdlm_delete_lp((struct gdlm_lock *) lock);
+}
+
+void gdlm_do_lock(struct gdlm_lock *lp, struct dlm_range *range)
+{
+	struct gdlm_ls *ls = lp->ls;
+	struct gdlm_strname str;
+	int error, bast = 1;
+
+	/*
+	 * When recovery is in progress, delay lock requests for submission
+	 * once recovery is done.  Requests for recovery (NOEXP) and unlocks
+	 * can pass.
+	 */
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+	    !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
+		gdlm_queue_delayed(lp);
+		return;
+	}
+
+	/*
+	 * Submit the actual lock request.
+	 */
+
+	if (test_bit(LFL_NOBAST, &lp->flags))
+		bast = 0;
+
+	make_strname(&lp->lockname, &str);
+
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	log_debug("lk %x,%"PRIx64" id %x %d,%d %x", lp->lockname.ln_type,
+		  lp->lockname.ln_number, lp->lksb.sb_lkid,
+		  lp->cur, lp->req, lp->lkf);
+
+	error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
+			 str.name, str.namelen, 0, gdlm_ast, (void *) lp,
+			 bast ? gdlm_bast : NULL, range);
+
+	if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
+		lp->lksb.sb_status = -EAGAIN;
+		queue_complete(lp);
+		error = 0;
+	}
+
+	GDLM_ASSERT(!error,
+		   printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n",
+			  ls->fsname, lp->lockname.ln_type,
+			  lp->lockname.ln_number, error, lp->cur, lp->req,
+			  lp->lkf););
+}
+
+void gdlm_do_unlock(struct gdlm_lock *lp)
+{
+	unsigned int lkf = 0;
+	int error;
+
+	set_bit(LFL_DLM_UNLOCK, &lp->flags);
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	if (lp->lvb)
+		lkf = DLM_LKF_VALBLK;
+
+	log_debug("un %x,%"PRIx64" %x %d %x", lp->lockname.ln_type,
+		  lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
+
+	error = dlm_unlock(lp->ls->dlm_lockspace, lp->lksb.sb_lkid, lkf,
+			   NULL, lp);
+
+	GDLM_ASSERT(!error,
+		   printk("%s: error=%d num=%x,%"PRIx64" lkf=%x flags=%lx\n",
+			  lp->ls->fsname, error, lp->lockname.ln_type,
+			  lp->lockname.ln_number, lkf, lp->flags););
+}
+
+unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
+		       unsigned int req_state, unsigned int flags)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	clear_bit(LFL_DLM_CANCEL, &lp->flags);
+	if (flags & LM_FLAG_NOEXP)
+		set_bit(LFL_NOBLOCK, &lp->flags);
+
+	check_cur_state(lp, cur_state);
+	lp->req = make_mode(req_state);
+	lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
+
+	gdlm_do_lock(lp, NULL);
+	return LM_OUT_ASYNC;
+}
+
+unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	clear_bit(LFL_DLM_CANCEL, &lp->flags);
+	if (lp->cur == DLM_LOCK_IV)
+		return 0;
+	gdlm_do_unlock(lp);
+	return LM_OUT_ASYNC;
+}
+
+void gdlm_cancel(lm_lock_t *lock)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+	struct gdlm_ls *ls = lp->ls;
+	int error, delay_list = 0;
+
+	if (test_bit(LFL_DLM_CANCEL, &lp->flags))
+		return;
+
+	log_info("gdlm_cancel %x,%"PRIx64" flags %lx",
+		 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+	spin_lock(&ls->async_lock);
+	if (!list_empty(&lp->delay_list)) {
+		list_del_init(&lp->delay_list);
+		delay_list = 1;
+	}
+	spin_unlock(&ls->async_lock);
+
+	if (delay_list) {
+		set_bit(LFL_CANCEL, &lp->flags);
+		set_bit(LFL_ACTIVE, &lp->flags);
+		queue_complete(lp);
+		return;
+	}
+
+	if (!test_bit(LFL_ACTIVE, &lp->flags) ||
+	    test_bit(LFL_DLM_UNLOCK, &lp->flags))	{
+		log_info("gdlm_cancel skip %x,%"PRIx64" flags %lx",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number,
+			 lp->flags);
+		return;
+	}
+
+	/* the lock is blocked in the dlm */
+
+	set_bit(LFL_DLM_CANCEL, &lp->flags);
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
+			   NULL, lp);
+
+	log_info("gdlm_cancel rv %d %x,%"PRIx64" flags %lx", error,
+		 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+	if (error == -EBUSY)
+		clear_bit(LFL_DLM_CANCEL, &lp->flags);
+}
+
+int gdlm_add_lvb(struct gdlm_lock *lp)
+{
+	char *lvb;
+
+	lvb = kmalloc(GDLM_LVB_SIZE, GFP_KERNEL);
+	if (!lvb)
+		return -ENOMEM;
+
+	memset(lvb, 0, GDLM_LVB_SIZE);
+
+	lp->lksb.sb_lvbptr = lvb;
+	lp->lvb = lvb;
+	return 0;
+}
+
+void gdlm_del_lvb(struct gdlm_lock *lp)
+{
+	kfree(lp->lvb);
+	lp->lvb = NULL;
+	lp->lksb.sb_lvbptr = NULL;
+}
+
+/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs won't call hold_lvb() during a callback (from
+   the context of a lock_dlm thread). */
+
+static int hold_null_lock(struct gdlm_lock *lp)
+{
+	struct gdlm_lock *lpn = NULL;
+	int error;
+
+	if (lp->hold_null) {
+		printk("lock_dlm: lvb already held\n");
+		return 0;
+	}
+
+	error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
+	if (error)
+		goto out;
+
+	lpn->lksb.sb_lvbptr = junk_lvb;
+	lpn->lvb = junk_lvb;
+
+	lpn->req = DLM_LOCK_NL;
+	lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
+	set_bit(LFL_NOBAST, &lpn->flags);
+	set_bit(LFL_INLOCK, &lpn->flags);
+
+	init_completion(&lpn->ast_wait);
+	gdlm_do_lock(lpn, NULL);
+	wait_for_completion(&lpn->ast_wait);
+	error = lp->lksb.sb_status;
+	if (error) {
+		printk("lock_dlm: hold_null_lock dlm error %d\n", error);
+		gdlm_delete_lp(lpn);
+		lpn = NULL;
+	}
+ out:
+	lp->hold_null = lpn;
+	return error;
+}
+
+/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs may call unhold_lvb() during a callback (from
+   the context of a lock_dlm thread) which could cause a deadlock since the
+   other lock_dlm thread could be engaged in recovery. */
+
+static void unhold_null_lock(struct gdlm_lock *lp)
+{
+	struct gdlm_lock *lpn = lp->hold_null;
+
+	GDLM_ASSERT(lpn,);
+	lpn->lksb.sb_lvbptr = NULL;
+	lpn->lvb = NULL;
+	set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
+	gdlm_do_unlock(lpn);
+	lp->hold_null = NULL;
+}
+
+/* Acquire a NL lock because gfs requires the value block to remain
+   intact on the resource while the lvb is "held" even if it's holding no locks
+   on the resource. */
+
+int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+	int error;
+
+	error = gdlm_add_lvb(lp);
+	if (error)
+		return error;
+
+	*lvbp = lp->lvb;
+
+	error = hold_null_lock(lp);
+	if (error)
+		gdlm_del_lvb(lp);
+
+	return error;
+}
+
+void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	unhold_null_lock(lp);
+	gdlm_del_lvb(lp);
+}
+
+void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	if (lp->cur != DLM_LOCK_EX)
+		return;
+
+	init_completion(&lp->ast_wait);
+	set_bit(LFL_SYNC_LVB, &lp->flags);
+
+	lp->req = DLM_LOCK_EX;
+	lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
+
+	gdlm_do_lock(lp, NULL);
+	wait_for_completion(&lp->ast_wait);
+}
+
+void gdlm_submit_delayed(struct gdlm_ls *ls)
+{
+	struct gdlm_lock *lp, *safe;
+
+	spin_lock(&ls->async_lock);
+	list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
+		list_del_init(&lp->delay_list);
+		list_add_tail(&lp->delay_list, &ls->submit);
+	}
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+int gdlm_release_all_locks(struct gdlm_ls *ls)
+{
+	struct gdlm_lock *lp, *safe;
+	int count = 0;
+
+	spin_lock(&ls->async_lock);
+	list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
+		list_del_init(&lp->all_list);
+
+		if (lp->lvb && lp->lvb != junk_lvb)
+			kfree(lp->lvb);
+		kfree(lp);
+		count++;
+	}
+	spin_unlock(&ls->async_lock);
+
+	return count;
+}
+
diff -urpN a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
--- a/fs/gfs2/locking/dlm/lock_dlm.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock_dlm.h	2005-09-01 17:48:48.147748440 +0800
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef LOCK_DLM_DOT_H
+#define LOCK_DLM_DOT_H
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/fcntl.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+#include <linux/dlm.h>
+#include "../harness/lm_interface.h"
+
+/*
+ * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
+ * prefix of lock_dlm_ gets awkward.  Externally, GFS refers to this module
+ * as "lock_dlm".
+ */
+
+#define GDLM_STRNAME_BYTES	24
+#define GDLM_LVB_SIZE		32
+#define GDLM_DROP_COUNT		50000
+#define GDLM_DROP_PERIOD	60
+
+/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
+   We sprintf these numbers into a 24 byte string of hex values to make them
+   human-readable (to make debugging simpler.) */
+
+struct gdlm_strname {
+	unsigned char		name[GDLM_STRNAME_BYTES];
+	unsigned short		namelen;
+};
+
+#define DFL_BLOCK_LOCKS		0
+#define DFL_JOIN_DONE		1
+#define DFL_LEAVE_DONE		2
+#define DFL_TERMINATE		3
+#define DFL_SPECTATOR		4
+#define DFL_WITHDRAW		5
+
+struct gdlm_ls {
+	uint32_t		id;
+	int			jid;
+	int			first;
+	int			first_done;
+	unsigned long		flags;
+	struct kobject		kobj;
+	char			clustername[128];
+	char			fsname[128];
+	int			fsflags;
+	dlm_lockspace_t		*dlm_lockspace;
+	lm_callback_t		fscb;
+	lm_fsdata_t		*fsdata;
+	int			recover_jid;
+	int			recover_done;
+	spinlock_t		async_lock;
+	struct list_head	complete;
+	struct list_head	blocking;
+	struct list_head	delayed;
+	struct list_head	submit;
+	struct list_head	all_locks;
+	uint32_t		all_locks_count;
+	wait_queue_head_t	wait_control;
+	struct task_struct	*thread1;
+	struct task_struct	*thread2;
+	wait_queue_head_t	thread_wait;
+	unsigned long		drop_time;
+	int			drop_locks_count;
+	int			drop_locks_period;
+};
+
+#define LFL_NOBLOCK		0
+#define LFL_NOCACHE		1
+#define LFL_DLM_UNLOCK		2
+#define LFL_DLM_CANCEL		3
+#define LFL_SYNC_LVB		4
+#define LFL_FORCE_PROMOTE	5
+#define LFL_REREQUEST		6
+#define LFL_ACTIVE		7
+#define LFL_INLOCK		8
+#define LFL_CANCEL		9
+#define LFL_NOBAST		10
+#define LFL_HEADQUE		11
+#define LFL_UNLOCK_DELETE	12
+
+struct gdlm_lock {
+	struct gdlm_ls		*ls;
+	struct lm_lockname	lockname;
+	char			*lvb;
+	struct dlm_lksb		lksb;
+
+	int16_t			cur;
+	int16_t			req;
+	int16_t			prev_req;
+	uint32_t		lkf;		/* dlm flags DLM_LKF_ */
+	unsigned long		flags;		/* lock_dlm flags LFL_ */
+
+	int			bast_mode;	/* protected by async_lock */
+	struct completion	ast_wait;
+
+	struct list_head	clist;		/* complete */
+	struct list_head	blist;		/* blocking */
+	struct list_head	delay_list;	/* delayed */
+	struct list_head	all_list;	/* all locks for the fs */
+	struct gdlm_lock	*hold_null;	/* NL lock for hold_lvb */
+};
+
+#if (BITS_PER_LONG == 64)
+#define PRIx64 "lx"
+#else
+#define PRIx64 "Lx"
+#endif
+
+#define GDLM_ASSERT(x, do) \
+{ \
+  if (!(x)) \
+  { \
+    printk("\nlock_dlm:  Assertion failed on line %d of file %s\n" \
+           "lock_dlm:  assertion:  \"%s\"\n" \
+           "lock_dlm:  time = %lu\n", \
+           __LINE__, __FILE__, #x, jiffies); \
+    {do} \
+    printk("\n"); \
+    BUG(); \
+    panic("lock_dlm:  Record message above and reboot.\n"); \
+  } \
+}
+
+#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
+#define log_info(fmt, arg...)  log_print(KERN_INFO , fmt , ## arg)
+#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
+#ifdef LOCK_DLM_LOG_DEBUG
+#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
+#else
+#define log_debug(fmt, arg...)
+#endif
+
+/* sysfs.c */
+
+int gdlm_sysfs_init(void);
+void gdlm_sysfs_exit(void);
+int gdlm_kobject_setup(struct gdlm_ls *);
+void gdlm_kobject_release(struct gdlm_ls *);
+
+/* thread.c */
+
+int gdlm_init_threads(struct gdlm_ls *);
+void gdlm_release_threads(struct gdlm_ls *);
+
+/* lock.c */
+
+int16_t gdlm_make_lmstate(int16_t);
+void gdlm_queue_delayed(struct gdlm_lock *);
+void gdlm_submit_delayed(struct gdlm_ls *);
+int gdlm_release_all_locks(struct gdlm_ls *);
+int gdlm_create_lp(struct gdlm_ls *, struct lm_lockname *, struct gdlm_lock **);
+void gdlm_delete_lp(struct gdlm_lock *);
+int gdlm_add_lvb(struct gdlm_lock *);
+void gdlm_del_lvb(struct gdlm_lock *);
+void gdlm_do_lock(struct gdlm_lock *, struct dlm_range *);
+void gdlm_do_unlock(struct gdlm_lock *);
+
+int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
+void gdlm_put_lock(lm_lock_t *);
+unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
+unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
+void gdlm_cancel(lm_lock_t *);
+int gdlm_hold_lvb(lm_lock_t *, char **);
+void gdlm_unhold_lvb(lm_lock_t *, char *);
+void gdlm_sync_lvb(lm_lock_t *, char *);
+
+/* plock.c */
+
+int gdlm_plock_init(void);
+void gdlm_plock_exit(void);
+int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
+		struct file_lock *);
+int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
+		struct file_lock *);
+int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
+		struct file_lock *);
+#endif
+
diff -urpN a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
--- a/fs/gfs2/locking/dlm/main.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/main.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/init.h>
+
+#include "lock_dlm.h"
+
+extern int gdlm_drop_count;
+extern int gdlm_drop_period;
+
+extern struct lm_lockops gdlm_ops;
+
+int __init init_lock_dlm(void)
+{
+	int error;
+
+	error = lm_register_proto(&gdlm_ops);
+	if (error) {
+		printk("lock_dlm:  can't register protocol: %d\n", error);
+		return error;
+	}
+
+	error = gdlm_sysfs_init();
+	if (error) {
+		lm_unregister_proto(&gdlm_ops);
+		return error;
+	}
+
+	error = gdlm_plock_init();
+	if (error) {
+		gdlm_sysfs_exit();
+		lm_unregister_proto(&gdlm_ops);
+		return error;
+	}
+
+	gdlm_drop_count = GDLM_DROP_COUNT;
+	gdlm_drop_period = GDLM_DROP_PERIOD;
+
+	printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
+	return 0;
+}
+
+void __exit exit_lock_dlm(void)
+{
+	gdlm_plock_exit();
+	gdlm_sysfs_exit();
+	lm_unregister_proto(&gdlm_ops);
+}
+
+module_init(init_lock_dlm);
+module_exit(exit_lock_dlm);
+
+MODULE_DESCRIPTION("GFS DLM Locking Module");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
diff -urpN a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
--- a/fs/gfs2/locking/dlm/mount.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/mount.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+int gdlm_drop_count;
+int gdlm_drop_period;
+struct lm_lockops gdlm_ops;
+
+
+static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
+				 int flags, char *table_name)
+{
+	struct gdlm_ls *ls;
+	char buf[256], *p;
+
+	ls = kmalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
+	if (!ls)
+		return NULL;
+
+	memset(ls, 0, sizeof(struct gdlm_ls));
+
+	ls->drop_locks_count = gdlm_drop_count;
+	ls->drop_locks_period = gdlm_drop_period;
+
+	ls->fscb = cb;
+	ls->fsdata = fsdata;
+	ls->fsflags = flags;
+
+	spin_lock_init(&ls->async_lock);
+
+	INIT_LIST_HEAD(&ls->complete);
+	INIT_LIST_HEAD(&ls->blocking);
+	INIT_LIST_HEAD(&ls->delayed);
+	INIT_LIST_HEAD(&ls->submit);
+	INIT_LIST_HEAD(&ls->all_locks);
+
+	init_waitqueue_head(&ls->thread_wait);
+	init_waitqueue_head(&ls->wait_control);
+	ls->thread1 = NULL;
+	ls->thread2 = NULL;
+	ls->drop_time = jiffies;
+	ls->jid = -1;
+
+	strncpy(buf, table_name, 256);
+	buf[255] = '\0';
+
+	p = strstr(buf, ":");
+	if (!p) {
+		printk("lock_dlm: invalid table_name \"%s\"\n", table_name);
+		kfree(ls);
+		return NULL;
+	}
+	*p = '\0';
+	p++;
+
+	strncpy(ls->clustername, buf, 128);
+	strncpy(ls->fsname, p, 128);
+
+	return ls;
+}
+
+static int gdlm_mount(char *table_name, char *host_data,
+			lm_callback_t cb, lm_fsdata_t *fsdata,
+			unsigned int min_lvb_size, int flags,
+			struct lm_lockstruct *lockstruct)
+{
+	struct gdlm_ls *ls;
+	int error = -ENOMEM;
+
+	if (min_lvb_size > GDLM_LVB_SIZE)
+		goto out;
+
+	ls = init_gdlm(cb, fsdata, flags, table_name);
+	if (!ls)
+		goto out;
+
+	error = gdlm_init_threads(ls);
+	if (error)
+		goto out_free;
+
+	error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
+				  &ls->dlm_lockspace, 0, GDLM_LVB_SIZE);
+	if (error) {
+		printk("lock_dlm: dlm_new_lockspace error %d\n", error);
+		goto out_thread;
+	}
+
+	error = gdlm_kobject_setup(ls);
+	if (error)
+		goto out_dlm;
+	kobject_uevent(&ls->kobj, KOBJ_MOUNT, NULL);
+
+	/* Now we depend on userspace to notice the new mount,
+	   join the appropriate group, and do a write to our sysfs
+	   "mounted" or "terminate" file.  Before the start, userspace
+	   must set "jid" and "first". */
+
+	error = wait_event_interruptible(ls->wait_control,
+			test_bit(DFL_JOIN_DONE, &ls->flags));
+	if (error)
+		goto out_sysfs;
+
+	if (test_bit(DFL_TERMINATE, &ls->flags)) {
+		error = -ERESTARTSYS;
+		goto out_sysfs;
+	}
+
+	lockstruct->ls_jid = ls->jid;
+	lockstruct->ls_first = ls->first;
+	lockstruct->ls_lockspace = ls;
+	lockstruct->ls_ops = &gdlm_ops;
+	lockstruct->ls_flags = 0;
+	lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
+	return 0;
+
+ out_sysfs:
+	gdlm_kobject_release(ls);
+ out_dlm:
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+ out_thread:
+	gdlm_release_threads(ls);
+ out_free:
+	kfree(ls);
+ out:
+	return error;
+}
+
+static void gdlm_unmount(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	int rv;
+
+	log_debug("unmount flags %lx", ls->flags);
+
+	if (test_bit(DFL_WITHDRAW, &ls->flags)) {
+		gdlm_kobject_release(ls);
+		goto out;
+	}
+
+	kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+	wait_event_interruptible(ls->wait_control,
+				 test_bit(DFL_LEAVE_DONE, &ls->flags));
+
+	gdlm_kobject_release(ls);
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+	gdlm_release_threads(ls);
+	rv = gdlm_release_all_locks(ls);
+	if (rv)
+		log_info("lm_dlm_unmount: %d stray locks freed", rv);
+ out:
+	kfree(ls);
+}
+
+static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
+                               unsigned int message)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	ls->recover_done = jid;
+	kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	ls->first_done = 1;
+	kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_withdraw(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+
+	/* userspace suspends locking on all other members */
+
+	kobject_uevent(&ls->kobj, KOBJ_OFFLINE, NULL);
+
+	wait_event_interruptible(ls->wait_control,
+				 test_bit(DFL_WITHDRAW, &ls->flags));
+
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+	gdlm_release_threads(ls);
+	gdlm_release_all_locks(ls);
+
+	kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+	/* userspace leaves the mount group, we don't need to wait for
+	   that to complete */
+}
+
+struct lm_lockops gdlm_ops = {
+	.lm_proto_name = "lock_dlm",
+	.lm_mount = gdlm_mount,
+	.lm_others_may_mount = gdlm_others_may_mount,
+	.lm_unmount = gdlm_unmount,
+	.lm_withdraw = gdlm_withdraw,
+	.lm_get_lock = gdlm_get_lock,
+	.lm_put_lock = gdlm_put_lock,
+	.lm_lock = gdlm_lock,
+	.lm_unlock = gdlm_unlock,
+	.lm_plock = gdlm_plock,
+	.lm_punlock = gdlm_punlock,
+	.lm_plock_get = gdlm_plock_get,
+	.lm_cancel = gdlm_cancel,
+	.lm_hold_lvb = gdlm_hold_lvb,
+	.lm_unhold_lvb = gdlm_unhold_lvb,
+	.lm_sync_lvb = gdlm_sync_lvb,
+	.lm_recovery_done = gdlm_recovery_done,
+	.lm_owner = THIS_MODULE,
+};
+
diff -urpN a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
--- a/fs/gfs2/locking/dlm/plock.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/plock.c	2005-09-01 17:48:48.148748288 +0800
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+#include <linux/lock_dlm_plock.h>
+
+#include <linux/miscdevice.h>
+
+static spinlock_t ops_lock;
+static struct list_head send_list;
+static struct list_head recv_list;
+static wait_queue_head_t send_wq;
+static wait_queue_head_t recv_wq;
+
+struct plock_op {
+	struct list_head list;
+	int done;
+	struct gdlm_plock_info info;
+};
+
+static inline void set_version(struct gdlm_plock_info *info)
+{
+	info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
+	info->version[1] = GDLM_PLOCK_VERSION_MINOR;
+	info->version[2] = GDLM_PLOCK_VERSION_PATCH;
+}
+
+static int check_version(struct gdlm_plock_info *info)
+{
+	if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
+	    (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
+		log_error("plock device version mismatch: "
+			  "kernel (%u.%u.%u), user (%u.%u.%u)",
+			  GDLM_PLOCK_VERSION_MAJOR,
+			  GDLM_PLOCK_VERSION_MINOR,
+			  GDLM_PLOCK_VERSION_PATCH,
+			  info->version[0],
+			  info->version[1],
+			  info->version[2]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+	       struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	struct plock_op *op;
+	int rv;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	log_debug("en plock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+	set_version(&op->info);
+	op->info.optype		= GDLM_PLOCK_OP_LOCK;
+	op->info.pid		= (uint32_t) fl->fl_owner;
+	op->info.ex		= (fl->fl_type == F_WRLCK);
+	op->info.wait		= IS_SETLKW(cmd);
+	op->info.fsid		= ls->id;
+	op->info.number		= name->ln_number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+
+	INIT_LIST_HEAD(&op->list);
+	spin_lock(&ops_lock);
+	list_add_tail(&op->list, &send_list);
+	spin_unlock(&ops_lock);
+	wake_up(&send_wq);
+
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		printk("plock op on list\n");
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	log_debug("ex plock done %d rv %d", op->done, op->info.rv);
+
+	rv = op->info.rv;
+
+	if (!rv) {
+		if (posix_lock_file_wait(file, fl) < 0)
+			log_error("gdlm_plock: vfs lock error %x,%"PRIx64"",
+				  name->ln_type, name->ln_number);
+	}
+
+	kfree(op);
+	return rv;
+}
+
+int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		 struct file *file, struct file_lock *fl)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	struct plock_op *op;
+	int rv;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	log_debug("en punlock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+	if (posix_lock_file_wait(file, fl) < 0)
+		log_error("gdlm_punlock: vfs unlock error %x,%"PRIx64"",
+			  name->ln_type, name->ln_number);
+
+	set_version(&op->info);
+	op->info.optype		= GDLM_PLOCK_OP_UNLOCK;
+	op->info.pid		= (uint32_t) fl->fl_owner;
+	op->info.fsid		= ls->id;
+	op->info.number		= name->ln_number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+
+	INIT_LIST_HEAD(&op->list);
+	spin_lock(&ops_lock);
+	list_add_tail(&op->list, &send_list);
+	spin_unlock(&ops_lock);
+	wake_up(&send_wq);
+
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		printk("plock op on list\n");
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	log_debug("ex punlock done %d rv %d", op->done, op->info.rv);
+
+	rv = op->info.rv;
+
+	kfree(op);
+	return rv;
+}
+
+int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		   struct file *file, struct file_lock *fl)
+{
+	return -ENOSYS;
+}
+
+/* a read copies out one plock request from the send list */
+static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+			loff_t *ppos)
+{
+	struct gdlm_plock_info info;
+	struct plock_op *op = NULL;
+
+	if (count < sizeof(info))
+		return -EINVAL;
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list)) {
+		op = list_entry(send_list.next, struct plock_op, list);
+		list_move(&op->list, &recv_list);
+		memcpy(&info, &op->info, sizeof(info));
+	}
+	spin_unlock(&ops_lock);
+
+	if (!op)
+		return -EAGAIN;
+
+	log_debug("send %"PRIx64" op %d ex %d wait %d", info.number,
+		  info.optype, info.ex, info.wait);
+
+	if (copy_to_user(u, &info, sizeof(info)))
+		return -EFAULT;
+	return sizeof(info);
+}
+
+/* a write copies in one plock result that should match a plock_op
+   on the recv list */
+static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+			 loff_t *ppos)
+{
+	struct gdlm_plock_info info;
+	struct plock_op *op;
+	int found = 0;
+
+	if (count != sizeof(info))
+		return -EINVAL;
+
+	if (copy_from_user(&info, u, sizeof(info)))
+		return -EFAULT;
+
+	if (check_version(&info))
+		return -EINVAL;
+
+	log_debug("recv %"PRIx64" op %d ex %d wait %d", info.number,
+		  info.optype, info.ex, info.wait);
+
+	spin_lock(&ops_lock);
+	list_for_each_entry(op, &recv_list, list) {
+		if (op->info.fsid == info.fsid &&
+		    op->info.number == info.number) {
+			list_del_init(&op->list);
+			found = 1;
+			op->done = 1;
+			memcpy(&op->info, &info, sizeof(info));
+			break;
+		}
+	}
+	spin_unlock(&ops_lock);
+
+	if (found)
+		wake_up(&recv_wq);
+	else
+		printk("gdlm dev_write no op %x %"PRIx64"\n", info.fsid,
+			info.number);
+	return count;
+}
+
+static unsigned int dev_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &send_wq, wait);
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list)) {
+		spin_unlock(&ops_lock);
+		return POLLIN | POLLRDNORM;
+	}
+	spin_unlock(&ops_lock);
+	return 0;
+}
+
+static struct file_operations dev_fops = {
+	.read    = dev_read,
+	.write   = dev_write,
+	.poll    = dev_poll,
+	.owner   = THIS_MODULE
+};
+
+static struct miscdevice plock_dev_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = GDLM_PLOCK_MISC_NAME,
+	.fops = &dev_fops
+};
+
+int gdlm_plock_init(void)
+{
+	int rv;
+
+	spin_lock_init(&ops_lock);
+	INIT_LIST_HEAD(&send_list);
+	INIT_LIST_HEAD(&recv_list);
+	init_waitqueue_head(&send_wq);
+	init_waitqueue_head(&recv_wq);
+
+	rv = misc_register(&plock_dev_misc);
+	if (rv)
+		printk("gdlm_plock_init: misc_register failed %d", rv);
+	return rv;
+}
+
+void gdlm_plock_exit(void)
+{
+	if (misc_deregister(&plock_dev_misc) < 0)
+		printk("gdlm_plock_exit: misc_deregister failed");
+}
+
diff -urpN a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
--- a/fs/gfs2/locking/dlm/sysfs.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/sysfs.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/ctype.h>
+#include <linux/stat.h>
+
+#include "lock_dlm.h"
+
+static ssize_t gdlm_block_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = 0;
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_block_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_BLOCK_LOCKS, &ls->flags);
+	else if (val == 0) {
+		clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
+		gdlm_submit_delayed(ls);
+	} else
+		ret = -EINVAL;
+	return ret;
+}
+
+static ssize_t gdlm_mounted_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = -2;
+
+	if (test_bit(DFL_TERMINATE, &ls->flags))
+		val = -1;
+	else if (test_bit(DFL_LEAVE_DONE, &ls->flags))
+		val = 0;
+	else if (test_bit(DFL_JOIN_DONE, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_mounted_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_JOIN_DONE, &ls->flags);
+	else if (val == 0)
+		set_bit(DFL_LEAVE_DONE, &ls->flags);
+	else if (val == -1) {
+		set_bit(DFL_TERMINATE, &ls->flags);
+		set_bit(DFL_JOIN_DONE, &ls->flags);
+		set_bit(DFL_LEAVE_DONE, &ls->flags);
+	} else
+		ret = -EINVAL;
+	wake_up(&ls->wait_control);
+	return ret;
+}
+
+static ssize_t gdlm_withdraw_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = 0;
+
+	if (test_bit(DFL_WITHDRAW, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_WITHDRAW, &ls->flags);
+	else
+		ret = -EINVAL;
+	wake_up(&ls->wait_control);
+	return ret;
+}
+
+static ssize_t gdlm_id_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%u\n", ls->id);
+}
+
+static ssize_t gdlm_id_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->id = simple_strtoul(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_jid_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->jid);
+}
+
+static ssize_t gdlm_jid_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->jid = simple_strtol(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_first_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->first);
+}
+
+static ssize_t gdlm_first_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->first = simple_strtol(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_first_done_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->first_done);
+}
+
+static ssize_t gdlm_recover_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->recover_jid);
+}
+
+static ssize_t gdlm_recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->recover_jid = simple_strtol(buf, NULL, 0);
+	ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
+	return len;
+}
+
+static ssize_t gdlm_recover_done_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	ret = sprintf(buf, "%d\n", ls->recover_done);
+	return ret;
+}
+
+static ssize_t gdlm_cluster_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	ret = sprintf(buf, "%s\n", ls->clustername);
+	return ret;
+}
+
+static ssize_t gdlm_options_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret = 0;
+
+	if (ls->fsflags & LM_MFLAG_SPECTATOR)
+		ret += sprintf(buf, "spectator ");
+
+	return ret;
+}
+
+struct gdlm_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gdlm_ls *, char *);
+	ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
+};
+
+#define GDLM_ATTR(_name,_mode,_show,_store) \
+static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+GDLM_ATTR(block, S_IRUGO | S_IWUSR, gdlm_block_show, gdlm_block_store);
+GDLM_ATTR(mounted, S_IRUGO | S_IWUSR, gdlm_mounted_show, gdlm_mounted_store);
+GDLM_ATTR(withdraw, S_IRUGO | S_IWUSR, gdlm_withdraw_show, gdlm_withdraw_store);
+GDLM_ATTR(id, S_IRUGO | S_IWUSR, gdlm_id_show, gdlm_id_store);
+GDLM_ATTR(jid, S_IRUGO | S_IWUSR, gdlm_jid_show, gdlm_jid_store);
+GDLM_ATTR(first, S_IRUGO | S_IWUSR, gdlm_first_show, gdlm_first_store);
+GDLM_ATTR(first_done, S_IRUGO, gdlm_first_done_show, NULL);
+GDLM_ATTR(recover, S_IRUGO | S_IWUSR, gdlm_recover_show, gdlm_recover_store);
+GDLM_ATTR(recover_done, S_IRUGO, gdlm_recover_done_show, NULL);
+GDLM_ATTR(cluster, S_IRUGO, gdlm_cluster_show, NULL);
+GDLM_ATTR(options, S_IRUGO, gdlm_options_show, NULL);
+
+static struct attribute *gdlm_attrs[] = {
+	&gdlm_attr_block.attr,
+	&gdlm_attr_mounted.attr,
+	&gdlm_attr_withdraw.attr,
+	&gdlm_attr_id.attr,
+	&gdlm_attr_jid.attr,
+	&gdlm_attr_first.attr,
+	&gdlm_attr_first_done.attr,
+	&gdlm_attr_recover.attr,
+	&gdlm_attr_recover_done.attr,
+	&gdlm_attr_cluster.attr,
+	&gdlm_attr_options.attr,
+	NULL,
+};
+
+static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+	struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+	return a->show ? a->show(ls, buf) : 0;
+}
+
+static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+	struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+	return a->store ? a->store(ls, buf, len) : len;
+}
+
+static struct sysfs_ops gdlm_attr_ops = {
+	.show  = gdlm_attr_show,
+	.store = gdlm_attr_store,
+};
+
+static struct kobj_type gdlm_ktype = {
+	.default_attrs = gdlm_attrs,
+	.sysfs_ops     = &gdlm_attr_ops,
+};
+
+static struct kset gdlm_kset = {
+	.subsys = &kernel_subsys,
+	.kobj   = {.name = "lock_dlm",},
+	.ktype  = &gdlm_ktype,
+};
+
+int gdlm_kobject_setup(struct gdlm_ls *ls)
+{
+	int error;
+
+	error = kobject_set_name(&ls->kobj, "%s", ls->fsname);
+	if (error)
+		return error;
+
+	ls->kobj.kset = &gdlm_kset;
+	ls->kobj.ktype = &gdlm_ktype;
+
+	error = kobject_register(&ls->kobj);
+
+	return 0;
+}
+
+void gdlm_kobject_release(struct gdlm_ls *ls)
+{
+	kobject_unregister(&ls->kobj);
+}
+
+int gdlm_sysfs_init(void)
+{
+	int error;
+
+	error = kset_register(&gdlm_kset);
+	if (error)
+		printk("lock_dlm: cannot register kset %d\n", error);
+
+	return error;
+}
+
+void gdlm_sysfs_exit(void)
+{
+	kset_unregister(&gdlm_kset);
+}
+
diff -urpN a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
--- a/fs/gfs2/locking/dlm/thread.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/thread.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
+   thread gets to it. */
+
+static void queue_submit(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->delay_list, &ls->submit);
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+static void process_submit(struct gdlm_lock *lp)
+{
+	gdlm_do_lock(lp, NULL);
+}
+
+static void process_blocking(struct gdlm_lock *lp, int bast_mode)
+{
+	struct gdlm_ls *ls = lp->ls;
+	unsigned int cb;
+
+	switch (gdlm_make_lmstate(bast_mode)) {
+	case LM_ST_EXCLUSIVE:
+		cb = LM_CB_NEED_E;
+		break;
+	case LM_ST_DEFERRED:
+		cb = LM_CB_NEED_D;
+		break;
+	case LM_ST_SHARED:
+		cb = LM_CB_NEED_S;
+		break;
+	default:
+		GDLM_ASSERT(0, printk("unknown bast mode %u\n",lp->bast_mode););
+	}
+
+	ls->fscb(ls->fsdata, cb, &lp->lockname);
+}
+
+static void process_complete(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+	struct lm_async_cb acb;
+	int16_t prev_mode = lp->cur;
+
+	memset(&acb, 0, sizeof(acb));
+
+	if (lp->lksb.sb_status == -DLM_ECANCEL) {
+		log_info("complete dlm cancel %x,%"PRIx64" flags %lx",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number,
+			 lp->flags);
+
+		lp->req = lp->cur;
+		acb.lc_ret |= LM_OUT_CANCELED;
+		if (lp->cur == DLM_LOCK_IV)
+			lp->lksb.sb_lkid = 0;
+		goto out;
+	}
+
+	if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
+		if (lp->lksb.sb_status != -DLM_EUNLOCK) {
+			log_info("unlock sb_status %d %x,%"PRIx64" flags %lx",
+				 lp->lksb.sb_status, lp->lockname.ln_type,
+				 lp->lockname.ln_number, lp->flags);
+			return;
+		}
+
+		lp->cur = DLM_LOCK_IV;
+		lp->req = DLM_LOCK_IV;
+		lp->lksb.sb_lkid = 0;
+
+		if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
+			gdlm_delete_lp(lp);
+			return;
+		}
+		goto out;
+	}
+
+	if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
+		memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
+
+	if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
+		if (lp->req == DLM_LOCK_PR)
+			lp->req = DLM_LOCK_CW;
+		else if (lp->req == DLM_LOCK_CW)
+			lp->req = DLM_LOCK_PR;
+	}
+
+	/*
+	 * A canceled lock request.  The lock was just taken off the delayed
+	 * list and was never even submitted to dlm.
+	 */
+
+	if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
+		log_info("complete internal cancel %x,%"PRIx64"",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number);
+		lp->req = lp->cur;
+		acb.lc_ret |= LM_OUT_CANCELED;
+		goto out;
+	}
+
+	/*
+	 * An error occured.
+	 */
+
+	if (lp->lksb.sb_status) {
+		/* a "normal" error */
+		if ((lp->lksb.sb_status == -EAGAIN) &&
+		    (lp->lkf & DLM_LKF_NOQUEUE)) {
+			lp->req = lp->cur;
+			if (lp->cur == DLM_LOCK_IV)
+				lp->lksb.sb_lkid = 0;
+			goto out;
+		}
+
+		/* this could only happen with cancels I think */
+		log_info("ast sb_status %d %x,%"PRIx64" flags %lx",
+			 lp->lksb.sb_status, lp->lockname.ln_type,
+			 lp->lockname.ln_number, lp->flags);
+		return;
+	}
+
+	/*
+	 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
+	 */
+
+	if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
+		complete(&lp->ast_wait);
+		return;
+	}
+
+	/*
+	 * A lock has been demoted to NL because it initially completed during
+	 * BLOCK_LOCKS.  Now it must be requested in the originally requested
+	 * mode.
+	 */
+
+	if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
+		GDLM_ASSERT(lp->req == DLM_LOCK_NL,);
+		GDLM_ASSERT(lp->prev_req > DLM_LOCK_NL,);
+
+		lp->cur = DLM_LOCK_NL;
+		lp->req = lp->prev_req;
+		lp->prev_req = DLM_LOCK_IV;
+		lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+		set_bit(LFL_NOCACHE, &lp->flags);
+
+		if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+		    !test_bit(LFL_NOBLOCK, &lp->flags))
+			gdlm_queue_delayed(lp);
+		else
+			queue_submit(lp);
+		return;
+	}
+
+	/*
+	 * A request is granted during dlm recovery.  It may be granted
+	 * because the locks of a failed node were cleared.  In that case,
+	 * there may be inconsistent data beneath this lock and we must wait
+	 * for recovery to complete to use it.  When gfs recovery is done this
+	 * granted lock will be converted to NL and then reacquired in this
+	 * granted state.
+	 */
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+	    !test_bit(LFL_NOBLOCK, &lp->flags) &&
+	    lp->req != DLM_LOCK_NL) {
+
+		lp->cur = lp->req;
+		lp->prev_req = lp->req;
+		lp->req = DLM_LOCK_NL;
+		lp->lkf |= DLM_LKF_CONVERT;
+		lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+		log_debug("rereq %x,%"PRIx64" id %x %d,%d",
+			  lp->lockname.ln_type, lp->lockname.ln_number,
+			  lp->lksb.sb_lkid, lp->cur, lp->req);
+
+		set_bit(LFL_REREQUEST, &lp->flags);
+		queue_submit(lp);
+		return;
+	}
+
+	/*
+	 * DLM demoted the lock to NL before it was granted so GFS must be
+	 * told it cannot cache data for this lock.
+	 */
+
+	if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
+		set_bit(LFL_NOCACHE, &lp->flags);
+
+ out:
+	/*
+	 * This is an internal lock_dlm lock
+	 */
+
+	if (test_bit(LFL_INLOCK, &lp->flags)) {
+		clear_bit(LFL_NOBLOCK, &lp->flags);
+		lp->cur = lp->req;
+		complete(&lp->ast_wait);
+		return;
+	}
+
+	/*
+	 * Normal completion of a lock request.  Tell GFS it now has the lock.
+	 */
+
+	clear_bit(LFL_NOBLOCK, &lp->flags);
+	lp->cur = lp->req;
+
+	acb.lc_name = lp->lockname;
+	acb.lc_ret |= gdlm_make_lmstate(lp->cur);
+
+	if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
+	    (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
+		acb.lc_ret |= LM_OUT_CACHEABLE;
+
+	ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
+}
+
+static inline int no_work(struct gdlm_ls *ls, int blocking)
+{
+	int ret;
+
+	spin_lock(&ls->async_lock);
+	ret = list_empty(&ls->complete) && list_empty(&ls->submit);
+	if (ret && blocking)
+		ret = list_empty(&ls->blocking);
+	spin_unlock(&ls->async_lock);
+
+	return ret;
+}
+
+static inline int check_drop(struct gdlm_ls *ls)
+{
+	if (!ls->drop_locks_count)
+		return 0;
+
+	if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
+		ls->drop_time = jiffies;
+		if (ls->all_locks_count >= ls->drop_locks_count)
+			return 1;
+	}
+	return 0;
+}
+
+static int gdlm_thread(void *data)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) data;
+	struct gdlm_lock *lp = NULL;
+	int blist = 0;
+	uint8_t complete, blocking, submit, drop;
+	DECLARE_WAITQUEUE(wait, current);
+
+	/* Only thread1 is allowed to do blocking callbacks since gfs
+	   may wait for a completion callback within a blocking cb. */
+
+	if (current == ls->thread1)
+		blist = 1;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&ls->thread_wait, &wait);
+		if (no_work(ls, blist))
+			schedule();
+		remove_wait_queue(&ls->thread_wait, &wait);
+		set_current_state(TASK_RUNNING);
+
+		complete = blocking = submit = drop = 0;
+
+		spin_lock(&ls->async_lock);
+
+		if (blist && !list_empty(&ls->blocking)) {
+			lp = list_entry(ls->blocking.next, struct gdlm_lock,
+					blist);
+			list_del_init(&lp->blist);
+			blocking = lp->bast_mode;
+			lp->bast_mode = 0;
+		} else if (!list_empty(&ls->complete)) {
+			lp = list_entry(ls->complete.next, struct gdlm_lock,
+					clist);
+			list_del_init(&lp->clist);
+			complete = 1;
+		} else if (!list_empty(&ls->submit)) {
+			lp = list_entry(ls->submit.next, struct gdlm_lock,
+					delay_list);
+			list_del_init(&lp->delay_list);
+			submit = 1;
+		}
+
+		drop = check_drop(ls);
+		spin_unlock(&ls->async_lock);
+
+		if (complete)
+			process_complete(lp);
+
+		else if (blocking)
+			process_blocking(lp, blocking);
+
+		else if (submit)
+			process_submit(lp);
+
+		if (drop)
+			ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+int gdlm_init_threads(struct gdlm_ls *ls)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(gdlm_thread, ls, "lock_dlm1");
+	error = IS_ERR(p);
+	if (error) {
+		log_error("can't start lock_dlm1 thread %d", error);
+		return error;
+	}
+	ls->thread1 = p;
+
+	p = kthread_run(gdlm_thread, ls, "lock_dlm2");
+	error = IS_ERR(p);
+	if (error) {
+		log_error("can't start lock_dlm2 thread %d", error);
+		kthread_stop(ls->thread1);
+		return error;
+	}
+	ls->thread2 = p;
+
+	return 0;
+}
+
+void gdlm_release_threads(struct gdlm_ls *ls)
+{
+	kthread_stop(ls->thread1);
+	kthread_stop(ls->thread2);
+}
+
diff -urpN a/include/linux/lock_dlm_plock.h b/include/linux/lock_dlm_plock.h
--- a/include/linux/lock_dlm_plock.h	1970-01-01 07:30:00.000000000 +0730
+++ b/include/linux/lock_dlm_plock.h	2005-09-01 17:48:48.142749200 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LOCK_DLM_PLOCK_DOT_H__
+#define __LOCK_DLM_PLOCK_DOT_H__
+
+#define GDLM_PLOCK_MISC_NAME		"lock_dlm_plock"
+
+#define GDLM_PLOCK_VERSION_MAJOR	1
+#define GDLM_PLOCK_VERSION_MINOR	0
+#define GDLM_PLOCK_VERSION_PATCH	0
+
+enum {
+	GDLM_PLOCK_OP_LOCK = 1,
+	GDLM_PLOCK_OP_UNLOCK,
+	GDLM_PLOCK_OP_GET,
+};
+
+struct gdlm_plock_info {
+	__u32 version[3];
+	__u8 optype;
+	__u8 ex;
+	__u8 wait;
+	__u8 pad;
+	__u32 pid;
+	__s32 nodeid;
+	__s32 rv;
+	__u32 fsid;
+	__u64 number;
+	__u64 start;
+	__u64 end;
+};
+
+#endif
+