diff mbox

[V2,2/5] Orangefs: protocol between kernel and userspace

Message ID 1421787111-27162-3-git-send-email-root@logtruck.clemson.edu (mailing list archive)
State New, archived
Headers show

Commit Message

Mike Marshall Jan. 20, 2015, 8:51 p.m. UTC
From: Mike Marshall <hubcap@omnibond.com>

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/pvfs2-bufmap.c | 970 +++++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/pvfs2-cache.c  | 258 ++++++++++++
 fs/orangefs/pvfs2-mod.c    | 346 ++++++++++++++++
 fs/orangefs/pvfs2-proc.c   | 698 ++++++++++++++++++++++++++++++++
 fs/orangefs/waitqueue.c    | 522 ++++++++++++++++++++++++
 5 files changed, 2794 insertions(+)
 create mode 100644 fs/orangefs/pvfs2-bufmap.c
 create mode 100644 fs/orangefs/pvfs2-cache.c
 create mode 100644 fs/orangefs/pvfs2-mod.c
 create mode 100644 fs/orangefs/pvfs2-proc.c
 create mode 100644 fs/orangefs/waitqueue.c
diff mbox

Patch

diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c
new file mode 100644
index 0000000..9a756ee
--- /dev/null
+++ b/fs/orangefs/pvfs2-bufmap.c
@@ -0,0 +1,970 @@ 
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
+
+struct pvfs2_bufmap {
+	atomic_t		refcnt;
+
+	int			desc_size;
+	int			desc_shift;
+	int			desc_count;
+	int			total_size;
+	int			page_count;
+
+	struct page		**page_array;
+	struct pvfs_bufmap_desc	*desc_array;
+
+	/* array to track usage of buffer descriptors */
+	int			*buffer_index_array;
+	spinlock_t		buffer_index_lock;
+
+	/* array to track usage of buffer descriptors for readdir */
+	int			readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
+	spinlock_t		readdir_index_lock;
+} *__pvfs2_bufmap;
+
+static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
+
+static void
+pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
+{
+	int i;
+
+	for (i = 0; i < bufmap->page_count; i++)
+		page_cache_release(bufmap->page_array[i]);
+}
+
+static void
+pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
+{
+	kfree(bufmap->page_array);
+	kfree(bufmap->desc_array);
+	kfree(bufmap->buffer_index_array);
+	kfree(bufmap);
+}
+
+struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
+{
+	struct pvfs2_bufmap *bufmap = NULL;
+
+	spin_lock(&pvfs2_bufmap_lock);
+	if (__pvfs2_bufmap) {
+		bufmap = __pvfs2_bufmap;
+		atomic_inc(&bufmap->refcnt);
+	}
+	spin_unlock(&pvfs2_bufmap_lock);
+	return bufmap;
+}
+
+void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
+{
+	if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
+		__pvfs2_bufmap = NULL;
+		spin_unlock(&pvfs2_bufmap_lock);
+
+		pvfs2_bufmap_unmap(bufmap);
+		pvfs2_bufmap_free(bufmap);
+	}
+}
+
+inline int pvfs_bufmap_size_query(void)
+{
+	struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
+	int size = bufmap ? bufmap->desc_size : 0;
+
+	pvfs2_bufmap_unref(bufmap);
+	return size;
+}
+
+inline int pvfs_bufmap_shift_query(void)
+{
+	struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
+	int shift = bufmap ? bufmap->desc_shift : 0;
+
+	pvfs2_bufmap_unref(bufmap);
+	return shift;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
+
+/*
+ * get_bufmap_init
+ *
+ * If bufmap_init is 1, then the shared memory system, including the
+ * buffer_index_array, is available.  Otherwise, it is not.
+ *
+ * returns the value of bufmap_init
+ */
+int get_bufmap_init(void)
+{
+	return __pvfs2_bufmap ? 1 : 0;
+}
+
+
+static struct pvfs2_bufmap *
+pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
+{
+	struct pvfs2_bufmap *bufmap;
+
+	bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
+	if (!bufmap)
+		goto out;
+
+	atomic_set(&bufmap->refcnt, 1);
+	bufmap->total_size = user_desc->total_size;
+	bufmap->desc_count = user_desc->count;
+	bufmap->desc_size = user_desc->size;
+	bufmap->desc_shift = ilog2(bufmap->desc_size);
+
+	spin_lock_init(&bufmap->buffer_index_lock);
+	bufmap->buffer_index_array =
+		kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
+	if (!bufmap->buffer_index_array) {
+		gossip_err("pvfs2: could not allocate %d buffer indices\n",
+				bufmap->desc_count);
+		goto out_free_bufmap;
+	}
+	spin_lock_init(&bufmap->readdir_index_lock);
+
+	bufmap->desc_array =
+		kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
+			GFP_KERNEL);
+	if (!bufmap->desc_array) {
+		gossip_err("pvfs2: could not allocate %d descriptors\n",
+				bufmap->desc_count);
+		goto out_free_index_array;
+	}
+
+	bufmap->page_count = bufmap->total_size / PAGE_SIZE;
+
+	/* allocate storage to track our page mappings */
+	bufmap->page_array =
+		kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!bufmap->page_array)
+		goto out_free_desc_array;
+
+	return bufmap;
+
+out_free_desc_array:
+	kfree(bufmap->desc_array);
+out_free_index_array:
+	kfree(bufmap->buffer_index_array);
+out_free_bufmap:
+	kfree(bufmap);
+out:
+	return NULL;
+}
+
+static int
+pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
+		struct PVFS_dev_map_desc *user_desc)
+{
+	int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
+	int offset = 0, ret, i;
+
+	/* map the pages */
+	down_write(&current->mm->mmap_sem);
+	ret = get_user_pages(current,
+			     current->mm,
+			     (unsigned long)user_desc->ptr,
+			     bufmap->page_count,
+			     1,
+			     0,
+			     bufmap->page_array,
+			     NULL);
+	up_write(&current->mm->mmap_sem);
+
+	if (ret < 0)
+		return ret;
+
+	if (ret != bufmap->page_count) {
+		gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
+				bufmap->page_count, ret);
+
+		for (i = 0; i < ret; i++) {
+			SetPageError(bufmap->page_array[i]);
+			page_cache_release(bufmap->page_array[i]);
+		}
+		return -ENOMEM;
+	}
+
+	/*
+	 * ideally we want to get kernel space pointers for each page, but
+	 * we can't kmap that many pages at once if highmem is being used.
+	 * so instead, we just kmap/kunmap the page address each time the
+	 * kaddr is needed.
+	 */
+	for (i = 0; i < bufmap->page_count; i++)
+		flush_dcache_page(bufmap->page_array[i]);
+
+	/* build a list of available descriptors */
+	for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
+		bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
+		bufmap->desc_array[i].array_count = pages_per_desc;
+		bufmap->desc_array[i].uaddr =
+		    (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
+		offset += pages_per_desc;
+	}
+
+	return 0;
+}
+
+/*
+ * pvfs_bufmap_initialize()
+ *
+ * initializes the mapped buffer interface
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
+{
+	struct pvfs2_bufmap *bufmap;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_initialize: called (ptr ("
+		     "%p) sz (%d) cnt(%d).\n",
+		     user_desc->ptr,
+		     user_desc->size,
+		     user_desc->count);
+
+	/*
+	 * sanity check alignment and size of buffer that caller wants to
+	 * work with
+	 */
+	if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
+	    (unsigned long)user_desc->ptr) {
+		gossip_err("pvfs2 error: memory alignment (front). %p\n",
+			   user_desc->ptr);
+		goto out;
+	}
+
+	if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
+	    != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
+		gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
+			   user_desc->ptr,
+			   user_desc->total_size);
+		goto out;
+	}
+
+	if (user_desc->total_size != (user_desc->size * user_desc->count)) {
+		gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
+			   user_desc->total_size,
+			   user_desc->size,
+			   user_desc->count);
+		goto out;
+	}
+
+	if ((user_desc->size % PAGE_SIZE) != 0) {
+		gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
+			   user_desc->size);
+		goto out;
+	}
+
+	ret = -ENOMEM;
+	bufmap = pvfs2_bufmap_alloc(user_desc);
+	if (!bufmap)
+		goto out;
+
+	ret = pvfs2_bufmap_map(bufmap, user_desc);
+	if (ret)
+		goto out_free_bufmap;
+
+
+	spin_lock(&pvfs2_bufmap_lock);
+	if (__pvfs2_bufmap) {
+		spin_unlock(&pvfs2_bufmap_lock);
+		gossip_err("pvfs2: error: bufmap already initialized.\n");
+		ret = -EALREADY;
+		goto out_unmap_bufmap;
+	}
+	__pvfs2_bufmap = bufmap;
+	spin_unlock(&pvfs2_bufmap_lock);
+
+	/*
+	 * If there are operations in pvfs2_bufmap_init_waitq, wake them up.
+	 * This scenario occurs when the client-core is restarted and I/O
+	 * requests in the in-progress or waiting tables are restarted.  I/O
+	 * requests cannot be restarted until the shared memory system is
+	 * completely re-initialized, so we put the I/O requests in this
+	 * waitq until initialization has completed.  NOTE:  the I/O requests
+	 * are also on a timer, so they don't wait forever just in case the
+	 * client-core doesn't come back up.
+	 */
+	wake_up_interruptible(&pvfs2_bufmap_init_waitq);
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_initialize: exiting normally\n");
+	return 0;
+
+out_unmap_bufmap:
+	pvfs2_bufmap_unmap(bufmap);
+out_free_bufmap:
+	pvfs2_bufmap_free(bufmap);
+out:
+	return ret;
+}
+
+/*
+ * pvfs_bufmap_finalize()
+ *
+ * shuts down the mapped buffer interface and releases any resources
+ * associated with it
+ *
+ * no return value
+ */
+void pvfs_bufmap_finalize(void)
+{
+	gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
+	BUG_ON(!__pvfs2_bufmap);	//  XXX
+	pvfs2_bufmap_unref(__pvfs2_bufmap);
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs2_bufmap_finalize: exiting normally\n");
+}
+
+struct slot_args {
+	int slot_count;
+	int *slot_array;
+	spinlock_t *slot_lock;
+	wait_queue_head_t *slot_wq;
+};
+
+static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
+{
+	int ret = -1;
+	int i = 0;
+	DECLARE_WAITQUEUE(my_wait, current);
+
+
+	add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/*
+		 * check for available desc, slot_lock is the appropriate
+		 * index_lock
+		 */
+		spin_lock(slargs->slot_lock);
+		for (i = 0; i < slargs->slot_count; i++)
+			if (slargs->slot_array[i] == 0) {
+				slargs->slot_array[i] = 1;
+				*buffer_index = i;
+				ret = 0;
+				break;
+			}
+		spin_unlock(slargs->slot_lock);
+
+		/* if we acquired a buffer, then break out of while */
+		if (ret == 0)
+			break;
+
+		if (!signal_pending(current)) {
+			int timeout =
+			    MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
+			gossip_debug(GOSSIP_BUFMAP_DEBUG,
+				     "[BUFMAP]: waiting %d "
+				     "seconds for a slot\n",
+				     slot_timeout_secs);
+			if (!schedule_timeout(timeout)) {
+				gossip_debug(GOSSIP_BUFMAP_DEBUG,
+					     "*** wait_for_a_slot timed out\n");
+				ret = -ETIMEDOUT;
+				break;
+			}
+			gossip_debug(GOSSIP_BUFMAP_DEBUG,
+			  "[BUFMAP]: woken up by a slot becoming available.\n");
+			continue;
+		}
+
+		gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
+			     __func__);
+		ret = -EINTR;
+		break;
+	}
+
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(slargs->slot_wq, &my_wait);
+	return ret;
+}
+
+static void put_back_slot(struct slot_args *slargs, int buffer_index)
+{
+	/* slot_lock is the appropriate index_lock */
+	spin_lock(slargs->slot_lock);
+	if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
+		spin_unlock(slargs->slot_lock);
+		return;
+	}
+
+	/* put the desc back on the queue */
+	slargs->slot_array[buffer_index] = 0;
+	spin_unlock(slargs->slot_lock);
+
+	/* wake up anyone who may be sleeping on the queue */
+	wake_up_interruptible(slargs->slot_wq);
+}
+
+/*
+ * pvfs_bufmap_get()
+ *
+ * gets a free mapped buffer descriptor, will sleep until one becomes
+ * available if necessary
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
+{
+	struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
+	struct slot_args slargs;
+	int ret;
+
+	if (!bufmap) {
+		gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
+		return -EIO;
+	}
+
+	slargs.slot_count = bufmap->desc_count;
+	slargs.slot_array = bufmap->buffer_index_array;
+	slargs.slot_lock = &bufmap->buffer_index_lock;
+	slargs.slot_wq = &bufmap_waitq;
+	ret = wait_for_a_slot(&slargs, buffer_index);
+	if (ret)
+		pvfs2_bufmap_unref(bufmap);
+	*mapp = bufmap;
+	return ret;
+}
+
+/*
+ * pvfs_bufmap_put()
+ *
+ * returns a mapped buffer descriptor to the collection
+ *
+ * no return value
+ */
+void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
+{
+	struct slot_args slargs;
+
+	slargs.slot_count = bufmap->desc_count;
+	slargs.slot_array = bufmap->buffer_index_array;
+	slargs.slot_lock = &bufmap->buffer_index_lock;
+	slargs.slot_wq = &bufmap_waitq;
+	put_back_slot(&slargs, buffer_index);
+	pvfs2_bufmap_unref(bufmap);
+}
+
+/*
+ * readdir_index_get()
+ *
+ * gets a free descriptor, will sleep until one becomes
+ * available if necessary.
+ * Although the readdir buffers are not mapped into kernel space
+ * we could do that at a later point of time. Regardless, these
+ * indices are used by the client-core.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
+{
+	struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
+	struct slot_args slargs;
+	int ret;
+
+	if (!bufmap) {
+		gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
+		return -EIO;
+	}
+
+	slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
+	slargs.slot_array = bufmap->readdir_index_array;
+	slargs.slot_lock = &bufmap->readdir_index_lock;
+	slargs.slot_wq = &readdir_waitq;
+	ret = wait_for_a_slot(&slargs, buffer_index);
+	if (ret)
+		pvfs2_bufmap_unref(bufmap);
+	*mapp = bufmap;
+	return ret;
+}
+
+void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
+{
+	struct slot_args slargs;
+
+	slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
+	slargs.slot_array = bufmap->readdir_index_array;
+	slargs.slot_lock = &bufmap->readdir_index_lock;
+	slargs.slot_wq = &readdir_waitq;
+	put_back_slot(&slargs, buffer_index);
+	pvfs2_bufmap_unref(bufmap);
+}
+
+/*
+ * pvfs_bufmap_copy_iovec_from_user()
+ *
+ * copies data from several user space address's in an iovec
+ * to a mapped buffer
+ *
+ * Note that the mapped buffer is a series of pages and therefore
+ * the copies have to be split by PAGE_SIZE bytes at a time.
+ * Note that this routine checks that summation of iov_len
+ * across all the elements of iov is equal to size.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap,
+				     int buffer_index,
+				     const struct iovec *iov,
+				     unsigned long nr_segs,
+				     size_t size)
+{
+	size_t ret = 0;
+	size_t amt_copied = 0;
+	size_t cur_copy_size = 0;
+	unsigned int to_page_offset = 0;
+	unsigned int to_page_index = 0;
+	void *to_kaddr = NULL;
+	void __user *from_addr = NULL;
+	struct iovec *copied_iovec = NULL;
+	struct pvfs_bufmap_desc *to;
+	unsigned int seg;
+	char *tmp_printer = NULL;
+	int tmp_int = 0;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_copy_iovec_from_user: index %d, "
+		     "size %zd\n",
+		     buffer_index,
+		     size);
+
+	to = &bufmap->desc_array[buffer_index];
+
+	/*
+	 * copy the passed in iovec so that we can change some of its fields
+	 */
+	copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec),
+			       PVFS2_BUFMAP_GFP_FLAGS);
+	if (copied_iovec == NULL) {
+		gossip_err("pvfs2_bufmap_copy_iovec_from_user: failed allocating memory\n");
+		return -ENOMEM;
+	}
+	memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
+	/*
+	 * Go through each segment in the iovec and make sure that
+	 * the summation of iov_len matches the given size.
+	 */
+	for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
+		amt_copied += copied_iovec[seg].iov_len;
+	if (amt_copied != size) {
+		gossip_err(
+		    "pvfs2_bufmap_copy_iovec_from_user: computed total ("
+		    "%zd) is not equal to (%zd)\n",
+		    amt_copied,
+		    size);
+		kfree(copied_iovec);
+		return -EINVAL;
+	}
+
+	to_page_index = 0;
+	to_page_offset = 0;
+	amt_copied = 0;
+	seg = 0;
+	/*
+	 * Go through each segment in the iovec and copy its
+	 * buffer into the mapped buffer one page at a time though
+	 */
+	while (amt_copied < size) {
+		struct iovec *iv = &copied_iovec[seg];
+		int inc_to_page_index;
+
+		if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			from_addr = iv->iov_base;
+			inc_to_page_index = 0;
+		} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			from_addr = iv->iov_base;
+			inc_to_page_index = 1;
+		} else {
+			cur_copy_size =
+			    PVFS_util_min(PAGE_SIZE - to_page_offset,
+					  size - amt_copied);
+			from_addr = iv->iov_base;
+			iv->iov_base += cur_copy_size;
+			iv->iov_len -= cur_copy_size;
+			inc_to_page_index = 1;
+		}
+		to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
+		ret =
+		    copy_from_user(to_kaddr + to_page_offset,
+				   from_addr,
+				   cur_copy_size);
+		if (!PageReserved(to->page_array[to_page_index]))
+			SetPageDirty(to->page_array[to_page_index]);
+
+		if (!tmp_printer) {
+			tmp_printer = (char *)(to_kaddr + to_page_offset);
+			tmp_int += tmp_printer[0];
+			gossip_debug(GOSSIP_BUFMAP_DEBUG,
+				     "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n",
+				     tmp_int);
+		}
+
+		pvfs2_kunmap(to->page_array[to_page_index]);
+		if (ret) {
+			gossip_err("Failed to copy data from user space\n");
+			kfree(copied_iovec);
+			return -EFAULT;
+		}
+
+		amt_copied += cur_copy_size;
+		if (inc_to_page_index) {
+			to_page_offset = 0;
+			to_page_index++;
+		} else {
+			to_page_offset += cur_copy_size;
+		}
+	}
+	kfree(copied_iovec);
+	return 0;
+}
+
+/*
+ * pvfs_bufmap_copy_iovec_from_kernel()
+ *
+ * copies data from several kernel space address's in an iovec
+ * to a mapped buffer
+ *
+ * Note that the mapped buffer is a series of pages and therefore
+ * the copies have to be split by PAGE_SIZE bytes at a time.
+ * Note that this routine checks that summation of iov_len
+ * across all the elements of iov is equal to size.
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap,
+		int buffer_index, const struct iovec *iov,
+		unsigned long nr_segs, size_t size)
+{
+	size_t amt_copied = 0;
+	size_t cur_copy_size = 0;
+	int to_page_index = 0;
+	void *to_kaddr = NULL;
+	void *from_kaddr = NULL;
+	struct iovec *copied_iovec = NULL;
+	struct pvfs_bufmap_desc *to;
+	unsigned int seg;
+	unsigned to_page_offset = 0;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_copy_iovec_from_kernel: index %d, "
+		     "size %zd\n",
+		     buffer_index,
+		     size);
+
+	to = &bufmap->desc_array[buffer_index];
+	/*
+	 * copy the passed in iovec so that we can change some of its fields
+	 */
+	copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec),
+			       PVFS2_BUFMAP_GFP_FLAGS);
+	if (copied_iovec == NULL) {
+		gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: failed allocating memory\n");
+		return -ENOMEM;
+	}
+	memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
+	/*
+	 * Go through each segment in the iovec and make sure that
+	 * the summation of iov_len matches the given size.
+	 */
+	for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
+		amt_copied += copied_iovec[seg].iov_len;
+	if (amt_copied != size) {
+		gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: computed total(%zd) is not equal to (%zd)\n",
+			   amt_copied,
+			   size);
+		kfree(copied_iovec);
+		return -EINVAL;
+	}
+
+	to_page_index = 0;
+	amt_copied = 0;
+	seg = 0;
+	to_page_offset = 0;
+	/*
+	 * Go through each segment in the iovec and copy its
+	 * buffer into the mapped buffer one page at a time though
+	 */
+	while (amt_copied < size) {
+		struct iovec *iv = &copied_iovec[seg];
+		int inc_to_page_index;
+
+		if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			from_kaddr = iv->iov_base;
+			inc_to_page_index = 0;
+		} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			from_kaddr = iv->iov_base;
+			inc_to_page_index = 1;
+		} else {
+			cur_copy_size =
+			    PVFS_util_min(PAGE_SIZE - to_page_offset,
+					  size - amt_copied);
+			from_kaddr = iv->iov_base;
+			iv->iov_base += cur_copy_size;
+			iv->iov_len -= cur_copy_size;
+			inc_to_page_index = 1;
+		}
+		to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
+		memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size);
+		if (!PageReserved(to->page_array[to_page_index]))
+			SetPageDirty(to->page_array[to_page_index]);
+		pvfs2_kunmap(to->page_array[to_page_index]);
+		amt_copied += cur_copy_size;
+		if (inc_to_page_index) {
+			to_page_offset = 0;
+			to_page_index++;
+		} else {
+			to_page_offset += cur_copy_size;
+		}
+	}
+	kfree(copied_iovec);
+	return 0;
+}
+
+/*
+ * pvfs_bufmap_copy_to_user_iovec()
+ *
+ * copies data to several user space address's in an iovec
+ * from a mapped buffer
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap,
+		int buffer_index, const struct iovec *iov,
+		unsigned long nr_segs, size_t size)
+{
+	size_t ret = 0;
+	size_t amt_copied = 0;
+	size_t cur_copy_size = 0;
+	int from_page_index = 0;
+	void *from_kaddr = NULL;
+	void __user *to_addr = NULL;
+	struct iovec *copied_iovec = NULL;
+	struct pvfs_bufmap_desc *from;
+	unsigned int seg;
+	unsigned from_page_offset = 0;
+	char *tmp_printer = NULL;
+	int tmp_int = 0;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_copy_to_user_iovec: index %d, size %zd\n",
+		     buffer_index,
+		     size);
+
+	from = &bufmap->desc_array[buffer_index];
+	/*
+	 * copy the passed in iovec so that we can change some of its fields
+	 */
+	copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec),
+			       PVFS2_BUFMAP_GFP_FLAGS);
+	if (copied_iovec == NULL) {
+		gossip_err("pvfs2_bufmap_copy_to_user_iovec: failed allocating memory\n");
+		return -ENOMEM;
+	}
+	memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
+	/*
+	 * Go through each segment in the iovec and make sure that
+	 * the summation of iov_len is greater than the given size.
+	 */
+	for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
+		amt_copied += copied_iovec[seg].iov_len;
+	if (amt_copied < size) {
+		gossip_err("pvfs2_bufmap_copy_to_user_iovec: computed total (%zd) is less than (%zd)\n",
+			   amt_copied,
+			   size);
+		kfree(copied_iovec);
+		return -EINVAL;
+	}
+
+	from_page_index = 0;
+	amt_copied = 0;
+	seg = 0;
+	from_page_offset = 0;
+	/*
+	 * Go through each segment in the iovec and copy from the mapper buffer,
+	 * but make sure that we do so one page at a time.
+	 */
+	while (amt_copied < size) {
+		struct iovec *iv = &copied_iovec[seg];
+		int inc_from_page_index;
+
+		if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			to_addr = iv->iov_base;
+			inc_from_page_index = 0;
+		} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			to_addr = iv->iov_base;
+			inc_from_page_index = 1;
+		} else {
+			cur_copy_size =
+			    PVFS_util_min(PAGE_SIZE - from_page_offset,
+					  size - amt_copied);
+			to_addr = iv->iov_base;
+			iv->iov_base += cur_copy_size;
+			iv->iov_len -= cur_copy_size;
+			inc_from_page_index = 1;
+		}
+		from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
+		if (!tmp_printer) {
+			tmp_printer = (char *)(from_kaddr + from_page_offset);
+			tmp_int += tmp_printer[0];
+			gossip_debug(GOSSIP_BUFMAP_DEBUG,
+				     "First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n",
+				     tmp_int);
+		}
+		ret =
+		    copy_to_user(to_addr,
+				 from_kaddr + from_page_offset,
+				 cur_copy_size);
+		pvfs2_kunmap(from->page_array[from_page_index]);
+		if (ret) {
+			gossip_err("Failed to copy data to user space\n");
+			kfree(copied_iovec);
+			return -EFAULT;
+		}
+
+		amt_copied += cur_copy_size;
+		if (inc_from_page_index) {
+			from_page_offset = 0;
+			from_page_index++;
+		} else {
+			from_page_offset += cur_copy_size;
+		}
+	}
+	kfree(copied_iovec);
+	return 0;
+}
+
+/*
+ * pvfs_bufmap_copy_to_kernel_iovec()
+ *
+ * copies data to several kernel space address's in an iovec
+ * from a mapped buffer
+ *
+ * returns 0 on success, -errno on failure
+ */
+int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap,
+		int buffer_index, const struct iovec *iov,
+		unsigned long nr_segs, size_t size)
+{
+	size_t amt_copied = 0;
+	size_t cur_copy_size = 0;
+	int from_page_index = 0;
+	void *from_kaddr = NULL;
+	void *to_kaddr = NULL;
+	struct iovec *copied_iovec = NULL;
+	struct pvfs_bufmap_desc *from;
+	unsigned int seg;
+	unsigned int from_page_offset = 0;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "pvfs_bufmap_copy_to_kernel_iovec: index %d, size %zd\n",
+		      buffer_index,
+		      size);
+
+	from = &bufmap->desc_array[buffer_index];
+	/*
+	 * copy the passed in iovec so that we can change some of its fields
+	 */
+	copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec),
+			       PVFS2_BUFMAP_GFP_FLAGS);
+	if (copied_iovec == NULL) {
+		gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: failed allocating memory\n");
+		return -ENOMEM;
+	}
+	memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
+	/*
+	 * Go through each segment in the iovec and make sure that
+	 * the summation of iov_len is greater than the given size.
+	 */
+	for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
+		amt_copied += copied_iovec[seg].iov_len;
+
+	if (amt_copied < size) {
+		gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: computed total (%zd) is less than (%zd)\n",
+		     amt_copied,
+		     size);
+		kfree(copied_iovec);
+		return -EINVAL;
+	}
+
+	from_page_index = 0;
+	amt_copied = 0;
+	seg = 0;
+	from_page_offset = 0;
+	/*
+	 * Go through each segment in the iovec and copy from the mapper buffer,
+	 * but make sure that we do so one page at a time.
+	 */
+	while (amt_copied < size) {
+		struct iovec *iv = &copied_iovec[seg];
+		int inc_from_page_index;
+
+		if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			to_kaddr = iv->iov_base;
+			inc_from_page_index = 0;
+		} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
+			cur_copy_size =
+			    PVFS_util_min(iv->iov_len, size - amt_copied);
+			seg++;
+			to_kaddr = iv->iov_base;
+			inc_from_page_index = 1;
+		} else {
+			cur_copy_size =
+			    PVFS_util_min(PAGE_SIZE - from_page_offset,
+					  size - amt_copied);
+			to_kaddr = iv->iov_base;
+			iv->iov_base += cur_copy_size;
+			iv->iov_len -= cur_copy_size;
+			inc_from_page_index = 1;
+		}
+		from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
+		memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size);
+		pvfs2_kunmap(from->page_array[from_page_index]);
+		amt_copied += cur_copy_size;
+		if (inc_from_page_index) {
+			from_page_offset = 0;
+			from_page_index++;
+		} else {
+			from_page_offset += cur_copy_size;
+		}
+	}
+	kfree(copied_iovec);
+	return 0;
+}
diff --git a/fs/orangefs/pvfs2-cache.c b/fs/orangefs/pvfs2-cache.c
new file mode 100644
index 0000000..e449ba5
--- /dev/null
+++ b/fs/orangefs/pvfs2-cache.c
@@ -0,0 +1,258 @@ 
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+
+/* tags assigned to kernel upcall operations */
+static uint64_t next_tag_value;
+static DEFINE_SPINLOCK(next_tag_value_lock);
+
+/* the pvfs2 memory caches */
+
+/* a cache for pvfs2 upcall/downcall operations */
+static struct kmem_cache *op_cache;
+
+/* a cache for device (/dev/pvfs2-req) communication */
+static struct kmem_cache *dev_req_cache;
+
+/* a cache for pvfs2_kiocb objects (i.e pvfs2 iocb structures ) */
+static struct kmem_cache *pvfs2_kiocb_cache;
+
+int op_cache_initialize(void)
+{
+	op_cache = kmem_cache_create("pvfs2_op_cache",
+				     sizeof(struct pvfs2_kernel_op),
+				     0,
+				     PVFS2_CACHE_CREATE_FLAGS,
+				     NULL);
+
+	if (!op_cache) {
+		gossip_err("Cannot create pvfs2_op_cache\n");
+		return -ENOMEM;
+	}
+
+	/* initialize our atomic tag counter */
+	spin_lock(&next_tag_value_lock);
+	next_tag_value = 100;
+	spin_unlock(&next_tag_value_lock);
+	return 0;
+}
+
+int op_cache_finalize(void)
+{
+	kmem_cache_destroy(op_cache);
+	return 0;
+}
+
+char *get_opname_string(struct pvfs2_kernel_op *new_op)
+{
+	if (new_op) {
+		int32_t type = new_op->upcall.type;
+		if (type == PVFS2_VFS_OP_FILE_IO)
+			return "OP_FILE_IO";
+		else if (type == PVFS2_VFS_OP_LOOKUP)
+			return "OP_LOOKUP";
+		else if (type == PVFS2_VFS_OP_CREATE)
+			return "OP_CREATE";
+		else if (type == PVFS2_VFS_OP_GETATTR)
+			return "OP_GETATTR";
+		else if (type == PVFS2_VFS_OP_REMOVE)
+			return "OP_REMOVE";
+		else if (type == PVFS2_VFS_OP_MKDIR)
+			return "OP_MKDIR";
+		else if (type == PVFS2_VFS_OP_READDIR)
+			return "OP_READDIR";
+		else if (type == PVFS2_VFS_OP_READDIRPLUS)
+			return "OP_READDIRPLUS";
+		else if (type == PVFS2_VFS_OP_SETATTR)
+			return "OP_SETATTR";
+		else if (type == PVFS2_VFS_OP_SYMLINK)
+			return "OP_SYMLINK";
+		else if (type == PVFS2_VFS_OP_RENAME)
+			return "OP_RENAME";
+		else if (type == PVFS2_VFS_OP_STATFS)
+			return "OP_STATFS";
+		else if (type == PVFS2_VFS_OP_TRUNCATE)
+			return "OP_TRUNCATE";
+		else if (type == PVFS2_VFS_OP_MMAP_RA_FLUSH)
+			return "OP_MMAP_RA_FLUSH";
+		else if (type == PVFS2_VFS_OP_FS_MOUNT)
+			return "OP_FS_MOUNT";
+		else if (type == PVFS2_VFS_OP_FS_UMOUNT)
+			return "OP_FS_UMOUNT";
+		else if (type == PVFS2_VFS_OP_GETXATTR)
+			return "OP_GETXATTR";
+		else if (type == PVFS2_VFS_OP_SETXATTR)
+			return "OP_SETXATTR";
+		else if (type == PVFS2_VFS_OP_LISTXATTR)
+			return "OP_LISTXATTR";
+		else if (type == PVFS2_VFS_OP_REMOVEXATTR)
+			return "OP_REMOVEXATTR";
+		else if (type == PVFS2_VFS_OP_PARAM)
+			return "OP_PARAM";
+		else if (type == PVFS2_VFS_OP_PERF_COUNT)
+			return "OP_PERF_COUNT";
+		else if (type == PVFS2_VFS_OP_CANCEL)
+			return "OP_CANCEL";
+		else if (type == PVFS2_VFS_OP_FSYNC)
+			return "OP_FSYNC";
+		else if (type == PVFS2_VFS_OP_FSKEY)
+			return "OP_FSKEY";
+		else if (type == PVFS2_VFS_OP_FILE_IOX)
+			return "OP_FILE_IOX";
+	}
+	return "OP_UNKNOWN?";
+}
+
+static struct pvfs2_kernel_op *op_alloc_common(int32_t op_linger, int32_t type)
+{
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	new_op = kmem_cache_alloc(op_cache, PVFS2_CACHE_ALLOC_FLAGS);
+	if (new_op) {
+		memset(new_op, 0, sizeof(struct pvfs2_kernel_op));
+
+		INIT_LIST_HEAD(&new_op->list);
+		spin_lock_init(&new_op->lock);
+		init_waitqueue_head(&new_op->waitq);
+
+		init_waitqueue_head(&new_op->io_completion_waitq);
+		atomic_set(&new_op->aio_ref_count, 0);
+
+		pvfs2_op_initialize(new_op);
+
+		/* initialize the op specific tag and upcall credentials */
+		spin_lock(&next_tag_value_lock);
+		new_op->tag = next_tag_value++;
+		if (next_tag_value == 0)
+			next_tag_value = 100;
+		spin_unlock(&next_tag_value_lock);
+		new_op->upcall.type = type;
+		new_op->attempts = 0;
+		gossip_debug(GOSSIP_CACHE_DEBUG,
+			     "Alloced OP (%p: %llu %s)\n",
+			     new_op,
+			     llu(new_op->tag),
+			     get_opname_string(new_op));
+
+		new_op->upcall.uid = from_kuid(current_user_ns(), current_fsuid());
+
+		new_op->upcall.gid = from_kgid(current_user_ns(), current_fsgid());
+
+		new_op->op_linger = new_op->op_linger_tmp = op_linger;
+	} else {
+		gossip_err("op_alloc: kmem_cache_alloc failed!\n");
+	}
+	return new_op;
+}
+
+struct pvfs2_kernel_op *op_alloc(int32_t type)
+{
+	return op_alloc_common(1, type);
+}
+
+struct pvfs2_kernel_op *op_alloc_trailer(int32_t type)
+{
+	return op_alloc_common(2, type);
+}
+
+void op_release(struct pvfs2_kernel_op *pvfs2_op)
+{
+	if (pvfs2_op) {
+		gossip_debug(GOSSIP_CACHE_DEBUG,
+			     "Releasing OP (%p: %llu)\n",
+			     pvfs2_op,
+			     llu(pvfs2_op->tag));
+		pvfs2_op_initialize(pvfs2_op);
+		kmem_cache_free(op_cache, pvfs2_op);
+	} else {
+		gossip_err("NULL pointer in op_release\n");
+	}
+}
+
+int dev_req_cache_initialize(void)
+{
+	dev_req_cache = kmem_cache_create("pvfs2_devreqcache",
+					  MAX_ALIGNED_DEV_REQ_DOWNSIZE,
+					  0,
+					  PVFS2_CACHE_CREATE_FLAGS,
+					  NULL);
+
+	if (!dev_req_cache) {
+		gossip_err("Cannot create pvfs2_dev_req_cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int dev_req_cache_finalize(void)
+{
+	kmem_cache_destroy(dev_req_cache);
+	return 0;
+}
+
+void *dev_req_alloc(void)
+{
+	void *buffer;
+
+	buffer = kmem_cache_alloc(dev_req_cache, PVFS2_CACHE_ALLOC_FLAGS);
+	if (buffer == NULL)
+		gossip_err("Failed to allocate from dev_req_cache\n");
+	else
+		memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE));
+	return buffer;
+}
+
+void dev_req_release(void *buffer)
+{
+	if (buffer)
+		kmem_cache_free(dev_req_cache, buffer);
+	else
+		gossip_err("NULL pointer passed to dev_req_release\n");
+	return;
+}
+
+int kiocb_cache_initialize(void)
+{
+	pvfs2_kiocb_cache = kmem_cache_create("pvfs2_kiocbcache",
+					      sizeof(struct pvfs2_kiocb_s),
+					      0,
+					      PVFS2_CACHE_CREATE_FLAGS,
+					      NULL);
+
+	if (!pvfs2_kiocb_cache) {
+		gossip_err("Cannot create pvfs2_kiocb_cache!\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int kiocb_cache_finalize(void)
+{
+	kmem_cache_destroy(pvfs2_kiocb_cache);
+	return 0;
+}
+
+struct pvfs2_kiocb_s *kiocb_alloc(void)
+{
+	struct pvfs2_kiocb_s *x = NULL;
+
+	x = kmem_cache_alloc(pvfs2_kiocb_cache, PVFS2_CACHE_ALLOC_FLAGS);
+	if (x == NULL)
+		gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n");
+	else
+		memset(x, 0, sizeof(struct pvfs2_kiocb_s));
+	return x;
+}
+
+void kiocb_release(struct pvfs2_kiocb_s *x)
+{
+	if (x)
+		kmem_cache_free(pvfs2_kiocb_cache, x);
+	else
+		gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n");
+}
diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c
new file mode 100644
index 0000000..b59bdf2
--- /dev/null
+++ b/fs/orangefs/pvfs2-mod.c
@@ -0,0 +1,346 @@ 
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add proc file handler for pvfs2 client
+ * parameters, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-proc.h"
+
+/* PVFS2_VERSION is a ./configure define */
+#ifndef PVFS2_VERSION
+#define PVFS2_VERSION "Unknown"
+#endif
+
+#define DEBUG_HELP_STRING_SIZE 4096
+
+/*
+ * global variables declared here
+ */
+
+/* the size of the hash tables for ops in progress */
+int hash_table_size = 509;
+
+/* the insmod command only understands "unsigned long" and NOT
+ * "unsigned long long" as an input parameter.  So, to accomodate
+ * both 32- and 64- bit machines, we will read the debug mask parameter
+ * as an unsigned long (4-bytes on a 32-bit machine and 8-bytes
+ * on a 64-bit machine) and then cast the "unsigned long" to an
+ * "unsigned long long" once we have the value in the kernel.  In this
+ * way, the gossip_debug_mask can remain as a "uint64_t" and the kernel
+ * and client may continue to use the same gossip functions.
+ * NOTE: the kernel debug mask currently does not have more than 32
+ * valid keywords, so only reading a 32-bit integer from the insmod
+ * command line is not a problem.  However, the
+ * /proc/sys/pvfs2/kernel-debug functionality can accomodate up to
+ * 64 keywords, in the event that the kernel debug mask supports more
+ * than 32 keywords.
+ */
+uint32_t module_parm_debug_mask = 0;
+uint64_t gossip_debug_mask = 0;
+unsigned int kernel_mask_set_mod_init = false;
+int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS;
+int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS;
+uint32_t DEBUG_LINE = 50;
+char debug_help_string[DEBUG_HELP_STRING_SIZE] = { 0 };
+
+int fake_mmap_shared = 0;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("PVFS2 Development Team");
+MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2");
+MODULE_PARM_DESC(debug, "debugging level (see pvfs2-debug.h for values)");
+MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds");
+MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds");
+MODULE_PARM_DESC(hash_table_size,
+		 "size of hash table for operations in progress");
+MODULE_PARM_DESC(fake_mmap_shared,
+		 "perform mmap with MAP_SHARED flag as if called with MAP_PRIVATE");
+
+static struct file_system_type pvfs2_fs_type = {
+	.name = "pvfs2",
+	.mount = pvfs2_mount,
+	.kill_sb = pvfs2_kill_sb,
+	.owner = THIS_MODULE,
+};
+
+module_param(hash_table_size, int, 0);
+module_param(module_parm_debug_mask, uint, 0);
+module_param(op_timeout_secs, int, 0);
+module_param(slot_timeout_secs, int, 0);
+module_param(fake_mmap_shared, int, 0);
+
+/* synchronizes the request device file */
+struct mutex devreq_mutex;
+
+/*
+  blocks non-priority requests from being queued for servicing.  this
+  could be used for protecting the request list data structure, but
+  for now it's only being used to stall the op addition to the request
+  list
+*/
+struct mutex request_mutex;
+
+/* hash table for storing operations waiting for matching downcall */
+struct list_head *htable_ops_in_progress = NULL;
+DEFINE_SPINLOCK(htable_ops_in_progress_lock);
+
+/* list for queueing upcall operations */
+LIST_HEAD(pvfs2_request_list);
+
+/* used to protect the above pvfs2_request_list */
+DEFINE_SPINLOCK(pvfs2_request_list_lock);
+
+/* used for incoming request notification */
+DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq);
+
+static int __init pvfs2_init(void)
+{
+	int ret = -1;
+	uint32_t index = 0;
+	char client_title[] = "Client Debug Keywords:\n";
+	char kernel_title[] = "Kernel Debug Keywords:\n";
+	uint32_t i = 0;
+
+	/* convert input debug mask to a 64-bit unsigned integer */
+	gossip_debug_mask = (uint64_t) module_parm_debug_mask;
+
+	/*
+	 * set the kernel's gossip debug string; invalid mask values will
+	 * be ignored.
+	 */
+	PVFS_proc_kmod_mask_to_eventlog(gossip_debug_mask, kernel_debug_string);
+
+	/* remove any invalid values from the mask */
+	gossip_debug_mask =
+	    PVFS_proc_kmod_eventlog_to_mask(kernel_debug_string);
+
+	/*
+	 * if the mask has a non-zero value, then indicate that the mask
+	 * was set when the kernel module was loaded.  The pvfs2 dev ioctl
+	 * command will look at this boolean to determine if the kernel's
+	 * debug mask should be overwritten when the client-core is started.
+	 */
+	if (gossip_debug_mask != 0)
+		kernel_mask_set_mod_init = true;
+
+	/* print information message to the system log */
+	pr_info("pvfs2: pvfs2_init called with debug mask: \"%s\" (0x%08llx)\n",
+	       kernel_debug_string,
+	       gossip_debug_mask);
+
+	/*
+	 * load debug_help_string...this string is used during the
+	 * /proc/sys/pvfs2/debug-help operation
+	 */
+	if (strlen(client_title) < DEBUG_LINE) {
+		memcpy(&debug_help_string[index],
+		       client_title,
+		       sizeof(client_title));
+		index += strlen(client_title);
+	}
+
+	for (i = 0; i < num_keyword_mask_map; i++)
+		if ((strlen(s_keyword_mask_map[i].keyword) + 2) < DEBUG_LINE) {
+			debug_help_string[index] = '\t';
+			index++;
+			memcpy(&debug_help_string[index],
+			       s_keyword_mask_map[i].keyword,
+			       strlen(s_keyword_mask_map[i].keyword));
+			index += strlen(s_keyword_mask_map[i].keyword);
+			debug_help_string[index] = '\n';
+			index++;
+		}
+
+	if ((strlen(kernel_title) + 1) < DEBUG_LINE) {
+		debug_help_string[index] = '\n';
+		index++;
+
+		memcpy(&debug_help_string[index],
+		       kernel_title,
+		       sizeof(kernel_title));
+		index += strlen(kernel_title);
+	}
+
+	for (i = 0; i < num_kmod_keyword_mask_map; i++)
+		if ((strlen(s_kmod_keyword_mask_map[i].keyword) + 2) <
+		    DEBUG_LINE) {
+			debug_help_string[index] = '\t';
+			index++;
+			memcpy(&debug_help_string[index],
+			       s_kmod_keyword_mask_map[i].keyword,
+			       strlen(s_kmod_keyword_mask_map[i].keyword));
+			index += strlen(s_kmod_keyword_mask_map[i].keyword);
+			debug_help_string[index] = '\n';
+			index++;
+		}
+
+	ret = bdi_init(&pvfs2_backing_dev_info);
+
+	if (ret)
+		return ret;
+
+	if (op_timeout_secs < 0)
+		op_timeout_secs = 0;
+
+	if (slot_timeout_secs < 0)
+		slot_timeout_secs = 0;
+
+	/* initialize global book keeping data structures */
+	ret = op_cache_initialize();
+	if (ret < 0)
+		goto err;
+
+	ret = dev_req_cache_initialize();
+	if (ret < 0)
+		goto cleanup_op;
+
+	ret = pvfs2_inode_cache_initialize();
+	if (ret < 0)
+		goto cleanup_req;
+
+	ret = kiocb_cache_initialize();
+	if (ret  < 0)
+		goto cleanup_inode;
+
+	/* Initialize the pvfsdev subsystem. */
+	ret = pvfs2_dev_init();
+	if (ret < 0) {
+		gossip_err("pvfs2: could not initialize device subsystem %d!\n",
+			   ret);
+		goto cleanup_kiocb;
+	}
+
+	mutex_init(&devreq_mutex);
+	mutex_init(&request_mutex);
+
+	htable_ops_in_progress =
+	    kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
+	if (!htable_ops_in_progress) {
+		gossip_err("Failed to initialize op hashtable");
+		ret = -ENOMEM;
+		goto cleanup_device;
+	}
+
+	/* initialize a doubly linked at each hash table index */
+	for (i = 0; i < hash_table_size; i++)
+		INIT_LIST_HEAD(&htable_ops_in_progress[i]);
+
+	ret = fsid_key_table_initialize();
+	if (ret < 0)
+		goto cleanup_progress_table;
+
+	pvfs2_proc_initialize();
+	ret = register_filesystem(&pvfs2_fs_type);
+	if (ret == 0) {
+		pr_info("pvfs2: module version %s loaded\n", PVFS2_VERSION);
+		return 0;
+	}
+
+	pvfs2_proc_finalize();
+	fsid_key_table_finalize();
+
+cleanup_progress_table:
+	kfree(htable_ops_in_progress);
+
+cleanup_device:
+	pvfs2_dev_cleanup();
+
+cleanup_kiocb:
+	kiocb_cache_finalize();
+
+cleanup_inode:
+	pvfs2_inode_cache_finalize();
+
+cleanup_req:
+	dev_req_cache_finalize();
+
+cleanup_op:
+	op_cache_finalize();
+
+err:
+	bdi_destroy(&pvfs2_backing_dev_info);
+	return ret;
+}
+
+static void __exit pvfs2_exit(void)
+{
+	int i = 0;
+	struct pvfs2_kernel_op *cur_op = NULL;
+
+	gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_exit called\n");
+
+	unregister_filesystem(&pvfs2_fs_type);
+	pvfs2_proc_finalize();
+	fsid_key_table_finalize();
+	pvfs2_dev_cleanup();
+	/* clear out all pending upcall op requests */
+	spin_lock(&pvfs2_request_list_lock);
+	while (!list_empty(&pvfs2_request_list)) {
+		cur_op = list_entry(pvfs2_request_list.next,
+				    struct pvfs2_kernel_op,
+				    list);
+		list_del(&cur_op->list);
+		gossip_debug(GOSSIP_INIT_DEBUG,
+			     "Freeing unhandled upcall request type %d\n",
+			     cur_op->upcall.type);
+		op_release(cur_op);
+	}
+	spin_unlock(&pvfs2_request_list_lock);
+
+	for (i = 0; i < hash_table_size; i++)
+		while (!list_empty(&htable_ops_in_progress[i])) {
+			cur_op = list_entry(htable_ops_in_progress[i].next,
+					    struct pvfs2_kernel_op,
+					    list);
+			op_release(cur_op);
+		}
+
+	kiocb_cache_finalize();
+	pvfs2_inode_cache_finalize();
+	dev_req_cache_finalize();
+	op_cache_finalize();
+
+	kfree(htable_ops_in_progress);
+
+	bdi_destroy(&pvfs2_backing_dev_info);
+
+	pr_info("pvfs2: module version %s unloaded\n", PVFS2_VERSION);
+}
+
+/*
+ * What we do in this function is to walk the list of operations
+ * that are in progress in the hash table and mark them as purged as well.
+ */
+void purge_inprogress_ops(void)
+{
+	int i;
+
+	for (i = 0; i < hash_table_size; i++) {
+		struct pvfs2_kernel_op *op;
+		struct pvfs2_kernel_op *next;
+
+		list_for_each_entry_safe(op,
+					 next,
+					 &htable_ops_in_progress[i],
+					 list) {
+			spin_lock(&op->lock);
+			gossip_debug(GOSSIP_INIT_DEBUG,
+				"pvfs2-client-core: purging in-progress op tag "
+				"%llu %s\n",
+				llu(op->tag),
+				get_opname_string(op));
+			set_op_state_purged(op);
+			spin_unlock(&op->lock);
+			wake_up_interruptible(&op->waitq);
+		}
+	}
+	return;
+}
+
+module_init(pvfs2_init);
+module_exit(pvfs2_exit);
diff --git a/fs/orangefs/pvfs2-proc.c b/fs/orangefs/pvfs2-proc.c
new file mode 100644
index 0000000..789f1606
--- /dev/null
+++ b/fs/orangefs/pvfs2-proc.c
@@ -0,0 +1,698 @@ 
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add proc file handler for pvfs2 client
+ * parameters, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include "pvfs2-proc.h"
+
+/* PVFS2_VERSION is set in ./configure */
+#ifndef PVFS2_VERSION
+#define PVFS2_VERSION "Unknown"
+#endif
+
+/*
+ * CONFIG_SYSCTL is set in .config - most of this source file is inside
+ * this ifdef...
+ */
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+
+#define KERNEL_DEBUG "kernel-debug"
+#define CLIENT_DEBUG "client-debug"
+#define DEBUG_HELP "debug-help"
+
+/*
+ * these strings will be initialized by invoking the PVFS_DEV_DEBUG ioctl
+ * command when the client-core is started.  otherwise, these variables are
+ * only set via the proc sys calls.
+ */
+char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none";
+char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none";
+extern char debug_help_string[];
+
+/* extra parameters provided to pvfs2 param proc handlers */
+struct pvfs2_param_extra {
+	int op;			/* parameter type */
+	int min;		/* minimum value */
+	int max;		/* maximum value */
+};
+
+/*
+ * pvfs2_proc_debug_mask_handler()
+ * proc file handler that will take a debug string and convert it
+ * into the proper debug value and then send a request to update the
+ * debug mask if client or update the local debug mask if kernel.
+*/
+static int pvfs2_proc_debug_mask_handler(struct ctl_table *ctl,
+					 int write,
+					 void *buffer,
+					 size_t *lenp,
+					 loff_t *ppos)
+{
+	int ret = 0;
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	gossip_debug(GOSSIP_PROC_DEBUG,
+		     "Executing pvfs2_proc_debug_mask_handler...\n");
+
+	/* use generic proc string handling function to retrieve/set string. */
+	ret = proc_dostring(ctl, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	gossip_debug(GOSSIP_PROC_DEBUG,
+		     "%s: debug string: %s\n",
+		     "pvfs2_proc_debug_mask_handler",
+		     (char *)ctl->data);
+
+	/*
+	 * For a user write, ctl->data will now contain the new debug
+	 * string as given by the user.  For a user read, the user's "buffer"
+	 * will now contain the string stored in ctl->data.
+	 */
+
+	/*
+	 * For a write, we must convert the debug string into the proper
+	 * debug mask. The conversion will ignore any invalid keywords sent
+	 * in by the user, so we re-convert the debug mask back into the
+	 * correct debug string.
+	 */
+	if (write && !strcmp(ctl->procname, KERNEL_DEBUG)) {
+		gossip_debug_mask =
+		    PVFS_proc_kmod_eventlog_to_mask((const char *)ctl->data);
+		ret = PVFS_proc_kmod_mask_to_eventlog(gossip_debug_mask,
+						      (char *)ctl->data);
+
+		gossip_debug(GOSSIP_PROC_DEBUG,
+			     "pvfs2_proc_debug_mask_handler: kernel debug mask:"
+			     " %lu\n",
+			     (unsigned long)gossip_debug_mask);
+		gossip_debug(GOSSIP_PROC_DEBUG,
+			     "New kernel debug string is %s.\n",
+			     kernel_debug_string);
+		pr_info("PVFS: kernel debug mask has been modified to \"%s\" (0x%08llx).\n",
+		       kernel_debug_string,
+		       llu(gossip_debug_mask));
+	} else if (write && !strcmp(ctl->procname, CLIENT_DEBUG)) {
+		new_op = op_alloc(PVFS2_VFS_OP_PARAM);
+		if (!new_op)
+			return -ENOMEM;
+		strcpy(new_op->upcall.req.param.s_value, ctl->data);
+		new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET;
+		new_op->upcall.req.param.op =
+		    PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG;
+
+		ret =
+		    service_operation(new_op,
+				      "pvfs2_param",
+				      PVFS2_OP_INTERRUPTIBLE);
+
+		if (ret == 0) {
+			gossip_debug(GOSSIP_PROC_DEBUG,
+				     "Downcall:\treturn status:%d"
+				     "\treturn value:%x\n",
+				     (int)new_op->downcall.status,
+				     (int)new_op->downcall.resp.param.value);
+
+			ret =
+			    PVFS_proc_mask_to_eventlog(new_op->downcall.resp.
+						       param.value,
+						       client_debug_string);
+			gossip_debug(GOSSIP_PROC_DEBUG,
+				     "New client debug string is %s\n",
+				     client_debug_string);
+		}
+		op_release(new_op);
+		pr_info("PVFS: client debug mask has been modified to \"%s\" (0x%08llx).\n",
+		       client_debug_string,
+		       llu(new_op->downcall.resp.param.value));
+	} else if (write && !strcmp(ctl->procname, DEBUG_HELP)) {
+		/*do nothing...the user can only READ the debug help */
+		return 0;
+	}
+
+	return 0;
+}
+
+/*
+ * pvfs2_param_proc_handler()
+ *
+ * Generic proc file handler for getting and setting various tunable
+ * pvfs2-client parameters.
+ */
+static int pvfs2_param_proc_handler(struct ctl_table *ctl,
+				    int write,
+				    void *buffer,
+				    size_t *lenp,
+				    loff_t *ppos)
+{
+	struct pvfs2_kernel_op *new_op = NULL;
+	struct pvfs2_param_extra *extra = ctl->extra1;
+	int val = 0;
+	int ret = 0;
+	struct ctl_table tmp_ctl = *ctl;
+
+	/*
+	 * override fields in control structure for call to generic proc
+	 * handler
+	 */
+	tmp_ctl.data = &val;
+	tmp_ctl.extra1 = &extra->min;
+	tmp_ctl.extra2 = &extra->max;
+
+	/* build an op structure to send request to pvfs2-client */
+	new_op = op_alloc(PVFS2_VFS_OP_PARAM);
+	if (!new_op)
+		return -ENOMEM;
+
+	if (write) {
+		/* use generic proc handling function to retrive value to set */
+		ret = proc_dointvec_minmax(&tmp_ctl, write, buffer, lenp, ppos);
+		if (ret != 0) {
+			op_release(new_op);
+			return ret;
+		}
+		gossip_debug(GOSSIP_PROC_DEBUG, "pvfs2: proc write %d\n", val);
+		new_op->upcall.req.param.value = val;
+		new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET;
+	} else {
+		/* get parameter from client, we will output afterwards */
+		new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_GET;
+	}
+
+	new_op->upcall.req.param.op = extra->op;
+
+	/* perform operation (get or set) */
+	ret = service_operation(new_op, "pvfs2_param", PVFS2_OP_INTERRUPTIBLE);
+
+	if (ret == 0 && !write) {
+		/* use generic proc handling function to output value */
+		val = (int)new_op->downcall.resp.param.value;
+		gossip_debug(GOSSIP_PROC_DEBUG, "pvfs2: proc read %d\n", val);
+		ret = proc_dointvec_minmax(&tmp_ctl, write, buffer, lenp, ppos);
+	}
+
+	op_release(new_op);
+	return ret;
+}
+
+static int pvfs2_pc_proc_handler(struct ctl_table *ctl,
+				 int write,
+				 void *buffer,
+				 size_t *lenp,
+				 loff_t *ppos)
+{
+	struct pvfs2_kernel_op *new_op = NULL;
+	int ret;
+	int pos = 0;
+	int to_copy = 0;
+	int *pc_type = ctl->extra1;
+	loff_t *offset = ppos;
+
+	if (write) {
+		/* don't allow writes to this file */
+		*lenp = 0;
+		return -EPERM;
+	}
+
+	/* build an op structure to send request to pvfs2-client */
+	new_op = op_alloc(PVFS2_VFS_OP_PERF_COUNT);
+
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.perf_count.type = *pc_type;
+
+	/* retrieve performance counters */
+	ret = service_operation(new_op,
+				"pvfs2_perf_count",
+				PVFS2_OP_INTERRUPTIBLE);
+
+	if (ret == 0) {
+		/* figure out how many bytes we will copy out */
+		pos = strlen(new_op->downcall.resp.perf_count.buffer);
+		to_copy = pos - *offset;
+
+		if (to_copy < 0)
+			to_copy = 0;
+
+		if (to_copy > *lenp)
+			to_copy = *lenp;
+
+		if (to_copy) {
+			/* copy correct portion of the string buffer */
+			if (copy_to_user(buffer,
+					 (new_op->downcall.resp.perf_count.buffer + (*offset)),
+					 to_copy)) {
+				ret = -EFAULT;
+			} else {
+				/* update offsets etc. if successful */
+				*lenp = to_copy;
+				*offset += to_copy;
+				ret = to_copy;
+			}
+		} else {
+			*lenp = 0;
+			ret = 0;
+		}
+	}
+
+	op_release(new_op);
+
+	return ret;
+}
+
+static struct ctl_table_header *fs_table_header;
+
+static struct pvfs2_param_extra acache_timeout_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra acache_hard_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra acache_soft_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra acache_rec_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE,
+	.min = 0,
+	.max = 100,
+};
+
+static struct pvfs2_param_extra static_acache_timeout_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra static_acache_hard_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra static_acache_soft_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra static_acache_rec_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE,
+	.min = 0,
+	.max = 100,
+};
+
+static struct pvfs2_param_extra ncache_timeout_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra ncache_hard_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra ncache_soft_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra ncache_rec_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE,
+	.min = 0,
+	.max = 100,
+};
+
+static struct pvfs2_param_extra perf_time_interval_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS,
+	.min = 0,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra perf_history_size_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE,
+	.min = 1,
+	.max = INT_MAX,
+};
+
+static struct pvfs2_param_extra perf_reset_extra = {
+	.op = PVFS2_PARAM_REQUEST_OP_PERF_RESET,
+	.min = 0,
+	.max = 1,
+};
+
+static int min_op_timeout_secs[] = { 0 };
+static int max_op_timeout_secs[] = { INT_MAX };
+static int min_slot_timeout_secs[] = { 0 };
+static int max_slot_timeout_secs[] = { INT_MAX };
+
+#define UNNUMBERED_OR_VAL(x) x
+
+#define CTL_NAME(c_name)
+
+#define CTL_STRATEGY(strat)
+
+static struct ctl_table pvfs2_acache_table[] = {
+	/* controls acache timeout */
+	{
+	 CTL_NAME(1)
+	 .procname = "timeout-msecs",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &acache_timeout_extra},
+	/* controls acache hard limit */
+	{
+	 CTL_NAME(2)
+	 .procname = "hard-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &acache_hard_extra},
+	/* controls acache soft limit */
+	{
+	 CTL_NAME(3)
+	 .procname = "soft-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &acache_soft_extra},
+	/* controls acache reclaim percentage */
+	{
+	 CTL_NAME(4)
+	 .procname = "reclaim-percentage",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &acache_rec_extra,
+	 },
+	{CTL_NAME(CTL_NONE)}
+};
+
+static struct ctl_table pvfs2_static_acache_table[] = {
+	/* controls static acache timeout */
+	{
+	 CTL_NAME(1)
+	 .procname = "timeout-msecs",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &static_acache_timeout_extra},
+	/* controls static acache hard limit */
+	{
+	 CTL_NAME(2)
+	 .procname = "hard-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &static_acache_hard_extra},
+	/* controls static acache soft limit */
+	{
+	 CTL_NAME(3)
+	 .procname = "soft-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &static_acache_soft_extra},
+	/* controls static acache reclaim percentage */
+	{
+	 CTL_NAME(4)
+	 .procname = "reclaim-percentage",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &static_acache_rec_extra,
+	 },
+	{CTL_NAME(CTL_NONE)}
+};
+
+static struct ctl_table pvfs2_ncache_table[] = {
+	/* controls ncache timeout */
+	{
+	 CTL_NAME(1)
+	 .procname = "timeout-msecs",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &ncache_timeout_extra},
+	/* controls ncache hard limit */
+	{
+	 CTL_NAME(2)
+	 .procname = "hard-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &ncache_hard_extra},
+	/* controls ncache soft limit */
+	{
+	 CTL_NAME(3)
+	 .procname = "soft-limit",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &ncache_soft_extra},
+	/* controls ncache reclaim percentage */
+	{
+	 CTL_NAME(4)
+	 .procname = "reclaim-percentage",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &ncache_rec_extra},
+	{CTL_NAME(CTL_NONE)}
+};
+
+static int acache_perf_count = PVFS2_PERF_COUNT_REQUEST_ACACHE;
+static int static_acache_perf_count = PVFS2_PERF_COUNT_REQUEST_STATIC_ACACHE;
+static int ncache_perf_count = PVFS2_PERF_COUNT_REQUEST_NCACHE;
+static struct ctl_table pvfs2_pc_table[] = {
+	{
+	 CTL_NAME(1)
+	 .procname = "acache",
+	 .maxlen = 4096,
+	 .mode = 0444,
+	 .proc_handler = pvfs2_pc_proc_handler,
+	 .extra1 = &acache_perf_count,
+	 },
+	{
+	 CTL_NAME(1)
+	 .procname = "static-acache",
+	 .maxlen = 4096,
+	 .mode = 0444,
+	 .proc_handler = pvfs2_pc_proc_handler,
+	 .extra1 = &static_acache_perf_count,
+	 },
+	{
+	 CTL_NAME(2)
+	 .procname = "ncache",
+	 .maxlen = 4096,
+	 .mode = 0444,
+	 .proc_handler = pvfs2_pc_proc_handler,
+	 .extra1 = &ncache_perf_count},
+	{CTL_NAME(CTL_NONE)}
+};
+
+struct pvfs2_stats g_pvfs2_stats;
+
+static struct ctl_table pvfs2_stats_table[] = {
+	/* shows number of hits in cache */
+	{
+	 CTL_NAME(1)
+	 .procname = "hits",
+	 .data = &g_pvfs2_stats.cache_hits,
+	 .maxlen = sizeof(unsigned long),
+	 .mode = 0444,
+	 .proc_handler = &proc_dointvec,
+	 },
+	{
+	 CTL_NAME(2)
+	 .procname = "misses",
+	 .data = &g_pvfs2_stats.cache_misses,
+	 .maxlen = sizeof(unsigned long),
+	 .mode = 0444,
+	 .proc_handler = &proc_dointvec,
+	 },
+	{
+	 .procname = "reads",
+	 .data = &g_pvfs2_stats.reads,
+	 .maxlen = sizeof(unsigned long),
+	 .mode = 0444,
+	 .proc_handler = &proc_dointvec,
+	 },
+	{
+	 CTL_NAME(4)
+	 .procname = "writes",
+	 .data = &g_pvfs2_stats.writes,
+	 .maxlen = sizeof(unsigned long),
+	 .mode = 0444,
+	 .proc_handler = &proc_dointvec,
+	 },
+	{CTL_NAME(CTL_NONE)}
+};
+
+static struct ctl_table pvfs2_table[] = {
+	/* outputs the available debugging keywords */
+	{
+	 CTL_NAME(14)
+	 .procname = DEBUG_HELP,
+	 .data = &debug_help_string,
+	 .maxlen = PVFS2_MAX_DEBUG_STRING_LEN,
+	 .mode = 0444,
+	 .proc_handler = &pvfs2_proc_debug_mask_handler},
+	/* controls client-core debugging level */
+	{
+	 CTL_NAME(1)
+	 .procname = CLIENT_DEBUG,
+	 .data = &client_debug_string,
+	 .maxlen = PVFS2_MAX_DEBUG_STRING_LEN,
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_proc_debug_mask_handler},
+	/* controls kernel debugging level using string input */
+	{
+	 CTL_NAME(2)
+	 .procname = KERNEL_DEBUG,
+	 .data = &kernel_debug_string,
+	 .maxlen = PVFS2_MAX_DEBUG_STRING_LEN,
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_proc_debug_mask_handler},
+	/* operation timeout */
+	{
+	 CTL_NAME(3)
+	 .procname = "op-timeout-secs",
+	 .data = &op_timeout_secs,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &proc_dointvec_minmax,
+	 CTL_STRATEGY(&sysctl_intvec)
+	 .extra1 = &min_op_timeout_secs,
+	 .extra2 = &max_op_timeout_secs},
+	/* slot timeout */
+	{
+	 CTL_NAME(4)
+	 .procname = "slot-timeout-secs",
+	 .data = &slot_timeout_secs,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &proc_dointvec_minmax,
+	 CTL_STRATEGY(&sysctl_intvec)
+	 .extra1 = &min_slot_timeout_secs,
+	 .extra2 = &max_slot_timeout_secs},
+	/* time interval for client side performance counters */
+	{
+	 CTL_NAME(5)
+	 .procname = "perf-time-interval-secs",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &perf_time_interval_extra},
+	/* time interval for client side performance counters */
+	{
+	 CTL_NAME(6)
+	 .procname = "perf-history-size",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &perf_history_size_extra},
+	/* reset performance counters */
+	{
+	 CTL_NAME(7)
+	 .procname = "perf-counter-reset",
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = &pvfs2_param_proc_handler,
+	 .extra1 = &perf_reset_extra,
+	 },
+	/* subdir for acache control */
+	{
+	 CTL_NAME(8)
+	 .procname = "acache",
+	 .maxlen = 0,
+	 .mode = 0555,
+	 .child = pvfs2_acache_table},
+	/* subdir for static acache control */
+	{
+	 CTL_NAME(9)
+	 .procname = "static-acache",
+	 .maxlen = 0,
+	 .mode = 0555,
+	 .child = pvfs2_static_acache_table},
+	{
+	 CTL_NAME(10)
+	 .procname = "perf-counters",
+	 .maxlen = 0,
+	 .mode = 0555,
+	 .child = pvfs2_pc_table},
+	/* subdir for ncache control */
+	{
+	 CTL_NAME(11)
+	 .procname = "ncache",
+	 .maxlen = 0,
+	 .mode = 0555,
+	 .child = pvfs2_ncache_table},
+	/*
+	 * statistics maintained by the kernel module (output only
+	 * below this)
+	 */
+	{
+	 CTL_NAME(12)
+	 .procname = "stats",
+	 .maxlen = 0,
+	 .mode = 0555,
+	 .child = pvfs2_stats_table},
+	{CTL_NAME(CTL_NONE)}
+};
+
+static struct ctl_table fs_table[] = {
+	{
+	 CTL_NAME(13)
+	 .procname = "pvfs2",
+	 .mode = 0555,
+	 .child = pvfs2_table},
+	{CTL_NAME(CTL_NONE)}
+};
+#endif
+
+void pvfs2_proc_initialize(void)
+{
+/* CONFIG_SYSCTL is set in .config */
+#ifdef CONFIG_SYSCTL
+	if (!fs_table_header)
+		fs_table_header = register_sysctl_table(fs_table);
+#endif
+
+	return;
+}
+
+void pvfs2_proc_finalize(void)
+{
+/* CONFIG_SYSCTL is set in .config */
+#ifdef CONFIG_SYSCTL
+	if (fs_table_header) {
+		unregister_sysctl_table(fs_table_header);
+		fs_table_header = NULL;
+	}
+#endif
+	return;
+}
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
new file mode 100644
index 0000000..569e8f0
--- /dev/null
+++ b/fs/orangefs/waitqueue.c
@@ -0,0 +1,522 @@ 
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ * (C) 2011 Omnibond Systems
+ *
+ * Changes by Acxiom Corporation to implement generic service_operation()
+ * function, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  In-kernel waitqueue operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+/*
+ * What we do in this function is to walk the list of operations that are
+ * present in the request queue and mark them as purged.
+ * NOTE: This is called from the device close after client-core has
+ * guaranteed that no new operations could appear on the list since the
+ * client-core is anyway going to exit.
+ */
+void purge_waiting_ops(void)
+{
+	struct pvfs2_kernel_op *op;
+	spin_lock(&pvfs2_request_list_lock);
+	list_for_each_entry(op, &pvfs2_request_list, list) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "pvfs2-client-core: purging op tag %llu %s\n",
+			     llu(op->tag),
+			     get_opname_string(op));
+		spin_lock(&op->lock);
+		set_op_state_purged(op);
+		spin_unlock(&op->lock);
+		wake_up_interruptible(&op->waitq);
+	}
+	spin_unlock(&pvfs2_request_list_lock);
+	return;
+}
+
+/*
+ * submits a PVFS2 operation and waits for it to complete
+ *
+ * Note op->downcall.status will contain the status of the operation (in
+ * errno format), whether provided by pvfs2-client or a result of failure to
+ * service the operation.  If the caller wishes to distinguish, then
+ * op->state can be checked to see if it was serviced or not.
+ *
+ * Returns contents of op->downcall.status for convenience
+ */
+int service_operation(struct pvfs2_kernel_op *op,
+		      const char *op_name,
+		      int flags)
+{
+	/* flags to modify behavior */
+	sigset_t orig_sigset;
+	int ret = 0;
+
+	/* irqflags and wait_entry are only used IF the client-core aborts */
+	unsigned long irqflags;
+
+	DECLARE_WAITQUEUE(wait_entry, current);
+
+	op->upcall.tgid = current->tgid;
+	op->upcall.pid = current->pid;
+
+retry_servicing:
+	op->downcall.status = 0;
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "pvfs2: service_operation: %s %p\n",
+		     op_name,
+		     op);
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "pvfs2: operation posted by process: %s, pid: %i\n",
+		     current->comm,
+		     current->pid);
+
+	/* mask out signals if this operation is not to be interrupted */
+	if (!(flags & PVFS2_OP_INTERRUPTIBLE))
+		mask_blocked_signals(&orig_sigset);
+
+	if (!(flags & PVFS2_OP_NO_SEMAPHORE)) {
+		ret = mutex_lock_interruptible(&request_mutex);
+		/*
+		 * check to see if we were interrupted while waiting for
+		 * semaphore
+		 */
+		if (ret < 0) {
+			if (!(flags & PVFS2_OP_INTERRUPTIBLE))
+				unmask_blocked_signals(&orig_sigset);
+			op->downcall.status = ret;
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "pvfs2: service_operation interrupted.\n");
+			return ret;
+		}
+	}
+
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s:About to call is_daemon_in_service().\n",
+		     __func__);
+
+	if (is_daemon_in_service() < 0) {
+		/*
+		 * By incrementing the per-operation attempt counter, we
+		 * directly go into the timeout logic while waiting for
+		 * the matching downcall to be read
+		 */
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:client core is NOT in service(%d).\n",
+			     __func__,
+			     is_daemon_in_service());
+		op->attempts++;
+	}
+
+	/* queue up the operation */
+	if (flags & PVFS2_OP_PRIORITY) {
+		add_priority_op_to_request_list(op);
+	} else {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:About to call add_op_to_request_list().\n",
+			     __func__);
+		add_op_to_request_list(op);
+	}
+
+	if (!(flags & PVFS2_OP_NO_SEMAPHORE))
+		mutex_unlock(&request_mutex);
+
+	/*
+	 * If we are asked to service an asynchronous operation from
+	 * VFS perspective, we are done.
+	 */
+	if (flags & PVFS2_OP_ASYNC)
+		return 0;
+
+	if (flags & PVFS2_OP_CANCELLATION) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:"
+			     "About to call wait_for_cancellation_downcall.\n",
+			     __func__);
+		ret = wait_for_cancellation_downcall(op);
+	} else {
+		ret = wait_for_matching_downcall(op);
+	}
+
+	if (ret < 0) {
+		/* failed to get matching downcall */
+		if (ret == -ETIMEDOUT) {
+			gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n",
+				   op_name);
+		}
+		op->downcall.status = ret;
+	} else {
+		/* got matching downcall; make sure status is in errno format */
+		op->downcall.status =
+		    pvfs2_normalize_to_errno(op->downcall.status);
+		ret = op->downcall.status;
+	}
+
+	if (!(flags & PVFS2_OP_INTERRUPTIBLE))
+		unmask_blocked_signals(&orig_sigset);
+
+	BUG_ON(ret != op->downcall.status);
+	/* retry if operation has not been serviced and if requested */
+	if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "pvfs2: tag %llu (%s)"
+			     " -- operation to be retried (%d attempt)\n",
+			     llu(op->tag),
+			     op_name,
+			     op->attempts + 1);
+
+		if (!op->uses_shared_memory)
+			/*
+			 * this operation doesn't use the shared memory
+			 * system
+			 */
+			goto retry_servicing;
+
+		/* op uses shared memory */
+		if (get_bufmap_init() == 0) {
+			/*
+			 * This operation uses the shared memory system AND
+			 * the system is not yet ready. This situation occurs
+			 * when the client-core is restarted AND there were
+			 * operations waiting to be processed or were already
+			 * in process.
+			 */
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "uses_shared_memory is true.\n");
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "Client core in-service status(%d).\n",
+				     is_daemon_in_service());
+			gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
+				     get_bufmap_init());
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "operation's status is 0x%0x.\n",
+				     op->op_state);
+
+			/*
+			 * let process sleep for a few seconds so shared
+			 * memory system can be initialized.
+			 */
+			spin_lock_irqsave(&op->lock, irqflags);
+			add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry);
+			spin_unlock_irqrestore(&op->lock, irqflags);
+
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			/*
+			 * Wait for pvfs_bufmap_initialize() to wake me up
+			 * within the allotted time.
+			 */
+			ret = schedule_timeout(MSECS_TO_JIFFIES
+				(1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS));
+
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "Value returned from schedule_timeout:"
+				     "%d.\n",
+				     ret);
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "Is shared memory available? (%d).\n",
+				     get_bufmap_init());
+
+			spin_lock_irqsave(&op->lock, irqflags);
+			remove_wait_queue(&pvfs2_bufmap_init_waitq,
+					  &wait_entry);
+			spin_unlock_irqrestore(&op->lock, irqflags);
+
+			if (get_bufmap_init() == 0) {
+				gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
+					   __func__,
+					   PVFS2_BUFMAP_WAIT_TIMEOUT_SECS,
+					   get_opname_string(op));
+				return -EIO;
+			}
+
+			/*
+			 * Return to the calling function and re-populate a
+			 * shared memory buffer.
+			 */
+			return -EAGAIN;
+		}
+	}
+
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "pvfs2: service_operation %s returning: %d for %p.\n",
+		     op_name,
+		     ret,
+		     op);
+	return ret;
+}
+
+void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op *op)
+{
+	/*
+	 * handle interrupted cases depending on what state we were in when
+	 * the interruption is detected.  there is a coarse grained lock
+	 * across the operation.
+	 *
+	 * NOTE: be sure not to reverse lock ordering by locking an op lock
+	 * while holding the request_list lock.  Here, we first lock the op
+	 * and then lock the appropriate list.
+	 */
+	if (!op) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			    "%s: op is null, ignoring\n",
+			     __func__);
+		return;
+	}
+
+	/*
+	 * one more sanity check, make sure it's in one of the possible states
+	 * or don't try to cancel it
+	 */
+	if (!(op_state_waiting(op) ||
+	      op_state_in_progress(op) ||
+	      op_state_serviced(op) ||
+	      op_state_purged(op))) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s: op %p not in a valid state (%0x), "
+			     "ignoring\n",
+			     __func__,
+			     op,
+			     op->op_state);
+		return;
+	}
+
+	spin_lock(&op->lock);
+
+	if (op_state_waiting(op)) {
+		/*
+		 * upcall hasn't been read; remove op from upcall request
+		 * list.
+		 */
+		spin_unlock(&op->lock);
+		remove_op_from_request_list(op);
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "Interrupted: Removed op %p from request_list\n",
+			     op);
+	} else if (op_state_in_progress(op)) {
+		/* op must be removed from the in progress htable */
+		spin_unlock(&op->lock);
+		spin_lock(&htable_ops_in_progress_lock);
+		list_del(&op->list);
+		spin_unlock(&htable_ops_in_progress_lock);
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "Interrupted: Removed op %p"
+			     " from htable_ops_in_progress\n",
+			     op);
+	} else if (!op_state_serviced(op)) {
+		spin_unlock(&op->lock);
+		gossip_err("interrupted operation is in a weird state 0x%x\n",
+			   op->op_state);
+	}
+}
+
+/*
+ * sleeps on waitqueue waiting for matching downcall.
+ * if client-core finishes servicing, then we are good to go.
+ * else if client-core exits, we get woken up here, and retry with a timeout
+ *
+ * Post when this call returns to the caller, the specified op will no
+ * longer be on any list or htable.
+ *
+ * Returns 0 on success and -errno on failure
+ * Errors are:
+ * EAGAIN in case we want the caller to requeue and try again..
+ * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
+ * operation since client-core seems to be exiting too often
+ * or if we were interrupted.
+ */
+int wait_for_matching_downcall(struct pvfs2_kernel_op *op)
+{
+	int ret = -EINVAL;
+	DECLARE_WAITQUEUE(wait_entry, current);
+
+	spin_lock(&op->lock);
+	add_wait_queue(&op->waitq, &wait_entry);
+	spin_unlock(&op->lock);
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		spin_lock(&op->lock);
+		if (op_state_serviced(op)) {
+			spin_unlock(&op->lock);
+			ret = 0;
+			break;
+		}
+		spin_unlock(&op->lock);
+
+		if (!signal_pending(current)) {
+			/*
+			 * if this was our first attempt and client-core
+			 * has not purged our operation, we are happy to
+			 * simply wait
+			 */
+			spin_lock(&op->lock);
+			if (op->attempts == 0 && !op_state_purged(op)) {
+				spin_unlock(&op->lock);
+				schedule();
+			} else {
+				spin_unlock(&op->lock);
+				/*
+				 * subsequent attempts, we retry exactly once
+				 * with timeouts
+				 */
+				if (!schedule_timeout(MSECS_TO_JIFFIES
+				      (1000 * op_timeout_secs))) {
+					gossip_debug(GOSSIP_WAIT_DEBUG,
+						     "*** %s:"
+						     " operation timed out (tag"
+						     " %llu, %p, att %d)\n",
+						     __func__,
+						     llu(op->tag),
+						     op,
+						     op->attempts);
+					ret = -ETIMEDOUT;
+					pvfs2_clean_up_interrupted_operation
+					    (op);
+					break;
+				}
+			}
+			spin_lock(&op->lock);
+			op->attempts++;
+			/*
+			 * if the operation was purged in the meantime, it
+			 * is better to requeue it afresh but ensure that
+			 * we have not been purged repeatedly. This could
+			 * happen if client-core crashes when an op
+			 * is being serviced, so we requeue the op, client
+			 * core crashes again so we requeue the op, client
+			 * core starts, and so on...
+			 */
+			if (op_state_purged(op)) {
+				ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ?
+					 -EAGAIN :
+					 -EIO;
+				spin_unlock(&op->lock);
+				gossip_debug(GOSSIP_WAIT_DEBUG,
+					     "*** %s:"
+					     " operation purged (tag "
+					     "%llu, %p, att %d)\n",
+					     __func__,
+					     llu(op->tag),
+					     op,
+					     op->attempts);
+				pvfs2_clean_up_interrupted_operation(op);
+				break;
+			}
+			spin_unlock(&op->lock);
+			continue;
+		}
+
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "*** %s:"
+			     " operation interrupted by a signal (tag "
+			     "%llu, op %p)\n",
+			     __func__,
+			     llu(op->tag),
+			     op);
+		pvfs2_clean_up_interrupted_operation(op);
+		ret = -EINTR;
+		break;
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	spin_lock(&op->lock);
+	remove_wait_queue(&op->waitq, &wait_entry);
+	spin_unlock(&op->lock);
+
+	return ret;
+}
+
+/*
+ * similar to wait_for_matching_downcall(), but used in the special case
+ * of I/O cancellations.
+ *
+ * Note we need a special wait function because if this is called we already
+ *      know that a signal is pending in current and need to service the
+ *      cancellation upcall anyway.  the only way to exit this is to either
+ *      timeout or have the cancellation be serviced properly.
+ */
+int wait_for_cancellation_downcall(struct pvfs2_kernel_op *op)
+{
+	int ret = -EINVAL;
+	DECLARE_WAITQUEUE(wait_entry, current);
+
+	spin_lock(&op->lock);
+	add_wait_queue(&op->waitq, &wait_entry);
+	spin_unlock(&op->lock);
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		spin_lock(&op->lock);
+		if (op_state_serviced(op)) {
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "%s:op-state is SERVICED.\n",
+				     __func__);
+			spin_unlock(&op->lock);
+			ret = 0;
+			break;
+		}
+		spin_unlock(&op->lock);
+
+		if (signal_pending(current)) {
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "%s:operation interrupted by a signal (tag"
+				     " %llu, op %p)\n",
+				     __func__,
+				     llu(op->tag),
+				     op);
+			pvfs2_clean_up_interrupted_operation(op);
+			ret = -EINTR;
+			break;
+		}
+
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:About to call schedule_timeout.\n",
+			     __func__);
+		ret =
+		    schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
+
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:Value returned from schedule_timeout(%d).\n",
+			     __func__,
+			     ret);
+		if (!ret) {
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "%s:*** operation timed out: %p\n",
+				     __func__,
+				     op);
+			pvfs2_clean_up_interrupted_operation(op);
+			ret = -ETIMEDOUT;
+			break;
+		}
+
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
+			     __func__);
+		ret = -ETIMEDOUT;
+		break;
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	spin_lock(&op->lock);
+	remove_wait_queue(&op->waitq, &wait_entry);
+	spin_unlock(&op->lock);
+
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s:returning ret(%d)\n",
+		     __func__,
+		     ret);
+
+	return ret;
+}