123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732 |
- From 1f52075d672a9bdd0069b3ea68be266ef5c229bd Mon Sep 17 00:00:00 2001
- From: Alexey Shvetsov <alexxy@gentoo.org>
- Date: Tue, 17 Jan 2012 21:08:49 +0400
- Subject: [PATCH] [kcopy] Add kcopy driver
- Add kcopy driver from qlogic to implement zero copy for infiniband psm
- userspace driver
- Signed-off-by: Alexey Shvetsov <alexxy@gentoo.org>
- ---
- drivers/char/Kconfig | 2 +
- drivers/char/Makefile | 2 +
- drivers/char/kcopy/Kconfig | 17 ++
- drivers/char/kcopy/Makefile | 4 +
- drivers/char/kcopy/kcopy.c | 646 +++++++++++++++++++++++++++++++++++++++++++
- 5 files changed, 671 insertions(+)
- create mode 100644 drivers/char/kcopy/Kconfig
- create mode 100644 drivers/char/kcopy/Makefile
- create mode 100644 drivers/char/kcopy/kcopy.c
- diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
- index ee94686..5b81449 100644
- --- a/drivers/char/Kconfig
- +++ b/drivers/char/Kconfig
- @@ -6,6 +6,8 @@ menu "Character devices"
-
- source "drivers/tty/Kconfig"
-
- +source "drivers/char/kcopy/Kconfig"
- +
- config DEVKMEM
- bool "/dev/kmem virtual device support"
- default y
- diff --git a/drivers/char/Makefile b/drivers/char/Makefile
- index 0dc5d7c..be519d6 100644
- --- a/drivers/char/Makefile
- +++ b/drivers/char/Makefile
- @@ -62,3 +62,5 @@
- js-rtc-y = rtc.o
- obj-$(CONFIG_TILE_SROM) += tile-srom.o
- +
- +obj-$(CONFIG_KCOPY) += kcopy/
- diff --git a/drivers/char/kcopy/Kconfig b/drivers/char/kcopy/Kconfig
- new file mode 100644
- index 0000000..453ae52
- --- /dev/null
- +++ b/drivers/char/kcopy/Kconfig
- @@ -0,0 +1,17 @@
- +#
- +# KCopy character device configuration
- +#
- +
- +menu "KCopy"
- +
- +config KCOPY
- + tristate "Memory-to-memory copies using kernel assist"
- + default m
- + ---help---
- + High-performance inter-process memory copies. Can often save a
- + memory copy to shared memory in the application. Useful at least
- + for MPI applications where the point-to-point nature of vmsplice
- + and pipes can be a limiting factor in performance.
- +
- +endmenu
- +
- diff --git a/drivers/char/kcopy/Makefile b/drivers/char/kcopy/Makefile
- new file mode 100644
- index 0000000..9cb269b
- --- /dev/null
- +++ b/drivers/char/kcopy/Makefile
- @@ -0,0 +1,4 @@
- +#
- +# Makefile for the kernel character device drivers.
- +#
- +obj-$(CONFIG_KCOPY) += kcopy.o
- diff --git a/drivers/char/kcopy/kcopy.c b/drivers/char/kcopy/kcopy.c
- new file mode 100644
- index 0000000..a9f915c
- --- /dev/null
- +++ b/drivers/char/kcopy/kcopy.c
- @@ -0,0 +1,646 @@
- +#include <linux/module.h>
- +#include <linux/fs.h>
- +#include <linux/cdev.h>
- +#include <linux/device.h>
- +#include <linux/mutex.h>
- +#include <linux/mman.h>
- +#include <linux/highmem.h>
- +#include <linux/spinlock.h>
- +#include <linux/sched.h>
- +#include <linux/rbtree.h>
- +#include <linux/rcupdate.h>
- +#include <linux/uaccess.h>
- +#include <linux/slab.h>
- +
- +MODULE_LICENSE("GPL");
- +MODULE_AUTHOR("Arthur Jones <arthur.jones@qlogic.com>");
- +MODULE_DESCRIPTION("QLogic kcopy driver");
- +
- +#define KCOPY_ABI 1
- +#define KCOPY_MAX_MINORS 64
- +
- +struct kcopy_device {
- + struct cdev cdev;
- + struct class *class;
- + struct device *devp[KCOPY_MAX_MINORS];
- + dev_t dev;
- +
- + struct kcopy_file *kf[KCOPY_MAX_MINORS];
- + struct mutex open_lock;
- +};
- +
- +static struct kcopy_device kcopy_dev;
- +
- +/* per file data / one of these is shared per minor */
- +struct kcopy_file {
- + int count;
- +
- + /* pid indexed */
- + struct rb_root live_map_tree;
- +
- + struct mutex map_lock;
- +};
- +
- +struct kcopy_map_entry {
- + int count;
- + struct task_struct *task;
- + pid_t pid;
- + struct kcopy_file *file; /* file backpointer */
- +
- + struct list_head list; /* free map list */
- + struct rb_node node; /* live map tree */
- +};
- +
- +#define KCOPY_GET_SYSCALL 1
- +#define KCOPY_PUT_SYSCALL 2
- +#define KCOPY_ABI_SYSCALL 3
- +
- +struct kcopy_syscall {
- + __u32 tag;
- + pid_t pid;
- + __u64 n;
- + __u64 src;
- + __u64 dst;
- +};
- +
- +static const void __user *kcopy_syscall_src(const struct kcopy_syscall *ks)
- +{
- + return (const void __user *) (unsigned long) ks->src;
- +}
- +
- +static void __user *kcopy_syscall_dst(const struct kcopy_syscall *ks)
- +{
- + return (void __user *) (unsigned long) ks->dst;
- +}
- +
- +static unsigned long kcopy_syscall_n(const struct kcopy_syscall *ks)
- +{
- + return (unsigned long) ks->n;
- +}
- +
- +static struct kcopy_map_entry *kcopy_create_entry(struct kcopy_file *file)
- +{
- + struct kcopy_map_entry *kme =
- + kmalloc(sizeof(struct kcopy_map_entry), GFP_KERNEL);
- +
- + if (!kme)
- + return NULL;
- +
- + kme->count = 1;
- + kme->file = file;
- + kme->task = current;
- + kme->pid = current->tgid;
- + INIT_LIST_HEAD(&kme->list);
- +
- + return kme;
- +}
- +
- +static struct kcopy_map_entry *
- +kcopy_lookup_pid(struct rb_root *root, pid_t pid)
- +{
- + struct rb_node *node = root->rb_node;
- +
- + while (node) {
- + struct kcopy_map_entry *kme =
- + container_of(node, struct kcopy_map_entry, node);
- +
- + if (pid < kme->pid)
- + node = node->rb_left;
- + else if (pid > kme->pid)
- + node = node->rb_right;
- + else
- + return kme;
- + }
- +
- + return NULL;
- +}
- +
- +static int kcopy_insert(struct rb_root *root, struct kcopy_map_entry *kme)
- +{
- + struct rb_node **new = &(root->rb_node);
- + struct rb_node *parent = NULL;
- +
- + while (*new) {
- + struct kcopy_map_entry *tkme =
- + container_of(*new, struct kcopy_map_entry, node);
- +
- + parent = *new;
- + if (kme->pid < tkme->pid)
- + new = &((*new)->rb_left);
- + else if (kme->pid > tkme->pid)
- + new = &((*new)->rb_right);
- + else {
- + printk(KERN_INFO "!!! debugging: bad rb tree !!!\n");
- + return -EINVAL;
- + }
- + }
- +
- + rb_link_node(&kme->node, parent, new);
- + rb_insert_color(&kme->node, root);
- +
- + return 0;
- +}
- +
- +static int kcopy_open(struct inode *inode, struct file *filp)
- +{
- + int ret;
- + const int minor = iminor(inode);
- + struct kcopy_file *kf = NULL;
- + struct kcopy_map_entry *kme;
- + struct kcopy_map_entry *okme;
- +
- + if (minor < 0 || minor >= KCOPY_MAX_MINORS)
- + return -ENODEV;
- +
- + mutex_lock(&kcopy_dev.open_lock);
- +
- + if (!kcopy_dev.kf[minor]) {
- + kf = kmalloc(sizeof(struct kcopy_file), GFP_KERNEL);
- +
- + if (!kf) {
- + ret = -ENOMEM;
- + goto bail;
- + }
- +
- + kf->count = 1;
- + kf->live_map_tree = RB_ROOT;
- + mutex_init(&kf->map_lock);
- + kcopy_dev.kf[minor] = kf;
- + } else {
- + if (filp->f_flags & O_EXCL) {
- + ret = -EBUSY;
- + goto bail;
- + }
- + kcopy_dev.kf[minor]->count++;
- + }
- +
- + kme = kcopy_create_entry(kcopy_dev.kf[minor]);
- + if (!kme) {
- + ret = -ENOMEM;
- + goto err_free_kf;
- + }
- +
- + kf = kcopy_dev.kf[minor];
- +
- + mutex_lock(&kf->map_lock);
- +
- + okme = kcopy_lookup_pid(&kf->live_map_tree, kme->pid);
- + if (okme) {
- + /* pid already exists... */
- + okme->count++;
- + kfree(kme);
- + kme = okme;
- + } else
- + ret = kcopy_insert(&kf->live_map_tree, kme);
- +
- + mutex_unlock(&kf->map_lock);
- +
- + filp->private_data = kme;
- +
- + ret = 0;
- + goto bail;
- +
- +err_free_kf:
- + if (kf) {
- + kcopy_dev.kf[minor] = NULL;
- + kfree(kf);
- + }
- +bail:
- + mutex_unlock(&kcopy_dev.open_lock);
- + return ret;
- +}
- +
- +static int kcopy_flush(struct file *filp, fl_owner_t id)
- +{
- + struct kcopy_map_entry *kme = filp->private_data;
- + struct kcopy_file *kf = kme->file;
- +
- + if (file_count(filp) == 1) {
- + mutex_lock(&kf->map_lock);
- + kme->count--;
- +
- + if (!kme->count) {
- + rb_erase(&kme->node, &kf->live_map_tree);
- + kfree(kme);
- + }
- + mutex_unlock(&kf->map_lock);
- + }
- +
- + return 0;
- +}
- +
- +static int kcopy_release(struct inode *inode, struct file *filp)
- +{
- + const int minor = iminor(inode);
- +
- + mutex_lock(&kcopy_dev.open_lock);
- + kcopy_dev.kf[minor]->count--;
- + if (!kcopy_dev.kf[minor]->count) {
- + kfree(kcopy_dev.kf[minor]);
- + kcopy_dev.kf[minor] = NULL;
- + }
- + mutex_unlock(&kcopy_dev.open_lock);
- +
- + return 0;
- +}
- +
- +static void kcopy_put_pages(struct page **pages, int npages)
- +{
- + int j;
- +
- + for (j = 0; j < npages; j++)
- + put_page(pages[j]);
- +}
- +
- +static int kcopy_validate_task(struct task_struct *p)
- +{
- + return p && (uid_eq(current_euid(), task_euid(p)) || uid_eq(current_euid(), task_uid(p)));
- +}
- +
- +static int kcopy_get_pages(struct kcopy_file *kf, pid_t pid,
- + struct page **pages, void __user *addr,
- + int write, size_t npages)
- +{
- + int err;
- + struct mm_struct *mm;
- + struct kcopy_map_entry *rkme;
- +
- + mutex_lock(&kf->map_lock);
- +
- + rkme = kcopy_lookup_pid(&kf->live_map_tree, pid);
- + if (!rkme || !kcopy_validate_task(rkme->task)) {
- + err = -EINVAL;
- + goto bail_unlock;
- + }
- +
- + mm = get_task_mm(rkme->task);
- + if (unlikely(!mm)) {
- + err = -ENOMEM;
- + goto bail_unlock;
- + }
- +
- + down_read(&mm->mmap_sem);
- + err = get_user_pages(rkme->task, mm,
- + (unsigned long) addr, npages, write, 0,
- + pages, NULL);
- +
- + if (err < npages && err > 0) {
- + kcopy_put_pages(pages, err);
- + err = -ENOMEM;
- + } else if (err == npages)
- + err = 0;
- +
- + up_read(&mm->mmap_sem);
- +
- + mmput(mm);
- +
- +bail_unlock:
- + mutex_unlock(&kf->map_lock);
- +
- + return err;
- +}
- +
- +static unsigned long kcopy_copy_pages_from_user(void __user *src,
- + struct page **dpages,
- + unsigned doff,
- + unsigned long n)
- +{
- + struct page *dpage = *dpages;
- + char *daddr = kmap(dpage);
- + int ret = 0;
- +
- + while (1) {
- + const unsigned long nleft = PAGE_SIZE - doff;
- + const unsigned long nc = (n < nleft) ? n : nleft;
- +
- + /* if (copy_from_user(daddr + doff, src, nc)) { */
- + if (__copy_from_user_nocache(daddr + doff, src, nc)) {
- + ret = -EFAULT;
- + goto bail;
- + }
- +
- + n -= nc;
- + if (n == 0)
- + break;
- +
- + doff += nc;
- + doff &= ~PAGE_MASK;
- + if (doff == 0) {
- + kunmap(dpage);
- + dpages++;
- + dpage = *dpages;
- + daddr = kmap(dpage);
- + }
- +
- + src += nc;
- + }
- +
- +bail:
- + kunmap(dpage);
- +
- + return ret;
- +}
- +
- +static unsigned long kcopy_copy_pages_to_user(void __user *dst,
- + struct page **spages,
- + unsigned soff,
- + unsigned long n)
- +{
- + struct page *spage = *spages;
- + const char *saddr = kmap(spage);
- + int ret = 0;
- +
- + while (1) {
- + const unsigned long nleft = PAGE_SIZE - soff;
- + const unsigned long nc = (n < nleft) ? n : nleft;
- +
- + if (copy_to_user(dst, saddr + soff, nc)) {
- + ret = -EFAULT;
- + goto bail;
- + }
- +
- + n -= nc;
- + if (n == 0)
- + break;
- +
- + soff += nc;
- + soff &= ~PAGE_MASK;
- + if (soff == 0) {
- + kunmap(spage);
- + spages++;
- + spage = *spages;
- + saddr = kmap(spage);
- + }
- +
- + dst += nc;
- + }
- +
- +bail:
- + kunmap(spage);
- +
- + return ret;
- +}
- +
- +static unsigned long kcopy_copy_to_user(void __user *dst,
- + struct kcopy_file *kf, pid_t pid,
- + void __user *src,
- + unsigned long n)
- +{
- + struct page **pages;
- + const int pages_len = PAGE_SIZE / sizeof(struct page *);
- + int ret = 0;
- +
- + pages = (struct page **) __get_free_page(GFP_KERNEL);
- + if (!pages) {
- + ret = -ENOMEM;
- + goto bail;
- + }
- +
- + while (n) {
- + const unsigned long soff = (unsigned long) src & ~PAGE_MASK;
- + const unsigned long spages_left =
- + (soff + n + PAGE_SIZE - 1) >> PAGE_SHIFT;
- + const unsigned long spages_cp =
- + min_t(unsigned long, spages_left, pages_len);
- + const unsigned long sbytes =
- + PAGE_SIZE - soff + (spages_cp - 1) * PAGE_SIZE;
- + const unsigned long nbytes = min_t(unsigned long, sbytes, n);
- +
- + ret = kcopy_get_pages(kf, pid, pages, src, 0, spages_cp);
- + if (unlikely(ret))
- + goto bail_free;
- +
- + ret = kcopy_copy_pages_to_user(dst, pages, soff, nbytes);
- + kcopy_put_pages(pages, spages_cp);
- + if (ret)
- + goto bail_free;
- + dst = (char *) dst + nbytes;
- + src = (char *) src + nbytes;
- +
- + n -= nbytes;
- + }
- +
- +bail_free:
- + free_page((unsigned long) pages);
- +bail:
- + return ret;
- +}
- +
- +static unsigned long kcopy_copy_from_user(const void __user *src,
- + struct kcopy_file *kf, pid_t pid,
- + void __user *dst,
- + unsigned long n)
- +{
- + struct page **pages;
- + const int pages_len = PAGE_SIZE / sizeof(struct page *);
- + int ret = 0;
- +
- + pages = (struct page **) __get_free_page(GFP_KERNEL);
- + if (!pages) {
- + ret = -ENOMEM;
- + goto bail;
- + }
- +
- + while (n) {
- + const unsigned long doff = (unsigned long) dst & ~PAGE_MASK;
- + const unsigned long dpages_left =
- + (doff + n + PAGE_SIZE - 1) >> PAGE_SHIFT;
- + const unsigned long dpages_cp =
- + min_t(unsigned long, dpages_left, pages_len);
- + const unsigned long dbytes =
- + PAGE_SIZE - doff + (dpages_cp - 1) * PAGE_SIZE;
- + const unsigned long nbytes = min_t(unsigned long, dbytes, n);
- +
- + ret = kcopy_get_pages(kf, pid, pages, dst, 1, dpages_cp);
- + if (unlikely(ret))
- + goto bail_free;
- +
- + ret = kcopy_copy_pages_from_user((void __user *) src,
- + pages, doff, nbytes);
- + kcopy_put_pages(pages, dpages_cp);
- + if (ret)
- + goto bail_free;
- +
- + dst = (char *) dst + nbytes;
- + src = (char *) src + nbytes;
- +
- + n -= nbytes;
- + }
- +
- +bail_free:
- + free_page((unsigned long) pages);
- +bail:
- + return ret;
- +}
- +
- +static int kcopy_do_get(struct kcopy_map_entry *kme, pid_t pid,
- + const void __user *src, void __user *dst,
- + unsigned long n)
- +{
- + struct kcopy_file *kf = kme->file;
- + int ret = 0;
- +
- + if (n == 0) {
- + ret = -EINVAL;
- + goto bail;
- + }
- +
- + ret = kcopy_copy_to_user(dst, kf, pid, (void __user *) src, n);
- +
- +bail:
- + return ret;
- +}
- +
- +static int kcopy_do_put(struct kcopy_map_entry *kme, const void __user *src,
- + pid_t pid, void __user *dst,
- + unsigned long n)
- +{
- + struct kcopy_file *kf = kme->file;
- + int ret = 0;
- +
- + if (n == 0) {
- + ret = -EINVAL;
- + goto bail;
- + }
- +
- + ret = kcopy_copy_from_user(src, kf, pid, (void __user *) dst, n);
- +
- +bail:
- + return ret;
- +}
- +
- +static int kcopy_do_abi(u32 __user *dst)
- +{
- + u32 val = KCOPY_ABI;
- + int err;
- +
- + err = put_user(val, dst);
- + if (err)
- + return -EFAULT;
- +
- + return 0;
- +}
- +
- +ssize_t kcopy_write(struct file *filp, const char __user *data, size_t cnt,
- + loff_t *o)
- +{
- + struct kcopy_map_entry *kme = filp->private_data;
- + struct kcopy_syscall ks;
- + int err = 0;
- + const void __user *src;
- + void __user *dst;
- + unsigned long n;
- +
- + if (cnt != sizeof(struct kcopy_syscall)) {
- + err = -EINVAL;
- + goto bail;
- + }
- +
- + err = copy_from_user(&ks, data, cnt);
- + if (unlikely(err))
- + goto bail;
- +
- + src = kcopy_syscall_src(&ks);
- + dst = kcopy_syscall_dst(&ks);
- + n = kcopy_syscall_n(&ks);
- + if (ks.tag == KCOPY_GET_SYSCALL)
- + err = kcopy_do_get(kme, ks.pid, src, dst, n);
- + else if (ks.tag == KCOPY_PUT_SYSCALL)
- + err = kcopy_do_put(kme, src, ks.pid, dst, n);
- + else if (ks.tag == KCOPY_ABI_SYSCALL)
- + err = kcopy_do_abi(dst);
- + else
- + err = -EINVAL;
- +
- +bail:
- + return err ? err : cnt;
- +}
- +
- +static const struct file_operations kcopy_fops = {
- + .owner = THIS_MODULE,
- + .open = kcopy_open,
- + .release = kcopy_release,
- + .flush = kcopy_flush,
- + .write = kcopy_write,
- +};
- +
- +static int __init kcopy_init(void)
- +{
- + int ret;
- + const char *name = "kcopy";
- + int i;
- + int ninit = 0;
- +
- + mutex_init(&kcopy_dev.open_lock);
- +
- + ret = alloc_chrdev_region(&kcopy_dev.dev, 0, KCOPY_MAX_MINORS, name);
- + if (ret)
- + goto bail;
- +
- + kcopy_dev.class = class_create(THIS_MODULE, (char *) name);
- +
- + if (IS_ERR(kcopy_dev.class)) {
- + ret = PTR_ERR(kcopy_dev.class);
- + printk(KERN_ERR "kcopy: Could not create "
- + "device class (err %d)\n", -ret);
- + goto bail_chrdev;
- + }
- +
- + cdev_init(&kcopy_dev.cdev, &kcopy_fops);
- + ret = cdev_add(&kcopy_dev.cdev, kcopy_dev.dev, KCOPY_MAX_MINORS);
- + if (ret < 0) {
- + printk(KERN_ERR "kcopy: Could not add cdev (err %d)\n",
- + -ret);
- + goto bail_class;
- + }
- +
- + for (i = 0; i < KCOPY_MAX_MINORS; i++) {
- + char devname[8];
- + const int minor = MINOR(kcopy_dev.dev) + i;
- + const dev_t dev = MKDEV(MAJOR(kcopy_dev.dev), minor);
- +
- + snprintf(devname, sizeof(devname), "kcopy%02d", i);
- + kcopy_dev.devp[i] =
- + device_create(kcopy_dev.class, NULL,
- + dev, NULL, devname);
- +
- + if (IS_ERR(kcopy_dev.devp[i])) {
- + ret = PTR_ERR(kcopy_dev.devp[i]);
- + printk(KERN_ERR "kcopy: Could not create "
- + "devp %d (err %d)\n", i, -ret);
- + goto bail_cdev_add;
- + }
- +
- + ninit++;
- + }
- +
- + ret = 0;
- + goto bail;
- +
- +bail_cdev_add:
- + for (i = 0; i < ninit; i++)
- + device_unregister(kcopy_dev.devp[i]);
- +
- + cdev_del(&kcopy_dev.cdev);
- +bail_class:
- + class_destroy(kcopy_dev.class);
- +bail_chrdev:
- + unregister_chrdev_region(kcopy_dev.dev, KCOPY_MAX_MINORS);
- +bail:
- + return ret;
- +}
- +
- +static void __exit kcopy_fini(void)
- +{
- + int i;
- +
- + for (i = 0; i < KCOPY_MAX_MINORS; i++)
- + device_unregister(kcopy_dev.devp[i]);
- +
- + cdev_del(&kcopy_dev.cdev);
- + class_destroy(kcopy_dev.class);
- + unregister_chrdev_region(kcopy_dev.dev, KCOPY_MAX_MINORS);
- +}
- +
- +module_init(kcopy_init);
- +module_exit(kcopy_fini);
- --
- 1.7.10
|