From c66ab6fa705e1b2887a6d9246b798bdc526839e2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Jun 2007 21:17:17 +0200 Subject: splice: abstract out actor data For direct splicing (or private splicing), the output may not be a file. So abstract out the handling into a specified actor function and put the data in the splice_desc structure earlier, so we can build on top of that. This is the first step in better splice handling for drivers, and also for implementing vmsplice _to_ user memory. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index c8884f97122..883ba9b78d3 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -94,13 +94,15 @@ struct splice_desc { typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, struct splice_desc *); +typedef int (splice_direct_actor)(struct pipe_inode_info *, + struct splice_desc *); extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int, splice_actor *); - -extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int, - splice_actor *); +extern ssize_t __splice_from_pipe(struct pipe_inode_info *, + struct splice_desc *, splice_actor *); +extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, + splice_direct_actor *); #endif -- cgit From 6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Jun 2007 13:08:55 +0200 Subject: vmsplice: add vmsplice-to-user support A bit of a cheat, it actually just copies the data to userspace. But this makes the interface nice and symmetric and enables people to build on splice, with room for future improvement in performance. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 883ba9b78d3..6e7bfc12542 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); struct splice_desc { unsigned int len, total_len; /* current and remaining length */ unsigned int flags; /* splice flags */ - struct file *file; /* file to read/write */ + /* + * actor() private data + */ + union { + void __user *userptr; /* memory to write to */ + struct file *file; /* file to read/write */ + } u; loff_t pos; /* file position */ }; -- cgit From 0452a4e5d021900b07ebdeecb9ed03b49f164f3f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 Jun 2007 11:55:49 +0200 Subject: sendfile: kill generic_file_sendfile() It's no longer used. Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a41f4cab14..87c1d3e9d6c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1762,7 +1762,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, loff_t *, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); -- cgit From 130610d6f681c5d970340897f5db000d04a7ef78 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Jun 2007 21:20:00 +0200 Subject: splice: add void cookie to the actor data We need that for passing driver private info. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6e7bfc12542..f277a9cdb50 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -94,6 +94,7 @@ struct splice_desc { union { void __user *userptr; /* memory to write to */ struct file *file; /* file to read/write */ + void *data; /* cookie */ } u; loff_t pos; /* file position */ }; -- cgit From cf8208d0eabd1d5d2625ec02a175a294c3f30d36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Jun 2007 21:22:14 +0200 Subject: sendfile: convert nfsd to splice_direct_to_actor() Signed-off-by: Jens Axboe --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4a7ae8ab6eb..129d50f2225 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -253,7 +253,7 @@ struct svc_rqst { * determine what device number * to report (real or virtual) */ - int rq_sendfile_ok; /* turned off in gss privacy + int rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ -- cgit From d6b29d7cee064f28ca097e906de7453541351095 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 Jun 2007 09:59:47 +0200 Subject: splice: divorce the splice structure/function definitions from the pipe header We need to move even more stuff into the header so that folks can use the splice_to_pipe() implementation instead of open-coding a lot of pipe knowledge (see relay implementation), so move to our own header file finally. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 41 --------------------------- include/linux/splice.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 41 deletions(-) create mode 100644 include/linux/splice.h (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index f277a9cdb50..7ba228d52f5 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -71,45 +71,4 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); -/* - * splice is tied to pipes as a transport (at least for now), so we'll just - * add the splice flags here. - */ -#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ -#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ - /* we may still block on the fd we splice */ - /* from/to, of course */ -#define SPLICE_F_MORE (0x04) /* expect more data */ -#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ - -/* - * Passed to the actors - */ -struct splice_desc { - unsigned int len, total_len; /* current and remaining length */ - unsigned int flags; /* splice flags */ - /* - * actor() private data - */ - union { - void __user *userptr; /* memory to write to */ - struct file *file; /* file to read/write */ - void *data; /* cookie */ - } u; - loff_t pos; /* file position */ -}; - -typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, - struct splice_desc *); -typedef int (splice_direct_actor)(struct pipe_inode_info *, - struct splice_desc *); - -extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int, - splice_actor *); -extern ssize_t __splice_from_pipe(struct pipe_inode_info *, - struct splice_desc *, splice_actor *); -extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, - splice_direct_actor *); - #endif diff --git a/include/linux/splice.h b/include/linux/splice.h new file mode 100644 index 00000000000..f8cc97f71cd --- /dev/null +++ b/include/linux/splice.h @@ -0,0 +1,72 @@ +/* + * Function declerations and data structures related to the splice + * implementation. + * + * Copyright (C) 2007 Jens Axboe + * + */ +#ifndef SPLICE_H +#define SPLICE_H + +#include + +/* + * splice is tied to pipes as a transport (at least for now), so we'll just + * add the splice flags here. + */ +#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ +#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ + /* we may still block on the fd we splice */ + /* from/to, of course */ +#define SPLICE_F_MORE (0x04) /* expect more data */ +#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ + +/* + * Passed to the actors + */ +struct splice_desc { + unsigned int len, total_len; /* current and remaining length */ + unsigned int flags; /* splice flags */ + /* + * actor() private data + */ + union { + void __user *userptr; /* memory to write to */ + struct file *file; /* file to read/write */ + void *data; /* cookie */ + } u; + loff_t pos; /* file position */ +}; + +struct partial_page { + unsigned int offset; + unsigned int len; +}; + +/* + * Passed to splice_to_pipe + */ +struct splice_pipe_desc { + struct page **pages; /* page map */ + struct partial_page *partial; /* pages[] may not be contig */ + int nr_pages; /* number of pages in map */ + unsigned int flags; /* splice flags */ + const struct pipe_buf_operations *ops;/* ops associated with output pipe */ +}; + +typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); +typedef int (splice_direct_actor)(struct pipe_inode_info *, + struct splice_desc *); + +extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int, + splice_actor *); +extern ssize_t __splice_from_pipe(struct pipe_inode_info *, + struct splice_desc *, splice_actor *); +extern ssize_t splice_to_pipe(struct pipe_inode_info *, + struct splice_pipe_desc *); +extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, + splice_direct_actor *); + +#endif -- cgit From 497f9625c2bbd6a8525fb2eedb22a382a6a8253c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Jun 2007 12:00:45 +0200 Subject: pipe: allow passing around of ops private pointer relay needs this for proper consumption handling, and the network receive support needs it as well to lookup the sk_buff on pipe release. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 + include/linux/splice.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 7ba228d52f5..4409167b9eb 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -14,6 +14,7 @@ struct pipe_buffer { unsigned int offset, len; const struct pipe_buf_operations *ops; unsigned int flags; + unsigned long private; }; struct pipe_inode_info { diff --git a/include/linux/splice.h b/include/linux/splice.h index f8cc97f71cd..33e447f98a5 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -41,6 +41,7 @@ struct splice_desc { struct partial_page { unsigned int offset; unsigned int len; + unsigned long private; }; /* -- cgit From d054fe3d10cc1f9aec01378c38caa32dffdd0090 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Fri, 15 Jun 2007 08:16:22 +0200 Subject: xip sendfile removal This patch removes xip_file_sendfile, the sendfile implementation for xip without replacement. Those customers that use xip on s390 are not using sendfile() as far as we know, and so far s390 is the only platform this could potentially be used on so far. Having sendfile is not a popular feature for execute in place file systems, however we have a working implementation of splice_read() based on fs/splice.c if anyone asks for it. At this point in time, it does not seem preferable to merge splice_read() for xip because it causes extra maintenence effort due to code duplication and it requires struct page behind the xip memory segment. We'd like to get rid of that in favor of supporting flash based embedded platforms (Monta Vista work) soon. Signed-off-by: Carsten Otte Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 87c1d3e9d6c..894620e9402 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1791,9 +1791,6 @@ extern int nonseekable_open(struct inode * inode, struct file * filp); #ifdef CONFIG_FS_XIP extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos, - size_t count, read_actor_t actor, - void *target); extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); extern ssize_t xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -- cgit From d96e6e71647846e0dab097efd9b8bf3a3a556dca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Jun 2007 12:18:52 +0200 Subject: Remove remnants of sendfile() There are now zero users of .sendfile() in the kernel, so kill it from the file_operations structure and in do_sendfile(). Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 894620e9402..4f0b3bf5983 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1054,7 +1054,7 @@ struct block_device_operations { }; /* - * "descriptor" for what we're up to with a read for sendfile(). + * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet * have multiple different users of the data that * we read from a file. @@ -1105,7 +1105,6 @@ struct file_operations { int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); -- cgit From cac36bb06efe4880234524e117e0e712b10b1f16 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Jun 2007 13:10:48 +0200 Subject: pipe: change the ->pin() operation to ->confirm() The name 'pin' was badly chosen, it doesn't pin a pipe buffer in the most commonly used sense in the kernel. So change the name to 'confirm', after debating this issue with Hugh Dickins a bit. A good return from ->confirm() means that the buffer is really there, and that the contents are good. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4409167b9eb..cc09fe89bf0 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -35,20 +35,21 @@ struct pipe_inode_info { /* * Note on the nesting of these functions: * - * ->pin() + * ->confirm() * ->steal() * ... * ->map() * ... * ->unmap() * - * That is, ->map() must be called on a pinned buffer, same goes for ->steal(). + * That is, ->map() must be called on a confirmed buffer, + * same goes for ->steal(). */ struct pipe_buf_operations { int can_merge; void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); - int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); + int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); void (*get)(struct pipe_inode_info *, struct pipe_buffer *); @@ -69,7 +70,7 @@ void __free_pipe_info(struct pipe_inode_info *); void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); -int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); #endif -- cgit From 0845718dafea3e16041d270c256e8516acf4e13d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Jun 2007 20:51:32 +0200 Subject: pipe: add documentation and comments As per Andrew Mortons request, here's a set of documentation for the generic pipe_buf_operations hooks, the pipe, and pipe_buffer structures. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index cc09fe89bf0..8e4120285f7 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,15 @@ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ +/** + * struct pipe_buffer - a linux kernel pipe buffer + * @page: the page containing the data for the pipe buffer + * @offset: offset of data inside the @page + * @len: length of data inside the @page + * @ops: operations associated with this buffer. See @pipe_buf_operations. + * @flags: pipe buffer flags. See above. + * @private: private data owned by the ops. + **/ struct pipe_buffer { struct page *page; unsigned int offset, len; @@ -17,6 +26,22 @@ struct pipe_buffer { unsigned long private; }; +/** + * struct pipe_inode_info - a linux kernel pipe + * @wait: reader/writer wait point in case of empty/full pipe + * @nrbufs: the number of non-empty pipe buffers in this pipe + * @curbuf: the current pipe buffer entry + * @tmp_page: cached released page + * @readers: number of current readers of this pipe + * @writers: number of current writers of this pipe + * @waiting_writers: number of writers blocked waiting for room + * @r_counter: reader counter + * @w_counter: writer counter + * @fasync_readers: reader side fasync + * @fasync_writers: writer side fasync + * @inode: inode this pipe is attached to + * @bufs: the circular array of pipe buffers + **/ struct pipe_inode_info { wait_queue_head_t wait; unsigned int nrbufs, curbuf; @@ -43,15 +68,65 @@ struct pipe_inode_info { * ->unmap() * * That is, ->map() must be called on a confirmed buffer, - * same goes for ->steal(). + * same goes for ->steal(). See below for the meaning of each + * operation. Also see kerneldoc in fs/pipe.c for the pipe + * and generic variants of these hooks. */ struct pipe_buf_operations { + /* + * This is set to 1, if the generic pipe read/write may coalesce + * data into an existing buffer. If this is set to 0, a new pipe + * page segment is always used for new data. + */ int can_merge; + + /* + * ->map() returns a virtual address mapping of the pipe buffer. + * The last integer flag reflects whether this should be an atomic + * mapping or not. The atomic map is faster, however you can't take + * page faults before calling ->unmap() again. So if you need to eg + * access user data through copy_to/from_user(), then you must get + * a non-atomic map. ->map() uses the KM_USER0 atomic slot for + * atomic maps, so you can't map more than one pipe_buffer at once + * and you have to be careful if mapping another page as source + * or destination for a copy (IOW, it has to use something else + * than KM_USER0). + */ void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); + + /* + * Undoes ->map(), finishes the virtual mapping of the pipe buffer. + */ void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); + + /* + * ->confirm() verifies that the data in the pipe buffer is there + * and that the contents are good. If the pages in the pipe belong + * to a file system, we may need to wait for IO completion in this + * hook. Returns 0 for good, or a negative error value in case of + * error. + */ int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *); + + /* + * When the contents of this pipe buffer has been completely + * consumed by a reader, ->release() is called. + */ void (*release)(struct pipe_inode_info *, struct pipe_buffer *); + + /* + * Attempt to take ownership of the pipe buffer and its contents. + * ->steal() returns 0 for success, in which case the contents + * of the pipe (the buf->page) is locked and now completely owned + * by the caller. The page may then be transferred to a different + * mapping, the most often used case is insertion into different + * file address space cache. + */ int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); + + /* + * Get a reference to the pipe buffer. + */ void (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; -- cgit