Unix Technical Forum

Re: sendfile(2) of 03-18

This is a discussion on Re: sendfile(2) of 03-18 within the mailing.openbsd.tech forums, part of the OpenBSD category; --> On Sat, Mar 17, 2007 at 10:37:52PM -0400, tbert wrote: > After a few days of digging in the ...


Go Back   Unix Technical Forum > Unix Operating Systems > OpenBSD > mailing.openbsd.tech

FAQ Members List Calendar Search Today's Posts Mark Forums Read
  #1 (permalink)  
Old 02-18-2008, 09:15 AM
mickey
 
Posts: n/a
Default Re: sendfile(2) of 03-18

On Sat, Mar 17, 2007 at 10:37:52PM -0400, tbert wrote:
> After a few days of digging in the VFS code, I've used the
> VOP* call that seems appropriate to populate bufs, vice
> the bread call I had before; I've added a quick'n'dirty
> readahead for performance reasons (or so I think...) and
> addressed a major oversight on my part concerning the lack
> of paying attention to the offset.


there is already a function that does the reading -- vn_rdwr.

> I'm still somewhat unsure as to how to deal with the error
> return from copyout vice the error return from sosendfile.
> Which is more important? Of course, this issue could be
> rendered entirely moot, as I'm starting to look towards
> emulating the Linux sendfile interface, as it is simpler
> and more consistent with the other send* functions in appearance
> and use.


well. it's easy -- you take errors as they come.

> I'd much appreciate any feedback on the API issue from the
> community, especially those of you maintaining network software.


why don't you read the existing source before you write anything?
and don't forget to write a test for it either.

> Index: src/sys/kern/uipc_syscalls.c
> ================================================== =================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.66
> diff -u -r1.66 uipc_syscalls.c
> --- src/sys/kern/uipc_syscalls.c 23 Oct 2006 07:13:56 -0000 1.66
> +++ src/sys/kern/uipc_syscalls.c 19 Mar 2007 02:28:59 -0000
> @@ -47,6 +47,7 @@
> #include <sys/signalvar.h>
> #include <sys/unpcb.h>
> #include <sys/un.h>
> +#include <sys/vnode.h>
> #ifdef KTRACE
> #include <sys/ktrace.h>
> #endif
> @@ -1091,4 +1092,69 @@
> FREF(fp);
>
> return (0);
> +}
> +
> +int
> +sys_sendfile(struct proc *p, void* v, register_t *retval)
> +{
> + struct sys_sendfile_args /* {
> + syscallarg(int) fd;
> + syscallarg(int) s;
> + syscallarg(off_t) off;
> + syscallarg(size_t) nbytes;
> + syscallarg(off_t *) sbytes;
> + syscallarg(int) flags;
> + } */ *uap = v;
> + struct file *fp, *sp;
> + struct socket *so;
> + struct vnode *vp;
> + off_t *sbytes, sbtmp, off;
> + size_t nbytes;
> + int error, flags, fd, s;
> +
> + nbytes = SCARG(uap, nbytes);
> + sbytes = SCARG(uap, sbytes);
> + flags = SCARG(uap, flags);
> + off = SCARG(uap, off);
> + fd = SCARG(uap, fd);
> + s = SCARG(uap, s);
> +
> + /*
> + * Malicious or incorrect values
> + * may cause off + nbytes to wrap.
> + */
> + if (off < 0 || off + nbytes < off) {
> + error = EINVAL;
> + goto out;
> + }
> +
> + /* validate userspace pointer */
> + if (sbytes != NULL)
> + if ((error = copyin(sbytes, &sbtmp, sizeof(off_t))) != 0)
> + goto out;
> +
> + if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
> + goto out;
> + vp = fp->f_data;
> +
> + if ((error = getsock(p->p_fd, s, &sp)) != 0)
> + goto vfail;
> + so = sp->f_data;
> +
> + if (!(so->so_type & SOCK_STREAM)) {
> + error = EINVAL;
> + goto sfail;
> + }
> +
> + error = sosendfile(so, vp, nbytes, &sbtmp, p, off, flags);
> +
> + if (sbytes != NULL)
> + copyout(&sbtmp, sbytes, sizeof(off_t));
> +
> +sfail:
> + FRELE(sp);
> +vfail:
> + FRELE(fp);
> +out:
> + return (error);
> }
> Index: src/sys/kern/uipc_socket.c
> ================================================== =================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.66
> diff -u -r1.66 uipc_socket.c
> --- src/sys/kern/uipc_socket.c 26 Feb 2007 23:53:33 -0000 1.66
> +++ src/sys/kern/uipc_socket.c 19 Mar 2007 02:29:11 -0000
> @@ -47,6 +47,9 @@
> #include <sys/signalvar.h>
> #include <sys/resourcevar.h>
> #include <sys/pool.h>
> +#include <sys/buf.h>
> +#include <sys/vnode.h>
> +#include <sys/mount.h>
>
> void filt_sordetach(struct knote *kn);
> int filt_soread(struct knote *kn, long hint);
> @@ -54,6 +57,8 @@
> int filt_sowrite(struct knote *kn, long hint);
> int filt_solisten(struct knote *kn, long hint);
>
> +void sf_free_fbuf(caddr_t, u_int, void *);
> +
> struct filterops solisten_filtops =
> { 1, NULL, filt_sordetach, filt_solisten };
> struct filterops soread_filtops =
> @@ -1295,4 +1300,184 @@
>
> kn->kn_data = so->so_qlen;
> return (so->so_qlen != 0);
> +}
> +
> +/*
> + * Implement zero-copy file transmit over a socket.
> + *
> + * Socket MUST be SOCK_STREAM.
> + *
> + * File bufs are used as external data storage for mbufs,
> + * which are passed to the appropriate socket send routine.
> + *
> + * If a user has specified a number of bytes to send, transmit
> + * that amount of data; otherwise, send the entire file. Return
> + * the number of bytes sent in sbytes.
> + *
> + * Flags:
> + * SF_NODISKIO:
> + * return EBUSY instead of waiting
> + * for disk I/O to complete
> + */
> +#define SFRABLKS 2 /* better value is ??? */
> +int
> +sosendfile(struct socket *so, struct vnode *vp, size_t nbytes, off_t *sbytes,
> + struct proc *p, off_t off, int flags)
> +{
> + struct vattr va;
> + struct mbuf *m, *m0;
> + struct buf *bp, *bbp;
> + ssize_t space;
> + off_t resid, sent, sent0;
> + u_int sblk, eblk, len;
> + int error, i, j, s;
> +
> + sent = sent0 = 0;
> + m = NULL;
> +
> + s = splsoftnet();
> +
> + if ((error = sblock(&so->so_snd, M_WAITOK)) != 0)
> + goto out;
> +
> + if ((vn_lock(vp, (LK_EXCLUSIVE | LK_RETRY), p) != 0) ||
> + (VOP_GETATTR(vp, &va, p->p_ucred, p) != 0)) {
> + error = EIO;
> + goto release;
> + }
> +
> + /*
> + * off + nbytes wraparound validated in sys_sendfile()
> + */
> + if (off + nbytes > va.va_size) {
> + error = EINVAL;
> + goto release;
> + }
> +
> + if (nbytes > 0)
> + resid = nbytes;
> + else
> + resid = va.va_size - off;
> +
> + eblk = (u_int)((off + resid) / va.va_blocksize);
> + sblk = (u_int)(off / va.va_blocksize);
> + off -= sblk * va.va_blocksize;
> +
> + so->so_state |= SS_ISSENDING;
> +
> + for (i = sblk; i <= eblk {
> +
> + if (!(so->so_state & SS_ISCONNECTED))
> + error = ENOTCONN;
> + else if (so->so_state & SS_CANTSENDMORE)
> + error = EPIPE;
> + else if (so->so_error) {
> + error = so->so_error;
> + so->so_error = 0;
> + }
> + if (error)
> + goto release;
> +
> +retry:
> + space = sbspace(&so->so_snd);
> + if (space < so->so_snd.sb_lowat && space < resid) {
> + if ((error = sbwait(&so->so_snd)) != 0)
> + goto release;
> +
> + goto retry;
> + }
> + if (space > resid)
> + space = resid;
> +
> + while (space > 0) {
> +
> + bp = getblk(vp, i, va.va_blocksize, 0, 0);
> +
> + /*
> + * If it wasn't found in the cache, then disk I/O
> + * is needed; is that what the caller asked for?
> + */
> + if (!(bp->b_flags & (B_DONE | B_DELWRI))) {
> + if (flags & SF_NODISKIO) {
> + error = EBUSY;
> + brelse(bp);
> + goto release;
> + }
> + bp->b_flags |= B_READ;
> + VOP_STRATEGY(bp);
> + p->p_stats->p_ru.ru_inblock++;
> + }
> +
> + /* async readahead */
> + for (j = 1; j < min(eblk, i + SFRABLKS); j++) {
> + bbp = getblk(vp, i + j, va.va_blocksize, 0, 0);
> + if (!(bbp->b_flags & (B_DONE | B_DELWRI))) {
> + bbp->b_flags |= (B_READ | B_ASYNC);
> + VOP_STRATEGY(bbp);
> + p->p_stats->p_ru.ru_inblock++;
> + } else {
> + brelse(bbp);
> + }
> + }
> +
> + error = biowait(bp);
> + if (error) {
> + brelse(bp);
> + goto release;
> + }
> +
> + len = (u_int)bp->b_bcount - off;
> + if (len > resid)
> + len = (u_int)resid;
> +
> + if (m == NULL) {
> + MGETHDR(m0, M_WAITOK, MT_DATA);
> + m = m0;
> + m->m_pkthdr.rcvif = NULL;
> + m->m_pkthdr.len = 0;
> + } else {
> + MGET(m0, M_WAITOK, MT_DATA);
> + m_cat(m, m0);
> + }
> + MEXTADD(m0, bp->b_data + off, len,
> + M_FILE, sf_free_fbuf, bp);
> + m->m_pkthdr.len += len;
> +
> + sent0 += len;
> + resid -= len;
> + space -= len;
> + off = 0;
> + i++;
> + }
> +
> + error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, NULL, NULL);
> + if (error)
> + goto release;
> +
> + m_freem(m);
> +
> + sent = sent0;
> + }
> +
> +release:
> + VOP_UNLOCK(vp, 0, p);
> + so->so_state &= ~SS_ISSENDING;
> + sbunlock(&so->so_snd);
> +out:
> + splx(s);
> + if (m != NULL)
> + m_freem(m);
> + *sbytes = sent;
> + return (error);
> +}
> +
> +/*
> + * Utility function to free file buffers
> + * as they are consumed in sosendfile.
> + */
> +/* ARGSUSED */
> +void
> +sf_free_fbuf(caddr_t buf, u_int size, void *arg)
> +{
> + brelse(arg);
> }
> Index: src/sys/kern/syscalls.master
> ================================================== =================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.86
> diff -u -r1.86 syscalls.master
> --- src/sys/kern/syscalls.master 22 Sep 2006 17:35:41 -0000 1.86
> +++ src/sys/kern/syscalls.master 19 Mar 2007 02:29:26 -0000
> @@ -611,3 +611,5 @@
> 304 STD { int sys___getcwd(char *buf, size_t len); }
> 305 STD { int sys_adjfreq(const int64_t *freq, \
> int64_t *oldfreq); }
> +306 STD { int sys_sendfile(int fd, int s, off_t off, \
> + size_t nbytes, off_t *sbytes, int flags); }
> Index: src/sys/sys/socket.h
> ================================================== =================
> RCS file: /cvs/src/sys/sys/socket.h,v
> retrieving revision 1.53
> diff -u -r1.53 socket.h
> --- src/sys/sys/socket.h 31 Mar 2006 17:30:39 -0000 1.53
> +++ src/sys/sys/socket.h 19 Mar 2007 02:29:47 -0000
> @@ -436,6 +436,11 @@
>
> #define SA_LEN(x) ((x)->sa_len)
>
> +/*
> + * Sendfile flags.
> + */
> +#define SF_NODISKIO 0x0001
> +
> #ifndef _KERNEL
>
> #include <sys/cdefs.h>
> @@ -453,6 +458,7 @@
> ssize_t recvfrom(int, void *, size_t, int, struct sockaddr *, socklen_t *);
> ssize_t recvmsg(int, struct msghdr *, int);
> ssize_t send(int, const void *, size_t, int);
> +int sendfile(int, int, off_t, size_t, off_t *, int);
> ssize_t sendto(int, const void *,
> size_t, int, const struct sockaddr *, socklen_t);
> ssize_t sendmsg(int, const struct msghdr *, int);
> Index: src/sys/sys/socketvar.h
> ================================================== =================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.39
> diff -u -r1.39 socketvar.h
> --- src/sys/sys/socketvar.h 26 Feb 2007 23:53:33 -0000 1.39
> +++ src/sys/sys/socketvar.h 19 Mar 2007 02:29:56 -0000
> @@ -37,6 +37,8 @@
>
> TAILQ_HEAD(soqhead, socket);
>
> +struct vnode;
> +
> /*
> * Kernel structure per socket.
> * Contains send and receive buffer queues,
> @@ -302,6 +304,8 @@
> void sorflush(struct socket *so);
> int sosend(struct socket *so, struct mbuf *addr, struct uio *uio,
> struct mbuf *top, struct mbuf *control, int flags);
> +int sosendfile(struct socket *so, struct vnode *vp, size_t nbytes,
> + off_t *sbytes, struct proc *p, off_t off, int flags);
> int sosetopt(struct socket *so, int level, int optname,
> struct mbuf *m0);
> int soshutdown(struct socket *so, int how);
> Index: src/lib/libc/sys/Makefile.inc
> ================================================== =================
> RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
> retrieving revision 1.80
> diff -u -r1.80 Makefile.inc
> --- src/lib/libc/sys/Makefile.inc 24 Oct 2006 04:40:59 -0000 1.80
> +++ src/lib/libc/sys/Makefile.inc 19 Mar 2007 02:30:52 -0000
> @@ -50,7 +50,8 @@
> munlock.o munlockall.o munmap.o nanosleep.o nfssvc.o \
> open.o pathconf.o pipe.o poll.o profil.o quotactl.o \
> read.o readlink.o readv.o reboot.o recvfrom.o recvmsg.o rename.o \
> - revoke.o rmdir.o select.o semget.o semop.o sendmsg.o sendto.o \
> + revoke.o rmdir.o select.o semget.o semop.o sendfile.o sendmsg.o \
> + sendto.o \
> setegid.o seteuid.o setgid.o setgroups.o setitimer.o setpgid.o \
> setpriority.o setregid.o setreuid.o setresgid.o setresuid.o \
> setrlimit.o setsid.o setsockopt.o settimeofday.o \
> Index: src/libexec/tftpd/tftpd.c
> ================================================== =================
> RCS file: /cvs/src/libexec/tftpd/tftpd.c,v
> retrieving revision 1.54
> diff -u -r1.54 tftpd.c
> --- src/libexec/tftpd/tftpd.c 15 Dec 2006 05:52:06 -0000 1.54
> +++ src/libexec/tftpd/tftpd.c 19 Mar 2007 02:31:28 -0000
> @@ -83,7 +83,7 @@
> __dead void usage(void);
> void tftp(struct tftphdr *, int);
> int validate_access(char *, int);
> -int sendfile(struct formats *);
> +int tftpd_sendfile(struct formats *);
> int recvfile(struct formats *);
> void nak(int);
> void oack(int);
> @@ -112,8 +112,8 @@
> int (*f_recv)(struct formats *);
> int f_convert;
> } formats[] = {
> - { "netascii", validate_access, sendfile, recvfile, 1 },
> - { "octet", validate_access, sendfile, recvfile, 0 },
> + { "netascii", validate_access, tftpd_sendfile, recvfile, 1 },
> + { "octet", validate_access, tftpd_sendfile, recvfile, 0 },
> { NULL, NULL, NULL, NULL, 0 }
> };
>
> @@ -603,7 +603,7 @@
> * Send the requested file.
> */
> int
> -sendfile(struct formats *pf)
> +tftpd_sendfile(struct formats *pf)
> {
> struct tftphdr *dp, *r_init(void);
> struct tftphdr *ap; /* ack packet */
> Index: src/usr.bin/tftp/main.c
> ================================================== =================
> RCS file: /cvs/src/usr.bin/tftp/main.c,v
> retrieving revision 1.28
> diff -u -r1.28 main.c
> --- src/usr.bin/tftp/main.c 26 Jul 2006 22:43:53 -0000 1.28
> +++ src/usr.bin/tftp/main.c 19 Mar 2007 02:32:32 -0000
> @@ -381,7 +381,7 @@
> printf("putting %s to %s:%s [%s]\n",
> cp, hostname, targ, mode);
> peeraddr.sin_port = port;
> - sendfile(fd, targ, mode);
> + tftp_sendfile(fd, targ, mode);
> return;
> }
>
> @@ -402,7 +402,7 @@
> printf("putting %s to %s:%s [%s]\n",
> argv[n], hostname, cp, mode);
> peeraddr.sin_port = port;
> - sendfile(fd, cp, mode);
> + tftp_sendfile(fd, cp, mode);
> free(cp);
> }
> }
> Index: src/usr.bin/tftp/tftp.c
> ================================================== =================
> RCS file: /cvs/src/usr.bin/tftp/tftp.c,v
> retrieving revision 1.20
> diff -u -r1.20 tftp.c
> --- src/usr.bin/tftp/tftp.c 26 Jul 2006 09:10:03 -0000 1.20
> +++ src/usr.bin/tftp/tftp.c 19 Mar 2007 02:34:41 -0000
> @@ -129,7 +129,7 @@
> * Send the requested file.
> */
> void
> -sendfile(int fd, char *name, char *mode)
> +tftp_sendfile(int fd, char *name, char *mode)
> {
> struct tftphdr *dp, *ap; /* data and ack packets */
> struct sockaddr_in from;
> Index: src/usr.bin/tftp/extern.h
> ================================================== =================
> RCS file: /cvs/src/usr.bin/tftp/extern.h,v
> retrieving revision 1.5
> diff -u -r1.5 extern.h
> --- src/usr.bin/tftp/extern.h 26 Jul 2006 16:43:31 -0000 1.5
> +++ src/usr.bin/tftp/extern.h 19 Mar 2007 02:35:58 -0000
> @@ -33,7 +33,7 @@
> */
>
> void recvfile(int, char *, char *);
> -void sendfile(int, char *, char *);
> +void tftp_sendfile(int, char *, char *);
>
> #define TIMEOUT 5 /* packet rexmt timeout */
> #define TIMEOUT_MIN 1 /* minimal packet rexmt timeout */
>


--
paranoic mickey (my employers have changed but, the name has remained)

Digg this Post!Add Post to del.icio.usBookmark Post in TechnoratiFurl this Post!
Reply With Quote
Reply


Thread Tools
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

vB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are On
Forum Jump


All times are GMT. The time now is 12:39 AM.


Powered by vBulletin® Version 3.6.5
Copyright ©2000 - 2008, Jelsoft Enterprises Ltd.
SEO by vBSEO 3.2.0
www.UnixAdminTalk.com