This is a discussion on Re: sendfile(2) of 03-18 within the mailing.openbsd.tech forums, part of the OpenBSD category; --> On Sat, Mar 17, 2007 at 10:37:52PM -0400, tbert wrote: > After a few days of digging in the ...
| |||||||
| FAQ | Members List | Calendar | Search | Today's Posts | Mark Forums Read |
| ||||
| On Sat, Mar 17, 2007 at 10:37:52PM -0400, tbert wrote: > After a few days of digging in the VFS code, I've used the > VOP* call that seems appropriate to populate bufs, vice > the bread call I had before; I've added a quick'n'dirty > readahead for performance reasons (or so I think...) and > addressed a major oversight on my part concerning the lack > of paying attention to the offset. there is already a function that does the reading -- vn_rdwr. > I'm still somewhat unsure as to how to deal with the error > return from copyout vice the error return from sosendfile. > Which is more important? Of course, this issue could be > rendered entirely moot, as I'm starting to look towards > emulating the Linux sendfile interface, as it is simpler > and more consistent with the other send* functions in appearance > and use. well. it's easy -- you take errors as they come. > I'd much appreciate any feedback on the API issue from the > community, especially those of you maintaining network software. why don't you read the existing source before you write anything? and don't forget to write a test for it either. > Index: src/sys/kern/uipc_syscalls.c > ================================================== ================= > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.66 > diff -u -r1.66 uipc_syscalls.c > --- src/sys/kern/uipc_syscalls.c 23 Oct 2006 07:13:56 -0000 1.66 > +++ src/sys/kern/uipc_syscalls.c 19 Mar 2007 02:28:59 -0000 > @@ -47,6 +47,7 @@ > #include <sys/signalvar.h> > #include <sys/unpcb.h> > #include <sys/un.h> > +#include <sys/vnode.h> > #ifdef KTRACE > #include <sys/ktrace.h> > #endif > @@ -1091,4 +1092,69 @@ > FREF(fp); > > return (0); > +} > + > +int > +sys_sendfile(struct proc *p, void* v, register_t *retval) > +{ > + struct sys_sendfile_args /* { > + syscallarg(int) fd; > + syscallarg(int) s; > + syscallarg(off_t) off; > + syscallarg(size_t) nbytes; > + syscallarg(off_t *) sbytes; > + syscallarg(int) flags; > + } */ *uap = v; > + struct file *fp, *sp; > + struct socket *so; > + struct vnode *vp; > + off_t *sbytes, sbtmp, off; > + size_t nbytes; > + int error, flags, fd, s; > + > + nbytes = SCARG(uap, nbytes); > + sbytes = SCARG(uap, sbytes); > + flags = SCARG(uap, flags); > + off = SCARG(uap, off); > + fd = SCARG(uap, fd); > + s = SCARG(uap, s); > + > + /* > + * Malicious or incorrect values > + * may cause off + nbytes to wrap. > + */ > + if (off < 0 || off + nbytes < off) { > + error = EINVAL; > + goto out; > + } > + > + /* validate userspace pointer */ > + if (sbytes != NULL) > + if ((error = copyin(sbytes, &sbtmp, sizeof(off_t))) != 0) > + goto out; > + > + if ((error = getvnode(p->p_fd, fd, &fp)) != 0) > + goto out; > + vp = fp->f_data; > + > + if ((error = getsock(p->p_fd, s, &sp)) != 0) > + goto vfail; > + so = sp->f_data; > + > + if (!(so->so_type & SOCK_STREAM)) { > + error = EINVAL; > + goto sfail; > + } > + > + error = sosendfile(so, vp, nbytes, &sbtmp, p, off, flags); > + > + if (sbytes != NULL) > + copyout(&sbtmp, sbytes, sizeof(off_t)); > + > +sfail: > + FRELE(sp); > +vfail: > + FRELE(fp); > +out: > + return (error); > } > Index: src/sys/kern/uipc_socket.c > ================================================== ================= > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.66 > diff -u -r1.66 uipc_socket.c > --- src/sys/kern/uipc_socket.c 26 Feb 2007 23:53:33 -0000 1.66 > +++ src/sys/kern/uipc_socket.c 19 Mar 2007 02:29:11 -0000 > @@ -47,6 +47,9 @@ > #include <sys/signalvar.h> > #include <sys/resourcevar.h> > #include <sys/pool.h> > +#include <sys/buf.h> > +#include <sys/vnode.h> > +#include <sys/mount.h> > > void filt_sordetach(struct knote *kn); > int filt_soread(struct knote *kn, long hint); > @@ -54,6 +57,8 @@ > int filt_sowrite(struct knote *kn, long hint); > int filt_solisten(struct knote *kn, long hint); > > +void sf_free_fbuf(caddr_t, u_int, void *); > + > struct filterops solisten_filtops = > { 1, NULL, filt_sordetach, filt_solisten }; > struct filterops soread_filtops = > @@ -1295,4 +1300,184 @@ > > kn->kn_data = so->so_qlen; > return (so->so_qlen != 0); > +} > + > +/* > + * Implement zero-copy file transmit over a socket. > + * > + * Socket MUST be SOCK_STREAM. > + * > + * File bufs are used as external data storage for mbufs, > + * which are passed to the appropriate socket send routine. > + * > + * If a user has specified a number of bytes to send, transmit > + * that amount of data; otherwise, send the entire file. Return > + * the number of bytes sent in sbytes. > + * > + * Flags: > + * SF_NODISKIO: > + * return EBUSY instead of waiting > + * for disk I/O to complete > + */ > +#define SFRABLKS 2 /* better value is ??? */ > +int > +sosendfile(struct socket *so, struct vnode *vp, size_t nbytes, off_t *sbytes, > + struct proc *p, off_t off, int flags) > +{ > + struct vattr va; > + struct mbuf *m, *m0; > + struct buf *bp, *bbp; > + ssize_t space; > + off_t resid, sent, sent0; > + u_int sblk, eblk, len; > + int error, i, j, s; > + > + sent = sent0 = 0; > + m = NULL; > + > + s = splsoftnet(); > + > + if ((error = sblock(&so->so_snd, M_WAITOK)) != 0) > + goto out; > + > + if ((vn_lock(vp, (LK_EXCLUSIVE | LK_RETRY), p) != 0) || > + (VOP_GETATTR(vp, &va, p->p_ucred, p) != 0)) { > + error = EIO; > + goto release; > + } > + > + /* > + * off + nbytes wraparound validated in sys_sendfile() > + */ > + if (off + nbytes > va.va_size) { > + error = EINVAL; > + goto release; > + } > + > + if (nbytes > 0) > + resid = nbytes; > + else > + resid = va.va_size - off; > + > + eblk = (u_int)((off + resid) / va.va_blocksize); > + sblk = (u_int)(off / va.va_blocksize); > + off -= sblk * va.va_blocksize; > + > + so->so_state |= SS_ISSENDING; > + > + for (i = sblk; i <= eblk > + > + if (!(so->so_state & SS_ISCONNECTED)) > + error = ENOTCONN; > + else if (so->so_state & SS_CANTSENDMORE) > + error = EPIPE; > + else if (so->so_error) { > + error = so->so_error; > + so->so_error = 0; > + } > + if (error) > + goto release; > + > +retry: > + space = sbspace(&so->so_snd); > + if (space < so->so_snd.sb_lowat && space < resid) { > + if ((error = sbwait(&so->so_snd)) != 0) > + goto release; > + > + goto retry; > + } > + if (space > resid) > + space = resid; > + > + while (space > 0) { > + > + bp = getblk(vp, i, va.va_blocksize, 0, 0); > + > + /* > + * If it wasn't found in the cache, then disk I/O > + * is needed; is that what the caller asked for? > + */ > + if (!(bp->b_flags & (B_DONE | B_DELWRI))) { > + if (flags & SF_NODISKIO) { > + error = EBUSY; > + brelse(bp); > + goto release; > + } > + bp->b_flags |= B_READ; > + VOP_STRATEGY(bp); > + p->p_stats->p_ru.ru_inblock++; > + } > + > + /* async readahead */ > + for (j = 1; j < min(eblk, i + SFRABLKS); j++) { > + bbp = getblk(vp, i + j, va.va_blocksize, 0, 0); > + if (!(bbp->b_flags & (B_DONE | B_DELWRI))) { > + bbp->b_flags |= (B_READ | B_ASYNC); > + VOP_STRATEGY(bbp); > + p->p_stats->p_ru.ru_inblock++; > + } else { > + brelse(bbp); > + } > + } > + > + error = biowait(bp); > + if (error) { > + brelse(bp); > + goto release; > + } > + > + len = (u_int)bp->b_bcount - off; > + if (len > resid) > + len = (u_int)resid; > + > + if (m == NULL) { > + MGETHDR(m0, M_WAITOK, MT_DATA); > + m = m0; > + m->m_pkthdr.rcvif = NULL; > + m->m_pkthdr.len = 0; > + } else { > + MGET(m0, M_WAITOK, MT_DATA); > + m_cat(m, m0); > + } > + MEXTADD(m0, bp->b_data + off, len, > + M_FILE, sf_free_fbuf, bp); > + m->m_pkthdr.len += len; > + > + sent0 += len; > + resid -= len; > + space -= len; > + off = 0; > + i++; > + } > + > + error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, NULL, NULL); > + if (error) > + goto release; > + > + m_freem(m); > + > + sent = sent0; > + } > + > +release: > + VOP_UNLOCK(vp, 0, p); > + so->so_state &= ~SS_ISSENDING; > + sbunlock(&so->so_snd); > +out: > + splx(s); > + if (m != NULL) > + m_freem(m); > + *sbytes = sent; > + return (error); > +} > + > +/* > + * Utility function to free file buffers > + * as they are consumed in sosendfile. > + */ > +/* ARGSUSED */ > +void > +sf_free_fbuf(caddr_t buf, u_int size, void *arg) > +{ > + brelse(arg); > } > Index: src/sys/kern/syscalls.master > ================================================== ================= > RCS file: /cvs/src/sys/kern/syscalls.master,v > retrieving revision 1.86 > diff -u -r1.86 syscalls.master > --- src/sys/kern/syscalls.master 22 Sep 2006 17:35:41 -0000 1.86 > +++ src/sys/kern/syscalls.master 19 Mar 2007 02:29:26 -0000 > @@ -611,3 +611,5 @@ > 304 STD { int sys___getcwd(char *buf, size_t len); } > 305 STD { int sys_adjfreq(const int64_t *freq, \ > int64_t *oldfreq); } > +306 STD { int sys_sendfile(int fd, int s, off_t off, \ > + size_t nbytes, off_t *sbytes, int flags); } > Index: src/sys/sys/socket.h > ================================================== ================= > RCS file: /cvs/src/sys/sys/socket.h,v > retrieving revision 1.53 > diff -u -r1.53 socket.h > --- src/sys/sys/socket.h 31 Mar 2006 17:30:39 -0000 1.53 > +++ src/sys/sys/socket.h 19 Mar 2007 02:29:47 -0000 > @@ -436,6 +436,11 @@ > > #define SA_LEN(x) ((x)->sa_len) > > +/* > + * Sendfile flags. > + */ > +#define SF_NODISKIO 0x0001 > + > #ifndef _KERNEL > > #include <sys/cdefs.h> > @@ -453,6 +458,7 @@ > ssize_t recvfrom(int, void *, size_t, int, struct sockaddr *, socklen_t *); > ssize_t recvmsg(int, struct msghdr *, int); > ssize_t send(int, const void *, size_t, int); > +int sendfile(int, int, off_t, size_t, off_t *, int); > ssize_t sendto(int, const void *, > size_t, int, const struct sockaddr *, socklen_t); > ssize_t sendmsg(int, const struct msghdr *, int); > Index: src/sys/sys/socketvar.h > ================================================== ================= > RCS file: /cvs/src/sys/sys/socketvar.h,v > retrieving revision 1.39 > diff -u -r1.39 socketvar.h > --- src/sys/sys/socketvar.h 26 Feb 2007 23:53:33 -0000 1.39 > +++ src/sys/sys/socketvar.h 19 Mar 2007 02:29:56 -0000 > @@ -37,6 +37,8 @@ > > TAILQ_HEAD(soqhead, socket); > > +struct vnode; > + > /* > * Kernel structure per socket. > * Contains send and receive buffer queues, > @@ -302,6 +304,8 @@ > void sorflush(struct socket *so); > int sosend(struct socket *so, struct mbuf *addr, struct uio *uio, > struct mbuf *top, struct mbuf *control, int flags); > +int sosendfile(struct socket *so, struct vnode *vp, size_t nbytes, > + off_t *sbytes, struct proc *p, off_t off, int flags); > int sosetopt(struct socket *so, int level, int optname, > struct mbuf *m0); > int soshutdown(struct socket *so, int how); > Index: src/lib/libc/sys/Makefile.inc > ================================================== ================= > RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v > retrieving revision 1.80 > diff -u -r1.80 Makefile.inc > --- src/lib/libc/sys/Makefile.inc 24 Oct 2006 04:40:59 -0000 1.80 > +++ src/lib/libc/sys/Makefile.inc 19 Mar 2007 02:30:52 -0000 > @@ -50,7 +50,8 @@ > munlock.o munlockall.o munmap.o nanosleep.o nfssvc.o \ > open.o pathconf.o pipe.o poll.o profil.o quotactl.o \ > read.o readlink.o readv.o reboot.o recvfrom.o recvmsg.o rename.o \ > - revoke.o rmdir.o select.o semget.o semop.o sendmsg.o sendto.o \ > + revoke.o rmdir.o select.o semget.o semop.o sendfile.o sendmsg.o \ > + sendto.o \ > setegid.o seteuid.o setgid.o setgroups.o setitimer.o setpgid.o \ > setpriority.o setregid.o setreuid.o setresgid.o setresuid.o \ > setrlimit.o setsid.o setsockopt.o settimeofday.o \ > Index: src/libexec/tftpd/tftpd.c > ================================================== ================= > RCS file: /cvs/src/libexec/tftpd/tftpd.c,v > retrieving revision 1.54 > diff -u -r1.54 tftpd.c > --- src/libexec/tftpd/tftpd.c 15 Dec 2006 05:52:06 -0000 1.54 > +++ src/libexec/tftpd/tftpd.c 19 Mar 2007 02:31:28 -0000 > @@ -83,7 +83,7 @@ > __dead void usage(void); > void tftp(struct tftphdr *, int); > int validate_access(char *, int); > -int sendfile(struct formats *); > +int tftpd_sendfile(struct formats *); > int recvfile(struct formats *); > void nak(int); > void oack(int); > @@ -112,8 +112,8 @@ > int (*f_recv)(struct formats *); > int f_convert; > } formats[] = { > - { "netascii", validate_access, sendfile, recvfile, 1 }, > - { "octet", validate_access, sendfile, recvfile, 0 }, > + { "netascii", validate_access, tftpd_sendfile, recvfile, 1 }, > + { "octet", validate_access, tftpd_sendfile, recvfile, 0 }, > { NULL, NULL, NULL, NULL, 0 } > }; > > @@ -603,7 +603,7 @@ > * Send the requested file. > */ > int > -sendfile(struct formats *pf) > +tftpd_sendfile(struct formats *pf) > { > struct tftphdr *dp, *r_init(void); > struct tftphdr *ap; /* ack packet */ > Index: src/usr.bin/tftp/main.c > ================================================== ================= > RCS file: /cvs/src/usr.bin/tftp/main.c,v > retrieving revision 1.28 > diff -u -r1.28 main.c > --- src/usr.bin/tftp/main.c 26 Jul 2006 22:43:53 -0000 1.28 > +++ src/usr.bin/tftp/main.c 19 Mar 2007 02:32:32 -0000 > @@ -381,7 +381,7 @@ > printf("putting %s to %s:%s [%s]\n", > cp, hostname, targ, mode); > peeraddr.sin_port = port; > - sendfile(fd, targ, mode); > + tftp_sendfile(fd, targ, mode); > return; > } > > @@ -402,7 +402,7 @@ > printf("putting %s to %s:%s [%s]\n", > argv[n], hostname, cp, mode); > peeraddr.sin_port = port; > - sendfile(fd, cp, mode); > + tftp_sendfile(fd, cp, mode); > free(cp); > } > } > Index: src/usr.bin/tftp/tftp.c > ================================================== ================= > RCS file: /cvs/src/usr.bin/tftp/tftp.c,v > retrieving revision 1.20 > diff -u -r1.20 tftp.c > --- src/usr.bin/tftp/tftp.c 26 Jul 2006 09:10:03 -0000 1.20 > +++ src/usr.bin/tftp/tftp.c 19 Mar 2007 02:34:41 -0000 > @@ -129,7 +129,7 @@ > * Send the requested file. > */ > void > -sendfile(int fd, char *name, char *mode) > +tftp_sendfile(int fd, char *name, char *mode) > { > struct tftphdr *dp, *ap; /* data and ack packets */ > struct sockaddr_in from; > Index: src/usr.bin/tftp/extern.h > ================================================== ================= > RCS file: /cvs/src/usr.bin/tftp/extern.h,v > retrieving revision 1.5 > diff -u -r1.5 extern.h > --- src/usr.bin/tftp/extern.h 26 Jul 2006 16:43:31 -0000 1.5 > +++ src/usr.bin/tftp/extern.h 19 Mar 2007 02:35:58 -0000 > @@ -33,7 +33,7 @@ > */ > > void recvfile(int, char *, char *); > -void sendfile(int, char *, char *); > +void tftp_sendfile(int, char *, char *); > > #define TIMEOUT 5 /* packet rexmt timeout */ > #define TIMEOUT_MIN 1 /* minimal packet rexmt timeout */ > -- paranoic mickey (my employers have changed but, the name has remained) |