diff --git a/README.md b/README.md new file mode 100644 index 0000000..0422910 --- /dev/null +++ b/README.md @@ -0,0 +1,465 @@ +## FD passing for DRI.Next + +Using the DMA-BUF interfaces to pass DRI objects between the client +and server, as discussed in my previous blog posting on [[DRI-Next]], +requires that we successfully pass file descriptors over the X +protocol socket. + +Rumor has it that this has been tried and found to be difficult, and +so I decided to do a bit of experimentation to see how this could be +made to work within the existing X implementation. + +(All of the examples shown here are licensed under the GPL, version 2 +and are available from git://keithp.com/git/fdpassing) + +### Basics of FD passing + +The kernel internals that support FD passing are actually quite simple +-- POSIX already require that two processes be able to share the same +underlying reference to a file because of the semantics of the fork(2) +call. Adding some ability to share arbitrary file descriptors between +two processes then is far more about how you ask the kernel than the +actual file descriptor sharing operation. + +In Linux, file descriptors can be passed through local network +sockets. The sender constructs a mystic-looking sendmsg(2) call, +placing the file descriptor in the control field of that +operation. The kernel pulls the file descriptor out of the control +field, allocates a file descriptor in the target process which +references the same file object and then sticks the file descriptor in +a queue for the receiving process to fetch. + +The receiver then constructs a matching call to recvmsg that provides +a place for the kernel to stick the new file descriptor. + +### A helper API for testing + +I first write a stand-alone program that created a socketpair, forked +and then passed an fd from the parent to the child. Once that was +working, I decided that some short helper functions would make further +testing a whole lot easier. + +Here's a function that writes some data and an optional file +descriptor: + + ssize_t + sock_fd_write(int sock, void *buf, ssize_t buflen, int fd) + { + ssize_t size; + struct msghdr msg; + struct iovec iov; + union { + struct cmsghdr cmsghdr; + char control[CMSG_SPACE(sizeof (int))]; + } cmsgu; + struct cmsghdr *cmsg; + + iov.iov_base = buf; + iov.iov_len = buflen; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (fd != -1) { + msg.msg_control = cmsgu.control; + msg.msg_controllen = sizeof(cmsgu.control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + printf ("passing fd %d\n", fd); + *((int *) CMSG_DATA(cmsg)) = fd; + } else { + msg.msg_control = NULL; + msg.msg_controllen = 0; + printf ("not passing fd\n"); + } + + size = sendmsg(sock, &msg, 0); + + if (size < 0) + perror ("sendmsg"); + return size; + } + +And here's the matching receiver function: + + ssize_t + sock_fd_read(int sock, void *buf, ssize_t bufsize, int *fd) + { + ssize_t size; + + if (fd) { + struct msghdr msg; + struct iovec iov; + union { + struct cmsghdr cmsghdr; + char control[CMSG_SPACE(sizeof (int))]; + } cmsgu; + struct cmsghdr *cmsg; + + iov.iov_base = buf; + iov.iov_len = bufsize; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgu.control; + msg.msg_controllen = sizeof(cmsgu.control); + size = recvmsg (sock, &msg, 0); + if (size < 0) { + perror ("recvmsg"); + exit(1); + } + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { + if (cmsg->cmsg_level != SOL_SOCKET) { + fprintf (stderr, "invalid cmsg_level %d\n", + cmsg->cmsg_level); + exit(1); + } + if (cmsg->cmsg_type != SCM_RIGHTS) { + fprintf (stderr, "invalid cmsg_type %d\n", + cmsg->cmsg_type); + exit(1); + } + + *fd = *((int *) CMSG_DATA(cmsg)); + printf ("received fd %d\n", *fd); + } else + *fd = -1; + } else { + size = read (sock, buf, bufsize); + if (size < 0) { + perror("read"); + exit(1); + } + } + return size; + } + +With these two functions, I rewrote the simple example as follows: + + void + child(int sock) + { + int fd; + char buf[16]; + ssize_t size; + + sleep(1); + for (;;) { + size = sock_fd_read(sock, buf, sizeof(buf), &fd); + if (size <= 0) + break; + printf ("read %d\n", size); + if (fd != -1) { + write(fd, "hello, world\n", 13); + close(fd); + } + } + } + + void + parent(int sock) + { + ssize_t size; + int i; + int fd; + + fd = 1; + size = sock_fd_write(sock, "1", 1, 1); + printf ("wrote %d\n", size); + } + + int + main(int argc, char **argv) + { + int sv[2]; + int pid; + + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit(1); + } + switch ((pid = fork())) { + case 0: + close(sv[0]); + child(sv[1]); + break; + case -1: + perror("fork"); + exit(1); + default: + close(sv[1]); + parent(sv[0]); + break; + } + return 0; + } + +### Experimenting with multiple writes + +I wanted to know what would happen if multiple writes were made, some +with file descriptors and some without. So I changed the simple example +parent function to look like: + + void + parent(int sock) + { + ssize_t size; + int i; + int fd; + + fd = 1; + size = sock_fd_write(sock, "1", 1, -1); + printf ("wrote %d without fd\n", size); + size = sock_fd_write(sock, "1", 1, 1); + printf ("wrote %d with fd\n", size); + size = sock_fd_write(sock, "1", 1, -1); + printf ("wrote %d without fd\n", size); + } + +When run, this demonstrates that the reader gets two bytes in the +first read along with a file descriptor followed by one byte in a +second read, without a file descriptor. This demonstrates that +a file descriptor message forms a barrier within the socket; multiple +messages will be merged together, but not past a message containing a +file descriptor. + +### Reading without accepting a file descriptor + +What happens when the reader isn't expecting a file descriptor? Does +it just get lost? Does the reader not get the message until it asks +for the file descriptor? What about the boundary issue described above? + +Here's my test case: + + void + child(int sock) + { + int fd; + char buf[16]; + ssize_t size; + + sleep(1); + size = sock_fd_read(sock, buf, sizeof(buf), NULL); + if (size <= 0) + return; + printf ("read %d\n", size); + size = sock_fd_read(sock, buf, sizeof(buf), &fd); + if (size <= 0) + return; + printf ("read %d\n", size); + if (fd != -1) { + write(fd, "hello, world\n", 13); + close(fd); + } + } + + void + parent(int sock) + { + ssize_t size; + int i; + int fd; + + fd = 1; + size = sock_fd_write(sock, "1", 1, 1); + printf ("wrote %d without fd\n", size); + size = sock_fd_write(sock, "1", 1, 2); + printf ("wrote %d with fd\n", size); + } + +This shows that the first passed file descriptor is picked up by the +first sock_fd_read call, but the file descriptor is closed. The second +file descriptor passed is picked up by the second sock_fd_read call. + +### Zero-length writes + +Can a file descriptor be passed without sending any data? + + void + parent(int sock) + { + ssize_t size; + int i; + int fd; + + fd = 1; + size = sock_fd_write(sock, "1", 1, -1); + printf ("wrote %d without fd\n", size); + size = sock_fd_write(sock, NULL, 0, 1); + printf ("wrote %d with fd\n", size); + size = sock_fd_write(sock, "1", 1, -1); + printf ("wrote %d without fd\n", size); + } + +And the answer is clearly "no" -- the file descriptor is not passed +when no data are included in the write. + +(update, 2019-5-8 from Yicholas Rishel) + +This is true for SOCK_STREAM sockets, but for SOCK_SEQPACKET sockets, +you *can* do zero-length writes and pass an fd. + +### A summary of results + + + 1. read and recvmsg don't merge data across a file descriptor message boundary. + + 2. failing to accept an fd in the receiver results in the fd being + closed by the kernel. + + 3. a file descriptor must be accompanied by some data. + +## Make X pass file descriptors + +I'd like to get X to pass a file descriptor without completely +rewriting the internals of both the library and the X server. Ideally, +without making any changes to the existing code paths for regular +request processing at all. + +On the sending side, this seems pretty straightforward -- we just need +to get the X connection file descriptor and call sendmsg directly, +passing the desired file descriptor along. In XCB, this could be done +by using the xcb_take_socket interface to temporarily hijack the +protocol as Xlib does. + +It's the receiving side where things are messier. Because a bare read +will discard any delivered file descriptor, we must make sure to use +recvmsg whenever we want to actually capture the file descriptor. + +### Kludge X server fd receiving + +Because a passed fd creates a barrier in the bytestream, when the X +server reads requests from a client, the read will stop sending data +after the message with the file descriptor is consumed. + +Of course, this process consumes the passed file descriptor, and if +that call isn't made with recvmsg set up to receive it, the fd will be +lost. + +As a simple kludge, if we pass a meaningless fd with the X request and +then the 'real' fd with a following XNoOperation request, the existing +request reading code will get the request, discard the meaningless fd +and then stop reading at that point due to the barrier. Once into the +request processing code, recvmsg can be called to get the real file +descriptor and the associated XNoOperation request. + +I wrote a test for this that demonstrates how this works: + + static void + child(int sock) + { + uint8_t xreq[1024]; + uint8_t xnop[4]; + uint8_t req; + int i, reqlen; + ssize_t size, fdsize; + int fd = -1, *fdp; + int j; + + sleep (1); + for (j = 0;; j++) { + size = sock_fd_read(sock, xreq, sizeof (xreq), NULL); + printf ("got %d\n", size); + if (size == 0) + break; + i = 0; + while (i < size) { + req = xreq[i]; + reqlen = xreq[i+1]; + i += reqlen; + switch (req) { + case 0: + break; + case 1: + if (i != size) { + fprintf (stderr, "Got fd req, but not at end of input %d < %d\n", + i, size); + } + fdsize = sock_fd_read(sock, xnop, sizeof (xnop), &fd); + if (fd == -1) { + fprintf (stderr, "no fd received\n"); + } else { + FILE *f = fdopen (fd, "w"); + fprintf(f, "hello %d\n", j); + fflush(f); + fclose(f); + close(fd); + fd = -1; + } + break; + case 2: + fprintf (stderr, "Unexpected FD passing req\n"); + break; + } + } + } + } + + int + tmp_file(int j) { + char name[64]; + + sprintf (name, "tmp-file-%d", j); + return creat(name, 0666); + } + + static void + parent(int sock) + { + uint8_t xreq[32]; + uint8_t xnop[4]; + int i, j; + int fd; + + for (j = 0; j < 4; j++) { + /* Write a bunch of regular requests */ + for (i = 0; i < 8; i++) { + xreq[0] = 0; + xreq[1] = sizeof (xreq); + sock_fd_write(sock, xreq, sizeof (xreq), -1); + } + + /* Write our 'pass an fd' request with a 'useless' FD to block the receiver */ + xreq[0] = 1; + xreq[1] = sizeof(xreq); + sock_fd_write(sock, xreq, sizeof (xreq), 1); + + /* Pass an fd */ + xnop[0] = 2; + xnop[1] = sizeof (xnop); + fd = tmp_file(j); + sock_fd_write(sock, xnop, sizeof (xnop), fd); + close(fd); + } + } + +### Fixing XCB to receive file descriptors + +Multiple threads may be trying to get replies and events back from the +X server at the same time, which means the kludge of having the real +fd follow the message will likely lead to the wrong thread getting the +file descriptor. + +Instead, I suspect the best plan will be to fix XCB to internally +capture passed file descriptors and save them with the associated +reply. Because the file descriptor message will form a barrier in the +read stream, xcb can associate any received file descriptor with the +last reply in the read data. The X server would then send the reply +with an explicit sendmsg call to pass both reply and file descriptor +together. + +## Next steps + +The next thing to do is code up a simple fd passing extension and try +to get it working, passing descriptors back and forth to the X +server. Once that works, design of the rest of the DRM-Next extension +should be pretty straightforward.