The basic approach to writing a TUN/TAP client (such as a VPN) for Linux is:
There's reasonably complete documentation about each step of this process, but I couldn't find a worked example that tied it all together. The following C program is intended to serve as a basic minimal TUN/TAP client.
Opening a file is straightforward, so the important part of this function is the ioctl(TUNSETIFF) call. It's this call that creates the network interface, and there are two user-configurable fields:
The set of possible flags and their effects are documented at Linux Networking Documentation ยป Universal TUN/TAP device driver.
The interface name, if provided, must be less than IFNAMSIZ bytes. After the ioctl call returns, the ifr_name field can be inspected to see what name the interface was created with.
/* Copyright (c) John Millikin <john@john-millikin.com> */ /* SPDX-License-Identifier: 0BSD */ #define _POSIX_C_SOURCE 200809L #include <errno.h> #include <fcntl.h> #include <linux/if.h> #include <linux/if_tun.h> #include <string.h> #include <sys/ioctl.h> #include <unistd.h> int tuntap_connect(const char *iface_name, short flags, char *iface_name_out) { int tuntap_fd, rc; size_t iface_name_len; struct ifreq setiff_request; if (iface_name != NULL) { iface_name_len = strlen(iface_name); if (iface_name_len >= IFNAMSIZ) { errno = EINVAL; return -1; } } tuntap_fd = open("/dev/net/tun", O_RDWR | O_CLOEXEC); if (tuntap_fd == -1) { return -1; } memset(&setiff_request, 0, sizeof setiff_request); setiff_request.ifr_flags = flags; if (iface_name != NULL) { memcpy(setiff_request.ifr_name, iface_name, iface_name_len + 1); } rc = ioctl(tuntap_fd, TUNSETIFF, &setiff_request); if (rc == -1) { int ioctl_errno = errno; close(tuntap_fd); errno = ioctl_errno; return -1; } if (iface_name_out != NULL) { memcpy(iface_name_out, setiff_request.ifr_name, IFNAMSIZ); } return tuntap_fd; }
At this point, most TUN/TAP examples I've found tell the user to configure the newly-created network interface by using the command line to run tools from iproute2. In this post I will instead use the Linux kernel's native Netlink subsystem.
Netlink can be thought of as a sort of RPC-ish request/response protocol, where messages are assembled manually from C structs. Besides the kernel docs linked above, the following manpages are useful for writing a Netlink client:
In this example we will be using the NETLINK_ROUTE mode to send RTM_NEWADDR and RTM_NEWLINK requests. Netlink error handling is a bit obtuse since it requires manual response handling, so I'm not going to bother with it for this example.
The first step is to open an AF_NETLINK socket by calling socket(AF_NETLINK). I'm also calling bind(), which isn't strictly necessary but provides metadata useful to strace
/* Copyright (c) John Millikin <john@john-millikin.com> */ /* SPDX-License-Identifier: 0BSD */ #include <arpa/inet.h> #include <linux/if.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <net/if.h> #include <stdint.h> #include <string.h> int netlink_connect() { int netlink_fd, rc; struct sockaddr_nl sockaddr; netlink_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (netlink_fd == -1) { return -1; } memset(&sockaddr, 0, sizeof sockaddr); sockaddr.nl_family = AF_NETLINK; rc = bind(netlink_fd, (struct sockaddr*) &sockaddr, sizeof sockaddr); if (rc == -1) { int bind_errno = errno; close(netlink_fd); errno = bind_errno; return -1; } return netlink_fd; }
The first Netlink command will be RTM_NEWADDR, which sets the address and prefix length (netmask) of the interface. I've only implemented IPv4 support for this example, but IPv6 is similar.
A Netlink request contains a header (struct nlmsghdr), message content (here that's a struct ifaddrmsg), and an optional list of key-value attributes. The set of necessary attributes isn't well documented, so I ran strace ip addr add and replicated its requests.
int netlink_set_addr_ipv4( int netlink_fd , const char *iface_name , const char *address , uint8_t network_prefix_bits ) { struct { struct nlmsghdr header; struct ifaddrmsg content; char attributes_buf[64]; } request; struct rtattr *request_attr; size_t attributes_buf_avail = sizeof request.attributes_buf; memset(&request, 0, sizeof request); request.header.nlmsg_len = NLMSG_LENGTH(sizeof request.content); request.header.nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; request.header.nlmsg_type = RTM_NEWADDR; request.content.ifa_index = if_nametoindex(iface_name); request.content.ifa_family = AF_INET; request.content.ifa_prefixlen = network_prefix_bits; /* request.attributes[IFA_LOCAL] = address */ request_attr = IFA_RTA(&request.content); request_attr->rta_type = IFA_LOCAL; request_attr->rta_len = RTA_LENGTH(sizeof (struct in_addr)); request.header.nlmsg_len += request_attr->rta_len; inet_pton(AF_INET, address, RTA_DATA(request_attr)); /* request.attributes[IFA_ADDRESS] = address */ request_attr = RTA_NEXT(request_attr, attributes_buf_avail); request_attr->rta_type = IFA_ADDRESS; request_attr->rta_len = RTA_LENGTH(sizeof (struct in_addr)); request.header.nlmsg_len += request_attr->rta_len; inet_pton(AF_INET, address, RTA_DATA(request_attr)); if (send(netlink_fd, &request, request.header.nlmsg_len, 0) == -1) { return -1; } return 0; }
The second Netlink command uses RTM_NEWLINK to enable the interface. It's equivalent to running ip link set up.
int netlink_link_up(int netlink_fd, const char *iface_name) { struct { struct nlmsghdr header; struct ifinfomsg content; } request; memset(&request, 0, sizeof request); request.header.nlmsg_len = NLMSG_LENGTH(sizeof request.content); request.header.nlmsg_flags = NLM_F_REQUEST; request.header.nlmsg_type = RTM_NEWLINK; request.content.ifi_index = if_nametoindex(iface_name); request.content.ifi_flags = IFF_UP; request.content.ifi_change = 1; if (send(netlink_fd, &request, request.header.nlmsg_len, 0) == -1) { return -1; } return 0; }
At this point the TUN/TAP interface has been fully configured and is just waiting for our process to read/write network data.
For this example I'll be writing a very simple tun2udp binary, which forwards IPv4 packets to/from UDP on localhost. Compile it with GCC or Clang:
gcc -o tun2udp tun2udp.c send_port=12345 recv_port=12346 sudo ./tun2udp 10.11.12.0/24 $send_port $recv_port
/* Copyright (c) John Millikin <john@john-millikin.com> */ /* SPDX-License-Identifier: 0BSD */ #include <poll.h> #include <stdio.h> #include <stdlib.h> int run_proxy(int tuntap_fd, int send_fd, int recv_fd) { struct pollfd poll_fds[2]; char recv_buf[UINT16_MAX]; poll_fds[0].fd = tuntap_fd; poll_fds[0].events = POLLIN; poll_fds[1].fd = recv_fd; poll_fds[1].events = POLLIN; while (1) { if (poll(poll_fds, 2, -1) == -1) { return -1; } if ((poll_fds[0].revents & POLLIN) != 0) { ssize_t count = read(tuntap_fd, recv_buf, UINT16_MAX); if (count < 0) { return -1; } send(send_fd, recv_buf, count, 0); } if ((poll_fds[1].revents & POLLIN) != 0) { ssize_t count = recv(recv_fd, recv_buf, UINT16_MAX, 0); if (count < 0) { return -1; } if (write(tuntap_fd, recv_buf, count) == -1) { return -1; } } } return 0; } int bind_localhost_udp(uint16_t port) { int fd, rc; struct sockaddr_in addr; fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd == -1) { return -1; } memset(&addr, 0, sizeof addr); addr.sin_family = AF_INET; addr.sin_port = htons(port); addr.sin_addr.s_addr = inet_addr("127.0.0.1"); rc = connect(fd, (struct sockaddr*) &addr, sizeof addr); if (rc == -1) { int connect_errno = errno; close(fd); errno = connect_errno; return -1; } return fd; } int connect_localhost_udp(uint16_t port) { int fd, rc; struct sockaddr_in addr; fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd == -1) { return -1; } memset(&addr, 0, sizeof addr); addr.sin_family = AF_INET; addr.sin_port = htons(port); addr.sin_addr.s_addr = inet_addr("127.0.0.1"); rc = bind(fd, (struct sockaddr*) &addr, sizeof addr); if (rc == -1) { int bind_errno = errno; close(fd); errno = bind_errno; return -1; } return fd; }
The rest of the code is just argument parsing. For the TUN interface address it accepts an IPv4 dotted quad, with an optional netmask (defaulting to /32).
int split_address(char *address_str, uint8_t *network_prefix_bits) { char *prefix_sep, *prefix_str; prefix_sep = strchr(address_str, '/'); if (prefix_sep == NULL) { prefix_str = NULL; *network_prefix_bits = 32; } else { *prefix_sep = 0; prefix_str = prefix_sep + 1; } if (inet_addr(address_str) == INADDR_NONE) { return -1; } if (prefix_str != NULL) { char *prefix_extra; long prefix_raw = strtol(prefix_str, &prefix_extra, 10); if (prefix_raw < 0 || prefix_raw > 32) { *prefix_sep = '/'; return -1; } if (*prefix_extra != 0) { *prefix_sep = '/'; return -1; } *network_prefix_bits = prefix_raw; } return 0; } int parse_port(char *port_str, uint16_t *port) { char *extra; long raw = strtol(port_str, &extra, 10); if (raw < 0 || raw > UINT16_MAX) { return -1; } if (*extra != 0) { return -1; } *port = raw; return 0; }
Finally we get to main() and can glue everything together. Copy (or #include) the TUN/TAP and Netlink code from earlier sections. The TUN/TAP flags are hardcoded to IFF_TUN | IFF_NO_PI, which means it will send/receive IP packets with no additional framing. The interface name will be assigned by the kernel.
int main(int argc, char **argv) { int tuntap_fd, netlink_fd, send_fd, recv_fd, rc; char iface_name[IFNAMSIZ]; char *address; uint8_t prefix_bits; uint16_t send_port, recv_port; if (argc < 4) { fprintf(stderr, "Usage: %s <address> <send-port> <recv-port>\n", argv[0]); return 1; } address = argv[1]; if (split_address(address, &prefix_bits) == -1) { fprintf(stderr, "Invalid address \"%s\"\n", argv[1]); return 1; } if (parse_port(argv[2], &send_port) == -1) { fprintf(stderr, "Invalid port \"%s\"\n", argv[2]); return 1; } if (parse_port(argv[3], &recv_port) == -1) { fprintf(stderr, "Invalid port \"%s\"\n", argv[3]); return 1; } send_fd = bind_localhost_udp(send_port); if (send_fd == -1) { fprintf(stderr, "bind_localhost_udp(%u): ", send_port); perror(NULL); return 1; } recv_fd = connect_localhost_udp(recv_port); if (recv_fd == -1) { fprintf(stderr, "connect_localhost_udp(%u): ", recv_port); perror(NULL); return 1; } tuntap_fd = tuntap_connect(NULL, IFF_TUN | IFF_NO_PI, iface_name); if (tuntap_fd == -1) { perror("tuntap_connect"); return 1; } netlink_fd = netlink_connect(); if (netlink_fd == -1) { perror("netlink_connect"); return 1; } rc = netlink_set_addr_ipv4(netlink_fd, iface_name, address, prefix_bits); if (rc == -1) { perror("netlink_set_addr_ipv4"); return 1; } rc = netlink_link_up(netlink_fd, iface_name); if (rc == -1) { perror("netlink_link_up"); return 1; } close(netlink_fd); if (run_proxy(tuntap_fd, send_fd, recv_fd) == -1) { perror("run_proxy"); return 1; } return 0; }
If the Netlink socket has bind() called on it, then the traced RTM_NEWADDR command is formatted like this:
sendto(6, [ { nlmsg_len=40, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=0 nlmsg_pid=0 }, { ifa_family=AF_INET, ifa_prefixlen=24, ifa_flags=0, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("tun0") }, [ [{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("10.10.0.1")], [{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("10.10.0.1")] ] ], 40, 0, NULL, 0) = 40
If the socket does not have bind() called on it, then the same command is formatted like this:
sendto(6, [ { nlmsg_len=40, nlmsg_type=0x14 /* NLMSG_??? */, nlmsg_flags=NLM_F_REQUEST|0x600, nlmsg_seq=0, nlmsg_pid=0 }, "\x02\x18\x00\x00\x55\x00\x00\x00\x08\x00\x02\x00\x0a\x0a\x00\x01\x08\x00\x01\x00\x0a\x0a\x00\x01" ], 40, 0, NULL, 0) = 40