The basic approach to writing a TUN/TAP client (such as a VPN) for Linux is:
There's reasonably complete documentation about each step of this process, but I couldn't find a worked example that tied it all together. The following C program is intended to serve as a basic minimal TUN/TAP client.
Opening a file is straightforward, so the important part of this function is the ioctl(TUNSETIFF) call. It's this call that creates the network interface, and there are two user-configurable fields:
The set of possible flags and their effects are documented at Linux Networking Documentation ยป Universal TUN/TAP device driver.
The interface name, if provided, must be less than IFNAMSIZ bytes. After the ioctl call returns, the ifr_name field can be inspected to see what name the interface was created with.
/* Copyright (c) John Millikin <john@john-millikin.com> */
/* SPDX-License-Identifier: 0BSD */
#define _POSIX_C_SOURCE 200809L
#include <errno.h>
#include <fcntl.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <string.h>
#include <sys/ioctl.h>
#include <unistd.h>
int tuntap_connect(const char *iface_name, short flags, char *iface_name_out) {
int tuntap_fd, rc;
size_t iface_name_len;
struct ifreq setiff_request;
if (iface_name != NULL) {
iface_name_len = strlen(iface_name);
if (iface_name_len >= IFNAMSIZ) {
errno = EINVAL;
return -1;
}
}
tuntap_fd = open("/dev/net/tun", O_RDWR | O_CLOEXEC);
if (tuntap_fd == -1) {
return -1;
}
memset(&setiff_request, 0, sizeof setiff_request);
setiff_request.ifr_flags = flags;
if (iface_name != NULL) {
memcpy(setiff_request.ifr_name, iface_name, iface_name_len + 1);
}
rc = ioctl(tuntap_fd, TUNSETIFF, &setiff_request);
if (rc == -1) {
int ioctl_errno = errno;
close(tuntap_fd);
errno = ioctl_errno;
return -1;
}
if (iface_name_out != NULL) {
memcpy(iface_name_out, setiff_request.ifr_name, IFNAMSIZ);
}
return tuntap_fd;
}
At this point, most TUN/TAP examples I've found tell the user to configure the newly-created network interface by using the command line to run tools from iproute2. In this post I will instead use the Linux kernel's native Netlink subsystem.
Netlink can be thought of as a sort of RPC-ish request/response protocol, where messages are assembled manually from C structs. Besides the kernel docs linked above, the following manpages are useful for writing a Netlink client:
In this example we will be using the NETLINK_ROUTE mode to send RTM_NEWADDR and RTM_NEWLINK requests. Netlink error handling is a bit obtuse since it requires manual response handling, so I'm not going to bother with it for this example.
The first step is to open an AF_NETLINK socket by calling socket(AF_NETLINK). I'm also calling bind(), which isn't strictly necessary but provides metadata useful to strace
/* Copyright (c) John Millikin <john@john-millikin.com> */
/* SPDX-License-Identifier: 0BSD */
#include <arpa/inet.h>
#include <linux/if.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <net/if.h>
#include <stdint.h>
#include <string.h>
int netlink_connect() {
int netlink_fd, rc;
struct sockaddr_nl sockaddr;
netlink_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (netlink_fd == -1) {
return -1;
}
memset(&sockaddr, 0, sizeof sockaddr);
sockaddr.nl_family = AF_NETLINK;
rc = bind(netlink_fd, (struct sockaddr*) &sockaddr, sizeof sockaddr);
if (rc == -1) {
int bind_errno = errno;
close(netlink_fd);
errno = bind_errno;
return -1;
}
return netlink_fd;
}
The first Netlink command will be RTM_NEWADDR, which sets the address and prefix length (netmask) of the interface. I've only implemented IPv4 support for this example, but IPv6 is similar.
A Netlink request contains a header (struct nlmsghdr), message content (here that's a struct ifaddrmsg), and an optional list of key-value attributes. The set of necessary attributes isn't well documented, so I ran strace ip addr add and replicated its requests.
int netlink_set_addr_ipv4(
int netlink_fd
, const char *iface_name
, const char *address
, uint8_t network_prefix_bits
) {
struct {
struct nlmsghdr header;
struct ifaddrmsg content;
char attributes_buf[64];
} request;
struct rtattr *request_attr;
size_t attributes_buf_avail = sizeof request.attributes_buf;
memset(&request, 0, sizeof request);
request.header.nlmsg_len = NLMSG_LENGTH(sizeof request.content);
request.header.nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
request.header.nlmsg_type = RTM_NEWADDR;
request.content.ifa_index = if_nametoindex(iface_name);
request.content.ifa_family = AF_INET;
request.content.ifa_prefixlen = network_prefix_bits;
/* request.attributes[IFA_LOCAL] = address */
request_attr = IFA_RTA(&request.content);
request_attr->rta_type = IFA_LOCAL;
request_attr->rta_len = RTA_LENGTH(sizeof (struct in_addr));
request.header.nlmsg_len += request_attr->rta_len;
inet_pton(AF_INET, address, RTA_DATA(request_attr));
/* request.attributes[IFA_ADDRESS] = address */
request_attr = RTA_NEXT(request_attr, attributes_buf_avail);
request_attr->rta_type = IFA_ADDRESS;
request_attr->rta_len = RTA_LENGTH(sizeof (struct in_addr));
request.header.nlmsg_len += request_attr->rta_len;
inet_pton(AF_INET, address, RTA_DATA(request_attr));
if (send(netlink_fd, &request, request.header.nlmsg_len, 0) == -1) {
return -1;
}
return 0;
}
The second Netlink command uses RTM_NEWLINK to enable the interface. It's equivalent to running ip link set up.
int netlink_link_up(int netlink_fd, const char *iface_name) {
struct {
struct nlmsghdr header;
struct ifinfomsg content;
} request;
memset(&request, 0, sizeof request);
request.header.nlmsg_len = NLMSG_LENGTH(sizeof request.content);
request.header.nlmsg_flags = NLM_F_REQUEST;
request.header.nlmsg_type = RTM_NEWLINK;
request.content.ifi_index = if_nametoindex(iface_name);
request.content.ifi_flags = IFF_UP;
request.content.ifi_change = 1;
if (send(netlink_fd, &request, request.header.nlmsg_len, 0) == -1) {
return -1;
}
return 0;
}
At this point the TUN/TAP interface has been fully configured and is just waiting for our process to read/write network data.
For this example I'll be writing a very simple tun2udp binary, which forwards IPv4 packets to/from UDP on localhost. Compile it with GCC or Clang:
gcc -o tun2udp tun2udp.c
send_port=12345
recv_port=12346
sudo ./tun2udp 10.11.12.0/24 $send_port $recv_port
/* Copyright (c) John Millikin <john@john-millikin.com> */
/* SPDX-License-Identifier: 0BSD */
#include <poll.h>
#include <stdio.h>
#include <stdlib.h>
int run_proxy(int tuntap_fd, int send_fd, int recv_fd) {
struct pollfd poll_fds[2];
char recv_buf[UINT16_MAX];
poll_fds[0].fd = tuntap_fd;
poll_fds[0].events = POLLIN;
poll_fds[1].fd = recv_fd;
poll_fds[1].events = POLLIN;
while (1) {
if (poll(poll_fds, 2, -1) == -1) {
return -1;
}
if ((poll_fds[0].revents & POLLIN) != 0) {
ssize_t count = read(tuntap_fd, recv_buf, UINT16_MAX);
if (count < 0) {
return -1;
}
send(send_fd, recv_buf, count, 0);
}
if ((poll_fds[1].revents & POLLIN) != 0) {
ssize_t count = recv(recv_fd, recv_buf, UINT16_MAX, 0);
if (count < 0) {
return -1;
}
if (write(tuntap_fd, recv_buf, count) == -1) {
return -1;
}
}
}
return 0;
}
int bind_localhost_udp(uint16_t port) {
int fd, rc;
struct sockaddr_in addr;
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd == -1) {
return -1;
}
memset(&addr, 0, sizeof addr);
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
rc = connect(fd, (struct sockaddr*) &addr, sizeof addr);
if (rc == -1) {
int connect_errno = errno;
close(fd);
errno = connect_errno;
return -1;
}
return fd;
}
int connect_localhost_udp(uint16_t port) {
int fd, rc;
struct sockaddr_in addr;
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd == -1) {
return -1;
}
memset(&addr, 0, sizeof addr);
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
rc = bind(fd, (struct sockaddr*) &addr, sizeof addr);
if (rc == -1) {
int bind_errno = errno;
close(fd);
errno = bind_errno;
return -1;
}
return fd;
}
The rest of the code is just argument parsing. For the TUN interface address it accepts an IPv4 dotted quad, with an optional netmask (defaulting to /32).
int split_address(char *address_str, uint8_t *network_prefix_bits) {
char *prefix_sep, *prefix_str;
prefix_sep = strchr(address_str, '/');
if (prefix_sep == NULL) {
prefix_str = NULL;
*network_prefix_bits = 32;
} else {
*prefix_sep = 0;
prefix_str = prefix_sep + 1;
}
if (inet_addr(address_str) == INADDR_NONE) {
return -1;
}
if (prefix_str != NULL) {
char *prefix_extra;
long prefix_raw = strtol(prefix_str, &prefix_extra, 10);
if (prefix_raw < 0 || prefix_raw > 32) {
*prefix_sep = '/';
return -1;
}
if (*prefix_extra != 0) {
*prefix_sep = '/';
return -1;
}
*network_prefix_bits = prefix_raw;
}
return 0;
}
int parse_port(char *port_str, uint16_t *port) {
char *extra;
long raw = strtol(port_str, &extra, 10);
if (raw < 0 || raw > UINT16_MAX) {
return -1;
}
if (*extra != 0) {
return -1;
}
*port = raw;
return 0;
}
Finally we get to main() and can glue everything together. Copy (or #include) the TUN/TAP and Netlink code from earlier sections. The TUN/TAP flags are hardcoded to IFF_TUN | IFF_NO_PI, which means it will send/receive IP packets with no additional framing. The interface name will be assigned by the kernel.
int main(int argc, char **argv) {
int tuntap_fd, netlink_fd, send_fd, recv_fd, rc;
char iface_name[IFNAMSIZ];
char *address;
uint8_t prefix_bits;
uint16_t send_port, recv_port;
if (argc < 4) {
fprintf(stderr, "Usage: %s <address> <send-port> <recv-port>\n", argv[0]);
return 1;
}
address = argv[1];
if (split_address(address, &prefix_bits) == -1) {
fprintf(stderr, "Invalid address \"%s\"\n", argv[1]);
return 1;
}
if (parse_port(argv[2], &send_port) == -1) {
fprintf(stderr, "Invalid port \"%s\"\n", argv[2]);
return 1;
}
if (parse_port(argv[3], &recv_port) == -1) {
fprintf(stderr, "Invalid port \"%s\"\n", argv[3]);
return 1;
}
send_fd = bind_localhost_udp(send_port);
if (send_fd == -1) {
fprintf(stderr, "bind_localhost_udp(%u): ", send_port);
perror(NULL);
return 1;
}
recv_fd = connect_localhost_udp(recv_port);
if (recv_fd == -1) {
fprintf(stderr, "connect_localhost_udp(%u): ", recv_port);
perror(NULL);
return 1;
}
tuntap_fd = tuntap_connect(NULL, IFF_TUN | IFF_NO_PI, iface_name);
if (tuntap_fd == -1) {
perror("tuntap_connect");
return 1;
}
netlink_fd = netlink_connect();
if (netlink_fd == -1) {
perror("netlink_connect");
return 1;
}
rc = netlink_set_addr_ipv4(netlink_fd, iface_name, address, prefix_bits);
if (rc == -1) {
perror("netlink_set_addr_ipv4");
return 1;
}
rc = netlink_link_up(netlink_fd, iface_name);
if (rc == -1) {
perror("netlink_link_up");
return 1;
}
close(netlink_fd);
if (run_proxy(tuntap_fd, send_fd, recv_fd) == -1) {
perror("run_proxy");
return 1;
}
return 0;
}
If the Netlink socket has bind() called on it, then the traced RTM_NEWADDR command is formatted like this:
sendto(6, [
{
nlmsg_len=40,
nlmsg_type=RTM_NEWADDR,
nlmsg_flags=NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE,
nlmsg_seq=0
nlmsg_pid=0
}, {
ifa_family=AF_INET,
ifa_prefixlen=24,
ifa_flags=0,
ifa_scope=RT_SCOPE_UNIVERSE,
ifa_index=if_nametoindex("tun0")
}, [
[{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("10.10.0.1")],
[{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("10.10.0.1")]
]
], 40, 0, NULL, 0) = 40
If the socket does not have bind() called on it, then the same command is formatted like this:
sendto(6, [
{
nlmsg_len=40,
nlmsg_type=0x14 /* NLMSG_??? */,
nlmsg_flags=NLM_F_REQUEST|0x600,
nlmsg_seq=0,
nlmsg_pid=0
}, "\x02\x18\x00\x00\x55\x00\x00\x00\x08\x00\x02\x00\x0a\x0a\x00\x01\x08\x00\x01\x00\x0a\x0a\x00\x01"
], 40, 0, NULL, 0) = 40