1NETLINK(7)                 Linux Programmer's Manual                NETLINK(7)
2
3
4

NAME

6       netlink - communication between kernel and user space (AF_NETLINK)
7

SYNOPSIS

9       #include <asm/types.h>
10       #include <sys/socket.h>
11       #include <linux/netlink.h>
12
13       netlink_socket = socket(AF_NETLINK, socket_type, netlink_family);
14

DESCRIPTION

16       Netlink  is  used  to transfer information between the kernel and user-
17       space processes.  It consists of a standard sockets-based interface for
18       user  space  processes  and  an internal kernel API for kernel modules.
19       The internal kernel interface is not documented in  this  manual  page.
20       There  is  also  an  obsolete  netlink  interface via netlink character
21       devices; this interface is not documented here and is provided only for
22       backward compatibility.
23
24       Netlink  is  a datagram-oriented service.  Both SOCK_RAW and SOCK_DGRAM
25       are valid values for socket_type.  However, the netlink  protocol  does
26       not distinguish between datagram and raw sockets.
27
28       netlink_family  selects  the kernel module or netlink group to communi‐
29       cate with.  The currently assigned netlink families are:
30
31       NETLINK_ROUTE
32              Receives routing and link updates and may be used to modify  the
33              routing  tables (both IPv4 and IPv6), IP addresses, link parame‐
34              ters, neighbor setups, queueing disciplines, traffic classes and
35              packet classifiers (see rtnetlink(7)).
36
37       NETLINK_W1 (Linux 2.6.13 to 2.16.17)
38              Messages from 1-wire subsystem.
39
40       NETLINK_USERSOCK
41              Reserved for user-mode socket protocols.
42
43       NETLINK_FIREWALL (up to and including Linux 3.4)
44              Transport  IPv4  packets  from netfilter to user space.  Used by
45              ip_queue kernel module.  After a long period of  being  declared
46              obsolete  (in  favor  of  the more advanced nfnetlink_queue fea‐
47              ture), NETLINK_FIREWALL was removed in Linux 3.5.
48
49       NETLINK_INET_DIAG (since Linux 2.6.14)
50              Query information about sockets  of  various  protocol  families
51              from the kernel (see sock_diag(7)).
52
53       NETLINK_SOCK_DIAG (since Linux 3.3)
54              A synonym for NETLINK_INET_DIAG.
55
56       NETLINK_NFLOG (up to and including Linux 3.16)
57              Netfilter/iptables ULOG.
58
59       NETLINK_XFRM
60              IPsec.
61
62       NETLINK_SELINUX (since Linux 2.6.4)
63              SELinux event notifications.
64
65       NETLINK_ISCSI (since Linux 2.6.15)
66              Open-iSCSI.
67
68       NETLINK_AUDIT (since Linux 2.6.6)
69              Auditing.
70
71       NETLINK_FIB_LOOKUP (since Linux 2.6.13)
72              Access to FIB lookup from user space.
73
74       NETLINK_CONNECTOR (since Linux 2.6.14)
75              Kernel  connector.   See  Documentation/connector/* in the Linux
76              kernel source tree for further information.
77
78       NETLINK_NETFILTER (since Linux 2.6.14)
79              Netfilter subsystem.
80
81       NETLINK_SCSITRANSPORT (since Linux 2.6.19)
82              SCSI Transports.
83
84       NETLINK_RDMA (since Linux 3.0)
85              Infiniband RDMA.
86
87       NETLINK_IP6_FW (up to and including Linux 3.4)
88              Transport IPv6 packets from netfilter to user  space.   Used  by
89              ip6_queue kernel module.
90
91       NETLINK_DNRTMSG
92              DECnet routing messages.
93
94       NETLINK_KOBJECT_UEVENT (since Linux 2.6.10)
95              Kernel messages to user space.
96
97       NETLINK_GENERIC (since Linux 2.6.15)
98              Generic netlink family for simplified netlink usage.
99
100       NETLINK_CRYPTO (since Linux 3.2)
101              Netlink  interface  to  request information about ciphers regis‐
102              tered with the kernel crypto API as well as allow  configuration
103              of the kernel crypto API.
104
105       Netlink messages consist of a byte stream with one or multiple nlmsghdr
106       headers and associated payload.  The byte  stream  should  be  accessed
107       only  with  the  standard  NLMSG_*  macros.  See netlink(3) for further
108       information.
109
110       In multipart messages (multiple nlmsghdr headers with  associated  pay‐
111       load  in  one byte stream) the first and all following headers have the
112       NLM_F_MULTI flag set, except for the last header  which  has  the  type
113       NLMSG_DONE.
114
115       After each nlmsghdr the payload follows.
116
117           struct nlmsghdr {
118               __u32 nlmsg_len;    /* Length of message including header */
119               __u16 nlmsg_type;   /* Type of message content */
120               __u16 nlmsg_flags;  /* Additional flags */
121               __u32 nlmsg_seq;    /* Sequence number */
122               __u32 nlmsg_pid;    /* Sender port ID */
123           };
124
125       nlmsg_type can be one of the standard message types: NLMSG_NOOP message
126       is to be ignored, NLMSG_ERROR message signals an error and the  payload
127       contains  an nlmsgerr structure, NLMSG_DONE message terminates a multi‐
128       part message.
129
130           struct nlmsgerr {
131               int error;        /* Negative errno or 0 for acknowledgements */
132               struct nlmsghdr msg;  /* Message header that caused the error */
133           };
134
135       A netlink family usually specifies more message types, see  the  appro‐
136       priate   manual   pages   for   that,  for  example,  rtnetlink(7)  for
137       NETLINK_ROUTE.
138
139       Standard flag bits in nlmsg_flags
140       ──────────────────────────────────────────────────────────
141       NLM_F_REQUEST   Must be set on all request messages.
142       NLM_F_MULTI     The message is part of a  multipart  mes‐
143                       sage terminated by NLMSG_DONE.
144       NLM_F_ACK       Request for an acknowledgment on success.
145       NLM_F_ECHO      Echo this request.
146
147       Additional flag bits for GET requests
148       ────────────────────────────────────────────────────────────────────
149       NLM_F_ROOT     Return the complete table instead of a single entry.
150       NLM_F_MATCH    Return  all entries matching criteria passed in mes‐
151                      sage content.  Not implemented yet.
152       NLM_F_ATOMIC   Return an atomic snapshot of the table.
153       NLM_F_DUMP     Convenience macro; equivalent to
154                      (NLM_F_ROOT|NLM_F_MATCH).
155
156       Note that NLM_F_ATOMIC requires  the  CAP_NET_ADMIN  capability  or  an
157       effective UID of 0.
158
159       Additional flag bits for NEW requests
160       ────────────────────────────────────────────────────────────
161       NLM_F_REPLACE   Replace existing matching object.
162       NLM_F_EXCL      Don't replace if the object already exists.
163       NLM_F_CREATE    Create object if it doesn't already exist.
164       NLM_F_APPEND    Add to the end of the object list.
165
166       nlmsg_seq  and  nlmsg_pid  are used to track messages.  nlmsg_pid shows
167       the origin of the message.  Note that there isn't  a  1:1  relationship
168       between  nlmsg_pid and the PID of the process if the message originated
169       from a netlink socket.  See the ADDRESS  FORMATS  section  for  further
170       information.
171
172       Both nlmsg_seq and nlmsg_pid are opaque to netlink core.
173
174       Netlink  is  not  a  reliable protocol.  It tries its best to deliver a
175       message to its destination(s), but may drop messages  when  an  out-of-
176       memory  condition  or  other  error  occurs.  For reliable transfer the
177       sender can request an acknowledgement from the receiver by setting  the
178       NLM_F_ACK  flag.   An  acknowledgment is an NLMSG_ERROR packet with the
179       error field set to 0.  The application must  generate  acknowledgements
180       for  received messages itself.  The kernel tries to send an NLMSG_ERROR
181       message for every failed packet.  A user  process  should  follow  this
182       convention too.
183
184       However,  reliable  transmissions from kernel to user are impossible in
185       any case.  The kernel can't send a netlink message if the socket buffer
186       is  full: the message will be dropped and the kernel and the user-space
187       process will no longer have the same view of kernel state.  It is up to
188       the  application  to  detect  when  this happens (via the ENOBUFS error
189       returned by recvmsg(2)) and resynchronize.
190
191   Address formats
192       The sockaddr_nl structure describes a netlink client in user  space  or
193       in  the  kernel.  A sockaddr_nl can be either unicast (only sent to one
194       peer) or sent to netlink multicast groups (nl_groups not equal 0).
195
196           struct sockaddr_nl {
197               sa_family_t     nl_family;  /* AF_NETLINK */
198               unsigned short  nl_pad;     /* Zero */
199               pid_t           nl_pid;     /* Port ID */
200               __u32           nl_groups;  /* Multicast groups mask */
201           };
202
203       nl_pid is the unicast address of netlink socket.  It's always 0 if  the
204       destination is in the kernel.  For a user-space process, nl_pid is usu‐
205       ally the PID of the process owning the  destination  socket.   However,
206       nl_pid  identifies  a netlink socket, not a process.  If a process owns
207       several netlink sockets, then nl_pid can be equal  to  the  process  ID
208       only  for at most one socket.  There are two ways to assign nl_pid to a
209       netlink socket.  If the application sets nl_pid before calling bind(2),
210       then  it  is  up to the application to make sure that nl_pid is unique.
211       If the application sets it to 0, the kernel takes care of assigning it.
212       The  kernel  assigns  the  process  ID  to the first netlink socket the
213       process opens and assigns a unique nl_pid to every netlink socket  that
214       the process subsequently creates.
215
216       nl_groups  is  a  bit  mask with every bit representing a netlink group
217       number.  Each netlink family has a set of 32  multicast  groups.   When
218       bind(2) is called on the socket, the nl_groups field in the sockaddr_nl
219       should be set to a bit mask of the groups which it wishes to listen to.
220       The default value for this field is zero which means that no multicasts
221       will be received.  A socket may multicast messages to any of the multi‐
222       cast  groups by setting nl_groups to a bit mask of the groups it wishes
223       to send to when it calls sendmsg(2) or does a  connect(2).   Only  pro‐
224       cesses  with  an effective UID of 0 or the CAP_NET_ADMIN capability may
225       send or listen to a netlink multicast group.  Since Linux 2.6.13,  mes‐
226       sages  can't be broadcast to multiple groups.  Any replies to a message
227       received for a multicast group should be sent back to the  sending  PID
228       and the multicast group.  Some Linux kernel subsystems may additionally
229       allow other users to send and/or receive messages.  As  at  Linux  3.0,
230       the   NETLINK_KOBJECT_UEVENT,   NETLINK_GENERIC,   NETLINK_ROUTE,   and
231       NETLINK_SELINUX groups allow  other  users  to  receive  messages.   No
232       groups allow other users to send messages.
233
234   Socket options
235       To  set  or  get a netlink socket option, call getsockopt(2) to read or
236       setsockopt(2) to write the option with the option level argument set to
237       SOL_NETLINK.  Unless otherwise noted, optval is a pointer to an int.
238
239       NETLINK_PKTINFO (since Linux 2.6.14)
240              Enable  nl_pktinfo  control messages for received packets to get
241              the extended destination group number.
242
243       NETLINK_ADD_MEMBERSHIP, NETLINK_DROP_MEMBERSHIP (since Linux 2.6.14)
244              Join/leave a group specified by optval.
245
246       NETLINK_LIST_MEMBERSHIPS (since Linux 4.2)
247              Retrieve all groups a socket  is  a  member  of.   optval  is  a
248              pointer to __u32 and optlen is the size of the array.  The array
249              is filled with the full membership set of the  socket,  and  the
250              required array size is returned in optlen.
251
252       NETLINK_BROADCAST_ERROR (since Linux 2.6.30)
253              When  not set, netlink_broadcast() only reports ESRCH errors and
254              silently ignore NOBUFS errors.
255
256       NETLINK_NO_ENOBUFS (since Linux 2.6.30)
257              This flag can be used by  unicast  and  broadcast  listeners  to
258              avoid receiving ENOBUFS errors.
259
260       NETLINK_LISTEN_ALL_NSID (since Linux 4.2)
261              When  set,  this  socket will receive netlink notifications from
262              all network namespaces that have an nsid assigned into the  net‐
263              work  namespace  where  the socket has been opened.  The nsid is
264              sent to user space via an ancillary data.
265
266       NETLINK_CAP_ACK (since Linux 4.2)
267              The kernel may fail to  allocate  the  necessary  room  for  the
268              acknowledgment  message  back  to user space.  This option trims
269              off the payload of the original netlink  message.   The  netlink
270              message header is still included, so the user can guess from the
271              sequence number which message triggered the acknowledgment.
272

VERSIONS

274       The socket interface to netlink first appeared Linux 2.2.
275
276       Linux 2.0 supported a more  primitive  device-based  netlink  interface
277       (which  is  still  available as a compatibility option).  This obsolete
278       interface is not described here.
279

NOTES

281       It is often better to use netlink via libnetlink or libnl than via  the
282       low-level kernel interface.
283

BUGS

285       This manual page is not complete.
286

EXAMPLE

288       The following example creates a NETLINK_ROUTE netlink socket which will
289       listen to  the  RTMGRP_LINK  (network  interface  create/delete/up/down
290       events)  and RTMGRP_IPV4_IFADDR (IPv4 addresses add/delete events) mul‐
291       ticast groups.
292
293           struct sockaddr_nl sa;
294
295           memset(&sa, 0, sizeof(sa));
296           sa.nl_family = AF_NETLINK;
297           sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR;
298
299           fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
300           bind(fd, (struct sockaddr *) &sa, sizeof(sa));
301
302       The next example demonstrates how to send a netlink message to the ker‐
303       nel  (pid  0).   Note  that  the  application must take care of message
304       sequence numbers in order to reliably track acknowledgements.
305
306           struct nlmsghdr *nh;    /* The nlmsghdr with payload to send */
307           struct sockaddr_nl sa;
308           struct iovec iov = { nh, nh->nlmsg_len };
309           struct msghdr msg;
310
311           msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
312           memset(&sa, 0, sizeof(sa));
313           sa.nl_family = AF_NETLINK;
314           nh->nlmsg_pid = 0;
315           nh->nlmsg_seq = ++sequence_number;
316           /* Request an ack from kernel by setting NLM_F_ACK */
317           nh->nlmsg_flags |= NLM_F_ACK;
318
319           sendmsg(fd, &msg, 0);
320
321       And the last example is about reading netlink message.
322
323           int len;
324           char buf[8192];     /* 8192 to avoid message truncation on
325                                  platforms with page size > 4096 */
326           struct iovec iov = { buf, sizeof(buf) };
327           struct sockaddr_nl sa;
328           struct msghdr msg;
329           struct nlmsghdr *nh;
330
331           msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
332           len = recvmsg(fd, &msg, 0);
333
334           for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len);
335                nh = NLMSG_NEXT (nh, len)) {
336               /* The end of multipart message */
337               if (nh->nlmsg_type == NLMSG_DONE)
338                   return;
339
340               if (nh->nlmsg_type == NLMSG_ERROR)
341                   /* Do some error handling */
342               ...
343
344               /* Continue with parsing payload */
345               ...
346           }
347

SEE ALSO

349       cmsg(3), netlink(3), capabilities(7), rtnetlink(7), sock_diag(7)
350
351       information about libnetlink ⟨ftp://ftp.inr.ac.ru/ip-routing/iproute2*⟩
352
353       information about libnl ⟨http://www.infradead.org/~tgr/libnl/⟩
354
355       RFC 3549 "Linux Netlink as an IP Services Protocol"
356

COLOPHON

358       This page is part of release 4.15 of the Linux  man-pages  project.   A
359       description  of  the project, information about reporting bugs, and the
360       latest    version    of    this    page,    can     be     found     at
361       https://www.kernel.org/doc/man-pages/.
362
363
364
365Linux                             2017-09-15                        NETLINK(7)
Impressum