Annotation of src/sys/net/npf/npf_conn.c, Revision 1.8
1.8 ! rmind 1: /* $NetBSD: npf_conn.c,v 1.7 2014/07/25 23:07:21 rmind Exp $ */
1.1 rmind 2:
3: /*-
4: * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org>
5: * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This material is based upon work partially supported by The
9: * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
32:
33: /*
34: * NPF connection tracking for stateful filtering and translation.
35: *
36: * Overview
37: *
38: * Connection direction is identified by the direction of its first
39: * packet. Packets can be incoming or outgoing with respect to an
40: * interface. To describe the packet in the context of connection
41: * direction we will use the terms "forwards stream" and "backwards
42: * stream". All connections have two keys and thus two entries:
43: *
44: * npf_conn_t::c_forw_entry for the forwards stream and
45: * npf_conn_t::c_back_entry for the backwards stream.
46: *
47: * The keys are formed from the 5-tuple (source/destination address,
48: * source/destination port and the protocol). Additional matching
49: * is performed for the interface (a common behaviour is equivalent
50: * to the 6-tuple lookup including the interface ID). Note that the
51: * key may be formed using translated values in a case of NAT.
52: *
53: * Connections can serve two purposes: for the implicit passing or
54: * to accommodate the dynamic NAT. Connections for the former purpose
55: * are created by the rules with "stateful" attribute and are used for
56: * stateful filtering. Such connections indicate that the packet of
57: * the backwards stream should be passed without inspection of the
58: * ruleset. The other purpose is to associate a dynamic NAT mechanism
59: * with a connection. Such connections are created by the NAT policies
60: * and they have a relationship with NAT translation structure via
61: * npf_conn_t::c_nat. A single connection can serve both purposes,
62: * which is a common case.
63: *
64: * Connection life-cycle
65: *
66: * Connections are established when a packet matches said rule or
67: * NAT policy. Both keys of the established connection are inserted
68: * into the connection database. A garbage collection thread
69: * periodically scans all connections and depending on connection
70: * properties (e.g. last activity time, protocol) removes connection
71: * entries and expires the actual connections.
72: *
73: * Each connection has a reference count. The reference is acquired
74: * on lookup and should be released by the caller. It guarantees that
75: * the connection will not be destroyed, although it may be expired.
76: *
77: * Synchronisation
78: *
79: * Connection database is accessed in a lock-less manner by the main
80: * routines: npf_conn_inspect() and npf_conn_establish(). Since they
81: * are always called from a software interrupt, the database is
82: * protected using passive serialisation. The main place which can
83: * destroy a connection is npf_conn_worker(). The database itself
84: * can be replaced and destroyed in npf_conn_reload().
85: *
86: * ALG support
87: *
88: * Application-level gateways (ALGs) can override generic connection
89: * inspection (npf_alg_conn() call in npf_conn_inspect() function) by
90: * performing their own lookup using different key. Recursive call
91: * to npf_conn_inspect() is not allowed. The ALGs ought to use the
92: * npf_conn_lookup() function for this purpose.
93: *
94: * Lock order
95: *
1.6 rmind 96: * npf_config_lock ->
97: * conn_lock ->
98: * npf_conn_t::c_lock
1.1 rmind 99: */
100:
101: #include <sys/cdefs.h>
1.8 ! rmind 102: __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.7 2014/07/25 23:07:21 rmind Exp $");
1.1 rmind 103:
104: #include <sys/param.h>
105: #include <sys/types.h>
106:
107: #include <netinet/in.h>
108: #include <netinet/tcp.h>
109:
110: #include <sys/atomic.h>
111: #include <sys/condvar.h>
112: #include <sys/kmem.h>
113: #include <sys/kthread.h>
114: #include <sys/mutex.h>
115: #include <net/pfil.h>
116: #include <sys/pool.h>
117: #include <sys/queue.h>
118: #include <sys/systm.h>
119:
120: #define __NPF_CONN_PRIVATE
121: #include "npf_conn.h"
122: #include "npf_impl.h"
123:
124: /*
125: * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction.
126: */
127: CTASSERT(PFIL_ALL == (0x001 | 0x002));
128: #define CONN_ACTIVE 0x004 /* visible on inspection */
129: #define CONN_PASS 0x008 /* perform implicit passing */
130: #define CONN_EXPIRE 0x010 /* explicitly expire */
131: #define CONN_REMOVED 0x020 /* "forw/back" entries removed */
132:
133: /*
1.6 rmind 134: * Connection tracking state: disabled (off) or enabled (on).
1.1 rmind 135: */
1.6 rmind 136: enum { CONN_TRACKING_OFF, CONN_TRACKING_ON };
1.1 rmind 137: static volatile int conn_tracking __cacheline_aligned;
138:
139: /* Connection tracking database, connection cache and the lock. */
140: static npf_conndb_t * conn_db __read_mostly;
141: static pool_cache_t conn_cache __read_mostly;
142: static kmutex_t conn_lock __cacheline_aligned;
143:
144: static void npf_conn_worker(void);
145: static void npf_conn_destroy(npf_conn_t *);
146:
147: /*
148: * npf_conn_sys{init,fini}: initialise/destroy connection tracking.
149: */
150:
151: void
152: npf_conn_sysinit(void)
153: {
154: conn_cache = pool_cache_init(sizeof(npf_conn_t), coherency_unit,
155: 0, 0, "npfconpl", NULL, IPL_NET, NULL, NULL, NULL);
156: mutex_init(&conn_lock, MUTEX_DEFAULT, IPL_NONE);
157: conn_tracking = CONN_TRACKING_OFF;
1.6 rmind 158: conn_db = npf_conndb_create();
1.1 rmind 159:
160: npf_worker_register(npf_conn_worker);
161: }
162:
163: void
164: npf_conn_sysfini(void)
165: {
1.6 rmind 166: /* Note: the caller should have flushed the connections. */
167: KASSERT(conn_tracking == CONN_TRACKING_OFF);
1.1 rmind 168: npf_worker_unregister(npf_conn_worker);
169:
1.6 rmind 170: npf_conndb_destroy(conn_db);
1.1 rmind 171: pool_cache_destroy(conn_cache);
172: mutex_destroy(&conn_lock);
173: }
174:
175: /*
1.6 rmind 176: * npf_conn_load: perform the load by flushing the current connection
177: * database and replacing it with the new one or just destroying.
1.1 rmind 178: *
1.6 rmind 179: * => The caller must disable the connection tracking and ensure that
180: * there are no connection database lookups or references in-flight.
1.1 rmind 181: */
1.6 rmind 182: void
183: npf_conn_load(npf_conndb_t *ndb, bool track)
1.1 rmind 184: {
1.6 rmind 185: npf_conndb_t *odb = NULL;
1.1 rmind 186:
1.6 rmind 187: KASSERT(npf_config_locked_p());
1.1 rmind 188:
189: /*
1.6 rmind 190: * The connection database is in the quiescent state.
191: * Prevent G/C thread from running and install a new database.
1.1 rmind 192: */
1.6 rmind 193: mutex_enter(&conn_lock);
194: if (ndb) {
195: KASSERT(conn_tracking == CONN_TRACKING_OFF);
196: odb = conn_db;
197: conn_db = ndb;
198: membar_sync();
199: }
200: if (track) {
201: /* After this point lookups start flying in. */
202: conn_tracking = CONN_TRACKING_ON;
1.1 rmind 203: }
1.6 rmind 204: mutex_exit(&conn_lock);
1.1 rmind 205:
206: if (odb) {
1.6 rmind 207: /*
208: * Flush all, no sync since the caller did it for us.
209: * Also, release the pool cache memory.
210: */
211: npf_conn_gc(odb, true, false);
1.1 rmind 212: npf_conndb_destroy(odb);
1.6 rmind 213: pool_cache_invalidate(conn_cache);
1.1 rmind 214: }
215: }
216:
217: /*
218: * npf_conn_tracking: enable/disable connection tracking.
219: */
220: void
221: npf_conn_tracking(bool track)
222: {
1.6 rmind 223: KASSERT(npf_config_locked_p());
224: conn_tracking = track ? CONN_TRACKING_ON : CONN_TRACKING_OFF;
1.1 rmind 225: }
226:
1.6 rmind 227: static inline bool
1.1 rmind 228: npf_conn_trackable_p(const npf_cache_t *npc)
229: {
230: /*
231: * Check if connection tracking is on. Also, if layer 3 and 4 are
232: * not cached - protocol is not supported or packet is invalid.
233: */
234: if (conn_tracking != CONN_TRACKING_ON) {
235: return false;
236: }
237: if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
238: return false;
239: }
240: return true;
241: }
242:
243: /*
244: * npf_conn_conkey: construct a key for the connection lookup.
1.8 ! rmind 245: *
! 246: * => Returns the key length in bytes or zero on failure.
1.1 rmind 247: */
1.8 ! rmind 248: unsigned
1.1 rmind 249: npf_conn_conkey(const npf_cache_t *npc, npf_connkey_t *key, const bool forw)
250: {
251: const u_int alen = npc->npc_alen;
252: const struct tcphdr *th;
253: const struct udphdr *uh;
254: u_int keylen, isrc, idst;
255: uint16_t id[2];
256:
257: switch (npc->npc_proto) {
258: case IPPROTO_TCP:
259: KASSERT(npf_iscached(npc, NPC_TCP));
260: th = npc->npc_l4.tcp;
261: id[NPF_SRC] = th->th_sport;
262: id[NPF_DST] = th->th_dport;
263: break;
264: case IPPROTO_UDP:
265: KASSERT(npf_iscached(npc, NPC_UDP));
266: uh = npc->npc_l4.udp;
267: id[NPF_SRC] = uh->uh_sport;
268: id[NPF_DST] = uh->uh_dport;
269: break;
270: case IPPROTO_ICMP:
271: if (npf_iscached(npc, NPC_ICMP_ID)) {
272: const struct icmp *ic = npc->npc_l4.icmp;
273: id[NPF_SRC] = ic->icmp_id;
274: id[NPF_DST] = ic->icmp_id;
275: break;
276: }
1.8 ! rmind 277: return 0;
1.1 rmind 278: case IPPROTO_ICMPV6:
279: if (npf_iscached(npc, NPC_ICMP_ID)) {
280: const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
281: id[NPF_SRC] = ic6->icmp6_id;
282: id[NPF_DST] = ic6->icmp6_id;
283: break;
284: }
1.8 ! rmind 285: return 0;
1.1 rmind 286: default:
287: /* Unsupported protocol. */
1.8 ! rmind 288: return 0;
1.1 rmind 289: }
290:
291: if (__predict_true(forw)) {
292: isrc = NPF_SRC, idst = NPF_DST;
293: } else {
294: isrc = NPF_DST, idst = NPF_SRC;
295: }
296:
1.8 ! rmind 297: /*
! 298: * Construct a key formed out of 32-bit integers. The key layout:
! 299: *
! 300: * Field: | proto | alen | src-id | dst-id | src-addr | dst-addr |
! 301: * +-------+-------+--------+--------+----------+----------+
! 302: * Bits: | 8 | 8 | 16 | 16 | 32-128 | 32-128 |
! 303: *
! 304: * The source and destination are inverted if they key is for the
! 305: * backwards stream (forw == false). The address length depends
! 306: * on the 'alen' field; it is a length in bytes, either 4 or 16.
! 307: */
! 308:
1.1 rmind 309: key->ck_key[0] = ((uint32_t)npc->npc_proto << 16) | (alen & 0xffff);
310: key->ck_key[1] = ((uint32_t)id[isrc] << 16) | id[idst];
311:
312: if (__predict_true(alen == sizeof(in_addr_t))) {
313: key->ck_key[2] = npc->npc_ips[isrc]->s6_addr32[0];
314: key->ck_key[3] = npc->npc_ips[idst]->s6_addr32[0];
315: keylen = 4 * sizeof(uint32_t);
316: } else {
317: const u_int nwords = alen >> 2;
318: memcpy(&key->ck_key[2], npc->npc_ips[isrc], alen);
319: memcpy(&key->ck_key[2 + nwords], npc->npc_ips[idst], alen);
320: keylen = (2 + (nwords * 2)) * sizeof(uint32_t);
321: }
1.8 ! rmind 322: return keylen;
1.1 rmind 323: }
324:
1.3 christos 325: static __inline void
1.1 rmind 326: connkey_set_addr(npf_connkey_t *key, const npf_addr_t *naddr, const int di)
327: {
328: const u_int alen = key->ck_key[0] & 0xffff;
329: uint32_t *addr = &key->ck_key[2 + ((alen >> 2) * di)];
330:
331: KASSERT(alen > 0);
332: memcpy(addr, naddr, alen);
333: }
334:
1.3 christos 335: static __inline void
1.1 rmind 336: connkey_set_id(npf_connkey_t *key, const uint16_t id, const int di)
337: {
338: const uint32_t oid = key->ck_key[1];
339: const u_int shift = 16 * !di;
340: const uint32_t mask = 0xffff0000 >> shift;
341:
342: key->ck_key[1] = ((uint32_t)id << shift) | (oid & mask);
343: }
344:
345: /*
346: * npf_conn_lookup: lookup if there is an established connection.
347: *
348: * => If found, we will hold a reference for the caller.
349: */
350: npf_conn_t *
1.4 rmind 351: npf_conn_lookup(const npf_cache_t *npc, const int di, bool *forw)
1.1 rmind 352: {
1.4 rmind 353: const nbuf_t *nbuf = npc->npc_nbuf;
1.1 rmind 354: npf_conn_t *con;
355: npf_connkey_t key;
356: u_int flags, cifid;
357: bool ok, pforw;
358:
359: /* Construct a key and lookup for a connection in the store. */
360: if (!npf_conn_conkey(npc, &key, true)) {
361: return NULL;
362: }
363: con = npf_conndb_lookup(conn_db, &key, forw);
364: if (con == NULL) {
365: return NULL;
366: }
367: KASSERT(npc->npc_proto == con->c_proto);
368:
369: /* Check if connection is active and not expired. */
370: flags = con->c_flags;
371: ok = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE;
372:
373: if (__predict_false(!ok)) {
374: atomic_dec_uint(&con->c_refcnt);
375: return NULL;
376: }
377:
378: /*
379: * Match the interface and the direction of the connection entry
380: * and the packet.
381: */
382: cifid = con->c_ifid;
383: if (__predict_false(cifid && cifid != nbuf->nb_ifid)) {
384: atomic_dec_uint(&con->c_refcnt);
385: return NULL;
386: }
387: pforw = (flags & PFIL_ALL) == di;
388: if (__predict_false(*forw != pforw)) {
389: atomic_dec_uint(&con->c_refcnt);
390: return NULL;
391: }
392:
393: /* Update the last activity time. */
394: getnanouptime(&con->c_atime);
395: return con;
396: }
397:
398: /*
399: * npf_conn_inspect: lookup a connection and inspecting the protocol data.
400: *
401: * => If found, we will hold a reference for the caller.
402: */
403: npf_conn_t *
1.4 rmind 404: npf_conn_inspect(npf_cache_t *npc, const int di, int *error)
1.1 rmind 405: {
1.4 rmind 406: nbuf_t *nbuf = npc->npc_nbuf;
1.1 rmind 407: npf_conn_t *con;
408: bool forw, ok;
409:
410: KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
411: if (!npf_conn_trackable_p(npc)) {
412: return NULL;
413: }
414:
415: /* Query ALG which may lookup connection for us. */
1.4 rmind 416: if ((con = npf_alg_conn(npc, di)) != NULL) {
1.1 rmind 417: /* Note: reference is held. */
418: return con;
419: }
420: if (nbuf_head_mbuf(nbuf) == NULL) {
421: *error = ENOMEM;
422: return NULL;
423: }
424: KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
425:
426: /* Main lookup of the connection. */
1.4 rmind 427: if ((con = npf_conn_lookup(npc, di, &forw)) == NULL) {
1.1 rmind 428: return NULL;
429: }
430:
431: /* Inspect the protocol data and handle state changes. */
432: mutex_enter(&con->c_lock);
1.4 rmind 433: ok = npf_state_inspect(npc, &con->c_state, forw);
1.1 rmind 434: mutex_exit(&con->c_lock);
435:
436: if (__predict_false(!ok)) {
437: /* Invalid: let the rules deal with it. */
438: npf_conn_release(con);
439: npf_stats_inc(NPF_STAT_INVALID_STATE);
440: con = NULL;
441: }
442: return con;
443: }
444:
445: /*
446: * npf_conn_establish: create a new connection, insert into the global list.
447: *
448: * => Connection is created with the reference held for the caller.
449: * => Connection will be activated on the first reference release.
450: */
451: npf_conn_t *
1.4 rmind 452: npf_conn_establish(npf_cache_t *npc, int di, bool per_if)
1.1 rmind 453: {
1.4 rmind 454: const nbuf_t *nbuf = npc->npc_nbuf;
1.1 rmind 455: npf_conn_t *con;
456:
457: KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
458:
459: if (!npf_conn_trackable_p(npc)) {
460: return NULL;
461: }
462:
463: /* Allocate and initialise the new connection. */
464: con = pool_cache_get(conn_cache, PR_NOWAIT);
465: if (__predict_false(!con)) {
466: return NULL;
467: }
468: NPF_PRINTF(("NPF: create conn %p\n", con));
1.6 rmind 469: npf_stats_inc(NPF_STAT_CONN_CREATE);
1.1 rmind 470:
471: /* Reference count and flags (indicate direction). */
472: mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
473: con->c_flags = (di & PFIL_ALL);
474: con->c_refcnt = 1;
475: con->c_rproc = NULL;
476: con->c_nat = NULL;
477:
478: /* Initialize protocol state. */
1.4 rmind 479: if (!npf_state_init(npc, &con->c_state)) {
1.1 rmind 480: goto err;
481: }
482:
483: KASSERT(npf_iscached(npc, NPC_IP46));
484: npf_connkey_t *fw = &con->c_forw_entry;
485: npf_connkey_t *bk = &con->c_back_entry;
486:
487: /*
488: * Construct "forwards" and "backwards" keys. Also, set the
489: * interface ID for this connection (unless it is global).
490: */
491: if (!npf_conn_conkey(npc, fw, true)) {
492: goto err;
493: }
494: if (!npf_conn_conkey(npc, bk, false)) {
495: goto err;
496: }
497: fw->ck_backptr = bk->ck_backptr = con;
498: con->c_ifid = per_if ? nbuf->nb_ifid : 0;
499: con->c_proto = npc->npc_proto;
500:
501: /* Set last activity time for a new connection. */
502: getnanouptime(&con->c_atime);
503:
504: /*
505: * Insert both keys (entries representing directions) of the
506: * connection. At this point, it becomes visible.
507: */
508: if (!npf_conndb_insert(conn_db, fw, con)) {
509: goto err;
510: }
511: if (!npf_conndb_insert(conn_db, bk, con)) {
512: /* We have hit the duplicate. */
513: npf_conndb_remove(conn_db, fw);
1.6 rmind 514: npf_stats_inc(NPF_STAT_RACE_CONN);
1.1 rmind 515: goto err;
516: }
517:
518: /* Finally, insert into the connection list. */
519: NPF_PRINTF(("NPF: establish conn %p\n", con));
520: npf_conndb_enqueue(conn_db, con);
521: return con;
522: err:
523: npf_conn_destroy(con);
524: return NULL;
525: }
526:
527: static void
528: npf_conn_destroy(npf_conn_t *con)
529: {
530: if (con->c_nat) {
531: /* Release any NAT structures. */
532: npf_nat_destroy(con->c_nat);
533: }
534: if (con->c_rproc) {
535: /* Release the rule procedure. */
536: npf_rproc_release(con->c_rproc);
537: }
538:
539: /* Destroy the state. */
540: npf_state_destroy(&con->c_state);
541: mutex_destroy(&con->c_lock);
542:
543: /* Free the structure, increase the counter. */
544: pool_cache_put(conn_cache, con);
1.6 rmind 545: npf_stats_inc(NPF_STAT_CONN_DESTROY);
1.1 rmind 546: NPF_PRINTF(("NPF: conn %p destroyed\n", con));
547: }
548:
549: /*
550: * npf_conn_setnat: associate NAT entry with the connection, update and
551: * re-insert connection entry using the translation values.
552: */
553: int
554: npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con,
555: npf_nat_t *nt, u_int ntype)
556: {
557: static const u_int nat_type_dimap[] = {
558: [NPF_NATOUT] = NPF_DST,
559: [NPF_NATIN] = NPF_SRC,
560: };
561: npf_connkey_t key, *bk;
1.2 rmind 562: npf_conn_t *ret __diagused;
1.1 rmind 563: npf_addr_t *taddr;
564: in_port_t tport;
565: u_int tidx;
566:
567: KASSERT(con->c_refcnt > 0);
568:
569: npf_nat_gettrans(nt, &taddr, &tport);
570: KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN);
571: tidx = nat_type_dimap[ntype];
572:
573: /* Construct a "backwards" key. */
574: if (!npf_conn_conkey(npc, &key, false)) {
575: return EINVAL;
576: }
577:
578: /* Acquire the lock and check for the races. */
579: mutex_enter(&con->c_lock);
580: if (__predict_false(con->c_flags & CONN_EXPIRE)) {
581: /* The connection got expired. */
582: mutex_exit(&con->c_lock);
583: return EINVAL;
584: }
585: if (__predict_false(con->c_nat != NULL)) {
586: /* Race with a duplicate packet. */
587: mutex_exit(&con->c_lock);
588: npf_stats_inc(NPF_STAT_RACE_NAT);
589: return EISCONN;
590: }
591:
592: /* Remove the "backwards" entry. */
593: ret = npf_conndb_remove(conn_db, &key);
594: KASSERT(ret == con);
595:
596: /* Set the source/destination IDs to the translation values. */
597: bk = &con->c_back_entry;
598: connkey_set_addr(bk, taddr, tidx);
599: if (tport) {
600: connkey_set_id(bk, tport, tidx);
601: }
602:
603: /* Finally, re-insert the "backwards" entry. */
604: if (!npf_conndb_insert(conn_db, bk, con)) {
605: /*
606: * Race: we have hit the duplicate, remove the "forwards"
607: * entry and expire our connection; it is no longer valid.
608: */
609: (void)npf_conndb_remove(conn_db, &con->c_forw_entry);
610: atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
611: mutex_exit(&con->c_lock);
612:
613: npf_stats_inc(NPF_STAT_RACE_NAT);
614: return EISCONN;
615: }
616:
617: /* Associate the NAT entry and release the lock. */
618: con->c_nat = nt;
619: mutex_exit(&con->c_lock);
620: return 0;
621: }
622:
623: /*
624: * npf_conn_expire: explicitly mark connection as expired.
625: */
626: void
627: npf_conn_expire(npf_conn_t *con)
628: {
629: /* KASSERT(con->c_refcnt > 0); XXX: npf_nat_freepolicy() */
630: atomic_or_uint(&con->c_flags, CONN_EXPIRE);
631: }
632:
633: /*
634: * npf_conn_pass: return true if connection is "pass" one, otherwise false.
635: */
636: bool
637: npf_conn_pass(const npf_conn_t *con, npf_rproc_t **rp)
638: {
639: KASSERT(con->c_refcnt > 0);
640: if (__predict_true(con->c_flags & CONN_PASS)) {
641: *rp = con->c_rproc;
642: return true;
643: }
644: return false;
645: }
646:
647: /*
648: * npf_conn_setpass: mark connection as a "pass" one and associate the
649: * rule procedure with it.
650: */
651: void
652: npf_conn_setpass(npf_conn_t *con, npf_rproc_t *rp)
653: {
654: KASSERT((con->c_flags & CONN_ACTIVE) == 0);
655: KASSERT(con->c_refcnt > 0);
656: KASSERT(con->c_rproc == NULL);
657:
658: /*
659: * No need for atomic since the connection is not yet active.
660: * If rproc is set, the caller transfers its reference to us,
661: * which will be released on npf_conn_destroy().
662: */
663: con->c_flags |= CONN_PASS;
664: con->c_rproc = rp;
665: }
666:
667: /*
668: * npf_conn_release: release a reference, which might allow G/C thread
669: * to destroy this connection.
670: */
671: void
672: npf_conn_release(npf_conn_t *con)
673: {
674: if ((con->c_flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) {
675: /* Activate: after this, connection is globally visible. */
676: con->c_flags |= CONN_ACTIVE;
677: }
678: KASSERT(con->c_refcnt > 0);
679: atomic_dec_uint(&con->c_refcnt);
680: }
681:
682: /*
683: * npf_conn_retnat: return associated NAT data entry and indicate
684: * whether it is a "forwards" or "backwards" stream.
685: */
686: npf_nat_t *
687: npf_conn_retnat(npf_conn_t *con, const int di, bool *forw)
688: {
689: KASSERT(con->c_refcnt > 0);
690: *forw = (con->c_flags & PFIL_ALL) == di;
691: return con->c_nat;
692: }
693:
694: /*
695: * npf_conn_expired: criterion to check if connection is expired.
696: */
697: static inline bool
698: npf_conn_expired(const npf_conn_t *con, const struct timespec *tsnow)
699: {
700: const int etime = npf_state_etime(&con->c_state, con->c_proto);
701: struct timespec tsdiff;
702:
703: if (__predict_false(con->c_flags & CONN_EXPIRE)) {
704: /* Explicitly marked to be expired. */
705: return true;
706: }
707: timespecsub(tsnow, &con->c_atime, &tsdiff);
708: return tsdiff.tv_sec > etime;
709: }
710:
711: /*
1.6 rmind 712: * npf_conn_gc: garbage collect the expired connections.
713: *
714: * => Must run in a single-threaded manner.
715: * => If it is a flush request, then destroy all connections.
716: * => If 'sync' is true, then perform passive serialisation.
1.1 rmind 717: */
1.7 rmind 718: void
1.6 rmind 719: npf_conn_gc(npf_conndb_t *cd, bool flush, bool sync)
1.1 rmind 720: {
721: npf_conn_t *con, *prev, *gclist = NULL;
722: struct timespec tsnow;
723:
724: getnanouptime(&tsnow);
725:
726: /*
727: * Scan all connections and check them for expiration.
728: */
729: prev = NULL;
730: con = npf_conndb_getlist(cd);
731: while (con) {
732: npf_conn_t *next = con->c_next;
733:
734: /* Expired? Flushing all? */
1.6 rmind 735: if (!npf_conn_expired(con, &tsnow) && !flush) {
1.1 rmind 736: prev = con;
737: con = next;
738: continue;
739: }
740:
741: /* Remove both entries of the connection. */
742: mutex_enter(&con->c_lock);
743: if ((con->c_flags & CONN_REMOVED) == 0) {
744: npf_conn_t *ret __diagused;
745:
746: ret = npf_conndb_remove(cd, &con->c_forw_entry);
747: KASSERT(ret == con);
748: ret = npf_conndb_remove(cd, &con->c_back_entry);
749: KASSERT(ret == con);
750: }
751:
752: /* Flag the removal and expiration. */
753: atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
754: mutex_exit(&con->c_lock);
755:
756: /* Move to the G/C list. */
757: npf_conndb_dequeue(cd, con, prev);
758: con->c_next = gclist;
759: gclist = con;
760:
761: /* Next.. */
762: con = next;
763: }
764: npf_conndb_settail(cd, prev);
1.6 rmind 765:
766: /*
767: * Ensure it is safe to destroy the connections.
768: * Note: drop the conn_lock (see the lock order).
769: */
770: if (sync) {
771: mutex_exit(&conn_lock);
772: if (gclist) {
773: npf_config_enter();
774: npf_config_sync();
775: npf_config_exit();
776: }
1.1 rmind 777: }
778:
779: /*
780: * Garbage collect all expired connections.
781: * May need to wait for the references to drain.
782: */
783: con = gclist;
784: while (con) {
785: npf_conn_t *next = con->c_next;
786:
787: /*
788: * Destroy only if removed and no references.
789: * Otherwise, wait for a tiny moment.
790: */
791: if (__predict_false(con->c_refcnt)) {
792: kpause("npfcongc", false, 1, NULL);
793: continue;
794: }
795: npf_conn_destroy(con);
796: con = next;
797: }
798: }
799:
1.6 rmind 800: /*
801: * npf_conn_worker: G/C to run from a worker thread.
802: */
803: static void
804: npf_conn_worker(void)
1.1 rmind 805: {
1.6 rmind 806: mutex_enter(&conn_lock);
807: /* Note: the conn_lock will be released (sync == true). */
808: npf_conn_gc(conn_db, false, true);
1.1 rmind 809: }
810:
811: /*
1.6 rmind 812: * npf_conn_export: construct a list of connections prepared for saving.
1.1 rmind 813: * Note: this is expected to be an expensive operation.
814: */
815: int
1.6 rmind 816: npf_conn_export(prop_array_t conlist)
1.1 rmind 817: {
818: npf_conn_t *con, *prev;
819:
820: /*
821: * Note: acquire conn_lock to prevent from the database
822: * destruction and G/C thread.
823: */
824: mutex_enter(&conn_lock);
1.6 rmind 825: if (conn_tracking != CONN_TRACKING_ON) {
1.1 rmind 826: mutex_exit(&conn_lock);
827: return 0;
828: }
829: prev = NULL;
830: con = npf_conndb_getlist(conn_db);
831: while (con) {
832: npf_conn_t *next = con->c_next;
833: prop_data_t d;
834:
835: if ((con->c_flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE)
836: goto skip;
837:
838: prop_dictionary_t cdict = prop_dictionary_create();
839: prop_dictionary_set_uint32(cdict, "flags", con->c_flags);
840: prop_dictionary_set_uint32(cdict, "proto", con->c_proto);
841: /* FIXME: interface-id */
842:
843: d = prop_data_create_data(&con->c_state, sizeof(npf_state_t));
844: prop_dictionary_set_and_rel(cdict, "state", d);
845:
846: const uint32_t *fkey = con->c_forw_entry.ck_key;
847: d = prop_data_create_data(fkey, NPF_CONN_MAXKEYLEN);
848: prop_dictionary_set_and_rel(cdict, "forw-key", d);
849:
850: const uint32_t *bkey = con->c_back_entry.ck_key;
851: d = prop_data_create_data(bkey, NPF_CONN_MAXKEYLEN);
852: prop_dictionary_set_and_rel(cdict, "back-key", d);
853:
854: if (con->c_nat) {
1.6 rmind 855: npf_nat_export(cdict, con->c_nat);
1.1 rmind 856: }
857: prop_array_add(conlist, cdict);
858: prop_object_release(cdict);
859: skip:
860: prev = con;
861: con = next;
862: }
863: npf_conndb_settail(conn_db, prev);
864: mutex_exit(&conn_lock);
1.5 joerg 865: return 0;
1.1 rmind 866: }
867:
868: /*
1.6 rmind 869: * npf_conn_import: fully reconstruct a single connection from a
870: * directory and insert into the given database.
1.1 rmind 871: */
872: int
1.6 rmind 873: npf_conn_import(npf_conndb_t *cd, prop_dictionary_t cdict,
874: npf_ruleset_t *natlist)
1.1 rmind 875: {
876: npf_conn_t *con;
877: npf_connkey_t *fw, *bk;
878: prop_object_t obj;
879: const void *d;
880:
881: /* Allocate a connection and initialise it (clear first). */
882: con = pool_cache_get(conn_cache, PR_WAITOK);
883: memset(con, 0, sizeof(npf_conn_t));
884: mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
885:
886: prop_dictionary_get_uint32(cdict, "proto", &con->c_proto);
887: prop_dictionary_get_uint32(cdict, "flags", &con->c_flags);
888: con->c_flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS;
889: getnanouptime(&con->c_atime);
890:
891: obj = prop_dictionary_get(cdict, "state");
892: if ((d = prop_data_data_nocopy(obj)) == NULL ||
893: prop_data_size(obj) != sizeof(npf_state_t)) {
894: goto err;
895: }
896: memcpy(&con->c_state, d, sizeof(npf_state_t));
897:
898: /* Reconstruct NAT association, if any, or return NULL. */
1.6 rmind 899: con->c_nat = npf_nat_import(cdict, natlist, con);
1.1 rmind 900:
901: /*
902: * Fetch and copy the keys for each direction.
903: */
904: obj = prop_dictionary_get(cdict, "forw-key");
905: if ((d = prop_data_data_nocopy(obj)) == NULL ||
906: prop_data_size(obj) != NPF_CONN_MAXKEYLEN) {
907: goto err;
908: }
909: fw = &con->c_forw_entry;
910: memcpy(&fw->ck_key, d, NPF_CONN_MAXKEYLEN);
911:
912: obj = prop_dictionary_get(cdict, "back-key");
913: if ((d = prop_data_data_nocopy(obj)) == NULL ||
914: prop_data_size(obj) != NPF_CONN_MAXKEYLEN) {
915: goto err;
916: }
917: bk = &con->c_back_entry;
918: memcpy(&bk->ck_key, d, NPF_CONN_MAXKEYLEN);
919:
920: fw->ck_backptr = bk->ck_backptr = con;
921:
922: /* Insert the entries and the connection itself. */
923: if (!npf_conndb_insert(cd, fw, con)) {
924: goto err;
925: }
926: if (!npf_conndb_insert(cd, bk, con)) {
927: npf_conndb_remove(cd, fw);
928: goto err;
929: }
930: npf_conndb_enqueue(cd, con);
931: return 0;
932: err:
933: npf_conn_destroy(con);
934: return EINVAL;
935: }
936:
937: #if defined(DDB) || defined(_NPF_TESTING)
938:
939: void
940: npf_conn_print(const npf_conn_t *con)
941: {
942: const u_int alen = NPF_CONN_GETALEN(&con->c_forw_entry);
943: const uint32_t *fkey = con->c_forw_entry.ck_key;
944: const uint32_t *bkey = con->c_back_entry.ck_key;
945: const u_int proto = con->c_proto;
946: struct timespec tsnow, tsdiff;
947: const void *src, *dst;
948: int etime;
949:
950: getnanouptime(&tsnow);
951: timespecsub(&tsnow, &con->c_atime, &tsdiff);
952: etime = npf_state_etime(&con->c_state, proto);
953:
954: printf("%p:\n\tproto %d flags 0x%x tsdiff %d etime %d\n",
955: con, proto, con->c_flags, (int)tsdiff.tv_sec, etime);
956:
957: src = &fkey[2], dst = &fkey[2 + (alen >> 2)];
958: printf("\tforw %s:%d", npf_addr_dump(src, alen), ntohs(fkey[1] >> 16));
959: printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(fkey[1] & 0xffff));
960:
961: src = &bkey[2], dst = &bkey[2 + (alen >> 2)];
962: printf("\tback %s:%d", npf_addr_dump(src, alen), ntohs(bkey[1] >> 16));
963: printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(bkey[1] & 0xffff));
964:
965: npf_state_dump(&con->c_state);
966: if (con->c_nat) {
967: npf_nat_dump(con->c_nat);
968: }
969: }
970:
971: #endif
CVSweb <webmaster@jp.NetBSD.org>