Annotation of src/sys/netinet/ip_encap.c, Revision 1.69.2.2
1.69.2.2! martin 1: /* $NetBSD$ */
1.7 itojun 2: /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
1.1 itojun 3:
4: /*
5: * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the project nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32: /*
33: * My grandfather said that there's a devil inside tunnelling technology...
34: *
35: * We have surprisingly many protocols that want packets with IP protocol
36: * #4 or #41. Here's a list of protocols that want protocol #41:
37: * RFC1933 configured tunnel
38: * RFC1933 automatic tunnel
39: * RFC2401 IPsec tunnel
40: * RFC2473 IPv6 generic packet tunnelling
41: * RFC2529 6over4 tunnel
1.7 itojun 42: * RFC3056 6to4 tunnel
43: * isatap tunnel
1.1 itojun 44: * mobile-ip6 (uses RFC2473)
45: * Here's a list of protocol that want protocol #4:
46: * RFC1853 IPv4-in-IPv4 tunnelling
47: * RFC2003 IPv4 encapsulation within IPv4
48: * RFC2344 reverse tunnelling for mobile-ip4
49: * RFC2401 IPsec tunnel
50: * Well, what can I say. They impose different en/decapsulation mechanism
51: * from each other, so they need separate protocol handler. The only one
52: * we can easily determine by protocol # is IPsec, which always has
53: * AH/ESP/IPComp header right after outer IP header.
54: *
55: * So, clearly good old protosw does not work for protocol #4 and #41.
56: * The code will let you match protocol via src/dst address pair.
57: */
58: /* XXX is M_NETADDR correct? */
1.6 lukem 59:
1.7 itojun 60: /*
1.55 knakahar 61: * With USE_RADIX the code will use radix table for tunnel lookup, for
1.7 itojun 62: * tunnels registered with encap_attach() with a addr/mask pair.
63: * Faster on machines with thousands of tunnel registerations (= interfaces).
64: *
65: * The code assumes that radix table code can handle non-continuous netmask,
66: * as it will pass radix table memory region with (src + dst) sockaddr pair.
67: */
1.55 knakahar 68: #define USE_RADIX
1.7 itojun 69:
1.6 lukem 70: #include <sys/cdefs.h>
1.69.2.2! martin 71: __KERNEL_RCSID(0, "$NetBSD$");
1.1 itojun 72:
1.46 pooka 73: #ifdef _KERNEL_OPT
1.4 itojun 74: #include "opt_mrouting.h"
75: #include "opt_inet.h"
1.61 knakahar 76: #include "opt_net_mpsafe.h"
1.46 pooka 77: #endif
1.1 itojun 78:
79: #include <sys/param.h>
80: #include <sys/systm.h>
81: #include <sys/socket.h>
1.69.2.1 christos 82: #include <sys/socketvar.h> /* for softnet_lock */
1.1 itojun 83: #include <sys/sockio.h>
84: #include <sys/mbuf.h>
85: #include <sys/errno.h>
1.4 itojun 86: #include <sys/queue.h>
1.47 knakahar 87: #include <sys/kmem.h>
1.56 knakahar 88: #include <sys/mutex.h>
1.59 knakahar 89: #include <sys/condvar.h>
1.56 knakahar 90: #include <sys/psref.h>
91: #include <sys/pslist.h>
1.1 itojun 92:
93: #include <net/if.h>
94:
95: #include <netinet/in.h>
96: #include <netinet/in_systm.h>
97: #include <netinet/ip.h>
98: #include <netinet/ip_var.h>
99: #include <netinet/ip_encap.h>
100: #ifdef MROUTING
101: #include <netinet/ip_mroute.h>
102: #endif /* MROUTING */
103:
104: #ifdef INET6
105: #include <netinet/ip6.h>
106: #include <netinet6/ip6_var.h>
1.51 knakahar 107: #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
1.7 itojun 108: #include <netinet6/in6_var.h>
109: #include <netinet6/in6_pcb.h>
110: #include <netinet/icmp6.h>
1.1 itojun 111: #endif
112:
1.61 knakahar 113: #ifdef NET_MPSAFE
114: #define ENCAP_MPSAFE 1
115: #endif
116:
1.7 itojun 117: enum direction { INBOUND, OUTBOUND };
118:
119: #ifdef INET
1.56 knakahar 120: static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
121: struct psref *);
1.7 itojun 122: #endif
123: #ifdef INET6
1.56 knakahar 124: static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
125: struct psref *);
1.7 itojun 126: #endif
1.22 perry 127: static int encap_add(struct encaptab *);
128: static int encap_remove(struct encaptab *);
129: static int encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
1.55 knakahar 130: #ifdef USE_RADIX
1.22 perry 131: static struct radix_node_head *encap_rnh(int);
132: static int mask_matchlen(const struct sockaddr *);
1.55 knakahar 133: #else
134: static int mask_match(const struct encaptab *, const struct sockaddr *,
135: const struct sockaddr *);
136: #endif
1.1 itojun 137:
1.56 knakahar 138: /*
139: * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
140: * encap_table. So, it cannot use pserialize_read_enter()
141: */
142: static struct {
143: struct pslist_head list;
144: pserialize_t psz;
145: struct psref_class *elem_class; /* for the element of et_list */
146: } encaptab __cacheline_aligned = {
147: .list = PSLIST_INITIALIZER,
148: };
149: #define encap_table encaptab.list
1.1 itojun 150:
1.59 knakahar 151: static struct {
152: kmutex_t lock;
153: kcondvar_t cv;
154: struct lwp *busy;
155: } encap_whole __cacheline_aligned;
156:
1.55 knakahar 157: #ifdef USE_RADIX
1.7 itojun 158: struct radix_node_head *encap_head[2]; /* 0 for AF_INET, 1 for AF_INET6 */
1.57 knakahar 159: static bool encap_head_updating = false;
1.55 knakahar 160: #endif
1.7 itojun 161:
1.63 ozaki-r 162: static bool encap_initialized = false;
1.59 knakahar 163: /*
164: * must be done before other encap interfaces initialization.
165: */
166: void
167: encapinit(void)
168: {
169:
1.63 ozaki-r 170: if (encap_initialized)
171: return;
172:
1.60 knakahar 173: encaptab.psz = pserialize_create();
174: encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
175:
1.59 knakahar 176: mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
177: cv_init(&encap_whole.cv, "ip_encap cv");
178: encap_whole.busy = NULL;
1.63 ozaki-r 179:
180: encap_initialized = true;
1.59 knakahar 181: }
182:
1.1 itojun 183: void
1.23 perry 184: encap_init(void)
1.1 itojun 185: {
1.7 itojun 186: static int initialized = 0;
187:
188: if (initialized)
189: return;
190: initialized++;
1.1 itojun 191: #if 0
192: /*
193: * we cannot use LIST_INIT() here, since drivers may want to call
1.4 itojun 194: * encap_attach(), on driver attach. encap_init() will be called
1.1 itojun 195: * on AF_INET{,6} initialization, which happens after driver
196: * initialization - using LIST_INIT() here can nuke encap_attach()
197: * from drivers.
198: */
1.56 knakahar 199: PSLIST_INIT(&encap_table);
1.1 itojun 200: #endif
1.7 itojun 201:
1.55 knakahar 202: #ifdef USE_RADIX
1.7 itojun 203: /*
1.38 pooka 204: * initialize radix lookup table when the radix subsystem is inited.
1.7 itojun 205: */
1.38 pooka 206: rn_delayedinit((void *)&encap_head[0],
207: sizeof(struct sockaddr_pack) << 3);
1.7 itojun 208: #ifdef INET6
1.38 pooka 209: rn_delayedinit((void *)&encap_head[1],
210: sizeof(struct sockaddr_pack) << 3);
1.7 itojun 211: #endif
1.55 knakahar 212: #endif
1.1 itojun 213: }
214:
1.4 itojun 215: #ifdef INET
1.7 itojun 216: static struct encaptab *
1.56 knakahar 217: encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
218: struct psref *match_psref)
1.1 itojun 219: {
220: struct ip *ip;
1.33 pooka 221: struct ip_pack4 pack;
1.1 itojun 222: struct encaptab *ep, *match;
223: int prio, matchprio;
1.56 knakahar 224: int s;
1.55 knakahar 225: #ifdef USE_RADIX
1.7 itojun 226: struct radix_node_head *rnh = encap_rnh(AF_INET);
227: struct radix_node *rn;
1.55 knakahar 228: #endif
1.1 itojun 229:
1.41 ozaki-r 230: KASSERT(m->m_len >= sizeof(*ip));
231:
1.1 itojun 232: ip = mtod(m, struct ip *);
233:
1.35 cegger 234: memset(&pack, 0, sizeof(pack));
1.7 itojun 235: pack.p.sp_len = sizeof(pack);
236: pack.mine.sin_family = pack.yours.sin_family = AF_INET;
237: pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
238: if (dir == INBOUND) {
239: pack.mine.sin_addr = ip->ip_dst;
240: pack.yours.sin_addr = ip->ip_src;
241: } else {
242: pack.mine.sin_addr = ip->ip_src;
243: pack.yours.sin_addr = ip->ip_dst;
244: }
1.1 itojun 245:
246: match = NULL;
247: matchprio = 0;
1.7 itojun 248:
1.56 knakahar 249: s = pserialize_read_enter();
1.55 knakahar 250: #ifdef USE_RADIX
1.57 knakahar 251: if (encap_head_updating) {
252: /*
253: * Update in progress. Do nothing.
254: */
255: pserialize_read_exit(s);
256: return NULL;
257: }
258:
1.30 christos 259: rn = rnh->rnh_matchaddr((void *)&pack, rnh);
1.7 itojun 260: if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
1.56 knakahar 261: struct encaptab *encapp = (struct encaptab *)rn;
262:
263: psref_acquire(match_psref, &encapp->psref,
264: encaptab.elem_class);
265: match = encapp;
1.7 itojun 266: matchprio = mask_matchlen(match->srcmask) +
267: mask_matchlen(match->dstmask);
268: }
1.55 knakahar 269: #endif
1.56 knakahar 270: PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
271: struct psref elem_psref;
272:
1.1 itojun 273: if (ep->af != AF_INET)
274: continue;
275: if (ep->proto >= 0 && ep->proto != proto)
276: continue;
1.56 knakahar 277:
278: psref_acquire(&elem_psref, &ep->psref,
279: encaptab.elem_class);
280: if (ep->func) {
281: pserialize_read_exit(s);
282: /* ep->func is sleepable. e.g. rtalloc1 */
1.1 itojun 283: prio = (*ep->func)(m, off, proto, ep->arg);
1.56 knakahar 284: s = pserialize_read_enter();
285: } else {
1.55 knakahar 286: #ifdef USE_RADIX
1.56 knakahar 287: psref_release(&elem_psref, &ep->psref,
288: encaptab.elem_class);
1.7 itojun 289: continue;
1.55 knakahar 290: #else
291: prio = mask_match(ep, (struct sockaddr *)&pack.mine,
292: (struct sockaddr *)&pack.yours);
293: #endif
294: }
1.1 itojun 295:
296: /*
297: * We prioritize the matches by using bit length of the
298: * matches. mask_match() and user-supplied matching function
299: * should return the bit length of the matches (for example,
300: * if both src/dst are matched for IPv4, 64 should be returned).
301: * 0 or negative return value means "it did not match".
302: *
303: * The question is, since we have two "mask" portion, we
304: * cannot really define total order between entries.
305: * For example, which of these should be preferred?
306: * mask_match() returns 48 (32 + 16) for both of them.
307: * src=3ffe::/16, dst=3ffe:501::/32
308: * src=3ffe:501::/32, dst=3ffe::/16
309: *
310: * We need to loop through all the possible candidates
311: * to get the best match - the search takes O(n) for
312: * n attachments (i.e. interfaces).
1.7 itojun 313: *
314: * For radix-based lookup, I guess source takes precedence.
315: * See rn_{refines,lexobetter} for the correct answer.
1.1 itojun 316: */
1.56 knakahar 317: if (prio <= 0) {
318: psref_release(&elem_psref, &ep->psref,
319: encaptab.elem_class);
1.1 itojun 320: continue;
1.56 knakahar 321: }
1.1 itojun 322: if (prio > matchprio) {
1.56 knakahar 323: /* release last matched ep */
324: if (match != NULL)
325: psref_release(match_psref, &match->psref,
326: encaptab.elem_class);
327:
328: psref_copy(match_psref, &elem_psref,
329: encaptab.elem_class);
1.1 itojun 330: matchprio = prio;
331: match = ep;
332: }
1.56 knakahar 333: KASSERTMSG((match == NULL) || psref_held(&match->psref,
334: encaptab.elem_class),
335: "current match = %p, but not hold its psref", match);
336:
337: psref_release(&elem_psref, &ep->psref,
338: encaptab.elem_class);
1.1 itojun 339: }
1.56 knakahar 340: pserialize_read_exit(s);
1.1 itojun 341:
1.7 itojun 342: return match;
343: }
344:
345: void
1.69.2.1 christos 346: encap4_input(struct mbuf *m, int off, int proto)
1.7 itojun 347: {
1.51 knakahar 348: const struct encapsw *esw;
1.7 itojun 349: struct encaptab *match;
1.56 knakahar 350: struct psref match_psref;
1.7 itojun 351:
1.56 knakahar 352: match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
1.1 itojun 353: if (match) {
354: /* found a match, "match" has the best one */
1.51 knakahar 355: esw = match->esw;
356: if (esw && esw->encapsw4.pr_input) {
1.66 knakahar 357: (*esw->encapsw4.pr_input)(m, off, proto, match->arg);
1.56 knakahar 358: psref_release(&match_psref, &match->psref,
359: encaptab.elem_class);
1.54 knakahar 360: } else {
1.56 knakahar 361: psref_release(&match_psref, &match->psref,
362: encaptab.elem_class);
1.1 itojun 363: m_freem(m);
1.54 knakahar 364: }
1.1 itojun 365: return;
366: }
367:
368: /* last resort: inject to raw socket */
1.69 knakahar 369: SOFTNET_LOCK_IF_NET_MPSAFE();
1.1 itojun 370: rip_input(m, off, proto);
1.69 knakahar 371: SOFTNET_UNLOCK_IF_NET_MPSAFE();
1.1 itojun 372: }
373: #endif
374:
375: #ifdef INET6
1.7 itojun 376: static struct encaptab *
1.56 knakahar 377: encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
378: struct psref *match_psref)
1.1 itojun 379: {
380: struct ip6_hdr *ip6;
1.33 pooka 381: struct ip_pack6 pack;
1.7 itojun 382: int prio, matchprio;
1.56 knakahar 383: int s;
1.1 itojun 384: struct encaptab *ep, *match;
1.55 knakahar 385: #ifdef USE_RADIX
1.7 itojun 386: struct radix_node_head *rnh = encap_rnh(AF_INET6);
387: struct radix_node *rn;
1.55 knakahar 388: #endif
1.1 itojun 389:
1.41 ozaki-r 390: KASSERT(m->m_len >= sizeof(*ip6));
391:
1.1 itojun 392: ip6 = mtod(m, struct ip6_hdr *);
393:
1.35 cegger 394: memset(&pack, 0, sizeof(pack));
1.7 itojun 395: pack.p.sp_len = sizeof(pack);
396: pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
397: pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
398: if (dir == INBOUND) {
399: pack.mine.sin6_addr = ip6->ip6_dst;
400: pack.yours.sin6_addr = ip6->ip6_src;
401: } else {
402: pack.mine.sin6_addr = ip6->ip6_src;
403: pack.yours.sin6_addr = ip6->ip6_dst;
404: }
1.1 itojun 405:
406: match = NULL;
407: matchprio = 0;
1.7 itojun 408:
1.56 knakahar 409: s = pserialize_read_enter();
1.55 knakahar 410: #ifdef USE_RADIX
1.57 knakahar 411: if (encap_head_updating) {
412: /*
413: * Update in progress. Do nothing.
414: */
415: pserialize_read_exit(s);
416: return NULL;
417: }
418:
1.30 christos 419: rn = rnh->rnh_matchaddr((void *)&pack, rnh);
1.7 itojun 420: if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
1.56 knakahar 421: struct encaptab *encapp = (struct encaptab *)rn;
422:
423: psref_acquire(match_psref, &encapp->psref,
424: encaptab.elem_class);
425: match = encapp;
1.7 itojun 426: matchprio = mask_matchlen(match->srcmask) +
427: mask_matchlen(match->dstmask);
428: }
1.55 knakahar 429: #endif
1.56 knakahar 430: PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
431: struct psref elem_psref;
432:
1.1 itojun 433: if (ep->af != AF_INET6)
434: continue;
435: if (ep->proto >= 0 && ep->proto != proto)
436: continue;
1.56 knakahar 437:
438: psref_acquire(&elem_psref, &ep->psref,
439: encaptab.elem_class);
440:
441: if (ep->func) {
442: pserialize_read_exit(s);
443: /* ep->func is sleepable. e.g. rtalloc1 */
1.7 itojun 444: prio = (*ep->func)(m, off, proto, ep->arg);
1.56 knakahar 445: s = pserialize_read_enter();
446: } else {
1.55 knakahar 447: #ifdef USE_RADIX
1.56 knakahar 448: psref_release(&elem_psref, &ep->psref,
449: encaptab.elem_class);
1.7 itojun 450: continue;
1.55 knakahar 451: #else
452: prio = mask_match(ep, (struct sockaddr *)&pack.mine,
453: (struct sockaddr *)&pack.yours);
454: #endif
455: }
1.1 itojun 456:
1.7 itojun 457: /* see encap4_lookup() for issues here */
1.56 knakahar 458: if (prio <= 0) {
459: psref_release(&elem_psref, &ep->psref,
460: encaptab.elem_class);
1.1 itojun 461: continue;
1.56 knakahar 462: }
1.1 itojun 463: if (prio > matchprio) {
1.56 knakahar 464: /* release last matched ep */
465: if (match != NULL)
466: psref_release(match_psref, &match->psref,
467: encaptab.elem_class);
468:
469: psref_copy(match_psref, &elem_psref,
470: encaptab.elem_class);
1.1 itojun 471: matchprio = prio;
472: match = ep;
473: }
1.56 knakahar 474: KASSERTMSG((match == NULL) || psref_held(&match->psref,
475: encaptab.elem_class),
476: "current match = %p, but not hold its psref", match);
477:
478: psref_release(&elem_psref, &ep->psref,
479: encaptab.elem_class);
1.1 itojun 480: }
1.56 knakahar 481: pserialize_read_exit(s);
1.1 itojun 482:
1.7 itojun 483: return match;
484: }
485:
486: int
1.23 perry 487: encap6_input(struct mbuf **mp, int *offp, int proto)
1.7 itojun 488: {
489: struct mbuf *m = *mp;
1.51 knakahar 490: const struct encapsw *esw;
1.7 itojun 491: struct encaptab *match;
1.56 knakahar 492: struct psref match_psref;
1.69 knakahar 493: int rv;
1.7 itojun 494:
1.56 knakahar 495: match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
1.7 itojun 496:
1.1 itojun 497: if (match) {
498: /* found a match */
1.51 knakahar 499: esw = match->esw;
500: if (esw && esw->encapsw6.pr_input) {
1.56 knakahar 501: int ret;
1.66 knakahar 502: ret = (*esw->encapsw6.pr_input)(mp, offp, proto,
503: match->arg);
1.56 knakahar 504: psref_release(&match_psref, &match->psref,
505: encaptab.elem_class);
506: return ret;
1.1 itojun 507: } else {
1.56 knakahar 508: psref_release(&match_psref, &match->psref,
509: encaptab.elem_class);
1.1 itojun 510: m_freem(m);
511: return IPPROTO_DONE;
512: }
513: }
514:
515: /* last resort: inject to raw socket */
1.69 knakahar 516: SOFTNET_LOCK_IF_NET_MPSAFE();
517: rv = rip6_input(mp, offp, proto);
518: SOFTNET_UNLOCK_IF_NET_MPSAFE();
519: return rv;
1.1 itojun 520: }
521: #endif
522:
1.54 knakahar 523: /*
524: * XXX
525: * The encaptab list and the rnh radix tree must be manipulated atomically.
526: */
1.7 itojun 527: static int
1.23 perry 528: encap_add(struct encaptab *ep)
1.1 itojun 529: {
1.55 knakahar 530: #ifdef USE_RADIX
1.7 itojun 531: struct radix_node_head *rnh = encap_rnh(ep->af);
1.55 knakahar 532: #endif
1.1 itojun 533:
1.56 knakahar 534: KASSERT(encap_lock_held());
1.54 knakahar 535:
1.55 knakahar 536: #ifdef USE_RADIX
1.7 itojun 537: if (!ep->func && rnh) {
1.57 knakahar 538: /* Disable access to the radix tree for reader. */
539: encap_head_updating = true;
1.56 knakahar 540: /* Wait for all readers to drain. */
541: pserialize_perform(encaptab.psz);
542:
1.30 christos 543: if (!rnh->rnh_addaddr((void *)ep->addrpack,
544: (void *)ep->maskpack, rnh, ep->nodes)) {
1.57 knakahar 545: encap_head_updating = false;
1.56 knakahar 546: return EEXIST;
1.7 itojun 547: }
1.57 knakahar 548:
549: /*
550: * The ep added to the radix tree must be skipped while
551: * encap[46]_lookup walks encaptab list. In other words,
552: * encap_add() does not need to care whether the ep has
553: * been added encaptab list or not yet.
554: * So, we can re-enable access to the radix tree for now.
555: */
556: encap_head_updating = false;
1.7 itojun 557: }
1.55 knakahar 558: #endif
1.56 knakahar 559: PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);
1.7 itojun 560:
1.56 knakahar 561: return 0;
1.7 itojun 562: }
563:
1.54 knakahar 564: /*
565: * XXX
566: * The encaptab list and the rnh radix tree must be manipulated atomically.
567: */
1.7 itojun 568: static int
1.23 perry 569: encap_remove(struct encaptab *ep)
1.7 itojun 570: {
1.55 knakahar 571: #ifdef USE_RADIX
1.7 itojun 572: struct radix_node_head *rnh = encap_rnh(ep->af);
1.55 knakahar 573: #endif
1.7 itojun 574: int error = 0;
575:
1.56 knakahar 576: KASSERT(encap_lock_held());
1.54 knakahar 577:
1.55 knakahar 578: #ifdef USE_RADIX
1.7 itojun 579: if (!ep->func && rnh) {
1.57 knakahar 580: /* Disable access to the radix tree for reader. */
581: encap_head_updating = true;
1.56 knakahar 582: /* Wait for all readers to drain. */
583: pserialize_perform(encaptab.psz);
584:
1.30 christos 585: if (!rnh->rnh_deladdr((void *)ep->addrpack,
586: (void *)ep->maskpack, rnh))
1.7 itojun 587: error = ESRCH;
1.57 knakahar 588:
589: /*
590: * The ep added to the radix tree must be skipped while
591: * encap[46]_lookup walks encaptab list. In other words,
592: * encap_add() does not need to care whether the ep has
593: * been added encaptab list or not yet.
594: * So, we can re-enable access to the radix tree for now.
595: */
596: encap_head_updating = false;
1.7 itojun 597: }
1.55 knakahar 598: #endif
1.56 knakahar 599: PSLIST_WRITER_REMOVE(ep, chain);
600:
1.7 itojun 601: return error;
602: }
603:
604: static int
1.23 perry 605: encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
1.7 itojun 606: {
607: if (sp && dp) {
608: if (sp->sa_len != dp->sa_len)
609: return EINVAL;
610: if (af != sp->sa_family || af != dp->sa_family)
611: return EINVAL;
612: } else if (!sp && !dp)
613: ;
614: else
615: return EINVAL;
616:
617: switch (af) {
618: case AF_INET:
619: if (sp && sp->sa_len != sizeof(struct sockaddr_in))
620: return EINVAL;
621: if (dp && dp->sa_len != sizeof(struct sockaddr_in))
622: return EINVAL;
623: break;
624: #ifdef INET6
625: case AF_INET6:
626: if (sp && sp->sa_len != sizeof(struct sockaddr_in6))
627: return EINVAL;
628: if (dp && dp->sa_len != sizeof(struct sockaddr_in6))
629: return EINVAL;
630: break;
631: #endif
632: default:
633: return EAFNOSUPPORT;
634: }
635:
636: return 0;
1.1 itojun 637: }
638:
639: /*
640: * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
641: * length of mask (sm and dm) is assumed to be same as sp/dp.
642: * Return value will be necessary as input (cookie) for encap_detach().
643: */
644: const struct encaptab *
1.23 perry 645: encap_attach(int af, int proto,
646: const struct sockaddr *sp, const struct sockaddr *sm,
647: const struct sockaddr *dp, const struct sockaddr *dm,
1.51 knakahar 648: const struct encapsw *esw, void *arg)
1.1 itojun 649: {
650: struct encaptab *ep;
651: int error;
1.61 knakahar 652: int pss;
1.7 itojun 653: size_t l;
1.33 pooka 654: struct ip_pack4 *pack4;
1.7 itojun 655: #ifdef INET6
1.33 pooka 656: struct ip_pack6 *pack6;
1.7 itojun 657: #endif
1.61 knakahar 658: #ifndef ENCAP_MPSAFE
659: int s;
1.1 itojun 660:
661: s = splsoftnet();
1.61 knakahar 662: #endif
1.1 itojun 663: /* sanity check on args */
1.7 itojun 664: error = encap_afcheck(af, sp, dp);
665: if (error)
1.1 itojun 666: goto fail;
667:
668: /* check if anyone have already attached with exactly same config */
1.56 knakahar 669: pss = pserialize_read_enter();
670: PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
1.1 itojun 671: if (ep->af != af)
672: continue;
673: if (ep->proto != proto)
674: continue;
1.7 itojun 675: if (ep->func)
676: continue;
1.41 ozaki-r 677:
1.43 riastrad 678: KASSERT(ep->src != NULL);
679: KASSERT(ep->dst != NULL);
680: KASSERT(ep->srcmask != NULL);
681: KASSERT(ep->dstmask != NULL);
1.41 ozaki-r 682:
1.7 itojun 683: if (ep->src->sa_len != sp->sa_len ||
1.34 cegger 684: memcmp(ep->src, sp, sp->sa_len) != 0 ||
685: memcmp(ep->srcmask, sm, sp->sa_len) != 0)
1.7 itojun 686: continue;
687: if (ep->dst->sa_len != dp->sa_len ||
1.34 cegger 688: memcmp(ep->dst, dp, dp->sa_len) != 0 ||
689: memcmp(ep->dstmask, dm, dp->sa_len) != 0)
1.1 itojun 690: continue;
691:
692: error = EEXIST;
1.56 knakahar 693: pserialize_read_exit(pss);
1.1 itojun 694: goto fail;
695: }
1.56 knakahar 696: pserialize_read_exit(pss);
1.3 thorpej 697:
1.7 itojun 698: switch (af) {
699: case AF_INET:
700: l = sizeof(*pack4);
701: break;
702: #ifdef INET6
703: case AF_INET6:
704: l = sizeof(*pack6);
705: break;
706: #endif
707: default:
708: goto fail;
709: }
710:
1.20 itojun 711: /* M_NETADDR ok? */
1.47 knakahar 712: ep = kmem_zalloc(sizeof(*ep), KM_NOSLEEP);
1.1 itojun 713: if (ep == NULL) {
714: error = ENOBUFS;
715: goto fail;
716: }
1.47 knakahar 717: ep->addrpack = kmem_zalloc(l, KM_NOSLEEP);
1.7 itojun 718: if (ep->addrpack == NULL) {
719: error = ENOBUFS;
720: goto gc;
721: }
1.47 knakahar 722: ep->maskpack = kmem_zalloc(l, KM_NOSLEEP);
1.7 itojun 723: if (ep->maskpack == NULL) {
724: error = ENOBUFS;
725: goto gc;
726: }
1.1 itojun 727:
728: ep->af = af;
729: ep->proto = proto;
1.7 itojun 730: ep->addrpack->sa_len = l & 0xff;
731: ep->maskpack->sa_len = l & 0xff;
732: switch (af) {
733: case AF_INET:
1.33 pooka 734: pack4 = (struct ip_pack4 *)ep->addrpack;
1.7 itojun 735: ep->src = (struct sockaddr *)&pack4->mine;
736: ep->dst = (struct sockaddr *)&pack4->yours;
1.33 pooka 737: pack4 = (struct ip_pack4 *)ep->maskpack;
1.7 itojun 738: ep->srcmask = (struct sockaddr *)&pack4->mine;
739: ep->dstmask = (struct sockaddr *)&pack4->yours;
740: break;
741: #ifdef INET6
742: case AF_INET6:
1.33 pooka 743: pack6 = (struct ip_pack6 *)ep->addrpack;
1.7 itojun 744: ep->src = (struct sockaddr *)&pack6->mine;
745: ep->dst = (struct sockaddr *)&pack6->yours;
1.33 pooka 746: pack6 = (struct ip_pack6 *)ep->maskpack;
1.7 itojun 747: ep->srcmask = (struct sockaddr *)&pack6->mine;
748: ep->dstmask = (struct sockaddr *)&pack6->yours;
749: break;
750: #endif
751: }
752:
1.37 tsutsui 753: memcpy(ep->src, sp, sp->sa_len);
754: memcpy(ep->srcmask, sm, sp->sa_len);
755: memcpy(ep->dst, dp, dp->sa_len);
756: memcpy(ep->dstmask, dm, dp->sa_len);
1.51 knakahar 757: ep->esw = esw;
1.1 itojun 758: ep->arg = arg;
1.56 knakahar 759: psref_target_init(&ep->psref, encaptab.elem_class);
1.1 itojun 760:
1.7 itojun 761: error = encap_add(ep);
762: if (error)
763: goto gc;
1.1 itojun 764:
765: error = 0;
1.61 knakahar 766: #ifndef ENCAP_MPSAFE
1.1 itojun 767: splx(s);
1.61 knakahar 768: #endif
1.1 itojun 769: return ep;
770:
1.7 itojun 771: gc:
772: if (ep->addrpack)
1.47 knakahar 773: kmem_free(ep->addrpack, l);
1.7 itojun 774: if (ep->maskpack)
1.47 knakahar 775: kmem_free(ep->maskpack, l);
1.7 itojun 776: if (ep)
1.47 knakahar 777: kmem_free(ep, sizeof(*ep));
1.1 itojun 778: fail:
1.61 knakahar 779: #ifndef ENCAP_MPSAFE
1.1 itojun 780: splx(s);
1.61 knakahar 781: #endif
1.1 itojun 782: return NULL;
783: }
784:
785: const struct encaptab *
1.23 perry 786: encap_attach_func(int af, int proto,
1.26 martin 787: int (*func)(struct mbuf *, int, int, void *),
1.51 knakahar 788: const struct encapsw *esw, void *arg)
1.1 itojun 789: {
790: struct encaptab *ep;
791: int error;
1.61 knakahar 792: #ifndef ENCAP_MPSAFE
1.1 itojun 793: int s;
794:
795: s = splsoftnet();
1.61 knakahar 796: #endif
1.1 itojun 797: /* sanity check on args */
798: if (!func) {
799: error = EINVAL;
800: goto fail;
801: }
802:
1.7 itojun 803: error = encap_afcheck(af, NULL, NULL);
804: if (error)
805: goto fail;
806:
1.47 knakahar 807: ep = kmem_alloc(sizeof(*ep), KM_NOSLEEP); /*XXX*/
1.1 itojun 808: if (ep == NULL) {
809: error = ENOBUFS;
810: goto fail;
811: }
1.35 cegger 812: memset(ep, 0, sizeof(*ep));
1.1 itojun 813:
814: ep->af = af;
815: ep->proto = proto;
816: ep->func = func;
1.51 knakahar 817: ep->esw = esw;
1.1 itojun 818: ep->arg = arg;
1.56 knakahar 819: psref_target_init(&ep->psref, encaptab.elem_class);
1.1 itojun 820:
1.7 itojun 821: error = encap_add(ep);
822: if (error)
1.67 maxv 823: goto gc;
1.1 itojun 824:
825: error = 0;
1.61 knakahar 826: #ifndef ENCAP_MPSAFE
1.1 itojun 827: splx(s);
1.61 knakahar 828: #endif
1.1 itojun 829: return ep;
830:
1.67 maxv 831: gc:
832: kmem_free(ep, sizeof(*ep));
1.1 itojun 833: fail:
1.61 knakahar 834: #ifndef ENCAP_MPSAFE
1.1 itojun 835: splx(s);
1.61 knakahar 836: #endif
1.1 itojun 837: return NULL;
838: }
839:
1.7 itojun 840: /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
841:
842: #ifdef INET6
1.32 ad 843: void *
1.29 dyoung 844: encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
1.7 itojun 845: {
846: void *d = d0;
847: struct ip6_hdr *ip6;
848: struct mbuf *m;
849: int off;
850: struct ip6ctlparam *ip6cp = NULL;
851: int nxt;
1.56 knakahar 852: int s;
1.7 itojun 853: struct encaptab *ep;
1.51 knakahar 854: const struct encapsw *esw;
1.7 itojun 855:
856: if (sa->sa_family != AF_INET6 ||
857: sa->sa_len != sizeof(struct sockaddr_in6))
1.32 ad 858: return NULL;
1.7 itojun 859:
860: if ((unsigned)cmd >= PRC_NCMDS)
1.32 ad 861: return NULL;
1.7 itojun 862: if (cmd == PRC_HOSTDEAD)
863: d = NULL;
864: else if (cmd == PRC_MSGSIZE)
865: ; /* special code is present, see below */
866: else if (inet6ctlerrmap[cmd] == 0)
1.32 ad 867: return NULL;
1.7 itojun 868:
869: /* if the parameter is from icmp6, decode it. */
870: if (d != NULL) {
871: ip6cp = (struct ip6ctlparam *)d;
872: m = ip6cp->ip6c_m;
873: ip6 = ip6cp->ip6c_ip6;
874: off = ip6cp->ip6c_off;
875: nxt = ip6cp->ip6c_nxt;
1.15 mycroft 876:
877: if (ip6 && cmd == PRC_MSGSIZE) {
878: int valid = 0;
879: struct encaptab *match;
1.56 knakahar 880: struct psref elem_psref;
1.15 mycroft 881:
882: /*
883: * Check to see if we have a valid encap configuration.
884: */
1.56 knakahar 885: match = encap6_lookup(m, off, nxt, OUTBOUND,
886: &elem_psref);
1.69.2.2! martin 887: if (match) {
1.15 mycroft 888: valid++;
1.69.2.2! martin 889: psref_release(&elem_psref, &match->psref,
! 890: encaptab.elem_class);
! 891: }
1.15 mycroft 892:
893: /*
894: * Depending on the value of "valid" and routing table
895: * size (mtudisc_{hi,lo}wat), we will:
896: * - recalcurate the new MTU and create the
897: * corresponding routing entry, or
898: * - ignore the MTU change notification.
899: */
900: icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
901: }
1.7 itojun 902: } else {
903: m = NULL;
904: ip6 = NULL;
905: nxt = -1;
906: }
907:
908: /* inform all listeners */
1.56 knakahar 909:
910: s = pserialize_read_enter();
911: PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
912: struct psref elem_psref;
913:
1.7 itojun 914: if (ep->af != AF_INET6)
915: continue;
916: if (ep->proto >= 0 && ep->proto != nxt)
917: continue;
918:
919: /* should optimize by looking at address pairs */
920:
921: /* XXX need to pass ep->arg or ep itself to listeners */
1.56 knakahar 922: psref_acquire(&elem_psref, &ep->psref,
923: encaptab.elem_class);
1.51 knakahar 924: esw = ep->esw;
925: if (esw && esw->encapsw6.pr_ctlinput) {
1.56 knakahar 926: pserialize_read_exit(s);
927: /* pr_ctlinput is sleepable. e.g. rtcache_free */
1.52 knakahar 928: (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
1.56 knakahar 929: s = pserialize_read_enter();
1.51 knakahar 930: }
1.56 knakahar 931: psref_release(&elem_psref, &ep->psref,
932: encaptab.elem_class);
1.7 itojun 933: }
1.56 knakahar 934: pserialize_read_exit(s);
1.7 itojun 935:
936: rip6_ctlinput(cmd, sa, d0);
1.32 ad 937: return NULL;
1.7 itojun 938: }
939: #endif
940:
1.1 itojun 941: int
1.23 perry 942: encap_detach(const struct encaptab *cookie)
1.1 itojun 943: {
944: const struct encaptab *ep = cookie;
1.56 knakahar 945: struct encaptab *p;
1.7 itojun 946: int error;
1.1 itojun 947:
1.56 knakahar 948: KASSERT(encap_lock_held());
949:
950: PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
1.1 itojun 951: if (p == ep) {
1.7 itojun 952: error = encap_remove(p);
953: if (error)
954: return error;
1.56 knakahar 955: else
956: break;
957: }
958: }
959: if (p == NULL)
960: return ENOENT;
961:
962: pserialize_perform(encaptab.psz);
963: psref_target_destroy(&p->psref,
964: encaptab.elem_class);
965: if (!ep->func) {
966: kmem_free(p->addrpack, ep->addrpack->sa_len);
967: kmem_free(p->maskpack, ep->maskpack->sa_len);
1.1 itojun 968: }
1.56 knakahar 969: kmem_free(p, sizeof(*p));
1.1 itojun 970:
1.56 knakahar 971: return 0;
1.7 itojun 972: }
973:
1.55 knakahar 974: #ifdef USE_RADIX
1.7 itojun 975: static struct radix_node_head *
1.23 perry 976: encap_rnh(int af)
1.7 itojun 977: {
978:
979: switch (af) {
980: case AF_INET:
981: return encap_head[0];
982: #ifdef INET6
983: case AF_INET6:
984: return encap_head[1];
985: #endif
986: default:
987: return NULL;
988: }
989: }
990:
991: static int
1.23 perry 992: mask_matchlen(const struct sockaddr *sa)
1.7 itojun 993: {
994: const char *p, *ep;
995: int l;
996:
997: p = (const char *)sa;
998: ep = p + sa->sa_len;
999: p += 2; /* sa_len + sa_family */
1000:
1001: l = 0;
1002: while (p < ep) {
1003: l += (*p ? 8 : 0); /* estimate */
1004: p++;
1005: }
1006: return l;
1.1 itojun 1007: }
1.55 knakahar 1008: #endif
1009:
1010: #ifndef USE_RADIX
1011: static int
1012: mask_match(const struct encaptab *ep,
1013: const struct sockaddr *sp,
1014: const struct sockaddr *dp)
1015: {
1016: struct sockaddr_storage s;
1017: struct sockaddr_storage d;
1018: int i;
1019: const u_int8_t *p, *q;
1020: u_int8_t *r;
1021: int matchlen;
1022:
1023: KASSERTMSG(ep->func == NULL, "wrong encaptab passed to mask_match");
1024:
1025: if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
1026: return 0;
1027: if (sp->sa_family != ep->af || dp->sa_family != ep->af)
1028: return 0;
1029: if (sp->sa_len != ep->src->sa_len || dp->sa_len != ep->dst->sa_len)
1030: return 0;
1031:
1032: matchlen = 0;
1033:
1034: p = (const u_int8_t *)sp;
1035: q = (const u_int8_t *)ep->srcmask;
1036: r = (u_int8_t *)&s;
1037: for (i = 0 ; i < sp->sa_len; i++) {
1038: r[i] = p[i] & q[i];
1039: /* XXX estimate */
1040: matchlen += (q[i] ? 8 : 0);
1041: }
1042:
1043: p = (const u_int8_t *)dp;
1044: q = (const u_int8_t *)ep->dstmask;
1045: r = (u_int8_t *)&d;
1046: for (i = 0 ; i < dp->sa_len; i++) {
1047: r[i] = p[i] & q[i];
1048: /* XXX rough estimate */
1049: matchlen += (q[i] ? 8 : 0);
1050: }
1051:
1052: /* need to overwrite len/family portion as we don't compare them */
1053: s.ss_len = sp->sa_len;
1054: s.ss_family = sp->sa_family;
1055: d.ss_len = dp->sa_len;
1056: d.ss_family = dp->sa_family;
1057:
1058: if (memcmp(&s, ep->src, ep->src->sa_len) == 0 &&
1059: memcmp(&d, ep->dst, ep->dst->sa_len) == 0) {
1060: return matchlen;
1061: } else
1062: return 0;
1063: }
1064: #endif
1.1 itojun 1065:
1.59 knakahar 1066: int
1.54 knakahar 1067: encap_lock_enter(void)
1068: {
1.59 knakahar 1069: int error;
1070:
1071: mutex_enter(&encap_whole.lock);
1072: while (encap_whole.busy != NULL) {
1073: error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
1074: if (error) {
1075: mutex_exit(&encap_whole.lock);
1076: return error;
1077: }
1078: }
1079: KASSERT(encap_whole.busy == NULL);
1080: encap_whole.busy = curlwp;
1081: mutex_exit(&encap_whole.lock);
1.54 knakahar 1082:
1.59 knakahar 1083: return 0;
1.54 knakahar 1084: }
1085:
1086: void
1087: encap_lock_exit(void)
1088: {
1089:
1.59 knakahar 1090: mutex_enter(&encap_whole.lock);
1091: KASSERT(encap_whole.busy == curlwp);
1092: encap_whole.busy = NULL;
1093: cv_broadcast(&encap_whole.cv);
1094: mutex_exit(&encap_whole.lock);
1.54 knakahar 1095: }
1.56 knakahar 1096:
1097: bool
1098: encap_lock_held(void)
1099: {
1100:
1.59 knakahar 1101: return (encap_whole.busy == curlwp);
1.56 knakahar 1102: }
CVSweb <webmaster@jp.NetBSD.org>