Annotation of src/lib/libintl/gettext.c, Revision 1.29
1.29 ! christos 1: /* $NetBSD: gettext.c,v 1.28 2012/07/30 23:04:42 yamt Exp $ */
1.1 itojun 2:
3: /*-
1.9 minoura 4: * Copyright (c) 2000, 2001 Citrus Project,
1.1 itojun 5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
1.10 yamt 27: *
28: * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
1.1 itojun 29: */
30:
31: #include <sys/cdefs.h>
1.29 ! christos 32: __RCSID("$NetBSD: gettext.c,v 1.28 2012/07/30 23:04:42 yamt Exp $");
1.1 itojun 33:
34: #include <sys/param.h>
35: #include <sys/stat.h>
36: #include <sys/mman.h>
37: #include <sys/uio.h>
38:
1.19 tshiozak 39: #include <assert.h>
1.1 itojun 40: #include <fcntl.h>
41: #include <stdio.h>
42: #include <stdlib.h>
43: #include <unistd.h>
44: #include <string.h>
45: #if 0
46: #include <util.h>
47: #endif
48: #include <libintl.h>
49: #include <locale.h>
50: #include "libintl_local.h"
1.22 tshiozak 51: #include "plural_parser.h"
1.1 itojun 52: #include "pathnames.h"
53:
1.29 ! christos 54: /* GNU gettext added a hack to add some context to messages. If a message is
! 55: * used in multiple locations, it needs some amount of context to make the
! 56: * translation clear to translators. GNU gettext, rather than modifying the
! 57: * message format, concatenates the context, \004 and the message id.
! 58: */
! 59: #define MSGCTXT_ID_SEPARATOR '\004'
! 60:
! 61: static const char *pgettext_impl(const char *, const char *, const char *,
! 62: const char *, unsigned long int, int);
! 63: static char *concatenate_ctxt_id(const char *, const char *);
1.25 junyoung 64: static const char *lookup_category(int);
65: static const char *split_locale(const char *);
66: static const char *lookup_mofile(char *, size_t, const char *, const char *,
67: const char *, const char *,
68: struct domainbinding *);
69: static uint32_t flip(uint32_t, uint32_t);
70: static int validate(void *, struct mohandle *);
71: static int mapit(const char *, struct domainbinding *);
72: static int unmapit(struct domainbinding *);
73: static const char *lookup_hash(const char *, struct domainbinding *, size_t *);
74: static const char *lookup_bsearch(const char *, struct domainbinding *,
75: size_t *);
76: static const char *lookup(const char *, struct domainbinding *, size_t *);
77: static const char *get_lang_env(const char *);
1.1 itojun 78:
79: /*
80: * shortcut functions. the main implementation resides in dcngettext().
81: */
82: char *
1.25 junyoung 83: gettext(const char *msgid)
1.1 itojun 84: {
85:
86: return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
87: }
88:
89: char *
1.25 junyoung 90: dgettext(const char *domainname, const char *msgid)
1.1 itojun 91: {
92:
93: return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
94: }
95:
96: char *
1.25 junyoung 97: dcgettext(const char *domainname, const char *msgid, int category)
1.1 itojun 98: {
99:
100: return dcngettext(domainname, msgid, NULL, 1UL, category);
101: }
102:
103: char *
1.25 junyoung 104: ngettext(const char *msgid1, const char *msgid2, unsigned long int n)
1.1 itojun 105: {
106:
107: return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
108: }
109:
110: char *
1.25 junyoung 111: dngettext(const char *domainname, const char *msgid1, const char *msgid2,
112: unsigned long int n)
1.1 itojun 113: {
114:
115: return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
116: }
117:
1.29 ! christos 118: const char *
! 119: pgettext(const char *msgctxt, const char *msgid)
! 120: {
! 121:
! 122: return pgettext_impl(NULL, msgctxt, msgid, NULL, 1UL, LC_MESSAGES);
! 123: }
! 124:
! 125: const char *
! 126: dpgettext(const char *domainname, const char *msgctxt, const char *msgid)
! 127: {
! 128:
! 129: return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, LC_MESSAGES);
! 130: }
! 131:
! 132: const char *
! 133: dcpgettext(const char *domainname, const char *msgctxt, const char *msgid,
! 134: int category)
! 135: {
! 136:
! 137: return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, category);
! 138: }
! 139:
! 140: const char *
! 141: npgettext(const char *msgctxt, const char *msgid1, const char *msgid2,
! 142: unsigned long int n)
! 143: {
! 144:
! 145: return pgettext_impl(NULL, msgctxt, msgid1, msgid2, n, LC_MESSAGES);
! 146: }
! 147:
! 148: const char *
! 149: dnpgettext(const char *domainname, const char *msgctxt, const char *msgid1,
! 150: const char *msgid2, unsigned long int n)
! 151: {
! 152:
! 153: return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, LC_MESSAGES);
! 154: }
! 155:
! 156: const char *
! 157: dcnpgettext(const char *domainname, const char *msgctxt, const char *msgid1,
! 158: const char *msgid2, unsigned long int n, int category)
! 159: {
! 160:
! 161: return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, category);
! 162: }
! 163:
! 164: static const char *
! 165: pgettext_impl(const char *domainname, const char *msgctxt, const char *msgid1,
! 166: const char *msgid2, unsigned long int n, int category)
! 167: {
! 168: char *msgctxt_id;
! 169: char *translation;
! 170: char *p;
! 171:
! 172: if ((msgctxt_id = concatenate_ctxt_id(msgctxt, msgid1)) == NULL)
! 173: return msgid1;
! 174:
! 175: translation = dcngettext(domainname, msgctxt_id,
! 176: msgid2, n, category);
! 177: free(msgctxt_id);
! 178:
! 179: p = strchr(translation, '\004');
! 180: if (p)
! 181: return p + 1;
! 182: return translation;
! 183: }
! 184:
1.1 itojun 185: /*
186: * dcngettext() -
187: * lookup internationalized message on database locale/category/domainname
188: * (like ja_JP.eucJP/LC_MESSAGES/domainname).
189: * if n equals to 1, internationalized message will be looked up for msgid1.
190: * otherwise, message will be looked up for msgid2.
191: * if the lookup fails, the function will return msgid1 or msgid2 as is.
192: *
193: * Even though the return type is "char *", caller should not rewrite the
194: * region pointed to by the return value (should be "const char *", but can't
195: * change it for compatibility with other implementations).
196: *
197: * by default (if domainname == NULL), domainname is taken from the value set
198: * by textdomain(). usually name of the application (like "ls") is used as
199: * domainname. category is usually LC_MESSAGES.
200: *
201: * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
202: * endian encoded file. both endians are supported here, as the files are in
203: * /usr/share/locale! (or we should move those files into /usr/libdata)
204: */
205:
1.29 ! christos 206: static char *
! 207: concatenate_ctxt_id(const char *msgctxt, const char *msgid)
! 208: {
! 209: char *ret;
! 210:
! 211: if (asprintf(&ret, "%s%c%s", msgctxt, MSGCTXT_ID_SEPARATOR, msgid) == -1)
! 212: return NULL;
! 213:
! 214: return ret;
! 215: }
! 216:
1.1 itojun 217: static const char *
1.25 junyoung 218: lookup_category(int category)
1.1 itojun 219: {
220:
221: switch (category) {
222: case LC_COLLATE: return "LC_COLLATE";
223: case LC_CTYPE: return "LC_CTYPE";
224: case LC_MONETARY: return "LC_MONETARY";
225: case LC_NUMERIC: return "LC_NUMERIC";
226: case LC_TIME: return "LC_TIME";
227: case LC_MESSAGES: return "LC_MESSAGES";
228: }
229: return NULL;
230: }
231:
232: /*
233: * XPG syntax: language[_territory[.codeset]][@modifier]
234: * XXX boundary check on "result" is lacking
235: */
236: static const char *
1.25 junyoung 237: split_locale(const char *lname)
1.1 itojun 238: {
239: char buf[BUFSIZ], tmp[BUFSIZ];
240: char *l, *t, *c, *m;
241: static char result[BUFSIZ];
242:
243: memset(result, 0, sizeof(result));
244:
245: if (strlen(lname) + 1 > sizeof(buf)) {
246: fail:
247: return lname;
248: }
249:
250: strlcpy(buf, lname, sizeof(buf));
251: m = strrchr(buf, '@');
252: if (m)
253: *m++ = '\0';
254: c = strrchr(buf, '.');
255: if (c)
256: *c++ = '\0';
257: t = strrchr(buf, '_');
258: if (t)
259: *t++ = '\0';
260: l = buf;
261: if (strlen(l) == 0)
262: goto fail;
263: if (c && !t)
264: goto fail;
265:
266: if (m) {
267: if (t) {
268: if (c) {
269: snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
1.19 tshiozak 270: l, t, c, m);
1.1 itojun 271: strlcat(result, tmp, sizeof(result));
272: strlcat(result, ":", sizeof(result));
273: }
1.19 tshiozak 274: snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
1.1 itojun 275: strlcat(result, tmp, sizeof(result));
276: strlcat(result, ":", sizeof(result));
277: }
1.19 tshiozak 278: snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
1.1 itojun 279: strlcat(result, tmp, sizeof(result));
280: strlcat(result, ":", sizeof(result));
281: }
282: if (t) {
283: if (c) {
1.19 tshiozak 284: snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
1.1 itojun 285: strlcat(result, tmp, sizeof(result));
286: strlcat(result, ":", sizeof(result));
287: }
1.19 tshiozak 288: snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
1.1 itojun 289: strlcat(result, tmp, sizeof(result));
290: strlcat(result, ":", sizeof(result));
291: }
292: strlcat(result, l, sizeof(result));
293:
294: return result;
295: }
296:
297: static const char *
1.25 junyoung 298: lookup_mofile(char *buf, size_t len, const char *dir, const char *lpath,
299: const char *category, const char *domainname,
300: struct domainbinding *db)
1.1 itojun 301: {
302: struct stat st;
303: char *p, *q;
1.10 yamt 304: char lpath_tmp[BUFSIZ];
1.1 itojun 305:
1.28 yamt 306: /*
307: * LANGUAGE is a colon separated list of locale names.
308: */
309:
1.10 yamt 310: strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
311: q = lpath_tmp;
1.9 minoura 312: /* CONSTCOND */
1.1 itojun 313: while (1) {
314: p = strsep(&q, ":");
315: if (!p)
316: break;
317: if (!*p)
318: continue;
319:
320: /* don't mess with default locales */
321: if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
322: return NULL;
323:
324: /* validate pathname */
325: if (strchr(p, '/') || strchr(category, '/'))
326: continue;
327: #if 1 /*?*/
328: if (strchr(domainname, '/'))
329: continue;
330: #endif
331:
332: snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
333: category, domainname);
334: if (stat(buf, &st) < 0)
335: continue;
336: if ((st.st_mode & S_IFMT) != S_IFREG)
337: continue;
338:
1.9 minoura 339: if (mapit(buf, db) == 0)
1.1 itojun 340: return buf;
341: }
342:
343: return NULL;
344: }
345:
1.25 junyoung 346: static uint32_t
347: flip(uint32_t v, uint32_t magic)
1.1 itojun 348: {
349:
350: if (magic == MO_MAGIC)
351: return v;
352: else if (magic == MO_MAGIC_SWAPPED) {
353: v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
354: ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
355: return v;
356: } else {
357: abort();
358: /*NOTREACHED*/
359: }
360: }
361:
362: static int
1.25 junyoung 363: validate(void *arg, struct mohandle *mohandle)
1.1 itojun 364: {
365: char *p;
366:
367: p = (char *)arg;
1.9 minoura 368: if (p < (char *)mohandle->addr ||
369: p > (char *)mohandle->addr + mohandle->len)
1.1 itojun 370: return 0;
371: else
372: return 1;
373: }
374:
1.20 tshiozak 375: /*
376: * calculate the step value if the hash value is conflicted.
377: */
1.25 junyoung 378: static __inline uint32_t
379: calc_collision_step(uint32_t hashval, uint32_t hashsize)
1.20 tshiozak 380: {
381: _DIAGASSERT(hashsize>2);
382: return (hashval % (hashsize - 2)) + 1;
383: }
384:
385: /*
386: * calculate the next index while conflicting.
387: */
1.25 junyoung 388: static __inline uint32_t
389: calc_next_index(uint32_t curidx, uint32_t hashsize, uint32_t step)
1.20 tshiozak 390: {
391: return curidx+step - (curidx >= hashsize-step ? hashsize : 0);
392: }
393:
394: static int
1.25 junyoung 395: get_sysdep_string_table(struct mosysdepstr_h **table_h, uint32_t *ofstable,
396: uint32_t nstrings, uint32_t magic, char *base)
1.20 tshiozak 397: {
1.27 matt 398: unsigned int i;
399: int j, count;
1.20 tshiozak 400: size_t l;
401: struct mosysdepstr *table;
402:
403: for (i=0; i<nstrings; i++) {
404: /* get mosysdepstr record */
405: /* LINTED: ignore the alignment problem. */
406: table = (struct mosysdepstr *)(base + flip(ofstable[i], magic));
407: /* count number of segments */
408: count = 0;
409: while (flip(table->segs[count++].ref, magic) != MO_LASTSEG)
410: ;
411: /* get table */
412: l = sizeof(struct mosysdepstr_h) +
413: sizeof(struct mosysdepsegentry_h) * (count-1);
414: table_h[i] = (struct mosysdepstr_h *)malloc(l);
415: if (!table_h[i])
416: return -1;
417: memset(table_h[i], 0, l);
418: table_h[i]->off = (const char *)(base + flip(table->off, magic));
419: for (j=0; j<count; j++) {
420: table_h[i]->segs[j].len =
421: flip(table->segs[j].len, magic);
422: table_h[i]->segs[j].ref =
423: flip(table->segs[j].ref, magic);
424: }
425: /* LINTED: ignore the alignment problem. */
426: table = (struct mosysdepstr *)&table->segs[count];
427: }
428: return 0;
429: }
430:
431: static int
432: expand_sysdep(struct mohandle *mohandle, struct mosysdepstr_h *str)
433: {
434: int i;
435: const char *src;
436: char *dst;
437:
438: /* check whether already expanded */
439: if (str->expanded)
440: return 0;
441:
442: /* calc total length */
443: str->expanded_len = 1;
444: for (i=0; /*CONSTCOND*/1; i++) {
445: str->expanded_len += str->segs[i].len;
446: if (str->segs[i].ref == MO_LASTSEG)
447: break;
448: str->expanded_len +=
449: mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len;
450: }
451: /* expand */
452: str->expanded = malloc(str->expanded_len);
453: if (!str->expanded)
454: return -1;
455: src = str->off;
456: dst = str->expanded;
457: for (i=0; /*CONSTCOND*/1; i++) {
458: memcpy(dst, src, str->segs[i].len);
459: src += str->segs[i].len;
460: dst += str->segs[i].len;
461: if (str->segs[i].ref == MO_LASTSEG)
462: break;
463: memcpy(dst, mohandle->mo.mo_sysdep_segs[str->segs[i].ref].str,
464: mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len);
465: dst += mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len;
466: }
467: *dst = '\0';
468:
469: return 0;
470: }
471:
472: static void
1.25 junyoung 473: insert_to_hash(uint32_t *htable, uint32_t hsize, const char *str, uint32_t ref)
1.20 tshiozak 474: {
1.25 junyoung 475: uint32_t hashval, idx, step;
1.20 tshiozak 476:
477: hashval = __intl_string_hash(str);
478: step = calc_collision_step(hashval, hsize);
479: idx = hashval % hsize;
480:
481: while (htable[idx])
482: idx = calc_next_index(idx, hsize, step);
483:
484: htable[idx] = ref;
485: }
486:
487: static int
488: setup_sysdep_stuffs(struct mo *mo, struct mohandle *mohandle, char *base)
489: {
1.25 junyoung 490: uint32_t magic;
1.20 tshiozak 491: struct moentry *stable;
492: size_t l;
1.27 matt 493: unsigned int i;
1.20 tshiozak 494: char *v;
1.25 junyoung 495: uint32_t *ofstable;
1.20 tshiozak 496:
497: magic = mo->mo_magic;
498:
499: mohandle->mo.mo_sysdep_nsegs = flip(mo->mo_sysdep_nsegs, magic);
500: mohandle->mo.mo_sysdep_nstring = flip(mo->mo_sysdep_nstring, magic);
501:
502: if (mohandle->mo.mo_sysdep_nstring == 0)
503: return 0;
504:
505: /* check hash size */
506: if (mohandle->mo.mo_hsize <= 2 ||
507: mohandle->mo.mo_hsize <
508: (mohandle->mo.mo_nstring + mohandle->mo.mo_sysdep_nstring))
509: return -1;
510:
511: /* get sysdep segments */
1.21 yamt 512: l = sizeof(struct mosysdepsegs_h) * mohandle->mo.mo_sysdep_nsegs;
1.20 tshiozak 513: mohandle->mo.mo_sysdep_segs = (struct mosysdepsegs_h *)malloc(l);
514: if (!mohandle->mo.mo_sysdep_segs)
515: return -1;
516: /* LINTED: ignore the alignment problem. */
517: stable = (struct moentry *)(base + flip(mo->mo_sysdep_segoff, magic));
518: for (i=0; i<mohandle->mo.mo_sysdep_nsegs; i++) {
519: v = base + flip(stable[i].off, magic);
520: mohandle->mo.mo_sysdep_segs[i].str =
521: __intl_sysdep_get_string_by_tag(
522: v,
523: &mohandle->mo.mo_sysdep_segs[i].len);
524: }
525:
526: /* get sysdep string table */
527: mohandle->mo.mo_sysdep_otable =
528: (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring,
529: sizeof(struct mosysdepstr_h *));
530: if (!mohandle->mo.mo_sysdep_otable)
531: return -1;
532: /* LINTED: ignore the alignment problem. */
1.25 junyoung 533: ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_otable, magic));
1.20 tshiozak 534: if (get_sysdep_string_table(mohandle->mo.mo_sysdep_otable, ofstable,
535: mohandle->mo.mo_sysdep_nstring, magic,
536: base))
537: return -1;
538: mohandle->mo.mo_sysdep_ttable =
539: (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring,
540: sizeof(struct mosysdepstr_h *));
541: if (!mohandle->mo.mo_sysdep_ttable)
542: return -1;
543: /* LINTED: ignore the alignment problem. */
1.25 junyoung 544: ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_ttable, magic));
1.20 tshiozak 545: if (get_sysdep_string_table(mohandle->mo.mo_sysdep_ttable, ofstable,
546: mohandle->mo.mo_sysdep_nstring, magic,
547: base))
548: return -1;
549:
550: /* update hash */
551: for (i=0; i<mohandle->mo.mo_sysdep_nstring; i++) {
552: if (expand_sysdep(mohandle, mohandle->mo.mo_sysdep_otable[i]))
553: return -1;
554: insert_to_hash(mohandle->mo.mo_htable,
555: mohandle->mo.mo_hsize,
556: mohandle->mo.mo_sysdep_otable[i]->expanded,
557: (i+1) | MO_HASH_SYSDEP_MASK);
558: }
559:
560: return 0;
561: }
562:
1.1 itojun 563: int
1.25 junyoung 564: mapit(const char *path, struct domainbinding *db)
1.1 itojun 565: {
566: int fd;
567: struct stat st;
568: char *base;
1.25 junyoung 569: uint32_t magic, revision, flags = 0;
1.1 itojun 570: struct moentry *otable, *ttable;
1.25 junyoung 571: const uint32_t *htable;
1.1 itojun 572: struct moentry_h *p;
573: struct mo *mo;
1.22 tshiozak 574: size_t l, headerlen;
1.27 matt 575: unsigned int i;
1.1 itojun 576: char *v;
1.9 minoura 577: struct mohandle *mohandle = &db->mohandle;
1.1 itojun 578:
1.9 minoura 579: if (mohandle->addr && mohandle->addr != MAP_FAILED &&
580: mohandle->mo.mo_magic)
1.1 itojun 581: return 0; /*already opened*/
582:
1.9 minoura 583: unmapit(db);
1.1 itojun 584:
585: #if 0
586: if (secure_path(path) != 0)
587: goto fail;
588: #endif
589: if (stat(path, &st) < 0)
590: goto fail;
591: if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
592: goto fail;
593: fd = open(path, O_RDONLY);
594: if (fd < 0)
595: goto fail;
1.2 itojun 596: if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
1.1 itojun 597: (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
598: close(fd);
599: goto fail;
600: }
1.19 tshiozak 601: if (read(fd, &revision, sizeof(revision)) != sizeof(revision)) {
602: close(fd);
603: goto fail;
604: }
605: switch (flip(revision, magic)) {
606: case MO_MAKE_REV(0, 0):
1.20 tshiozak 607: break;
1.19 tshiozak 608: case MO_MAKE_REV(0, 1):
609: case MO_MAKE_REV(1, 1):
1.20 tshiozak 610: flags |= MO_F_SYSDEP;
1.19 tshiozak 611: break;
612: default:
1.1 itojun 613: close(fd);
614: goto fail;
615: }
1.9 minoura 616: mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
1.4 itojun 617: MAP_FILE | MAP_SHARED, fd, (off_t)0);
1.9 minoura 618: if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
1.1 itojun 619: close(fd);
620: goto fail;
621: }
622: close(fd);
1.9 minoura 623: mohandle->len = (size_t)st.st_size;
1.1 itojun 624:
1.9 minoura 625: base = mohandle->addr;
626: mo = (struct mo *)mohandle->addr;
1.1 itojun 627:
628: /* flip endian. do not flip magic number! */
1.9 minoura 629: mohandle->mo.mo_magic = mo->mo_magic;
630: mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
631: mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
1.19 tshiozak 632: mohandle->mo.mo_hsize = flip(mo->mo_hsize, magic);
1.20 tshiozak 633: mohandle->mo.mo_flags = flags;
1.1 itojun 634:
635: /* validate otable/ttable */
1.19 tshiozak 636: /* LINTED: ignore the alignment problem. */
1.1 itojun 637: otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
1.19 tshiozak 638: /* LINTED: ignore the alignment problem. */
1.1 itojun 639: ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
1.9 minoura 640: if (!validate(otable, mohandle) ||
641: !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
642: unmapit(db);
1.1 itojun 643: goto fail;
644: }
1.9 minoura 645: if (!validate(ttable, mohandle) ||
646: !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
647: unmapit(db);
1.1 itojun 648: goto fail;
649: }
650:
651: /* allocate [ot]table, and convert to normal pointer representation. */
1.9 minoura 652: l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
653: mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
654: if (!mohandle->mo.mo_otable) {
655: unmapit(db);
1.1 itojun 656: goto fail;
657: }
1.9 minoura 658: mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
659: if (!mohandle->mo.mo_ttable) {
660: unmapit(db);
1.1 itojun 661: goto fail;
662: }
1.9 minoura 663: p = mohandle->mo.mo_otable;
664: for (i = 0; i < mohandle->mo.mo_nstring; i++) {
1.1 itojun 665: p[i].len = flip(otable[i].len, magic);
666: p[i].off = base + flip(otable[i].off, magic);
667:
1.9 minoura 668: if (!validate(p[i].off, mohandle) ||
669: !validate(p[i].off + p[i].len + 1, mohandle)) {
670: unmapit(db);
1.1 itojun 671: goto fail;
672: }
673: }
1.9 minoura 674: p = mohandle->mo.mo_ttable;
675: for (i = 0; i < mohandle->mo.mo_nstring; i++) {
1.1 itojun 676: p[i].len = flip(ttable[i].len, magic);
677: p[i].off = base + flip(ttable[i].off, magic);
678:
1.9 minoura 679: if (!validate(p[i].off, mohandle) ||
680: !validate(p[i].off + p[i].len + 1, mohandle)) {
681: unmapit(db);
1.1 itojun 682: goto fail;
683: }
684: }
1.19 tshiozak 685: /* allocate htable, and convert it to the host order. */
686: if (mohandle->mo.mo_hsize > 2) {
1.25 junyoung 687: l = sizeof(uint32_t) * mohandle->mo.mo_hsize;
688: mohandle->mo.mo_htable = (uint32_t *)malloc(l);
1.19 tshiozak 689: if (!mohandle->mo.mo_htable) {
690: unmapit(db);
691: goto fail;
692: }
693: /* LINTED: ignore the alignment problem. */
1.25 junyoung 694: htable = (const uint32_t *)(base+flip(mo->mo_hoffset, magic));
1.19 tshiozak 695: for (i=0; i < mohandle->mo.mo_hsize; i++) {
696: mohandle->mo.mo_htable[i] = flip(htable[i], magic);
697: if (mohandle->mo.mo_htable[i] >=
698: mohandle->mo.mo_nstring+1) {
699: /* illegal string number. */
700: unmapit(db);
701: goto fail;
702: }
703: }
704: }
1.1 itojun 705: /* grab MIME-header and charset field */
1.22 tshiozak 706: mohandle->mo.mo_header = lookup("", db, &headerlen);
1.9 minoura 707: if (mohandle->mo.mo_header)
708: v = strstr(mohandle->mo.mo_header, "charset=");
1.1 itojun 709: else
710: v = NULL;
711: if (v) {
1.9 minoura 712: mohandle->mo.mo_charset = strdup(v + 8);
713: if (!mohandle->mo.mo_charset)
1.6 itojun 714: goto fail;
1.9 minoura 715: v = strchr(mohandle->mo.mo_charset, '\n');
1.1 itojun 716: if (v)
717: *v = '\0';
718: }
1.26 tnozaki 719: if (!mohandle->mo.mo_header ||
720: _gettext_parse_plural(&mohandle->mo.mo_plural,
1.22 tshiozak 721: &mohandle->mo.mo_nplurals,
722: mohandle->mo.mo_header, headerlen))
723: mohandle->mo.mo_plural = NULL;
1.1 itojun 724:
725: /*
726: * XXX check charset, reject it if we are unable to support the charset
727: * with the current locale.
728: * for example, if we are using euc-jp locale and we are looking at
729: * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
730: * the *.mo file as we cannot support it.
731: */
732:
1.20 tshiozak 733: /* system dependent string support */
734: if ((mohandle->mo.mo_flags & MO_F_SYSDEP) != 0) {
735: if (setup_sysdep_stuffs(mo, mohandle, base)) {
736: unmapit(db);
737: goto fail;
738: }
739: }
740:
1.1 itojun 741: return 0;
742:
743: fail:
744: return -1;
745: }
746:
1.20 tshiozak 747: static void
1.25 junyoung 748: free_sysdep_table(struct mosysdepstr_h **table, uint32_t nstring)
1.20 tshiozak 749: {
750:
1.29 ! christos 751: if (! table)
! 752: return;
! 753:
! 754: for (uint32_t i = 0; i < nstring; i++) {
1.20 tshiozak 755: if (table[i]) {
1.29 ! christos 756: free(table[i]->expanded);
1.20 tshiozak 757: free(table[i]);
758: }
759: }
760: free(table);
761: }
762:
1.1 itojun 763: static int
1.25 junyoung 764: unmapit(struct domainbinding *db)
1.1 itojun 765: {
1.9 minoura 766: struct mohandle *mohandle = &db->mohandle;
1.1 itojun 767:
768: /* unmap if there's already mapped region */
1.9 minoura 769: if (mohandle->addr && mohandle->addr != MAP_FAILED)
770: munmap(mohandle->addr, mohandle->len);
771: mohandle->addr = NULL;
1.29 ! christos 772: free(mohandle->mo.mo_otable);
! 773: free(mohandle->mo.mo_ttable);
! 774: free(mohandle->mo.mo_charset);
! 775: free(mohandle->mo.mo_htable);
! 776: free(mohandle->mo.mo_sysdep_segs);
! 777: free_sysdep_table(mohandle->mo.mo_sysdep_otable,
! 778: mohandle->mo.mo_sysdep_nstring);
! 779: free_sysdep_table(mohandle->mo.mo_sysdep_ttable,
! 780: mohandle->mo.mo_sysdep_nstring);
! 781: _gettext_free_plural(mohandle->mo.mo_plural);
1.9 minoura 782: memset(&mohandle->mo, 0, sizeof(mohandle->mo));
1.1 itojun 783: return 0;
784: }
785:
1.9 minoura 786: /* ARGSUSED */
1.1 itojun 787: static const char *
1.25 junyoung 788: lookup_hash(const char *msgid, struct domainbinding *db, size_t *rlen)
1.1 itojun 789: {
1.19 tshiozak 790: struct mohandle *mohandle = &db->mohandle;
1.25 junyoung 791: uint32_t idx, hashval, step, strno;
1.19 tshiozak 792: size_t len;
1.20 tshiozak 793: struct mosysdepstr_h *sysdep_otable, *sysdep_ttable;
1.19 tshiozak 794:
795: if (mohandle->mo.mo_hsize <= 2 || mohandle->mo.mo_htable == NULL)
796: return NULL;
1.1 itojun 797:
1.19 tshiozak 798: hashval = __intl_string_hash(msgid);
799: step = calc_collision_step(hashval, mohandle->mo.mo_hsize);
800: idx = hashval % mohandle->mo.mo_hsize;
801: len = strlen(msgid);
802: while (/*CONSTCOND*/1) {
803: strno = mohandle->mo.mo_htable[idx];
804: if (strno == 0) {
805: /* unexpected miss */
806: return NULL;
807: }
808: strno--;
1.20 tshiozak 809: if ((strno & MO_HASH_SYSDEP_MASK) == 0) {
810: /* system independent strings */
811: if (len <= mohandle->mo.mo_otable[strno].len &&
812: !strcmp(msgid, mohandle->mo.mo_otable[strno].off)) {
813: /* hit */
1.22 tshiozak 814: if (rlen)
815: *rlen =
816: mohandle->mo.mo_ttable[strno].len;
1.20 tshiozak 817: return mohandle->mo.mo_ttable[strno].off;
818: }
819: } else {
820: /* system dependent strings */
821: strno &= ~MO_HASH_SYSDEP_MASK;
822: sysdep_otable = mohandle->mo.mo_sysdep_otable[strno];
823: sysdep_ttable = mohandle->mo.mo_sysdep_ttable[strno];
824: if (len <= sysdep_otable->expanded_len &&
825: !strcmp(msgid, sysdep_otable->expanded)) {
826: /* hit */
827: if (expand_sysdep(mohandle, sysdep_ttable))
828: /* memory exhausted */
829: return NULL;
1.22 tshiozak 830: if (rlen)
831: *rlen = sysdep_ttable->expanded_len;
1.20 tshiozak 832: return sysdep_ttable->expanded;
833: }
1.19 tshiozak 834: }
835: idx = calc_next_index(idx, mohandle->mo.mo_hsize, step);
836: }
837: /*NOTREACHED*/
1.1 itojun 838: }
839:
840: static const char *
1.25 junyoung 841: lookup_bsearch(const char *msgid, struct domainbinding *db, size_t *rlen)
1.1 itojun 842: {
843: int top, bottom, middle, omiddle;
844: int n;
1.9 minoura 845: struct mohandle *mohandle = &db->mohandle;
1.1 itojun 846:
847: top = 0;
1.9 minoura 848: bottom = mohandle->mo.mo_nstring;
1.1 itojun 849: omiddle = -1;
1.9 minoura 850: /* CONSTCOND */
1.1 itojun 851: while (1) {
852: if (top > bottom)
1.4 itojun 853: break;
1.1 itojun 854: middle = (top + bottom) / 2;
855: /* avoid possible infinite loop, when the data is not sorted */
856: if (omiddle == middle)
1.4 itojun 857: break;
1.27 matt 858: if ((size_t)middle >= mohandle->mo.mo_nstring)
1.4 itojun 859: break;
1.1 itojun 860:
1.9 minoura 861: n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
1.22 tshiozak 862: if (n == 0) {
863: if (rlen)
864: *rlen = mohandle->mo.mo_ttable[middle].len;
1.9 minoura 865: return (const char *)mohandle->mo.mo_ttable[middle].off;
1.22 tshiozak 866: }
1.1 itojun 867: else if (n < 0)
868: bottom = middle;
869: else
870: top = middle;
871: omiddle = middle;
872: }
873:
874: return NULL;
875: }
876:
877: static const char *
1.25 junyoung 878: lookup(const char *msgid, struct domainbinding *db, size_t *rlen)
1.1 itojun 879: {
880: const char *v;
881:
1.22 tshiozak 882: v = lookup_hash(msgid, db, rlen);
1.1 itojun 883: if (v)
884: return v;
885:
1.22 tshiozak 886: return lookup_bsearch(msgid, db, rlen);
1.1 itojun 887: }
888:
1.16 itojun 889: static const char *
890: get_lang_env(const char *category_name)
1.10 yamt 891: {
892: const char *lang;
893:
1.28 yamt 894: /*
895: * 1. see LANGUAGE variable first.
896: *
897: * LANGUAGE is a GNU extension.
898: * It's a colon separated list of locale names.
899: */
1.10 yamt 900: lang = getenv("LANGUAGE");
901: if (lang)
902: return lang;
903:
1.28 yamt 904: /*
905: * 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG.
906: *
907: * It's essentially setlocale(LC_xxx, NULL).
908: */
1.13 yamt 909: lang = getenv("LC_ALL");
1.10 yamt 910: if (!lang)
1.13 yamt 911: lang = getenv(category_name);
1.10 yamt 912: if (!lang)
913: lang = getenv("LANG");
914:
915: if (!lang)
916: return 0; /* error */
917:
918: return split_locale(lang);
919: }
920:
1.22 tshiozak 921: static const char *
922: get_indexed_string(const char *str, size_t len, unsigned long idx)
923: {
924: while (idx > 0) {
925: if (len <= 1)
926: return str;
927: if (*str == '\0')
928: idx--;
929: if (len > 0) {
930: str++;
931: len--;
932: }
933: }
934: return str;
935: }
936:
1.23 yamt 937: #define _NGETTEXT_DEFAULT(msgid1, msgid2, n) \
938: ((char *)__UNCONST((n) == 1 ? (msgid1) : (msgid2)))
939:
1.1 itojun 940: char *
1.25 junyoung 941: dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
942: unsigned long int n, int category)
1.1 itojun 943: {
944: const char *msgid;
945: char path[PATH_MAX];
1.10 yamt 946: const char *lpath;
1.1 itojun 947: static char olpath[PATH_MAX];
1.6 itojun 948: const char *cname = NULL;
1.1 itojun 949: const char *v;
1.6 itojun 950: static char *ocname = NULL;
951: static char *odomainname = NULL;
1.5 itojun 952: struct domainbinding *db;
1.24 lukem 953: unsigned long plural_index = 0;
1.22 tshiozak 954: size_t len;
1.1 itojun 955:
956: if (!domainname)
1.9 minoura 957: domainname = __current_domainname;
1.1 itojun 958: cname = lookup_category(category);
959: if (!domainname || !cname)
960: goto fail;
961:
1.10 yamt 962: lpath = get_lang_env(cname);
963: if (!lpath)
1.1 itojun 964: goto fail;
1.19 tshiozak 965:
1.9 minoura 966: for (db = __bindings; db; db = db->next)
1.5 itojun 967: if (strcmp(db->domainname, domainname) == 0)
968: break;
1.9 minoura 969: if (!db) {
970: if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
971: goto fail;
972: db = __bindings;
1.11 yamt 973: }
974:
975: /* resolve relative path */
976: /* XXX not necessary? */
977: if (db->path[0] != '/') {
978: char buf[PATH_MAX];
979:
980: if (getcwd(buf, sizeof(buf)) == 0)
981: goto fail;
982: if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
983: goto fail;
984: if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
985: goto fail;
1.15 itojun 986: strlcpy(db->path, buf, sizeof(db->path));
1.9 minoura 987: }
1.5 itojun 988:
1.1 itojun 989: /* don't bother looking it up if the values are the same */
1.5 itojun 990: if (odomainname && strcmp(domainname, odomainname) == 0 &&
1.9 minoura 991: ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
992: db->mohandle.mo.mo_magic)
1.1 itojun 993: goto found;
994:
995: /* try to find appropriate file, from $LANGUAGE */
1.5 itojun 996: if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
1.9 minoura 997: domainname, db) == NULL)
1.3 itojun 998: goto fail;
1.5 itojun 999:
1.29 ! christos 1000: free(odomainname);
! 1001: free(ocname);
! 1002:
1.6 itojun 1003: odomainname = strdup(domainname);
1.5 itojun 1004: ocname = strdup(cname);
1.6 itojun 1005: if (!odomainname || !ocname) {
1.29 ! christos 1006: free(odomainname);
! 1007: free(ocname);
! 1008:
1.6 itojun 1009: odomainname = ocname = NULL;
1010: }
1.10 yamt 1011: else
1012: strlcpy(olpath, lpath, sizeof(olpath));
1.1 itojun 1013:
1014: found:
1.22 tshiozak 1015: if (db->mohandle.mo.mo_plural) {
1016: plural_index =
1017: _gettext_calculate_plural(db->mohandle.mo.mo_plural, n);
1018: if (plural_index >= db->mohandle.mo.mo_nplurals)
1019: plural_index = 0;
1020: msgid = msgid1;
1021: } else
1.23 yamt 1022: msgid = _NGETTEXT_DEFAULT(msgid1, msgid2, n);
1.22 tshiozak 1023:
1024: if (msgid == NULL)
1025: return NULL;
1026:
1027: v = lookup(msgid, db, &len);
1.1 itojun 1028: if (v) {
1.22 tshiozak 1029: if (db->mohandle.mo.mo_plural)
1030: v = get_indexed_string(v, len, plural_index);
1.1 itojun 1031: /*
1.18 yamt 1032: * convert the translated message's encoding.
1033: *
1034: * special case:
1035: * a result of gettext("") shouldn't need any conversion.
1.1 itojun 1036: */
1.18 yamt 1037: if (msgid[0])
1038: v = __gettext_iconv(v, db);
1.1 itojun 1039:
1040: /*
1041: * Given the amount of printf-format security issues, it may
1042: * be a good idea to validate if the original msgid and the
1043: * translated message format string carry the same printf-like
1044: * format identifiers.
1045: */
1046:
1047: msgid = v;
1048: }
1049:
1.23 yamt 1050: return (char *)__UNCONST(msgid);
1051:
1.1 itojun 1052: fail:
1.23 yamt 1053: return _NGETTEXT_DEFAULT(msgid1, msgid2, n);
1.1 itojun 1054: }
CVSweb <webmaster@jp.NetBSD.org>