[BACK]Return to unicode.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / fs / hfs

File: [cvs.NetBSD.org] / src / sys / fs / hfs / unicode.c (download)

Revision 1.2, Tue Dec 11 12:04:24 2007 UTC (13 years, 4 months ago) by lukem
Branch: MAIN
CVS Tags: yamt-pf42-baseX, yamt-pf42-base4, yamt-pf42-base3, yamt-pf42-base2, yamt-pf42-base, yamt-pf42, yamt-pagecache-tag8, yamt-pagecache-base9, yamt-pagecache-base8, yamt-pagecache-base7, yamt-pagecache-base6, yamt-pagecache-base5, yamt-pagecache-base4, yamt-pagecache-base3, yamt-pagecache-base2, yamt-pagecache-base, yamt-pagecache, yamt-nfs-mp-base9, yamt-nfs-mp-base8, yamt-nfs-mp-base7, yamt-nfs-mp-base6, yamt-nfs-mp-base5, yamt-nfs-mp-base4, yamt-nfs-mp-base3, yamt-nfs-mp-base2, yamt-nfs-mp-base11, yamt-nfs-mp-base10, yamt-nfs-mp-base, yamt-nfs-mp, yamt-lazymbuf-base15, yamt-lazymbuf-base14, yamt-kmem-base3, yamt-kmem-base2, wrstuden-revivesa-base-4, wrstuden-revivesa-base-3, wrstuden-revivesa-base-2, wrstuden-revivesa-base-1, wrstuden-revivesa-base, wrstuden-revivesa, vmlocking2-base3, uebayasi-xip-base4, uebayasi-xip-base3, uebayasi-xip-base2, uebayasi-xip-base1, uebayasi-xip-base, uebayasi-xip, tls-maxphys-base, tls-earlyentropy-base, tls-earlyentropy, simonb-wapbl-nbase, simonb-wapbl-base, simonb-wapbl, rmind-uvmplock-nbase, rmind-uvmplock-base, rmind-uvmplock, rmind-smpnet-nbase, rmind-smpnet-base, rmind-smpnet, riastradh-xf86-video-intel-2-7-1-pre-2-21-15, riastradh-drm2-base3, riastradh-drm2-base2, riastradh-drm2-base1, riastradh-drm2-base, riastradh-drm2, nick-nhusb-base-20150606, nick-nhusb-base-20150406, nick-nhusb-base, nick-net80211-sync-base, nick-net80211-sync, nick-hppapmap-base4, nick-hppapmap-base3, nick-hppapmap-base2, nick-hppapmap-base, nick-hppapmap, netbsd-7-nhusb-base-20170116, netbsd-7-nhusb-base, netbsd-7-nhusb, netbsd-7-base, netbsd-7-2-RELEASE, netbsd-7-1-RELEASE, netbsd-7-1-RC2, netbsd-7-1-RC1, netbsd-7-1-2-RELEASE, netbsd-7-1-1-RELEASE, netbsd-7-1, netbsd-7-0-RELEASE, netbsd-7-0-RC3, netbsd-7-0-RC2, netbsd-7-0-RC1, netbsd-7-0-2-RELEASE, netbsd-7-0-1-RELEASE, netbsd-7-0, netbsd-7, netbsd-6-base, netbsd-6-1-RELEASE, netbsd-6-1-RC4, netbsd-6-1-RC3, netbsd-6-1-RC2, netbsd-6-1-RC1, netbsd-6-1-5-RELEASE, netbsd-6-1-4-RELEASE, netbsd-6-1-3-RELEASE, netbsd-6-1-2-RELEASE, netbsd-6-1-1-RELEASE, netbsd-6-1, netbsd-6-0-RELEASE, netbsd-6-0-RC2, netbsd-6-0-RC1, netbsd-6-0-6-RELEASE, netbsd-6-0-5-RELEASE, netbsd-6-0-4-RELEASE, netbsd-6-0-3-RELEASE, netbsd-6-0-2-RELEASE, netbsd-6-0-1-RELEASE, netbsd-6-0, netbsd-6, netbsd-5-base, netbsd-5-2-RELEASE, netbsd-5-2-RC1, netbsd-5-2-3-RELEASE, netbsd-5-2-2-RELEASE, netbsd-5-2-1-RELEASE, netbsd-5-2, netbsd-5-1-RELEASE, netbsd-5-1-RC4, netbsd-5-1-RC3, netbsd-5-1-RC2, netbsd-5-1-RC1, netbsd-5-1-5-RELEASE, netbsd-5-1-4-RELEASE, netbsd-5-1-3-RELEASE, netbsd-5-1-2-RELEASE, netbsd-5-1-1-RELEASE, netbsd-5-1, netbsd-5-0-RELEASE, netbsd-5-0-RC4, netbsd-5-0-RC3, netbsd-5-0-RC2, netbsd-5-0-RC1, netbsd-5-0-2-RELEASE, netbsd-5-0-1-RELEASE, netbsd-5-0, netbsd-5, mjf-devfs2-base, mjf-devfs2, mjf-devfs-base, matt-premerge-20091211, matt-nb6-plus-nbase, matt-nb6-plus-base, matt-nb6-plus, matt-nb5-pq3-base, matt-nb5-pq3, matt-nb5-mips64-u2-k2-k4-k7-k8-k9, matt-nb5-mips64-u1-k1-k5, matt-nb5-mips64-premerge-20101231, matt-nb5-mips64-premerge-20091211, matt-nb5-mips64-k15, matt-nb5-mips64, matt-nb4-mips64-k7-u2a-k9b, matt-mips64-premerge-20101231, matt-mips64-base2, matt-armv6-nbase, matt-armv6-base, khorben-n900, keiichi-mipv6-nbase, keiichi-mipv6-base, keiichi-mipv6, jymxensuspend-base, jym-xensuspend-nbase, jym-xensuspend-base, jym-xensuspend, jruoho-x86intr-base, jruoho-x86intr, jmcneill-usbmp-pre-base2, jmcneill-usbmp-base9, jmcneill-usbmp-base8, jmcneill-usbmp-base7, jmcneill-usbmp-base6, jmcneill-usbmp-base5, jmcneill-usbmp-base4, jmcneill-usbmp-base3, jmcneill-usbmp-base2, jmcneill-usbmp-base10, jmcneill-usbmp-base, jmcneill-usbmp, jmcneill-audiomp3-base, jmcneill-audiomp3, hpcarm-cleanup-nbase, hpcarm-cleanup-base, haad-nbase2, haad-dm-base2, haad-dm-base1, haad-dm-base, haad-dm, cube-autoconf-base, cube-autoconf, cherry-xenmp-base, cherry-xenmp, bouyer-xeni386-nbase, bouyer-xeni386-base, bouyer-quota2-nbase, bouyer-quota2-base, bouyer-quota2, agc-symver-base, agc-symver, ad-socklock-base1, ad-audiomp2-base, ad-audiomp2
Branch point for: tls-maxphys, nick-nhusb
Changes since 1.1: +4 -1 lines

use __KERNEL_RCSID()

/* $NetBSD: unicode.c,v 1.2 2007/12/11 12:04:24 lukem Exp $ */

/*-
 * Copyright (c) 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Dieter Baron.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: unicode.c,v 1.2 2007/12/11 12:04:24 lukem Exp $");

#include <sys/null.h>

#include "unicode.h"

size_t
utf8_to_utf16(uint16_t *dst, size_t dst_len,
	      const char *src, size_t src_len,
	      int flags, int *errp)
{
    const unsigned char *s;
    size_t spos, dpos;
    int error;
    uint16_t c;

#define IS_CONT(c)	(((c)&0xc0) == 0x80)

    error = 0;
    s = (const unsigned char *)src;
    spos = dpos = 0;
    while (spos<src_len) {
	if (s[spos] < 0x80)
	    c = s[spos++];
	else if ((flags & UNICODE_UTF8_LATIN1_FALLBACK)
		 && (spos >= src_len || !IS_CONT(s[spos+1]))
		 && s[spos]>=0xa0) {
	    /* not valid UTF-8, assume ISO 8859-1 */
	    c = s[spos++];
	}
	else if (s[spos] < 0xc0 || s[spos] >= 0xf5) {
	    /* continuation byte without lead byte
	       or lead byte for codepoint above 0x10ffff */
	    error++;
	    spos++;
	    continue;
	}
	else if (s[spos] < 0xe0) {
	    if (spos >= src_len || !IS_CONT(s[spos+1])) {
		spos++;
		error++;
		continue;
	    }
	    c = ((s[spos] & 0x3f) << 6) | (s[spos+1] & 0x3f);
	    spos += 2;
	    if (c < 0x80) {
		/* overlong encoding */
		error++;
		continue;
	    }
	}
	else if (s[spos] < 0xf0) {
	    if (spos >= src_len-2
		|| !IS_CONT(s[spos+1]) || !IS_CONT(s[spos+2])) {
		spos++;
		error++;
		continue;
	    }
	    c = ((s[spos] & 0x0f) << 12) | ((s[spos+1] & 0x3f) << 6)
		| (s[spos+2] & 0x3f);
	    spos += 3;
	    if (c < 0x800 || (c & 0xdf00) == 0xd800 ) {
		/* overlong encoding or encoded surrogate */
		error++;
		continue;
	    }
	}
	else {
	    uint32_t cc;
	    /* UTF-16 surrogate pair */

	    if (spos >= src_len-3 || !IS_CONT(s[spos+1])
		|| !IS_CONT(s[spos+2]) || !IS_CONT(s[spos+3])) {
		spos++;
		error++;
		
		continue;
	    }
	    cc = ((s[spos] & 0x03) << 18) | ((s[spos+1] & 0x3f) << 12)
		 | ((s[spos+2] & 0x3f) << 6) | (s[spos+3] & 0x3f);
	    spos += 4;
	    if (cc < 0x10000) {
		/* overlong encoding */
		error++;
		continue;
	    }
	    if (dst && dpos < dst_len)
		dst[dpos] = (0xd800 | ((cc-0x10000)>>10));
	    dpos++;
	    c = 0xdc00 | ((cc-0x10000) & 0x3ffff);
	}

	if (dst && dpos < dst_len)
	    dst[dpos] = c;
	dpos++;
    }
    
    if (errp)
	*errp = error;

    return dpos;

#undef IS_CONT
}


size_t
utf16_to_utf8(char *dst, size_t dst_len,
	      const uint16_t *src, size_t src_len,
	      int flags, int *errp)
{
    uint8_t spos, dpos;
    int error;

#define CHECK_LENGTH(l)	(dpos > dst_len-(l) ? dst=NULL : NULL)
#define ADD_BYTE(b)	(dst ? dst[dpos] = (b) : 0, dpos++)

    error = 0;
    dpos = 0;
    for (spos=0; spos<src_len; spos++) {
	if (src[spos] < 0x80) {
	    CHECK_LENGTH(1);
	    ADD_BYTE(src[spos]);
	}
	else if (src[spos] < 0x800) {
	    CHECK_LENGTH(2);
	    ADD_BYTE(0xc0 | (src[spos]>>6));
	    ADD_BYTE(0x80 | (src[spos] & 0x3f));
	}
	else if ((src[spos] & 0xdc00) == 0xd800) {
	    uint32_t c;
	    /* first surrogate */
	    if (spos == src_len - 1 || (src[spos] & 0xdc00) != 0xdc00) {
		/* no second surrogate present */
		error++;
		continue;
	    }
	    spos++;
	    CHECK_LENGTH(4);
	    c = (((src[spos]&0x3ff) << 10) | (src[spos+1]&0x3ff)) + 0x10000;
	    ADD_BYTE(0xf0 | (c>>18));
	    ADD_BYTE(0x80 | ((c>>12) & 0x3f));
	    ADD_BYTE(0x80 | ((c>>6) & 0x3f));
	    ADD_BYTE(0x80 | (c & 0x3f));
	}
	else if ((src[spos] & 0xdc00) == 0xdc00) {
	    /* second surrogate without preceding first surrogate */
	    error++;
	}
	else {
	    CHECK_LENGTH(3);
	    ADD_BYTE(0xe0 | src[spos]>>12);
	    ADD_BYTE(0x80 | ((src[spos]>>6) & 0x3f));
	    ADD_BYTE(0x80 | (src[spos] & 0x3f));
	}
    }

    if (errp)
	*errp = error;

    return dpos;

#undef ADD_BYTE
#undef CHECK_LENGTH
}