#ifndef INCLUDED_UTF8_H
#define INCLUDED_UTF8_H
/* vim:ts=4:sw=4:noet
 * (tabspace=4)
 * 
 * Copyright (C) 2004, 2005 Walter Doekes, <walter@djcvt.net>.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/**
 * Conversion routines to and from UTF8 and UCS (UNICODE).
 *
 * The ANSI-C mbstowcs and wcstombs functions aren't sufficient to do
 * UTF8 and USC conversion on machines that do not have the proper
 * locales set up.
 * The two functions contained herein (this and utf8.c) provide
 * functions similar in behaviour to mbstowcs and wcstombs, but
 * function without a proper locale setup.
 *
 * See also: mbstowcs(3), wcstombs(3), unicode(7), utf8(7).
 *
 * Last modified: 2005-06-15
 *
 * TODO: Test behaviour of >16bit characters on both windows and
 *       linux.
 */

#include <sys/types.h>
#include <wchar.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * Convert an UTF8 multi-byte string to an UCS wide-character string.
 * The conversion stops at the first 0 (null) character encountered.
 *
 * - If dest is NULL, utf8towcs returns the size required for the full
 *   string (without the terminating 0 (null) character), n is ignored.
 * - If dest is not NULL, utf8towcs tries to write at most n characters
 *   to dest. the characters written (without the terminating 0 (null))
 *   is returned.
 * - If src is NULL, behaviour is undefined.
 * - If src contains an invalid UTF8 sequence, utf8towcs returns
 *   (size_t)-1. If there is room, a terminating 0 (null) character
 *   will be placed after the last successfully converted character in
 *   dest.
 *
 * Behaviour is a bit different on WIN32:
 * - The return value may be (size_t)-1 if the destination string
 *   is too short.
 * - Invalid characters encountered may not be flagged as an error.
 */
size_t utf8towcs(wchar_t* dest, const unsigned char* src, size_t n);

/**
 * Allocate a wide character string and perform utf8towcs on it.
 * Free the allocated memory with free().
 */
size_t utf8toawcs(wchar_t** dest, const unsigned char* src);

/**
 * Convert an UCS wide-character string to an UTF8 multi-byte string.
 * The conversion stops at the first 0 (null) character encountered.
 *
 * - If dest is NULL, wcstoutf8 returns the size required for the full
 *   string (without the terminating 0 (null) character), n is ignored.
 * - If dest is not NULL, wcstoutf8 tries to write at most n characters
 *   to dest. the characters written (without the terminating 0 (null))
 *   is returned.
 * - If src is NULL, behaviour is undefined.
 * - If src contains an invalid UCS sequence, utf8towcs returns
 *   (size_t)-1. If there is room, a terminating 0 (null) character
 *   will be placed after the last successfully converted character in
 *   dest.
 *   The Unicode standard specifies no characters above 0x10ffff, so an
 *   UTF8 encoding will at most contain 4 instead of 6 characters per
 *   wide character.
 *
 * Behaviour is a bit different on WIN32:
 * - The return value may be (size_t)-1 if the destination string
 *   is too short.
 * - Invalid characters encountered may not be flagged as an error.
 */
size_t wcstoutf8(unsigned char* dest, const wchar_t* src, size_t n);

/**
 * Allocate a wide character string and perform wcstoutf8 on it.
 * Free the allocated memory with free().
 */
size_t wcstoautf8(unsigned char** dest, const wchar_t* src);

#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

#endif /* INCLUDED_UTF8_H */
