/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 2005-2017   The R Core Team
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  https://www.R-project.org/Licenses/
 */

/* Internal header, not installed */

/*  This file was contributed by Ei-ji Nakama.
 *  See also the comments in  ../main/rlocale.c.

 *  It does 2 things:
 * (a) supplies wrapper/substitute wc[s]width functions for use in 
 *    character.c, errors.c, printutils.c, devPS.c, RGui console.
 * (b) Defines a replacment for iswctype to be used on Windows, maxOS and AIX.
 * in gram.c 
 *
 * It is not an installed header.
 */

#ifndef R_LOCALE_H
#define R_LOCALE_H

#include <wchar.h>
#include <ctype.h>
#include <wctype.h>

/*
 * The Rwchar_t typedef represents a single Unicode code point.  On most systems it's the same
 * as wchar_t, but on Windows (and others?) where wchar_t is too small and UTF-16 is used,
 * it is an unsigned int instead.
 */
 
#ifdef Win32
typedef unsigned int Rwchar_t;
#else
typedef wchar_t Rwchar_t;
#endif 

/*
 * Windows CJK
 * In Unicode, there is not a rule about character width. 
 * A letter of breadth is used in a CJK (China, Japan, Korea,
 * Taiwan, Hong Kong, and Singapore) area, and there are a
 * letter and a standard (character width is not still prescribed)
 * of a cord in a country.
 * Letter width is a problem of a font, but it is a rule route
 * besides a alphanumeric character that use a breadth letter.
 * It is generally defined as a breadth letter for a font such
 * as Japanese.
 * - Win32

 *  Attempted explanation by BDR
 *  The display widths of characters are not prescribed in Unicode.
 *  Double-width characters are used in the CJK area: their width can
 *  be font-specific, with different fonts in use in different parts
 *  of the CJK area.  The tables supplied in many OSes and by Markus
 *  Kuhn are not do not take the exact locale into account.  The
 *  tables supplied in rlocale_data.h allow different widths for
 *  different parts of the CJK area, and also where needed different
 *  widths on Windows.  (The Windows differences are in zh_CN, and
 *  apply to European characters.)
 *
 * The differences are mainly (but not exclusively) in the
 * Unicode 'East Asian Ambiguous' class.
 *
 */
 
extern int Ri18n_wcwidth(Rwchar_t);
extern int Ri18n_wcswidth (const wchar_t *, size_t);

/* macOS CJK and WindowXP(Japanese)
 * iswctypes of macOS calls isctypes. no i18n.
 * For example, iswprint of Windows does not accept a macron of
 * Japanese "a-ru" of R as a letter. 
 * Therefore Japanese "Buraian.Ripuri-" of "Brian Ripley" is
 * shown of hex-string.:-)
 * We define alternatives to be used if
 * defined(Win32) || defined(__APPLE__) || defined(_AIX)
 */
extern wctype_t Ri18n_wctype(const char *);
extern int      Ri18n_iswctype(wint_t, wctype_t);

#ifndef IN_RLOCALE_C
/* We want to avoid these redefinitions in rlocale.c itself */
#undef iswupper
#undef iswlower
#undef iswalpha
#undef iswdigit
#undef iswxdigit
#undef iswspace
#undef iswprint
#undef iswgraph
#undef iswblank
#undef iswcntrl
#undef iswpunct
#undef iswalnum
#undef wctype
#undef iswctype

#define iswupper(__x)     Ri18n_iswctype(__x, Ri18n_wctype("upper"))
#define iswlower(__x)     Ri18n_iswctype(__x, Ri18n_wctype("lower"))
#define iswalpha(__x)     Ri18n_iswctype(__x, Ri18n_wctype("alpha"))
#define iswdigit(__x)     Ri18n_iswctype(__x, Ri18n_wctype("digit"))
#define iswxdigit(__x)    Ri18n_iswctype(__x, Ri18n_wctype("xdigit"))
#define iswspace(__x)     Ri18n_iswctype(__x, Ri18n_wctype("space"))
#define iswprint(__x)     Ri18n_iswctype(__x, Ri18n_wctype("print"))
#define iswgraph(__x)     Ri18n_iswctype(__x, Ri18n_wctype("graph"))
#define iswblank(__x)     Ri18n_iswctype(__x, Ri18n_wctype("blank"))
#define iswcntrl(__x)     Ri18n_iswctype(__x, Ri18n_wctype("cntrl"))
#define iswpunct(__x)     Ri18n_iswctype(__x, Ri18n_wctype("punct"))
#define iswalnum(__x)     Ri18n_iswctype(__x, Ri18n_wctype("alnum"))
#define wctype(__x)       Ri18n_wctype(__x)
#define iswctype(__x,__y) Ri18n_iswctype(__x,__y)
#endif

/* These definitions are from winnls.h in Mingw_w64.  We don't need the rest of that file. */

#define HIGH_SURROGATE_START 0xd800
#define HIGH_SURROGATE_END 0xdbff
#define LOW_SURROGATE_START 0xdc00
#define LOW_SURROGATE_END 0xdfff

/* The first two of these definitions use the argument twice which is bad, but we include them here in
 * the original form for consistency with Mingw_w64.  Users should be careful that evaluating
 * the argument doesn't result in side effects.
 */

#define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && ((wch) <= HIGH_SURROGATE_END))
#define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && ((wch) <= LOW_SURROGATE_END))
#define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE (hs) && IS_LOW_SURROGATE (ls))

# define utf8toucs32		Rf_utf8toucs32
Rwchar_t utf8toucs32(wchar_t high, const char *s);

#endif /* R_LOCALE_H */