Main Page   Class Hierarchy   Alphabetical List   Data Structures   File List   Data Fields   Globals  

uchar.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1997-2001, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File UCHAR.H
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   04/02/97    aliu        Creation.
00013 *   03/29/99    helena      Updated for C APIs.
00014 *   4/15/99     Madhu       Updated for C Implementation and Javadoc
00015 *   5/20/99     Madhu       Added the function u_getVersion()
00016 *   8/19/1999   srl         Upgraded scripts to Unicode 3.0
00017 *   8/27/1999   schererm    UCharDirection constants: U_...
00018 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
00019 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
00020 ******************************************************************************
00021 */
00022 
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025 
00026 #include "unicode/utypes.h"
00027 
00028 U_CDECL_BEGIN
00029 
00030 /*==========================================================================*/
00031 /* Unicode version number                                                   */
00032 /*==========================================================================*/
00033 #define U_UNICODE_VERSION "3.1.1"
00034 
00056 #define UCHAR_MIN_VALUE 0
00057 
00064 #define UCHAR_MAX_VALUE 0x10ffff
00065 
00071 enum UCharCategory
00072 {
00074     U_UNASSIGNED              = 0,
00076     U_GENERAL_OTHER_TYPES     = 0,
00078     U_UPPERCASE_LETTER        = 1,
00080     U_LOWERCASE_LETTER        = 2,
00082     U_TITLECASE_LETTER        = 3,
00084     U_MODIFIER_LETTER         = 4,
00086     U_OTHER_LETTER            = 5,
00088     U_NON_SPACING_MARK        = 6,
00090     U_ENCLOSING_MARK          = 7,
00092     U_COMBINING_SPACING_MARK  = 8,
00094     U_DECIMAL_DIGIT_NUMBER    = 9,
00096     U_LETTER_NUMBER           = 10,
00098     U_OTHER_NUMBER            = 11,
00100     U_SPACE_SEPARATOR         = 12,
00102     U_LINE_SEPARATOR          = 13,
00104     U_PARAGRAPH_SEPARATOR     = 14,
00106     U_CONTROL_CHAR            = 15,
00108     U_FORMAT_CHAR             = 16,
00110     U_PRIVATE_USE_CHAR        = 17,
00112     U_SURROGATE               = 18,
00114     U_DASH_PUNCTUATION        = 19,
00116     U_START_PUNCTUATION       = 20,
00118     U_END_PUNCTUATION         = 21,
00120     U_CONNECTOR_PUNCTUATION   = 22,
00122     U_OTHER_PUNCTUATION       = 23,
00124     U_MATH_SYMBOL             = 24,
00126     U_CURRENCY_SYMBOL         = 25,
00128     U_MODIFIER_SYMBOL         = 26,
00130     U_OTHER_SYMBOL            = 27,
00132     U_INITIAL_PUNCTUATION     = 28,
00134     U_FINAL_PUNCTUATION       = 29,
00136     U_CHAR_CATEGORY_COUNT
00137 };
00138 
00139 typedef enum UCharCategory UCharCategory;
00140 
00145 enum UCharDirection   { 
00147     U_LEFT_TO_RIGHT               = 0, 
00149     U_RIGHT_TO_LEFT               = 1, 
00151     U_EUROPEAN_NUMBER             = 2,
00153     U_EUROPEAN_NUMBER_SEPARATOR   = 3,
00155     U_EUROPEAN_NUMBER_TERMINATOR  = 4,
00157     U_ARABIC_NUMBER               = 5,
00159     U_COMMON_NUMBER_SEPARATOR     = 6,
00161     U_BLOCK_SEPARATOR             = 7,
00163     U_SEGMENT_SEPARATOR           = 8,
00165     U_WHITE_SPACE_NEUTRAL         = 9, 
00167     U_OTHER_NEUTRAL               = 10, 
00169     U_LEFT_TO_RIGHT_EMBEDDING     = 11,
00171     U_LEFT_TO_RIGHT_OVERRIDE      = 12,
00173     U_RIGHT_TO_LEFT_ARABIC        = 13,
00175     U_RIGHT_TO_LEFT_EMBEDDING     = 14,
00177     U_RIGHT_TO_LEFT_OVERRIDE      = 15,
00179     U_POP_DIRECTIONAL_FORMAT      = 16,
00181     U_DIR_NON_SPACING_MARK        = 17,
00183     U_BOUNDARY_NEUTRAL            = 18,
00185     U_CHAR_DIRECTION_COUNT
00186 };
00187 
00188 typedef enum UCharDirection UCharDirection;
00189 
00195 enum UBlockCode {
00197     UBLOCK_BASIC_LATIN = 1,
00199     U_BASIC_LATIN = 1,
00200 
00202     UBLOCK_LATIN_1_SUPPLEMENT=2,
00204     U_LATIN_1_SUPPLEMENT=2,
00205 
00207     UBLOCK_LATIN_EXTENDED_A =3,
00209     U_LATIN_EXTENDED_A=3,
00210 
00212     UBLOCK_LATIN_EXTENDED_B =4,
00214     U_LATIN_EXTENDED_B=4,
00215 
00217     UBLOCK_IPA_EXTENSIONS =5,
00219     U_IPA_EXTENSIONS=5,
00220     
00222     UBLOCK_SPACING_MODIFIER_LETTERS =6,
00224     U_SPACING_MODIFIER_LETTERS=6,
00225 
00227     UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
00229     U_COMBINING_DIACRITICAL_MARKS=7,
00230     
00232     UBLOCK_GREEK =8,
00234     U_GREEK=8,
00235 
00237     UBLOCK_CYRILLIC =9,
00239     U_CYRILLIC=9,
00240 
00242     UBLOCK_ARMENIAN =10,
00244     U_ARMENIAN=10,
00245 
00247     UBLOCK_HEBREW =11,
00249     U_HEBREW=11,
00250 
00252     UBLOCK_ARABIC =12,
00254     U_ARABIC=12,
00255 
00257     UBLOCK_SYRIAC =13,
00259     U_SYRIAC=13,
00260 
00262     UBLOCK_THAANA =14,
00264     U_THAANA=14,
00265 
00267     UBLOCK_DEVANAGARI =15,
00269     U_DEVANAGARI=15,
00270 
00272     UBLOCK_BENGALI =16,
00274     U_BENGALI=16,
00275 
00277     UBLOCK_GURMUKHI =17,
00279     U_GURMUKHI=17,
00280 
00282     UBLOCK_GUJARATI =18,
00284     U_GUJARATI=18,
00285 
00287     UBLOCK_ORIYA =19,
00289     U_ORIYA=19,
00290 
00292     UBLOCK_TAMIL =20,
00294     U_TAMIL=20,
00295 
00297     UBLOCK_TELUGU =21,
00299     U_TELUGU=21,
00300 
00302     UBLOCK_KANNADA =22,
00304     U_KANNADA=22,
00305 
00307     UBLOCK_MALAYALAM =23,
00309     U_MALAYALAM=23,
00310 
00312     UBLOCK_SINHALA =24,
00314     U_SINHALA=24,
00315 
00317     UBLOCK_THAI =25,
00319     U_THAI=25,
00320 
00322     UBLOCK_LAO =26,
00324     U_LAO=26,
00325 
00327     UBLOCK_TIBETAN =27,
00329     U_TIBETAN=27,
00330 
00332     UBLOCK_MYANMAR =28,
00334     U_MYANMAR=28,
00335 
00337     UBLOCK_GEORGIAN =29,
00339     U_GEORGIAN=29,
00340 
00342     UBLOCK_HANGUL_JAMO =30,
00344     U_HANGUL_JAMO=30,
00345 
00347     UBLOCK_ETHIOPIC =31,
00349     U_ETHIOPIC=31,
00350 
00352     UBLOCK_CHEROKEE =32,
00354     U_CHEROKEE=32,
00355 
00357     UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
00359     U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS=33,
00360 
00362     UBLOCK_OGHAM =34,
00364     U_OGHAM=34,
00365 
00367     UBLOCK_RUNIC =35,
00369     U_RUNIC=35,
00370 
00372     UBLOCK_KHMER =36,
00374     U_KHMER=36,
00375 
00377     UBLOCK_MONGOLIAN =37,
00379     U_MONGOLIAN=37,
00380 
00382     UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
00384     U_LATIN_EXTENDED_ADDITIONAL=38,
00385 
00387     UBLOCK_GREEK_EXTENDED =39,
00389     U_GREEK_EXTENDED=39,
00390 
00392     UBLOCK_GENERAL_PUNCTUATION =40,
00394     U_GENERAL_PUNCTUATION=40,
00395 
00397     UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
00399     U_SUPERSCRIPTS_AND_SUBSCRIPTS=41,
00400     
00402     UBLOCK_CURRENCY_SYMBOLS =42,
00404     U_CURRENCY_SYMBOLS=42,
00405     
00407     UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
00409     U_COMBINING_MARKS_FOR_SYMBOLS=43,
00410     
00412     UBLOCK_LETTERLIKE_SYMBOLS =44,
00414     U_LETTERLIKE_SYMBOLS=44,
00415     
00417     UBLOCK_NUMBER_FORMS =45,
00419     U_NUMBER_FORMS=45,
00420 
00422     UBLOCK_ARROWS =46,
00424     U_ARROWS=46,
00425 
00427     UBLOCK_MATHEMATICAL_OPERATORS =47,
00429     U_MATHEMATICAL_OPERATORS=47,
00430 
00432     UBLOCK_MISCELLANEOUS_TECHNICAL =48,
00434     U_MISCELLANEOUS_TECHNICAL=48,
00435 
00437     UBLOCK_CONTROL_PICTURES =49,
00439     U_CONTROL_PICTURES=49,
00440 
00442     UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
00444     U_OPTICAL_CHARACTER_RECOGNITION=50,
00445 
00447     UBLOCK_ENCLOSED_ALPHANUMERICS =51,
00449     U_ENCLOSED_ALPHANUMERICS=51,
00450 
00452     UBLOCK_BOX_DRAWING =52,
00454     U_BOX_DRAWING=52,
00455 
00457     UBLOCK_BLOCK_ELEMENTS =53,
00459     U_BLOCK_ELEMENTS=53,
00460 
00462     UBLOCK_GEOMETRIC_SHAPES =54,
00464     U_GEOMETRIC_SHAPES=54,
00465 
00467     UBLOCK_MISCELLANEOUS_SYMBOLS =55,
00469     U_MISCELLANEOUS_SYMBOLS=55,
00470 
00472     UBLOCK_DINGBATS =56,
00474     U_DINGBATS=56,
00475 
00477     UBLOCK_BRAILLE_PATTERNS =57,
00479     U_BRAILLE_PATTERNS=57,
00480 
00482     UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
00484     U_CJK_RADICALS_SUPPLEMENT=58,
00485 
00487     UBLOCK_KANGXI_RADICALS =59,
00489     U_KANGXI_RADICALS=59,
00490 
00492     UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
00494     U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS=60,
00495 
00497     UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
00499     U_CJK_SYMBOLS_AND_PUNCTUATION=61,
00500 
00502     UBLOCK_HIRAGANA =62,
00504     U_HIRAGANA=62,
00505 
00507     UBLOCK_KATAKANA =63,
00509     U_KATAKANA=63,
00510 
00512     UBLOCK_BOPOMOFO =64,
00514     U_BOPOMOFO=64,
00515 
00517     UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
00519     U_HANGUL_COMPATIBILITY_JAMO=65,
00520 
00522     UBLOCK_KANBUN =66,
00524     U_KANBUN=66,
00525 
00527     UBLOCK_BOPOMOFO_EXTENDED =67,
00529     U_BOPOMOFO_EXTENDED=67,
00530 
00532     UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
00534     U_ENCLOSED_CJK_LETTERS_AND_MONTHS=68,
00535 
00537     UBLOCK_CJK_COMPATIBILITY =69,
00539     U_CJK_COMPATIBILITY=69,
00540 
00542     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
00544     U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A=70,
00545 
00547     UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
00549     U_CJK_UNIFIED_IDEOGRAPHS=71,
00550 
00552     UBLOCK_YI_SYLLABLES =72,
00554     U_YI_SYLLABLES=72,
00555 
00557     UBLOCK_YI_RADICALS =73,
00559     U_YI_RADICALS=73,
00560 
00562     UBLOCK_HANGUL_SYLLABLES =74,
00564     U_HANGUL_SYLLABLES=74,
00565 
00567     UBLOCK_HIGH_SURROGATES =75,
00569     U_HIGH_SURROGATES=75,
00570 
00572     UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
00574     U_HIGH_PRIVATE_USE_SURROGATES=76,
00575 
00577     UBLOCK_LOW_SURROGATES =77,
00579     U_LOW_SURROGATES=77,
00580 
00582     UBLOCK_PRIVATE_USE = 78,
00584     UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE,
00586     U_PRIVATE_USE_AREA=78,
00587 
00589     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
00591     U_CJK_COMPATIBILITY_IDEOGRAPHS=79,
00592 
00594     UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
00596     U_ALPHABETIC_PRESENTATION_FORMS=80,
00597 
00599     UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
00601     U_ARABIC_PRESENTATION_FORMS_A=81,
00602 
00604     UBLOCK_COMBINING_HALF_MARKS =82,
00606     U_COMBINING_HALF_MARKS=82,
00607 
00609     UBLOCK_CJK_COMPATIBILITY_FORMS =83,
00611     U_CJK_COMPATIBILITY_FORMS=83,
00612 
00614     UBLOCK_SMALL_FORM_VARIANTS =84,
00616     U_SMALL_FORM_VARIANTS=84,
00617 
00619     UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
00621     U_ARABIC_PRESENTATION_FORMS_B=85,
00622 
00624     UBLOCK_SPECIALS =86,
00626     U_SPECIALS=86,
00627 
00629     UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
00631     U_HALFWIDTH_AND_FULLWIDTH_FORMS=87,
00632     
00634     UBLOCK_OLD_ITALIC = 88  ,
00636     UBLOCK_GOTHIC = 89 ,
00638     UBLOCK_DESERET = 90 ,
00640     UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 ,
00642     UBLOCK_MUSICAL_SYMBOLS = 92 ,
00644     UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  ,
00646     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 ,
00648     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 ,
00650     UBLOCK_TAGS = 96 ,
00652     UBLOCK_COUNT=97,
00654     U_SCRIPT_COUNT=UBLOCK_COUNT,
00655 
00657     UBLOCK_INVALID_CODE=-1,
00658 
00660     U_CHAR_SCRIPT_COUNT =UBLOCK_COUNT,
00662     U_NO_SCRIPT = UBLOCK_COUNT
00663 };
00664 
00666 typedef enum UBlockCode UBlockCode;
00667 
00672 enum UCellWidth
00673 {
00675     U_ZERO_WIDTH              = 0,
00677     U_HALF_WIDTH              = 1,
00679     U_FULL_WIDTH              = 2,
00681     U_NEUTRAL_WIDTH           = 3,
00683     U_CELL_WIDTH_COUNT
00684 };
00685 
00687 typedef enum UCellWidth UCellWidth;
00688 
00700 enum UCharNameChoice {
00701     U_UNICODE_CHAR_NAME,
00702     U_UNICODE_10_CHAR_NAME,
00703     U_EXTENDED_CHAR_NAME,
00704     U_CHAR_NAME_CHOICE_COUNT
00705 };
00706 
00708 typedef enum UCharNameChoice UCharNameChoice;
00709 
00722 U_CAPI UBool U_EXPORT2
00723 u_islower(UChar32 c);
00724 
00736 U_CAPI UBool U_EXPORT2
00737 u_isupper(UChar32 c);
00738 
00750 U_CAPI UBool U_EXPORT2
00751 u_istitle(UChar32 c);
00752 
00760 U_CAPI UBool U_EXPORT2
00761 u_isdigit(UChar32 c);
00762 
00771 U_CAPI UBool U_EXPORT2
00772 u_isalnum(UChar32 c);
00773 
00789 U_CAPI UBool U_EXPORT2
00790 u_isdefined(UChar32 c);
00791 
00803 U_CAPI UBool U_EXPORT2
00804 u_isalpha(UChar32 c);
00805 
00813 U_CAPI UBool U_EXPORT2
00814 u_isspace(UChar32 c);
00815 
00844 U_CAPI UBool U_EXPORT2
00845 u_isWhitespace(UChar32 c);
00846 
00862 U_CAPI UBool U_EXPORT2
00863 u_iscntrl(UChar32 c);
00864 
00865 
00876 U_CAPI UBool U_EXPORT2
00877 u_isprint(UChar32 c);
00878 
00890 U_CAPI UBool U_EXPORT2
00891 u_isbase(UChar32 c);
00892 
00902 U_CAPI UCharDirection U_EXPORT2
00903 u_charDirection(UChar32 c);
00904 
00915 U_CAPI UBool U_EXPORT2
00916 u_isMirrored(UChar32 c);
00917 
00934 U_CAPI UChar32 U_EXPORT2
00935 u_charMirror(UChar32 c);
00936 
00988 U_CAPI uint16_t U_EXPORT2
00989 u_charCellWidth(UChar32 c);
00990 
01001 U_CAPI int8_t U_EXPORT2
01002 u_charType(UChar32 c);
01003 
01021 typedef UBool U_CALLCONV
01022 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
01023 
01043 U_CAPI void U_EXPORT2
01044 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
01045 
01053 U_CAPI uint8_t U_EXPORT2
01054 u_getCombiningClass(UChar32 c);
01055 
01064 U_CAPI int32_t U_EXPORT2
01065 u_charDigitValue(UChar32 c);
01066 
01073 U_CAPI UBlockCode U_EXPORT2
01074 ublock_getCode(UChar32    ch);
01075 
01108 U_CAPI UTextOffset U_EXPORT2
01109 u_charName(UChar32 code, UCharNameChoice nameChoice,
01110            char *buffer, UTextOffset bufferLength,
01111            UErrorCode *pErrorCode);
01112 
01132 U_CAPI UChar32 U_EXPORT2
01133 u_charFromName(UCharNameChoice nameChoice,
01134                const char *name,
01135                UErrorCode *pErrorCode);
01136 
01153 typedef UBool UEnumCharNamesFn(void *context,
01154                                UChar32 code,
01155                                UCharNameChoice nameChoice,
01156                                const char *name,
01157                                UTextOffset length);
01158 
01179 U_CAPI void U_EXPORT2
01180 u_enumCharNames(UChar32 start, UChar32 limit,
01181                 UEnumCharNamesFn *fn,
01182                 void *context,
01183                 UCharNameChoice nameChoice,
01184                 UErrorCode *pErrorCode);
01185 
01202 U_CAPI UBool U_EXPORT2
01203 u_isIDStart(UChar32 c);
01204 
01229 U_CAPI UBool U_EXPORT2
01230 u_isIDPart(UChar32 c);
01231 
01256 U_CAPI UBool U_EXPORT2
01257 u_isIDIgnorable(UChar32 c);
01258 
01279 U_CAPI UBool U_EXPORT2
01280 u_isJavaIDStart(UChar32 c);
01281 
01310 U_CAPI UBool U_EXPORT2
01311 u_isJavaIDPart(UChar32 c);
01312 
01335 U_CAPI UChar32 U_EXPORT2
01336 u_tolower(UChar32 c);
01337 
01353 U_CAPI UChar32 U_EXPORT2
01354 u_toupper(UChar32 c);
01355 
01370 U_CAPI UChar32 U_EXPORT2
01371 u_totitle(UChar32 c);
01372 
01374 #define U_FOLD_CASE_DEFAULT 0
01375 
01376 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
01377 
01391 U_CAPI UChar32 U_EXPORT2
01392 u_foldCase(UChar32 c, uint32_t options);
01393 
01429 U_CAPI int32_t U_EXPORT2
01430 u_digit(UChar32 ch, int8_t radix);
01431 
01458 U_CAPI UChar32 U_EXPORT2
01459 u_forDigit(int32_t digit, int8_t radix);
01460 
01468 U_CAPI void U_EXPORT2
01469 u_getUnicodeVersion(UVersionInfo info);
01470 
01471 
01475 #define u_charScript ublock_getCode
01476 
01477 typedef UBlockCode UCharScript;
01478 
01479 U_CDECL_END
01480 
01481 #endif /*_UCHAR*/
01482 /*eof*/

Generated on Mon Mar 4 23:18:37 2002 for ICU 2.0 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002