Main Page   Class Hierarchy   Alphabetical List   Data Structures   File List   Data Fields   Globals  

normlzr.h

00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-2001, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 #include "unicode/unistr.h"
00014 #include "unicode/chariter.h"
00015 #include "unicode/unorm.h"
00016 
00017 struct UCharIterator;
00018 typedef struct UCharIterator UCharIterator;
00019 
00020 U_NAMESPACE_BEGIN
00111 class U_COMMON_API Normalizer
00112 {
00113 public:
00119   enum {
00120       DONE=0xffff
00121   };
00122 
00123   // Constructors
00124 
00135   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00136     
00148   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00149 
00160   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00161 
00166   Normalizer(const Normalizer& copy);
00167 
00172   ~Normalizer();
00173 
00174 
00175   //-------------------------------------------------------------------------
00176   // Static utility methods
00177   //-------------------------------------------------------------------------
00178 
00196   static void normalize(const UnicodeString& source,
00197                         UNormalizationMode mode, int32_t options,
00198                         UnicodeString& result,
00199                         UErrorCode &status);
00200 
00222   static void compose(const UnicodeString& source,
00223                       UBool compat, int32_t options,
00224                       UnicodeString& result,
00225                       UErrorCode &status);
00226 
00249   static void decompose(const UnicodeString& source,
00250                         UBool compat, int32_t options,
00251                         UnicodeString& result,
00252                         UErrorCode &status);
00253 
00272   static UNormalizationCheckResult
00273   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00274 
00275   //-------------------------------------------------------------------------
00276   // Iteration API
00277   //-------------------------------------------------------------------------
00278   
00287   UChar32              current(void);
00288 
00297   UChar32              first(void);
00298 
00307   UChar32              last(void);
00308 
00317   UChar32              next(void);
00318 
00327   UChar32              previous(void);
00328 
00348   UChar32              setIndex(UTextOffset index);
00349 
00359   void                 setIndexOnly(UTextOffset index);
00360 
00366   void                reset(void);
00367 
00382   UTextOffset            getIndex(void) const;
00383 
00392   UTextOffset            startIndex(void) const;
00393 
00404   UTextOffset            endIndex(void) const;
00405 
00414   UBool        operator==(const Normalizer& that) const;
00415 
00424   inline UBool        operator!=(const Normalizer& that) const;
00425 
00432   Normalizer*        clone(void) const;
00433 
00440   int32_t                hashCode(void) const;
00441 
00442   //-------------------------------------------------------------------------
00443   // Property access methods
00444   //-------------------------------------------------------------------------
00445 
00461   void setMode(UNormalizationMode newMode);
00462 
00473   UNormalizationMode getUMode(void) const;
00474 
00491   void setOption(int32_t option, 
00492          UBool value);
00493 
00504   UBool getOption(int32_t option) const;
00505 
00514   void setText(const UnicodeString& newText, 
00515            UErrorCode &status);
00516 
00525   void setText(const CharacterIterator& newText, 
00526            UErrorCode &status);
00527 
00537   void setText(const UChar* newText,
00538                     int32_t length,
00539             UErrorCode &status);
00546   void            getText(UnicodeString&  result);
00547 
00548   //-------------------------------------------------------------------------
00549   // Deprecated APIs
00550   //-------------------------------------------------------------------------
00551 
00556   enum {
00557     COMPAT_BIT         = 1,
00558     DECOMP_BIT         = 2,
00559     COMPOSE_BIT        = 4,
00560     FCD_BIT            = 8
00561   };
00562 
00567   enum EMode {
00581     NO_OP         = 0,
00582     
00598     COMPOSE         = COMPOSE_BIT,
00599 
00615     COMPOSE_COMPAT     = COMPOSE_BIT | COMPAT_BIT,
00616 
00632     DECOMP         = DECOMP_BIT,
00633 
00649     DECOMP_COMPAT     = DECOMP_BIT | COMPAT_BIT,
00650 
00654     FCD = FCD_BIT
00655   };
00656 
00658   enum {
00677     IGNORE_HANGUL     = 0x001
00678   };
00679 
00690   Normalizer(const UnicodeString& str, 
00691          EMode mode);
00692     
00711   Normalizer(const UnicodeString& str, 
00712          EMode mode, 
00713          int32_t opt);
00714 
00726   Normalizer(const UChar* str,
00727          int32_t length,
00728          EMode mode);
00729 
00745   Normalizer(const UChar* str,
00746          int32_t length,
00747          EMode mode,
00748          int32_t option);
00749 
00760   Normalizer(const CharacterIterator& iter, 
00761          EMode mode);
00762 
00778   Normalizer(const CharacterIterator& iter, 
00779          EMode mode, 
00780          int32_t opt);
00781 
00802   inline static void
00803   normalize(const UnicodeString& source, 
00804             EMode mode, 
00805             int32_t options,
00806             UnicodeString& result, 
00807             UErrorCode &status);
00808 
00825   inline static UNormalizationCheckResult
00826   quickCheck(const UnicodeString& source,
00827              EMode                mode, 
00828              UErrorCode&          status);
00829 
00837   inline static UNormalizationMode getUNormalizationMode(EMode mode, 
00838                                                   UErrorCode& status);
00839 
00847   inline static EMode getNormalizerEMode(UNormalizationMode mode, 
00848                                          UErrorCode& status);
00849 
00876   inline void setMode(EMode newMode);
00877 
00884   inline EMode getMode(void) const;
00885 
00886 private:
00887   //-------------------------------------------------------------------------
00888   // Private functions
00889   //-------------------------------------------------------------------------
00890 
00891   // Private utility methods for iteration
00892   // For documentation, see the source code
00893   UBool nextNormalize();
00894   UBool previousNormalize();
00895 
00896   void    init(CharacterIterator *iter);
00897   void    clearBuffer(void);
00898 
00899   // Helper, without UErrorCode, for easier transitional code
00900   // remove after 2002-sep-30 with EMode etc.
00901   inline static UNormalizationMode getUMode(EMode mode);
00902 
00903   //-------------------------------------------------------------------------
00904   // Private data
00905   //-------------------------------------------------------------------------
00906 
00907   UNormalizationMode  fUMode;
00908   int32_t             fOptions;
00909 
00910   // The input text and our position in it
00911   UCharIterator       *text;
00912 
00913   // The normalization buffer is the result of normalization
00914   // of the source in [currentIndex..nextIndex[ .
00915   UTextOffset         currentIndex, nextIndex;
00916 
00917   // A buffer for holding intermediate results
00918   UnicodeString       buffer;
00919   UTextOffset         bufferPos;
00920 };
00921 
00922 //-------------------------------------------------------------------------
00923 // Inline implementations
00924 //-------------------------------------------------------------------------
00925 
00926 inline UBool
00927 Normalizer::operator!= (const Normalizer& other) const
00928 { return ! operator==(other); }
00929 
00930 inline void 
00931 Normalizer::normalize(const UnicodeString& source, 
00932                       EMode mode, int32_t options,
00933                       UnicodeString& result, 
00934                       UErrorCode &status) {
00935   normalize(source, getUNormalizationMode(mode, status), options, result, status);
00936 }
00937 
00938 inline UNormalizationCheckResult
00939 Normalizer::quickCheck(const UnicodeString& source,
00940                        EMode mode, 
00941                        UErrorCode &status) {
00942   return quickCheck(source, getUNormalizationMode(mode, status), status);
00943 }
00944 
00945 inline void
00946 Normalizer::setMode(EMode newMode) {
00947   UErrorCode status = U_ZERO_ERROR;
00948   fUMode = getUNormalizationMode(newMode, status);
00949 }
00950 
00951 inline Normalizer::EMode
00952 Normalizer::getMode() const {
00953   UErrorCode status = U_ZERO_ERROR;
00954   return getNormalizerEMode(fUMode, status);
00955 }
00956 
00957 inline UNormalizationMode Normalizer::getUNormalizationMode(
00958                                    Normalizer::EMode  mode, UErrorCode &status)
00959 {
00960   if (U_SUCCESS(status))
00961   { 
00962     switch (mode)
00963     {
00964     case Normalizer::NO_OP : 
00965       return UNORM_NONE;
00966     case Normalizer::COMPOSE :
00967       return UNORM_NFC;
00968     case Normalizer::COMPOSE_COMPAT :
00969       return UNORM_NFKC;
00970     case Normalizer::DECOMP :
00971       return UNORM_NFD;
00972     case Normalizer::DECOMP_COMPAT :
00973       return UNORM_NFKD;
00974     case Normalizer::FCD:
00975       return UNORM_FCD;
00976     default : 
00977       status = U_ILLEGAL_ARGUMENT_ERROR; 
00978     }
00979   }
00980   return UNORM_DEFAULT;
00981 }
00982 
00983 inline UNormalizationMode
00984 Normalizer::getUMode(Normalizer::EMode mode) {
00985   switch(mode) {
00986   case Normalizer::NO_OP : 
00987     return UNORM_NONE;
00988   case Normalizer::COMPOSE :
00989     return UNORM_NFC;
00990   case Normalizer::COMPOSE_COMPAT :
00991     return UNORM_NFKC;
00992   case Normalizer::DECOMP :
00993     return UNORM_NFD;
00994   case Normalizer::DECOMP_COMPAT :
00995     return UNORM_NFKD;
00996   case Normalizer::FCD:
00997     return UNORM_FCD;
00998   default : 
00999     return UNORM_DEFAULT;
01000   }
01001 }
01002 
01003 inline Normalizer::EMode Normalizer::getNormalizerEMode(
01004                                   UNormalizationMode mode, UErrorCode &status)
01005 {
01006   if (U_SUCCESS(status))
01007   {
01008     switch (mode)
01009     {
01010     case UNORM_NONE :
01011       return Normalizer::NO_OP;
01012     case UNORM_NFD :
01013       return Normalizer::DECOMP;
01014     case UNORM_NFKD :
01015       return Normalizer::DECOMP_COMPAT;
01016     case UNORM_NFC :
01017       return Normalizer::COMPOSE;
01018     case UNORM_NFKC :
01019       return Normalizer::COMPOSE_COMPAT;
01020     case UNORM_FCD:
01021       return Normalizer::FCD;
01022     default : 
01023       status = U_ILLEGAL_ARGUMENT_ERROR; 
01024     }
01025   }
01026   return Normalizer::DECOMP_COMPAT;
01027 }
01028 
01029 U_NAMESPACE_END
01030 #endif // _NORMLZR

Generated on Mon Mar 4 23:18:34 2002 for ICU 2.0 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002