Current File : //opt/RZphp80/includes/I18N/UnicodeNormalizer.php
<?php

/**
 * Unicode Normalizer
 *
 * "...Unicode's normalization is the concept of character composition and decomposition.
 * Character composition is the process of combining simpler characters into
 * fewer precomposed characters, such as the n character and the combining ~ character
 * into the single n+~ character. Decomposition is the opposite process,
 * breaking precomposed characters back into their component pieces...
 * ...Normalization is important when comparing text strings for searching and
 * sorting (collation)..." [Wikipedia]
 *
 * Performs the 4 normalizations:
 * NFD:  Canonical Decomposition
 * NFC:  Canonical Decomposition, followed by Canonical Composition
 * NFKD: Compatibility Decomposition
 * NFKC: Compatibility Decomposition, followed by Canonical Composition
 * Complies with the official Unicode.org regression test.
 * Uses UTF8 binary strings natively but can normalize a string in any UTF format.
 * Fully tested with phpUnit. Code coverage test close to 100%.
 *
 * PHP version 5
 *
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 * + Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * + Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation and/or
 * other materials provided with the distribution.
 * + The names of its contributors may not be used to endorse or
 * promote products derived from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @category  Internationalization
 * @package   I18N_UnicodeNormalizer
 * @author    Michel Corne <mcorne@yahoo.com>
 * @copyright 2007 Michel Corne
 * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License
 * @version   SVN: $Id: UnicodeNormalizer.php 39 2007-07-25 12:33:15Z mcorne $
 * @link      http://pear.php.net/package/I18N_UnicodeNormalizer
 */

require_once 'UnicodeNormalizer/String.php';

/**
 * Unicode Normalizer
 *
 * Performs the 4 normalizations: NFD, NFC, NFKD, NFKC.
 *
 * <pre>
 * Example 1: NFC-normalization of UTF-8 string 'foo'
 * $normalized = I18N_UnicodeNormalizer::toNFC('foo');
 * or
 * $normalizer = new I18N_UnicodeNormalizer();
 * $normalized = $normalizer->normalize('foo', 'NFC')
 *
 * Example 2: NFC-normalization of ISO-8859-1 string 'foo'
 * $normalized = I18N_UnicodeNormalizer::toNFC('foo', 'ISO-8859-1');
 * or
 * $normalizer = new I18N_UnicodeNormalizer();
 * $normalized = $normalizer->normalize('foo', 'NFC', 'ISO-8859-1')
 * </pre>
 *
 * @category  Internationalization
 * @package   I18N_UnicodeNormalizer
 * @author    Michel Corne <mcorne@yahoo.com>
 * @copyright 2007 Michel Corne
 * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License
 * @version   Release: @package_version@
 * @link      http://pear.php.net/package/I18N_UnicodeNormalizer
 * @link      http://www.unicode.org/unicode/reports/tr15 : Unicode Normalization Forms
 * @link      http://en.wikipedia.org/wiki/Unicode_normalization : Unicode Normalization Definition
 */
class I18N_UnicodeNormalizer
{
    /**
     * The canonical combining classes
     *
     * @var array
     */
    private static $combining = array();

    /**
     * The compiled file names
     *
     * @var array
     */
    private $compiled = array(// /
        'canonical_decomp' => 'CanonicalDecompositions.php',
        'canonical_decomp_x' => 'CanonicalDecompositionsX.php',
        'combining' => 'CanonicalCombining.php',
        'compat_decomp' => 'CompatibilityDecompositions.php',
        'compat_decomp_x' => 'CompatibilityDecompositionsX.php',
        'compositions' => 'Compositions.php',
        'corrections' => 'NormalizationCorrections.php',
        'exclusions' => 'CompositionExclusions.php',
        'hangul_compos' => 'HangulCompositions.php',
        'hangul_decomp' => 'HangulDecompositions.php',
        'quick_check_nfc' => 'QuickCheckNFC.php',
        'quick_check_nfd' => 'QuickCheckNFD.php',
        'quick_check_nfkc' => 'QuickCheckNFKC.php',
        'quick_check_nfkd' => 'QuickCheckNFKD.php',
        'test_base' => 'BaseTests.php',
        'test_hangul' => 'HangulTests.php',
        );

    /**
     * The character compositions
     *
     * @var    array
     * @access private
     * @static
     */
    private static $compositions = array();

    /**
     * The data/compiled files directory
     *
     * @var    string
     * @access private
     * @static
     */
    private static $dataDir = '';

    /**
     * The character decomposition mappings
     *
     * @var    array
     * @access private
     * @static
     */
    private static $decomp = array();

    /**
     * The decomposition types
     *
     * @var    array
     * @access private
     */
    private $decompTypes = array(// /
        'NFD' => 'canonical_decomp_x',
        'NFKD' => 'compat_decomp_x',
        'NFC' => 'canonical_decomp_x', // used by getCharInfo() only
        'NFKC' => 'compat_decomp_x', // used by getCharInfo() only
        );

    /**
     * The character normalization quick checks
     *
     * Characters listed in a given quick check array do not pass the the quick check,
     * and can possibly be normalized.
     * There are 4 quick check sub-arrays, one for each normalization type,
     * e.g. $quickCheck['NFC'] for the NFC normalisation.
     *
     * @var    array
     * @access private
     * @static
     */
    private static $quickCheck = array();

    /**
     * The quick check file nickname for each normalization
     *
     * @var    array
     * @access private
     * @see    self::$compiled
     */
    private $quickCheckTypes = array(// /
        'NFC' => 'quick_check_nfc',
        'NFD' => 'quick_check_nfd',
        'NFKC' => 'quick_check_nfkc',
        'NFKD' => 'quick_check_nfkd',
        );

    /**
     * The I18N_UnicodeNormalizer_String class instance
     *
     * @var    object
     * @access private
     */
    private $string;

    /**
     * The class constructor
     *
     * Sets the paths to the data/compiled files.
     *
     * @param  string $dir the data/compiled files base directory,
     *                     this is only to be used if it cannot be determined
     *                     automaticly, or by the package maintainers for testing purposes
     * @return void
     * @access public
     */
    public function __construct($dir = '')
    {
        // gets the data/compiled file base paths, prepends the path to the file names
        $dir or $dir = self::getDataDir();
        $dir .= '/utf8/';
        $compiled = substr_replace($this->compiled, $dir, 0, 0);
        $this->compiled = array_combine(array_keys($this->compiled), $compiled);

        $this->string = new I18N_UnicodeNormalizer_String();
    }

    /**
     * Gets some information for a set of characters
     *
     * Finds if the characters pass the quick check. Finds their combining classes,
     * their compositions and their decomposition mappings.
     * Mainly used for debugging/testing purposes.
     *
     * @param  mixed  $chars the UTF-8 characters to get the information for, either
     *                       as a string or an array of characters
     * @param  string $type  the type of normalization: 'NFC', 'NFD', 'NFKC' or 'NFKD'
     * @return array  the information, up to 4 sub-arrays, with the characters
     *                as keys and the corresponding quick check value, or
     *                combining class, or compositions, or decomposition mappingsm
     *                converted to the UCN format.
     * @access public
     */
    public function getCharInfo($chars, $type)
    {
        if (!$this->isValidType($type)) {
            // an invalid normalization type
            return null;
        }
        // loads the quick check file, the canonical combining classes,
        // the characters compositions,  and the decomposition mappings
        isset(self::$quickCheck[$type]) or
        self::$quickCheck[$type] = require($this->compiled[$this->quickCheckTypes[$type]]);
        self::$combining or self::$combining = require($this->compiled['combining']);
        self::$compositions or self::$compositions = require($this->compiled['compositions']);
        isset(self::$decomp[$type]) or
        self::$decomp[$type] = require($this->compiled[$this->decompTypes[$type]]);
        // splits the character string
        is_array($chars) or $chars = $this->string->split($chars);

        $combinations = array();
        foreach($chars as $idx => $char) {
            // creates 1, 2, and 3 character combinations
            $key = $char;
            $combinations[$key] = true;
            isset($chars[$idx + 1]) and $key .= $chars[$idx + 1] and $combinations[$key] = true and
            isset($chars[$idx + 2]) and $key .= $chars[$idx + 2] and $combinations[$key] = true;
        }

        $chars = array_flip($chars);

        $found = array();
        if ($intersect = array_intersect_key(self::$quickCheck[$type], $chars)) {
            // finds the characters present in the quick check data
            // converts the characters to the UCN format
            $keys = array_map(array($this->string, 'string2unicode'), array_keys($intersect));
            $values = array_values($intersect);
            $found['quick_check'] = array_combine($keys, $values);
        }

        if ($intersect = array_intersect_key(self::$combining, $chars)) {
            // finds the characters present in the combining data
            // converts the characters to the UCN format
            $keys = array_map(array($this->string, 'string2unicode'), array_keys($intersect));
            $values = array_values($intersect);
            $found['combining'] = array_combine($keys, $values);
        }

        if ($intersect = array_intersect_key(self::$decomp[$type], $chars)) {
            // finds the characters present in the decomposition data
            // converts the characters and the value to the UCN format
            $keys = array_map(array($this->string, 'string2unicode'), array_keys($intersect));
            $values = array_values($intersect);
            $values = array_map(array($this->string, 'string2unicode'), $values);
            $found['decompositions'] = array_combine($keys, $values);
        }

        if ($intersect = array_intersect_key(self::$compositions, $combinations)) {
            // finds the characters present in, for example, the quick check data
            // converts the characters and the value to the UCN format
            $keys = array_map(array($this->string, 'string2unicode'), array_keys($intersect));
            $values = array_values($intersect);
            $values = array_map(array($this->string, 'string2unicode'), $values);
            $found['compositions'] = array_combine($keys, $values);
        }

        return $found;
    }

    /**
     * Determines the data/compiled files directory
     *
     * In case of a raw install coming for example from the SVN repository,
     * the data/compiled files directory is expected to be in the same directory
     * as this file. In case of a Pear install, the data/compiled files directory
     * is computed by PEAR_Config.
     *
     * @return string the data/compiled files base directory
     * @access public
     * @static
     */
    public static function getDataDir()
    {
        if (empty(self::$dataDir)) {
            // the data directory is unknown
            if (file_exists(dirname(__FILE__) . '/' . 'data')) {
                // assuming a raw install, e.g. coming from a SVN checkout
                self::$dataDir = dirname(__FILE__) . '/data';
            } else if ((@include_once "PEAR/Config.php")) {
                // there is a Pear install on the system, gets the data directory
                self::$dataDir = PEAR_Config::singleton()->get('data_dir');
                // adds the package name to the data directory
                self::$dataDir .= '/' . __CLASS__;
            }
            // else: the install is most likely corrupted, the process will
            // stop at the next require statement of a data/compiled file
        }

        return self::$dataDir;
    }

    /**
     * Gets the name list of the compiled files
     *
     * @param  string $dir the data/compiled files base directory,
     * @return array  the name list of compiled files
     * @access public
     * @static
     */
    public static function getFileNames($dir = '')
    {
        $normalizer = new I18N_UnicodeNormalizer($dir);

        return $normalizer->compiled;
    }

    /**
     * Checks if a character is a starter
     *
     * A starter is a character that passes the quick check and with a
     * combining class equal to 0.
     *
     * @param  string  $char the character
     * @param  string  $type the type of normalization: 'NFC', 'NFD', 'NFKC' or 'NFKD'
     * @return boolean true if a starter, false otherwise
     * @access public
     */
    public function isStarter($char, $type)
    {
        if (!$this->isValidType($type)) {
            // an invalid normalization type
            return null;
        }
        // loads the quick check file, and the the canonical combining classes
        isset(self::$quickCheck[$type]) or
        self::$quickCheck[$type] = require($this->compiled[$this->quickCheckTypes[$type]]);
        self::$combining or self::$combining = require($this->compiled['combining']);

        $isStarter = (!isset(self::$quickCheck[$type][$char]) and
            (!isset(self::$combining[$char]) or !self::$combining[$char]));

        return $isStarter;
    }

    /**
     * Checks if the normalization type is valid: NFC, NFD, NFKC or NKFD
     *
     * @param  string $type the normalization type, e.g. 'NFC'
     * @return array  true if valid, false otherwise
     * @access public
     */
    public function isValidType($type)
    {
        return isset($this->quickCheckTypes[$type]);
    }

    /**
     * Normalizes a string
     *
     * @param  string $string   the string to normalize
     * @param  string $type     the type of normalization: 'NFC', 'NFD', 'NFKC'
     *                          or 'NFKD', 'NFC' is the default
     * @param  string $encoding the string encoding, must be compliant with mb_list_encodings(),
     *                          e.g. 'UFT-16', 'UTF-8' is the defaut
     * @return mixed  the normalized string
     * @access public
     */
    public function normalize($string, $type = '', $encoding = '')
    {
        if (!$type) {
            // no type specified, defaults to canonical composition
            $type = 'NFC';
        } else if (!$this->isValidType($type)) {
            // not a valid type
            return false;
        }

        if (!$encoding) {
            // no encoding specified
            $encoding = 'UTF-8'; // defaults to UTF-8 encoding
        } else if (in_array($encoding, mb_list_encodings())) {
            // checks the encoding is valid, encodes the string to UTF-8
            $string = mb_convert_encoding($string, 'UTF-8', $encoding);
        } else {
            // unknown encoding
            return false;
        }

        if (!preg_match('~[^\x0-\x7F]~', $string)) {
            // an ASCII string (which is already normalized by definition)
            $normalized = $string;
        } else {
            // not an ASCII string
            // captures the function name for recursive calls
            static $self = __FUNCTION__;
            // loads the quick check file, loads the canonical combining classes
            isset(self::$quickCheck[$type]) or
            self::$quickCheck[$type] = require($this->compiled[$this->quickCheckTypes[$type]]);
            self::$combining or self::$combining = require($this->compiled['combining']);

            $toNormalize = '';
            $prevCombClass = 0;
            $starterPos = null;
            $normalized = '';
            $combiningClass = 0;
            $length = strlen($string);

            for($i = 0; $i < $length;) {
                // checks if the first character is ASCII, or gets the next character
                // note: getChar() could be called directly but this increases the performance by 10-20%
                ($char = $string{$i}) < "\x80" and ++$i or
                $char = $this->string->getChar($string, $i, $length);

                if (!isset(self::$quickCheck[$type][$char])) {
                    // the character passes quick check
                    // gets the character combining class
                    $combiningClass = isset(self::$combining[$char])? self::$combining[$char] : 0;

                    if ($combiningClass == 0) {
                        // the character is a starter
                        if ($toNormalize != '') {
                            // resets the normalized string to the first starter
                            $this->resetToStarter($normalized, $toNormalize, $starterPos);

                            if ($type == 'NFC' or $type == 'NFKC') {
                                // a composition normalization
                                // decompose-normalizes the substring, recomposes the substring to normalize
                                $toNormalize = $this->$self($toNormalize, $type == 'NFC'? 'NFD' : 'NFKD');
                                $normalized .= $this->recompose($toNormalize);
                            } else {
                                // a decomposition normalization, resorts the substring to normalize
                                $normalized .= $this->resortDecomp($toNormalize);
                            }
                            // resets the substring to normalize
                            $starterPos = null;
                            $toNormalize = '';
                        }
                        // captures the starter
                        $normalized .= $char;
                    } else if ($toNormalize != '') {
                        // there are already characters to normalize
                        // adds the character to the substring to normalize
                        $toNormalize .= $char;
                    } else if ($prevCombClass <= $combiningClass) {
                        // the previous combining class is lower
                        $starterPos === null and $normalized != '' and
                        // captures the starter/previous character position
                        $starterPos = (mb_strlen($normalized, 'UTF-8') - 1);
                        // adds the normalized character
                        $normalized .= $char;
                    } else {
                        // character is not a starter and is not normalized
                        $starterPos === null and $normalized != '' and
                        // captures starter/previous character position
                        $starterPos = (mb_strlen($normalized, 'UTF-8') - 1);
                        // adds the character to the substring to normalize
                        $toNormalize .= $char;
                    }
                    $prevCombClass = $combiningClass;
                } else {
                    // the character does not pass the quick check
                    if ($starterPos === null and $normalized != '') {
                        // captures the character position
                        $starterPos = (mb_strlen($normalized, 'UTF-8') - 1);
                    }

                    if ($type == 'NFD' or $type == 'NFKD') {
                        // a decomposition normalization
                        isset(self::$decomp[$type]) or isset($this->decompTypes[$type]) and
                        self::$decomp[$type] = require($this->compiled[$this->decompTypes[$type]]);
                        // loads the decomposition mappings, if the character has a decomposition
                        isset(self::$decomp[$type][$char]) and $char = self::$decomp[$type][$char];
                    }
                    // adds the character to the substring to normalize
                    $toNormalize .= $char;
                }
            }

            if ($toNormalize != '') {
                // resets the normalized string to the first starter
                $this->resetToStarter($normalized, $toNormalize, $starterPos);
                if ($type == 'NFC' or $type == 'NFKC') {
                    // a composition normalization
                    // decompose-normalizes the substring, recomposes the substring
                    $toNormalize = $this->$self($toNormalize, $type == 'NFC'? 'NFD' : 'NFKD');

                    $normalized .= $this->recompose($toNormalize);
                } else {
                    // decomposition normalization, resorts the substring to normalize
                    $normalized .= $this->resortDecomp($toNormalize);
                }
            }
        }
        // encodes the string
        $encoding == 'UTF-8' or $normalized = mb_convert_encoding($normalized, $encoding, 'UTF-8');

        return $normalized ;
    }

    /**
     * Recomposes a string
     *
     * Recomposes character sequences into a unique characters.
     *
     * @param  string  $string the string to recompose
     * @return string  the recomposed string
     * @access private
     */
    private function recompose($string)
    {
        // loads the characters compositions
        self::$compositions or self::$compositions = require($this->compiled['compositions']);

        $noneStarters = '';
        $starter = '';
        $recomposed = '';
        $prevCombiningClass = 0;
        $isComposed = false;
        $length = strlen($string);

        for($i = 0; $i < $length;) {
            // checks if the first character is ASCII, or gets the next character
            // note: getChar() could be called directly but this increases the performance by 10-20%
            ($char = $string{$i}) < "\x80" and ++$i or
            $char = $this->string->getChar($string, $i, $length);

            if ($isComposed) {
                // the character is already recomposed into a hangul starter
                $isComposed = false;
            } else if ($combiningClass = isset(self::$combining[$char])? self::$combining[$char] : 0) {
                // the character is not a starter
                if (($prevCombiningClass < $combiningClass or $prevChar == $char) and
                        $starter and isset(self::$compositions[$starter . $char])) {
                    // the character is not blocked from starter, or
                    // a character is not blocked by the same preceeding character, and
                    // starter + character can be composed, composes starter + character
                    $prevChar = $starter = self::$compositions[$starter . $char];
                    $prevCombiningClass = 0;
                } else {
                    // the character cannot be composed with the starter, captures the character
                    $noneStarters .= $char;
                    $prevChar = $char;
                    $prevCombiningClass = $combiningClass;
                }
            } else if (!$noneStarters and $starter) {
                // the character is a starter following a starter
                $nextChar = $this->string->getChar($string, $i, $length, true);
                if ($nextChar != '' and
                    isset(self::$compositions[$starter . $char . $nextChar])) {
                    // there is another character to come, and
                    // starter + current + next characters can be composed into a hangul character
                    // composes the starter + characters
                    $prevChar = $starter = self::$compositions[$starter . $char . $nextChar];
                    $prevCombiningClass = 0;
                    $isComposed = true;
                } else if (isset(self::$compositions[$starter . $char])) {
                    // composes starter + character
                    $prevChar = $starter = self::$compositions[$starter . $char];
                    $prevCombiningClass = 0;
                } else {
                    // the character is a starter that cannot be composed
                    // adds the previous starter, adds none starter characters
                    $recomposed .= $starter;
                    $recomposed .= $noneStarters and $noneStarters = '';
                    // sets the new starter
                    $prevChar = $starter = $char;
                    $prevCombiningClass = $combiningClass;
                }
            } else {
                // the character is a starter that cannot be composed
                // adds the previous starter, adds none starter characters
                $recomposed .= $starter;
                $recomposed .= $noneStarters and $noneStarters = '';
                // sets the new starter
                $prevChar = $starter = $char;
                $prevCombiningClass = $combiningClass;
            }
        }
        // adds the last recomposed substring
        $recomposed .= $starter . $noneStarters;

        return $recomposed;
    }

    /**
     * Resets the normalized string to the last starter
     *
     * @param  string  &$normalized  the normalized (sub)string
     * @param  string  &$toNormalize the (sub)string to normalize
     * @param  integer $starterPos   the last starter position
     * @return void
     * @access private
     */
    private function resetToStarter(&$normalized, &$toNormalize, $starterPos)
    {
        if ($starterPos !== null) {
            // extracts the substring from last starter
            // $fromStarter = mb_substr($normalized, $starterPos, PHP_INT_MAX , 'UTF-8');
			// necessary fix because of PHP Bug #42101: mb_substr error if length = PHP_INT_MAX
			// using the 32-bit max integer value instead
            $fromStarter = mb_substr($normalized, $starterPos, 2147483647 , 'UTF-8');
            // adds the substring to the substring to normalize
            $toNormalize = $fromStarter . $toNormalize;
            // strips the normalized string from the last starter on
            $normalized = mb_substr($normalized, 0, $starterPos, 'UTF-8');
        }
    }

    /**
     * Resorts a decomposed string
     *
     * @param  string  $string the decomposed string
     * @return string  the resorted string
     * @access private
     * @todo   review the current limitation of up to 1000 characters to resort
     *         with the same combining class, this should probably be acceptable though!
     */
    private function resortDecomp($string)
    {
        $resorted = '';
        $order = array();
        $cnt = 0;
        $length = strlen($string);

        for($i = 0; $i < $length;) {
            // checks if the first character is ASCII, or gets the next character
            // note: getChar() could be called directly but this increases the performance by 10-20%
            ($char = $string{$i}) < "\x80" and ++$i or
            $char = $this->string->getChar($string, $i, $length);
            // gets the character combining class
            $combiningClass = isset(self::$combining[$char])? self::$combining[$char] : 0;

            if ($combiningClass) {
                // the character is not a starter
                // adds the character to resort, captures the character combining class
                // and concatenates a counter to differentiate characters with same combining classes
                $noneStarters[] = $char;
                $order[] = $combiningClass * 1000 + $cnt++;
            } else {
                // the character is a starter
                if ($order) {
                    // there are characters between starters
                    // sorts none starter characters on combination classes
                    array_multisort($order, SORT_ASC, SORT_NUMERIC , $noneStarters);
                    // adds none starter characters
                    $resorted .= implode('', $noneStarters);

                    $order = array();
                    $noneStarters = array();
                    $cnt = 0;
                }
                // adds the starter
                $resorted .= $char;
            }
        }

        if ($order) {
            // there are characters between starters
            // sorts none starter characters on combination classes
            array_multisort($order, SORT_ASC, SORT_NUMERIC , $noneStarters);
            // adds none starter characters
            $resorted .= implode('', $noneStarters);
        }

        return $resorted;
    }

    /**
     * NFC-normalizes a string
     *
     * @param  string $string   the string to normalize
     * @param  string $encoding the string encoding, must be compliant with mb_list_encodings(),
     *                          e.g. 'UFT-16', 'UTF-8' is the defaut
     * @return mixed  the normalized string
     * @access public
     * @static
     */
    public static function toNFC($string, $encoding = null)
    {
        $normalizer = new self;

        return $normalizer->normalize($string, 'NFC', $encoding);
    }

    /**
     * NFD-normalizes a string
     *
     * @param  string $string   the string to normalize
     * @param  string $encoding the string encoding, must be compliant with mb_list_encodings(),
     *                          e.g. 'UFT-16', 'UTF-8' is the defaut
     * @return mixed  the normalized string
     * @access public
     * @static
     */
    public static function toNFD($string, $encoding = null)
    {
        $normalizer = new self;

        return $normalizer->normalize($string, 'NFD', $encoding);
    }

    /**
     * NFKC-normalizes a string
     *
     * @param  string $string   the string to normalize
     * @param  string $encoding the string encoding, must be compliant with mb_list_encodings(),
     *                          e.g. 'UFT-16', 'UTF-8' is the defaut
     * @return mixed  the normalized string
     * @access public
     * @static
     */
    public static function toNFKC($string, $encoding = null)
    {
        $normalizer = new self;

        return $normalizer->normalize($string, 'NFKC', $encoding);
    }

    /**
     * NFKD-normalizes a string
     *
     * @param  string $string   the string to normalize
     * @param  string $encoding the string encoding, must be compliant with mb_list_encodings(),
     *                          e.g. 'UFT-16', 'UTF-8' is the defaut
     * @return mixed  the normalized string
     * @access public
     * @static
     */
    public static function toNFKD($string, $encoding = null)
    {
        $normalizer = new self;

        return $normalizer->normalize($string, 'NFKD', $encoding);
    }
}

?>