Current File : //opt/RZphp72/includes/test/I18N_UnicodeNormalizer/tests/UnicodeNormalizerTest.php |
<?php
/**
* Unicode Normalizer
*
* PHP version 5
*
* All rights reserved.
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
* + Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* + Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* + The names of its contributors may not be used to endorse or
* promote products derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category Internationalization
* @package I18N_UnicodeNormalizer
* @author Michel Corne <mcorne@yahoo.com>
* @copyright 2007 Michel Corne
* @license http://www.opensource.org/licenses/bsd-license.php The BSD License
* @version SVN: $Id: UnicodeNormalizerTest.php 38 2007-07-23 11:42:30Z mcorne $
* @link http://pear.php.net/package/I18N_UnicodeNormalizer
*/
// Call tests_UnicodeNormalizerTest::main() if this source file is executed directly.
if (!defined("PHPUnit_MAIN_METHOD")) {
define("PHPUnit_MAIN_METHOD", "tests_UnicodeNormalizerTest::main");
}
require_once "PHPUnit/Framework/TestCase.php";
require_once "PHPUnit/Framework/TestSuite.php";
require_once 'I18N/UnicodeNormalizer.php';
/**
* Test class for I18N_UnicodeNormalizer.
* Generated by PHPUnit_Util_Skeleton on 2007-05-18 at 18:49:09.
*
* The following flags can be adjusted to customize the tests:
* self::$codeCoverageTest, self::$forceCompile, self::$generateUCN,
* self::$mediawikiNormalization and self::$runNormalizationTest.
* The result of the normalization regression test is in reports/regression.
*
* @category Internationalization
* @package I18N_UnicodeNormalizer
* @author Michel Corne <mcorne@yahoo.com>
* @copyright 2007 Michel Corne
* @license http://www.opensource.org/licenses/bsd-license.php The BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/I18N_UnicodeNormalizer
*/
class tests_UnicodeNormalizerTest extends PHPUnit_Framework_TestCase
{
/**
* The code coverage test flag
*
* Overrules: self::$generateUCN, self::$mediawikiNormalization, self::$forceCompile.
* Controls the normalizer to run with a limited set of the Unicode data.
* This flag must be set when phpunit is run with the --report option, so tests
* are run with a limited set of the Unicode.org data. The normalizer will then
* include small compiled files and phpunit/xdebug will not exceed memory requirements
* to generate the code coverage report.
*
* @var boolean
* @access public
* @static
*/
public static $codeCoverageTest = false;
/**
* Controls the compiler to force-compile files
*
* @var boolean
* @access public
* @static
*/
public static $forceCompile = false;
/**
* Controls the compilation of Unicode data to the UCN format
*
* When selected, the compilation to the UCN format is done in addition to
* the compilation to UTF-8. The compiled files in UCN format are used
* for debugging purposes.
*
* @var boolean
* @access public
* @static
*/
public static $generateUCN = false;
/**
* Controls the normalization test
*
* The normalization test uses either the I18N_UnicodeNormalizer class if false
* or the mediawiki UtfNormal class if true
*
* @var boolean
* @access public
* @static
*/
public static $mediawikiNormalization = false;
/**
* Runs the Unicode.org normalization regression test
*
* @var boolean
* @access public
* @static
*/
public static $runNormalizationTest = true;
/**
* Allows the compiler to force-compile the files only during the first setup call
*
* @var boolean
* @access private
* @static
*/
private static $firstSetup = true;
/**
* The name list of compiled files
*
* @var array
* @access private
*/
private $compiled;
/**
* The I18N_UnicodeNormalizer_Normalizer class instance
*
* @var object
* @access private
*/
private $normalizer;
/**
* The normalization class name
*
* @var string
* @access private
*/
private $normClass;
/**
* The test result file name
*
* @var string
* @access private
*/
private $resultFile;
/**
* The test rules
*
* Format: NFx(string-i) => string-j,
* as defined in the header of the Unicode.org test file: NormalizationTest-*.txt
*
* @var array
* @access private
*/
private $rules = array(// /
'NFC' => array(1 => 2, 2 => 2, 3 => 2, 4 => 4, 5 => 4),
'NFD' => array(1 => 3, 2 => 3, 3 => 3, 4 => 5, 5 => 5),
'NFKC' => array(1 => 4, 2 => 4, 3 => 4, 4 => 4, 5 => 4),
'NFKD' => array(1 => 5, 2 => 5, 3 => 5, 4 => 5, 5 => 5),
);
/**
* The I18N_UnicodeNormalizer_String class instance
*
* @var object
* @access private
*/
private $string;
/**
* Runs the test methods of this class.
*
* @access public
* @static
*/
public static function main()
{
require_once "PHPUnit/TextUI/TestRunner.php";
$suite = new PHPUnit_Framework_TestSuite("I18N_UnicodeNormalizerTest");
$result = PHPUnit_TextUI_TestRunner::run($suite);
}
/**
* Sets up the fixture, for example, open a network connection.
* This method is called before a test is executed.
*
* @access protected
*/
protected function setUp()
{
$this->resultFile = dirname(__FILE__) . '/reports/regression/';
if (self::$mediawikiNormalization and !self::$codeCoverageTest) {
// test with the mediawiki class
require_once 'benchmark/mediawiki/UtfNormal.php';
$this->normClass = 'UtfNormal';
$this->resultFile .= 'mediawiki-';
} else {
// test with this package class
$this->normClass = 'I18N_UnicodeNormalizer';
$this->resultFile .= 'i18n-unorm-';
}
if (self::$codeCoverageTest) {
// test with a limited set of data
$compiledRoot = dirname(__FILE__) . '/data';
$this->resultFile .= 'limited.php';
} else {
// test with full data
$compiledRoot = '';
$this->resultFile .= 'all.php';
}
$forceCompile = (self::$firstSetup and (self::$forceCompile or self::$codeCoverageTest));
// this prevents re-running the compilations in the following setups
self::$firstSetup = false;
// compiles the Unicode data files to UTF-8
$compile = new I18N_UnicodeNormalizer_Compiler($compiledRoot, 'utf8',
$forceCompile, self::$codeCoverageTest);
$compile->compileAll();
if (self::$generateUCN or self::$codeCoverageTest) {
// compiles the Unicode data files to the UCN format
$compile = new I18N_UnicodeNormalizer_Compiler($compiledRoot, 'ucn',
$forceCompile, self::$codeCoverageTest);
$compile->compileAll();
}
// gets the compiled file names
$this->compiled = I18N_UnicodeNormalizer::getFileNames($compiledRoot);
$this->normalizer = new I18N_UnicodeNormalizer($compiledRoot);
$this->string = new I18N_UnicodeNormalizer_String();
$this->file = new I18N_UnicodeNormalizer_File();
}
/**
* Tears down the fixture, for example, close a network connection.
* This method is called after a test is executed.
*
* @access protected
*/
protected function tearDown()
{
}
/**
* Base NFD regression tests
*/
public function testBaseNFD()
{
($result = $this->normalizationTest('test_base', 'NFD')) === true or
$this->fail($result);
}
/**
* Base NFKD regression test
*/
public function testBaseNFKD()
{
($result = $this->normalizationTest('test_base', 'NFKD')) === true or
$this->fail($result);
}
/**
* Base NFC regression test
*/
public function testBaseNFC()
{
($result = $this->normalizationTest('test_base', 'NFC')) === true or
$this->fail($result);
}
/**
* Base NFKC regression test
*/
public function testBaseNFKC()
{
($result = $this->normalizationTest('test_base', 'NFKC')) === true or
$this->fail($result);
}
/**
* Hangul NFD regression test
*/
public function testHangulNFD()
{
($result = $this->normalizationTest('test_hangul', 'NFD')) === true or
$this->fail($result);
}
/**
* Hangul NFKD regression test
*/
public function testHangulNFKD()
{
($result = $this->normalizationTest('test_hangul', 'NFKD')) === true or
$this->fail($result);
}
/**
* Hangul NFC regression test
*/
public function testHangulNFC()
{
($result = $this->normalizationTest('test_hangul', 'NFC')) === true or
$this->fail($result);
}
/**
* Hangul NFKC regression test
*/
public function testHangulNFKC()
{
($result = $this->normalizationTest('test_hangul', 'NFKC')) === true or
$this->fail($result);
}
/**
* Tests getCharInfo()
*/
public function testGetCharInfo()
{
// converts some composable Unicode code points to UTF-8
$string = $this->string->unicode2string('\uAC01\u1100\u1161\u11A8\u0591');
$expected = array (// /
'quick_check' => array ('\\u1161' => true, '\\u11A8' => true),
'combining' => array ('\\u0591' => 220),
'decompositions' => array ('\\uAC01' => '\\u1100\\u1161\\u11A8'),
'compositions' => array ('\\u1100\\u1161' => '\\uAC00', '\\u1100\\u1161\\u11A8' => '\\uAC01'),
);
$this->assertEquals($expected, $this->normalizer->getCharInfo($string, 'NFC'));
$this->assertEquals(array(), $this->normalizer->getCharInfo('$', 'NFC'));
$this->assertEquals(null, $this->normalizer->getCharInfo('$', 'invalid-type'));
}
/**
* Tests isStarter()
*/
public function testIsStarter()
{
// converts COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK to UTF-8, not a starter
$string = $this->string->unicode2char('3099');
$this->assertEquals(true, $this->normalizer->isStarter('$', 'NFD'));
$this->assertEquals(true, $this->normalizer->isStarter('$', 'NFC'));
$this->assertEquals(false, $this->normalizer->isStarter($string, 'NFKD'));
$this->assertEquals(false, $this->normalizer->isStarter($string, 'NFKC'));
$this->assertEquals(null, $this->normalizer->isStarter('$', 'invalid-type'));
}
/**
* Encoding and type tests
*
* Code coverage testing cannot include the static I18N_UnicodeNormalizer::toNFx() methods
* which use the whole Unicode.org data.
*
* @see self::$codeCoverageTest definition
*/
public function testToNFx()
{
$string = "\xE0\xE9\xE7\xEF"; // ����
if (self::$codeCoverageTest) {
$this->assertEquals('aeci', $this->normalizer->normalize('aeci', 'NFD', 'ASCII'));
$this->assertEquals($string, $this->normalizer->normalize($string, 'NFC', 'ISO-8859-1'));
$this->assertEquals(false, $this->normalizer->normalize($string, 'NFKD', 'invalid-encoding'));
$this->assertNotEquals($string, $this->normalizer->normalize($string, 'NFKC', 'ASCII'));
$this->assertEquals('', $this->normalizer->normalize(''));
$this->assertEquals(false, $this->normalizer->normalize('', 'invalid-type'));
} else {
$this->assertEquals('aeci', I18N_UnicodeNormalizer::toNFD('aeci', 'ASCII'));
$this->assertEquals($string, I18N_UnicodeNormalizer::toNFC($string, 'ISO-8859-1'));
$this->assertEquals(false, I18N_UnicodeNormalizer::toNFKD($string, 'invalid-encoding'));
$this->assertNotEquals($string, I18N_UnicodeNormalizer::toNFKC($string, 'ASCII'));
$this->assertEquals('', I18N_UnicodeNormalizer::toNFC(''));
}
}
/**
* Runs the normalization regression test
*
* @param string $name the test name: 'test_base' or 'test_hangul'
* @param string $type the type of normalization: 'NFC', 'NFD', 'NFKC' or 'NFKD'
* @param integer $targetLine the line number to test as defined in
* the Unicode.org test file: NormalizationTest-*.txt,
* all lines are tested by default
* @param integer $targetColumn the column number to test: 1, 2, 3, 4, or 5,
* as defined in the Unicode.org test file: NormalizationTest-*.txt,
* all columns are tested by default
* @return mixed true if no error, the error message if false
* @access private
*/
private function normalizationTest($name, $type, $targetLine = null, $targetColumn = null)
{
if (self::$runNormalizationTest) {
static $file = array();
static $results = array();
// loads the test file
isset($file[$name]) or $file[$name] = require($this->compiled[$name]);
// resets the timer, the test counter, the normalized code point cache
$time = 0;
$testCnt = 0;
$normCache = array();
foreach($file[$name] as $line => $test) {
// processes the test lines
if (is_null($targetLine) or $targetLine == $line) {
// testing all lines or the targeted line only
foreach($test as $column => $chars) {
// processes the test line columns: a unit test
if (is_null($targetColumn) or $targetColumn == $column) {
// testing all columns or the targeted column only
// increments the test counter
$testCnt++;
if (!isset($normCache[$chars])) {
// the UTF-8 string is not yet normalized
// starts the timer
$start = microtime(true);
if (self::$codeCoverageTest) {
$normChars = $this->normalizer->normalize($chars, $type);
} else {
$normChars = call_user_func(array($this->normalizer, "to$type"), $chars);
}
// captures the elapsed time to normalize
$time += (microtime(true) - $start);
// caches the normalized UTF-8 string
$normCache[$chars] = $normChars;
}
// captures the normalized UTF-8 string
$normalized[$column] = $normCache[$chars];
}
}
foreach($this->rules[$type] as $i => $j) {
// processes the test rules
if (isset($normalized[$i])) {
if ($normalized[$i] != $test[$j]) {
// the test failed: the normalized code does not match the expected code
// converts the tested, normalized and expected strings
// to Unicode in UCN format
$test[$i] = $this->string->string2unicode($test[$i]);
$normalized[$i] = $this->string->string2unicode($normalized[$i]);
$test[$j] = $this->string->string2unicode($test[$j]);
// creates the error report: the test file, the test name,
// and the tested, normalized and expected strings
$error = array(// /
"file: {$this->compiled[$name]}",
"line #$line in unicodedata/NormalizationTest-*.txt",
"test: {$type}(c{$i}) == c{$j}",
"c{$i}\t\t= {$test[$i]}",
"{$type}(c{$i}) \t= {$normalized[$i]}",
"c{$j}\t\t= {$test[$j]}",
);
return implode("\n", $error);
}
}
}
}
}
// creates the test result: the test file, the normalization type,
// the number of tests, of normalization method calls, and
// the accumulated time to run the normalization tests
$results[] = array(// /
'file' => $this->compiled[$name],
'type' => $type,
'test_cnt' => $testCnt,
'normalization_cnt' => count($normCache),
'time (s)' => round($time, 1),
);
// creates the test report file
$this->file->put($this->resultFile, $results, __CLASS__ . '::' . __FUNCTION__,
dirname($this->compiled[$name]) . '*.php', 'Unicode.org normalization test results');
}
return true;
}
}
// Call tests_UnicodeNormalizerTest::main() if this source file is executed directly.
if (PHPUnit_MAIN_METHOD == "tests_UnicodeNormalizerTest::main") {
tests_UnicodeNormalizerTest::main();
}
?>