Current File : //opt/RZphp72/includes/test/I18N_UnicodeNormalizer/tests/benchmark/UnormVsMediawiki.php |
<?php
/**
* Unicode Normalizer
*
* PHP version 5
*
* All rights reserved.
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
* + Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* + Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* + The names of its contributors may not be used to endorse or
* promote products derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category Internationalization
* @package I18N_UnicodeNormalizer
* @author Michel Corne <mcorne@yahoo.com>
* @copyright 2007 Michel Corne
* @license http://www.opensource.org/licenses/bsd-license.php The BSD License
* @version SVN: $Id: UnormVsMediawiki.php 38 2007-07-23 11:42:30Z mcorne $
* @link http://pear.php.net/package/I18N_UnicodeNormalizer
*/
// Call tests_Benchmark_UnormVsMediawiki::main() if this source file is executed directly.
if (!defined("PHPUnit_MAIN_METHOD")) {
define("PHPUnit_MAIN_METHOD", "tests_Benchmark_UnormVsMediawiki::main");
}
require_once "PHPUnit/Framework/TestCase.php";
require_once "PHPUnit/Framework/TestSuite.php";
require_once 'I18N/UnicodeNormalizer.php';
require_once 'I18N/UnicodeNormalizer/String.php';
require_once 'I18N/UnicodeNormalizer/File.php';
require_once dirname(__FILE__) . '/mediawiki/UtfNormal.php';
/**
* Test class to benchmark against the Mediawiki normalizer
*
* The following flags can be adjusted to customize the tests:
* self::$textFiles and self::$denormalizationRatio.
* The result of the benchmark test is in reports/benchmark.
*
* The performance improvement vs the Mediawiki normalizer is:
* 2.5X on already normalized text, 9X on 0.1% denormalized texts, 3.5X on 1%
* denormalized text. The benchmark is run on 12 major languages.
*
* @category Internationalization
* @package I18N_UnicodeNormalizer
* @author Michel Corne <mcorne@yahoo.com>
* @copyright 2007 Michel Corne
* @license http://www.opensource.org/licenses/bsd-license.php The BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/I18N_UnicodeNormalizer
*/
class tests_Benchmark_UnormVsMediawiki extends PHPUnit_Framework_TestCase
{
/**
* The result file header comment
*/
const headerComment = 'Normalizer Benchmark test vs MediaWiki. Denormalization ratio = %s';
/**
* The text files paths
*
* @var string
* @access private
*/
private $filePaths = '';
/**
* The text files to normalize
*
* For examples:
* * : the benchmark test normalizes all files
* english : the benchmark test normalizes the english.txt file only
*
* @var string
* @access public
* @static
*/
public static $textFiles = '*';
/**
* The denormalization ratio of the data files
*
* For example:
* 0 = no denormalization
* 0.01 = 1 every 100 characters is denormalized
*
* @var float
* @access public
* @static
*/
public static $denormalizationRatio = 0.001;
/**
* The name list of compiled files
*
* @var array
* @access private
*/
private $compiled;
/**
* Runs the test methods of this class.
*
* @access public
* @static
*/
public static function main()
{
require_once "PHPUnit/TextUI/TestRunner.php";
$suite = new PHPUnit_Framework_TestSuite("tests_Benchmark_UnormVsMediawiki");
$result = PHPUnit_TextUI_TestRunner::run($suite);
}
/**
* Sets up the fixture, for example, open a network connection.
* This method is called before a test is executed.
*
* @access protected
*/
protected function setUp()
{
// creates the data files pathnames
$dir = dirname(__FILE__);
$this->filePaths = $dir . '/data/' . self::$textFiles . '.txt';
// creates the test results file name
$dir = dirname($dir);
$this->resultFile = sprintf("$dir/reports/benchmark/UnormVsMediawiki-%s.php", self::$denormalizationRatio);
// gets the compiled file names
$this->compiled = I18N_UnicodeNormalizer::getFileNames("$dir/data");
$this->string = new I18N_UnicodeNormalizer_String();
$this->file = new I18N_UnicodeNormalizer_File();
}
/**
* Tears down the fixture, for example, close a network connection.
* This method is called after a test is executed.
*
* @access protected
*/
protected function tearDown()
{
}
/**
* Computes the time and performance averages
*
* @param array $conso the consolidated list of times and performances
* for all data files
* @return array the average time and performance
* @access private
*/
private function calcAverage($conso)
{
$averages = array();
foreach($conso as $type => $typeConso) {
foreach($typeConso as $key => $values) {
$avg = array_sum($values) / count($values);
$averages[$type][$key] = round($avg, 1);
}
}
return $averages;
}
/**
* Denormalizes a string
*
* @param string $string the string to denormalize
* @param array $denormChars the set of denormalized characters
* @return string the denormlized string
* @access private
*/
private function denormString($string, $denormChars)
{
if (self::$denormalizationRatio) {
// denormalizes the string, randomizes the character to denormalize
$maxRandom = 1 / abs(self::$denormalizationRatio) - 1;
$frequency = rand(0, $maxRandom);
$denormString = '';
$length = strlen($string);
for($i = 0; $i < $length;) {
$char = $this->string->getChar($string, $i, $length);
if ($frequency--) {
$denormString .= $char;
} else {
$char = next($denormChars);
$char === false and $char = reset($denormChars);
$denormString .= $char;
// denormalizes the string, randomizes the character to denormalize
$frequency = rand(0, $maxRandom);
}
}
return $denormString;
} else {
// no denormalization
return $string;
}
}
/**
* Gets denormalized characters from the test file
*
* @return array the list of denormalized characters
* @access private
* @see tests_UnicodeNormalizerTest:rules
*/
private function getDenormChars()
{
// sets the NFC rule
$NFCRule = array(1 => 2, 2 => 2, 3 => 2, 4 => 4, 5 => 4);
$chars = array();
foreach(require $this->compiled['test_base'] as $columns) {
foreach($NFCRule as $i => $j) {
$columns[$i] != $columns[$j] and $chars[] = $columns[$i];
}
}
$chars = array_unique($chars);
shuffle($chars);
return $chars;
}
/**
* Times the normalization
*
* @param string $class the name of the class/normalizer to use
* @param string $type the type of normalization: 'NFC', 'NFD', 'NFKC', or 'NFKD'
* @param string $string the string to normalize
* @return array the normalized string and the time to normalize the string
* @access private
*/
private function timeNormalize($class, $type, $string)
{
// starts the timer, normalizes the string
$start = microtime(true);
$normalized = call_user_func(array($class, "to$type"), $string);
// captures the elapsed time to normalize
$time = microtime(true) - $start;
return array($normalized, $time);
}
/**
* Benchmarks the normalization against the Mediawiki normalization
*
* @return void
* @access public
*/
public function testUnormVsMediawiki()
{
// gets denormalized characters from the test file
$denormChars = $this->getDenormChars();
$results = array('summary' => array(), 'details' => array());
$conso = array();
foreach(glob($this->filePaths) as $fileName) {
$baseName = basename($fileName, '.txt');
$string = file_get_contents($fileName);
$string = strip_tags($string);
$string = $this->denormString($string, $denormChars);
foreach(array('NFC', 'NFD', 'NFKC', 'NFKD') as $type) {
// normalizes with the package class, and the mediawiki class, calculates the performance
list($normStr0, $time0) = $this->timeNormalize('I18N_UnicodeNormalizer', $type, $string);
list($normStr1, $time1) = $this->timeNormalize('UtfNormal', $type, $string);
$performance = $time1 / $time0;
// captures results, checks that both normalizations are the same
$results['details'][$baseName][$type] = array(// /
'performance (X)' => round($performance, 1),
'same' => $normStr0 == $normStr1,
'I18N_UnicodeNormalizer' => array('time (s)' => round($time0, 1),
'is_normalized' => $normStr0 == $string),
'UtfNormal' => array('time (s)' => round($time1, 1),
'is_normalized' => $normStr1 == $string),
);
// captures the performance for significant times
$conso[$type]['performance (X)'][] = $performance;
$conso[$type]['I18N_UnicodeNormalizer (s)'][] = $time0;
$conso[$type]['UtfNormal (s)'][] = $time1;
}
}
// calculates the performance averages for all lanaguages
$results['summary'] = $this->calcAverage($conso);
// updates the results file
$comment = sprintf(self::headerComment, self::$denormalizationRatio);
$this->file->put($this->resultFile, $results, __CLASS__ . '::' . __FUNCTION__, $this->filePaths, $comment);
}
}
// Call tests_Benchmark_UnormVsMediawiki::main() if this source file is executed directly.
if (PHPUnit_MAIN_METHOD == "tests_Benchmark_UnormVsMediawiki::main") {
tests_Benchmark_UnormVsMediawiki::main();
}
?>