| Current File : //opt/RZphp82/includes/XML/Query2XML/ISO9075Mapper.php |
<?php
/**
* This file contains the class XML_Query2XML_ISO9075Mapper.
*
* PHP version 5
*
* @category XML
* @package XML_Query2XML
* @author Lukas Feiler <lukas.feiler@lukasfeiler.com>
* @copyright 2006 Lukas Feiler
* @license http://www.gnu.org/copyleft/lesser.html LGPL Version 2.1
* @version CVS: $Id: ISO9075Mapper.php 302637 2010-08-22 14:34:31Z lukasfeiler $
* @link http://pear.php.net/package/XML_Query2XML
*/
/**
* PEAR_Exception is used as the parent for XML_Query2XML_ISO9075Mapper_Exception.
*/
require_once 'PEAR/Exception.php';
/**
* I18N_UnicodeString is used for converting UTF-8 to Unicode and vice versa.
*/
require_once 'I18N/UnicodeString.php';
/**
* Maps SQL identifiers to XML names according to Final Committee Draft for
* ISO/IEC 9075-14:2005, section "9.1 Mapping SQL <identifier>s to XML Names".
*
* ISO/IEC 9075-14:2005 is available online at
* http://www.sqlx.org/SQL-XML-documents/5FCD-14-XML-2004-07.pdf
*
* A lot of characters are legal in SQL identifiers but cannot be used within
* XML names. To begin with, SQL identifiers can contain any Unicode character
* while XML names are limited to a certain set of characters. E.g the
* SQL identifier "<21yrs in age" obviously is not a valid XML name.
* '#', '{', and '}' are also not allowed. Fully escaped SQL identifiers
* also must not contain a column (':') or start with "xml" (in any case
* combination). Illegal characters are mapped to a string of the form
* _xUUUU_ where UUUU is the Unicode value of the character.
*
* The following is a table of example mappings:
* <pre>
* +----------------+------------------------+------------------------------------+
* | SQL-Identifier | Fully escaped XML name | Comment |
* +----------------+------------------------+------------------------------------+
* | dept:id | dept_x003A_id | ":" is illegal |
* | xml_name | _x0078_ml_name | must not start with [Xx][Mm][Ll] |
* | XML_name | _x0058_ML_name | must not start with [Xx][Mm][Ll] |
* | hire date | hire_x0020_date | space is illegal too |
* | Works@home | Works_x0040_home | "@" is illegal |
* | file_xls | file_x005F_xls | "_" gets mapped if followed by "x" |
* | FIRST_NAME | FIRST_NAME | no problem here |
* +----------------+------------------------+------------------------------------+
* </pre>
*
* @category XML
* @package XML_Query2XML
* @author Lukas Feiler <lukas.feiler@lukasfeiler.com>
* @copyright 2006 Lukas Feiler
* @license http://www.gnu.org/copyleft/lesser.html LGPL Version 2.1
* @version Release: 1.7.2
* @link http://pear.php.net/package/XML_Query2XML
*/
class XML_Query2XML_ISO9075Mapper
{
/**
* This method maps an SQL identifier to an XML name according to
* FCD ISO/IEC 9075-14:2005.
*
* @param string $sqlIdentifier The SQL identifier as a UTF-8 string.
*
* @return string The fully escaped XML name.
* @throws XML_Query2XML_ISO9075Mapper_Exception If $sqlIdentifier was a
* malformed UTF-8 string.
*/
public static function map($sqlIdentifier)
{
/*
* S as defined in section 9.1, paragraph 1 with the difference that
* if N is the number of characters in SQLI the characters of SQLI,
* in order from left to right are S[0], S[1], ..., S[N-1].
*/
$S = self::_utf8ToUnicode($sqlIdentifier);
/*
* X as defined in section 9.1, paragraph 4 with the differnce that
* for each i between 0 (zero) and N-1, X[i] will be the Unicode
* character string.
*/
$X = array();
/*
* section 9.1, paragraph 4 lit a
* a) If S[i] has no mapping to Unicode (i.e., TM(S[i]) is undefined),
* then X[i] is implementation-defined.
*/
for ($i = 0; $i < count($S); $i++) {
if (self::_unicodeToUtf8($S[$i]) == ':') {
// section 9.1, paragraph 4 lit b: If Si is <colon>, then
if ($i == 0) {
// i) If i = 0 (zero), then let Xi be _x003A_.
$X[$i] = '_x003A_';
} else {
// ii) If EV is fully escaped, then let Xi be _x003A_.
$X[$i] = '_x003A_';
}
/*
* iii) Otherwise, let X[i] be T[i]
* we always do a full escape - therefore we do
* not have to implement iii)
*/
} elseif (
$i < count($S) - 1 &&
self::_unicodeToUtf8($S[$i]) == '_' &&
self::_unicodeToUtf8($S[$i+1]) == 'x'
) {
/*
* section 9.1, paragraph 4 lit c: if i < N�1, S[i] is <underscore>,
* and S[i+1] is the lowercase letter x, then let X[i] be _x005F_.
*/
$X[$i] = '_x005F_';
} elseif (
!self::_isValidNameChar($S[$i]) ||
$i == 0 &&
!self::_isValidNameStartChar($S[$i])
) {
/*
* section 9.1, paragraph 4 lit e: the SQL-implementation supports
* Feature X211, "XML 1.1 support", and either T[i] is not a valid
* XML 1.1 NameChar, or i = 0 (zerno) and T[0] is not a valid
* XML 1.1 NameStartChar
*/
$X[$i] = dechex($S[$i]);
if (strlen($X[$i]) < 4) {
/*
* ii) 1) If U1 = 0 (zero), U2 = 0 (zero), U3 = 0 (zero), and
* U4 = 0 (zero), then let X[i} be _xU5U6U7U8_.
*/
$X[$i] = str_pad($X[$i], 4, '0', STR_PAD_LEFT);
} elseif (strlen($X[$i]) > 4) {
// ii) 2) Otherwise, let X[i] be _xU3U4U5U6U7U8_.
$X[$i] = str_pad($X[$i], 8, '0', STR_PAD_LEFT);
}
$X[$i] = '_x' . $X[$i] . '_';
} else {
/*
* section 9.1, paragraph 4 lit f: Otherwise, let X[i] be T[i].
* NOTE 21 � That is, any character in SQLI that does not occasion
* a problem as a character in an XML 1.0 NCName or XML 1.1 NCName
* is simply copied into the result.
*/
$X[$i] = self::_unicodeToUtf8($S[$i]);
}
}
if (
count($S) >=3 &&
strpos(
strtolower(
self::_unicodeToUtf8($S[0])
. self::_unicodeToUtf8($S[1])
. self::_unicodeToUtf8($S[2])
),
'xml'
) === 0
) {
/*
* section 9.1, paragraph 4 lit d: if EV is fully escaped,
* i = 0 (zero), N >= 3, S[0] is either the uppercase letter
* X or the lowercase letter x, S[1] is either the uppercase
* letter M or the lowercase letter m, and S[2] is either the
* uppercase letter L or the lowercase letter l, then
*/
if (self::_unicodeToUtf8($S[0]) == 'x') {
// i) If S[0] is the lowercase letter x, then let X[0] be _x0078_.
$X[0] = '_x0078_';
} elseif (self::_unicodeToUtf8($S[0]) == 'X') {
// ii) If S[0] is the uppercase letter X, then let X[0] be _x0058_.
$X[0] = '_x0058_';
}
}
/*
* section 9.1, paragraph 5: let XMLN be the character string concatenation
* of X[0], X[1], ..., and X[N-1] in order from left to right.
*/
$XMLN = '';
for ($i = 0; $i < count($X); $i++) {
$XMLN .= $X[$i];
}
return $XMLN;
}
/**
* Returns whether $char is a valid XML 1.1. NameStartChar.
* NameStartChar is defined as:
* NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] |
* [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
* [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
* [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
* [#x10000-#xEFFFF]
*
* @param int $c A unicode character as an integer.
*
* @return boolean Wheather $c is a valid NameStartChar.
* @link http://www.w3.org/TR/xml11/
*/
private static function _isValidNameStartChar($c)
{
return preg_match('/^[:A-Z_a-z]$/', self::_unicodeToUtf8($c)) !== 0 ||
$c >= hexdec('C0') && $c <= hexdec('D6') ||
$c >= hexdec('D8') && $c <= hexdec('F6') ||
$c >= hexdec('F8') && $c <= hexdec('2FF') ||
$c >= hexdec('370') && $c <= hexdec('37D') ||
$c >= hexdec('37F') && $c <= hexdec('1FFF') ||
$c >= hexdec('200C') && $c <= hexdec('200D') ||
$c >= hexdec('2070') && $c <= hexdec('218F') ||
$c >= hexdec('2C00') && $c <= hexdec('2FEF') ||
$c >= hexdec('3001') && $c <= hexdec('D7FF') ||
$c >= hexdec('F900') && $c <= hexdec('FDCF') ||
$c >= hexdec('FDF0') && $c <= hexdec('FFFD') ||
$c >= hexdec('10000') && $c <= hexdec('EFFFF');
}
/**
* Returns whether $char is a valid XML 1.1. NameChar.
* NameChar is defined as:
* NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] |
* [#x203F-#x2040]
*
* @param int $c A unicode character as an integer.
*
* @return boolean Wheather $char is a valid NameChar.
* @link http://www.w3.org/TR/xml11/
*/
private static function _isValidNameChar($c)
{
return self::_isValidNameStartChar($c) ||
preg_match('/^[-\.0-9]$/', self::_unicodeToUtf8($c)) !== 0 ||
$c == hexdec('B7') ||
$c >= hexdec('0300') && $c <= hexdec('036F') ||
$c >= hexdec('203F') && $c <= hexdec('2040');
}
/**
* Converts a single unicode character represended by an integer
* to an UTF-8 chracter
*
* @param int $char The unicode character as an integer
*
* @return string The UTF-8 character.
*/
private static function _unicodeToUtf8($char)
{
return I18N_UnicodeString::unicodeCharToUtf8($char);
}
/**
* Converts a UTF-8 string into unicode integers.
*
* @param string $string A string containing Unicode values encoded in UTF-8
*
* @return array The array of Unicode values.
* @throws XML_Query2XML_ISO9075Mapper_Exception If a malformed UTF-8 string
* was passed as argument.
*/
private static function _utf8ToUnicode($string)
{
$string = I18N_UnicodeString::utf8ToUnicode($string);
if (is_object($string) && strtolower(get_class($string)) == 'pear_error') {
/*
* unit tests:
* testMapException1()
* testMapException2()
* testMapException3()
*/
throw new XML_Query2XML_ISO9075Mapper_Exception(
$string->getMessage()
);
}
return $string;
}
}
/**
* Only XML_Query2XML_ISO9075Mapper will throw this exception.
* It does not extend XML_Query2XML_Exception because the
* class XML_Query2XML_ISO9075Mapper should be usable without
* XML_Query2XML. XML_Query2XML itself will never throw this
* exception.
*
* @category XML
* @package XML_Query2XML
* @author Lukas Feiler <lukas.feiler@lukasfeiler.com>
* @license http://www.gnu.org/copyleft/lesser.html LGPL Version 2.1
* @link http://pear.php.net/package/XML_Query2XML
*/
class XML_Query2XML_ISO9075Mapper_Exception extends PEAR_Exception
{
/**
* Constructor method
*
* @param string $message The error message.
*/
public function __construct($message)
{
parent::__construct($message);
}
}
?>