StringValue.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. <?php
  2. /**
  3. * Artificial Neural Network - Version 2.2
  4. *
  5. * For updates and changes visit the project page at http://ann.thwien.de/
  6. *
  7. *
  8. *
  9. * <b>LICENCE</b>
  10. *
  11. * The BSD 2-Clause License
  12. *
  13. * http://opensource.org/licenses/bsd-license.php
  14. *
  15. * Copyright (c) 2007 - 2012, Thomas Wien
  16. * All rights reserved.
  17. *
  18. * Redistribution and use in source and binary forms, with or without
  19. * modification, are permitted provided that the following conditions
  20. * are met:
  21. *
  22. * 1. Redistributions of source code must retain the above copyright
  23. * notice, this list of conditions and the following disclaimer.
  24. *
  25. * 2. Redistributions in binary form must reproduce the above copyright
  26. * notice, this list of conditions and the following disclaimer in the
  27. * documentation and/or other materials provided with the distribution.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  32. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  33. * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  34. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  35. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  36. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  37. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  38. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  39. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  40. * POSSIBILITY OF SUCH DAMAGE.
  41. *
  42. * @author Thomas Wien <info_at_thwien_dot_de>
  43. * @version ANN Version 2.2 by Thomas Wien
  44. * @copyright Copyright (c) 2007-2012 by Thomas Wien
  45. * @package ANN
  46. */
  47. namespace ANN;
  48. /**
  49. * @package ANN
  50. * @access public
  51. */
  52. final class StringValue extends Filesystem implements InterfaceLoadable
  53. {
  54. /**#@+
  55. * @ignore
  56. */
  57. /**
  58. * @var integer
  59. */
  60. protected $intMaxStringLength;
  61. /**
  62. * @var boolean
  63. */
  64. protected $boolOverride = FALSE;
  65. /**
  66. * @var array
  67. */
  68. protected $arrMapping = array();
  69. /**#@-*/
  70. /**
  71. * @param integer $intMaxStringLength
  72. * @param boolean $boolOverride (Default: FALSE)
  73. * If $boolOverride is FALSE, an exception will be thrown if getInputValue() will
  74. * be called with outranged values. If $boolOverride is TRUE, no exception will be
  75. * thrown in this case, but lower values are replaced by $floatMin and upper values
  76. * are replaced by $floatMax.
  77. * @uses createMapping()
  78. * @throws Exception
  79. */
  80. public function __construct($intMaxStringLength, $boolOverride = FALSE)
  81. {
  82. mb_internal_encoding('UTF-8');
  83. if(!is_integer($intMaxStringLength) || $intMaxStringLength <= 0)
  84. throw new Exception('Constraints: $intMaxStringLength should be a positive integer number');
  85. if(!is_bool($boolOverride))
  86. throw new Exception('Constraints: $boolOverride should be boolean');
  87. $this->intMaxStringLength = $intMaxStringLength;
  88. $this->boolOverride = $boolOverride;
  89. $this->createMapping();
  90. }
  91. /**
  92. * @param string $strValue
  93. * @return array
  94. * @uses calculateInputValues()
  95. * @uses removeSpecialCharacters()
  96. * @throws Exception
  97. */
  98. public function getInputValue($strValue)
  99. {
  100. if(!is_string($strValue))
  101. throw new Exception('$strValue should be string');
  102. if(!$this->boolOverride && mb_strlen($strValue) > $this->intMaxStringLength)
  103. throw new Exception('$strValue is longer than max string length');
  104. substr($strValue, 0, $this->intMaxStringLength);
  105. $strValue = mb_strtolower($strValue);
  106. $strValue = $this->removeSpecialCharacters($strValue);
  107. return $this->calculateInputValues($strValue);
  108. }
  109. /**
  110. * @param string $strValue
  111. * @return string
  112. */
  113. protected function removeSpecialCharacters($strValue)
  114. {
  115. $strValue = preg_replace('/ /u', '', $strValue);
  116. $strValue = preg_replace('/[§\$%&)(=}{?!]/u', '', $strValue);
  117. return $strValue;
  118. }
  119. /**
  120. * @param string $strValue
  121. * @return array
  122. * @uses getMapping()
  123. */
  124. protected function calculateInputValues($strValue)
  125. {
  126. $arrReturn = array();
  127. $intStringLength = mb_strlen($strValue);
  128. for($intIndex = 0; $intIndex < $intStringLength; $intIndex++)
  129. {
  130. $strCharacter = mb_substr($strValue, $intIndex, 1);
  131. $arrReturn[] = $this->getMapping($strCharacter);
  132. }
  133. for(; $intIndex < $this->intMaxStringLength; $intIndex++)
  134. {
  135. $arrReturn[] = 0;
  136. }
  137. return $arrReturn;
  138. }
  139. /**
  140. * @param string $strCharacter
  141. * @return float
  142. * @throws Exception
  143. */
  144. protected function getMapping($strCharacter)
  145. {
  146. if(!isset($this->arrMapping[$strCharacter]))
  147. throw new Exception('Not convertable character '. $strCharacter);
  148. return $this->arrMapping[$strCharacter];
  149. }
  150. /**
  151. * @uses ordUTF8()
  152. * @uses createSimilarityMapping()
  153. * @throws Exception
  154. */
  155. protected function createMapping()
  156. {
  157. $arrCharacters = array('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  158. ' ', ',', ';', '.', ':', '-', '_', '!', '?');
  159. foreach($arrCharacters as $strCharacter)
  160. {
  161. $this->arrMapping[$strCharacter] = $this->ordUTF8($strCharacter) / 1000;
  162. if($this->arrMapping[$strCharacter] > 1)
  163. throw new Exception('Mapping exception');
  164. }
  165. $this->createSimilarityMapping();
  166. }
  167. protected function createSimilarityMapping()
  168. {
  169. $this->arrMapping['á'] = $this->arrMapping['a'];
  170. $this->arrMapping['à'] = $this->arrMapping['a'];
  171. $this->arrMapping['â'] = $this->arrMapping['a'];
  172. $this->arrMapping['é'] = $this->arrMapping['e'];
  173. $this->arrMapping['è'] = $this->arrMapping['e'];
  174. $this->arrMapping['ê'] = $this->arrMapping['e'];
  175. $this->arrMapping['í'] = $this->arrMapping['i'];
  176. $this->arrMapping['ì'] = $this->arrMapping['i'];
  177. $this->arrMapping['î'] = $this->arrMapping['i'];
  178. $this->arrMapping['ó'] = $this->arrMapping['o'];
  179. $this->arrMapping['ò'] = $this->arrMapping['o'];
  180. $this->arrMapping['ô'] = $this->arrMapping['o'];
  181. $this->arrMapping['ú'] = $this->arrMapping['u'];
  182. $this->arrMapping['ù'] = $this->arrMapping['u'];
  183. $this->arrMapping['û'] = $this->arrMapping['u'];
  184. $this->arrMapping['ß'] = $this->arrMapping['s'];
  185. $this->arrMapping['ö'] = $this->arrMapping['o'];
  186. $this->arrMapping['ü'] = $this->arrMapping['u'];
  187. $this->arrMapping['ä'] = $this->arrMapping['a'];
  188. }
  189. /**
  190. * @param string $strCharacter
  191. * @return integer
  192. * @throws Exception
  193. * @author kerry at shetline dot com
  194. * @author Thomas Wien
  195. */
  196. protected function ordUTF8($strCharacter)
  197. {
  198. if(!is_string($strCharacter))
  199. throw new Exception('$strCharacter should be string');
  200. if(mb_strlen($strCharacter) == 0)
  201. throw new Exception('$strCharacter should be exact one character (1)');
  202. if(mb_strlen($strCharacter) > 1)
  203. throw new Exception('$strCharacter should be exact one character (2)');
  204. $strOrd = ord($strCharacter{0});
  205. if($strOrd <= 0x7F)
  206. {
  207. return $strOrd;
  208. }
  209. elseif($strOrd < 0xC2)
  210. {
  211. throw new Exception('Cannot convert string to number');
  212. }
  213. elseif($strOrd <= 0xDF)
  214. {
  215. return ($strOrd & 0x1F) << 6
  216. | (ord($strCharacter{1}) & 0x3F);
  217. }
  218. elseif($strOrd <= 0xEF)
  219. {
  220. return ($strOrd & 0x0F) << 12
  221. | (ord($strCharacter{1}) & 0x3F) << 6
  222. | (ord($strCharacter{2}) & 0x3F);
  223. }
  224. elseif($strOrd <= 0xF4)
  225. {
  226. return ($strOrd & 0x0F) << 18
  227. | (ord($strCharacter{1}) & 0x3F) << 12
  228. | (ord($strCharacter{2}) & 0x3F) << 6
  229. | (ord($strCharacter{3}) & 0x3F);
  230. }
  231. throw new Exception('Cannot convert string to number');
  232. }
  233. public function __wakeup()
  234. {
  235. mb_internal_encoding('UTF-8');
  236. }
  237. /**
  238. * @param string $strValue
  239. * @return array
  240. * @uses getInputValue()
  241. */
  242. public function __invoke($strValue)
  243. {
  244. return $this->getInputValue($strValue);
  245. }
  246. }