ARC2.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. <?php
  2. /**
  3. * ARC2 core class (static, not instantiated)
  4. *
  5. * @author Benjamin Nowack
  6. * @license <http://arc.semsol.org/license>
  7. * @homepage <http://arc.semsol.org/>
  8. * @package ARC2
  9. * @version 2011-01-07
  10. */
  11. /* E_STRICT hack */
  12. if (function_exists('date_default_timezone_get')) {
  13. date_default_timezone_set(@date_default_timezone_get());
  14. }
  15. class ARC2 {
  16. static function getVersion() {
  17. return '2011-01-07';
  18. }
  19. /* */
  20. static function getIncPath($f = '') {
  21. $r = realpath(dirname(__FILE__)) . '/';
  22. $dirs = array(
  23. 'plugin' => 'plugins',
  24. 'trigger' => 'triggers',
  25. 'store' => 'store',
  26. 'serializer' => 'serializers',
  27. 'extractor' => 'extractors',
  28. 'sparqlscript' => 'sparqlscript',
  29. 'parser' => 'parsers',
  30. );
  31. foreach ($dirs as $k => $dir) {
  32. if (preg_match('/' . $k . '/i', $f)) {
  33. return $r . $dir . '/';
  34. }
  35. }
  36. return $r;
  37. }
  38. static function getScriptURI() {
  39. if (isset($_SERVER) && isset($_SERVER['SERVER_NAME'])) {
  40. $proto = preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL']));
  41. $port = $_SERVER['SERVER_PORT'];
  42. $server = $_SERVER['SERVER_NAME'];
  43. $script = $_SERVER['SCRIPT_NAME'];
  44. /* https */
  45. if (($proto == 'http') && $port == 443) {
  46. $proto = 'https';
  47. $port = 80;
  48. }
  49. return $proto . '://' . $server . ($port != 80 ? ':' . $port : '') . $script;
  50. /*
  51. return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
  52. '://' . $_SERVER['SERVER_NAME'] .
  53. ($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
  54. $_SERVER['SCRIPT_NAME'];
  55. */
  56. }
  57. elseif (isset($_SERVER['SCRIPT_FILENAME'])) {
  58. return 'file://' . realpath($_SERVER['SCRIPT_FILENAME']);
  59. }
  60. return 'http://localhost/unknown_path';
  61. }
  62. static function getRequestURI() {
  63. if (isset($_SERVER) && isset($_SERVER['REQUEST_URI'])) {
  64. return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
  65. '://' . $_SERVER['SERVER_NAME'] .
  66. ($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
  67. $_SERVER['REQUEST_URI'];
  68. }
  69. return ARC2::getScriptURI();
  70. }
  71. static function inc($f, $path = '') {
  72. $prefix = 'ARC2';
  73. if (preg_match('/^([^\_]+)\_(.*)$/', $f, $m)) {
  74. $prefix = $m[1];
  75. $f = $m[2];
  76. }
  77. $inc_path = $path ? $path : ARC2::getIncPath($f);
  78. $path = $inc_path . $prefix . '_' . urlencode($f) . '.php';
  79. if (file_exists($path)) return include_once($path);
  80. /* safe-mode hack */
  81. if (@include_once($path)) return 1;
  82. /* try other path */
  83. if ($prefix != 'ARC2') {
  84. $path = $inc_path . strtolower($prefix) . '/' . $prefix . '_' . rawurlencode($f) . '.php';
  85. if (file_exists($path)) return include_once($path);
  86. /* safe-mode hack */
  87. if (@include_once($path)) return 1;
  88. }
  89. return 0;
  90. }
  91. /* */
  92. static function mtime(){
  93. list($msec, $sec) = explode(" ", microtime());
  94. return ((float)$msec + (float)$sec);
  95. }
  96. static function x($re, $v, $options = 'si') {
  97. return preg_match("/^\s*" . $re . "(.*)$/" . $options, $v, $m) ? $m : false;
  98. }
  99. /* */
  100. static function getFormat($val, $mtype = '', $ext = '') {
  101. ARC2::inc('getFormat');
  102. return ARC2_getFormat($val, $mtype, $ext);
  103. }
  104. static function getPreferredFormat($default = 'plain') {
  105. ARC2::inc('getPreferredFormat');
  106. return ARC2_getPreferredFormat($default);
  107. }
  108. /* */
  109. static function toUTF8($v) {
  110. if (rawurlencode($v) === $v) return $v;
  111. //if (utf8_decode($v) == $v) return $v;
  112. $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
  113. /* custom hacks, mainly caused by bugs in PHP's json_decode */
  114. $mappings = array(
  115. '%18' => '‘',
  116. '%19' => '’',
  117. '%1C' => '“',
  118. '%1D' => '”',
  119. '%1E' => '„',
  120. '%10' => '‐',
  121. '%12' => '−',
  122. '%13' => '–',
  123. '%14' => '—',
  124. '%26' => '&',
  125. );
  126. $froms = array_keys($mappings);
  127. $tos = array_values($mappings);
  128. foreach ($froms as $i => $from) $froms[$i] = urldecode($from);
  129. $v = str_replace($froms, $tos, $v);
  130. /* utf8 tweaks */
  131. return preg_replace_callback('/([\x00-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3}|[\xf8-\xfb][\x80-\xbf]{4}|[\xfc-\xfd][\x80-\xbf]{5}|[^\x00-\x7f])/', array('ARC2', 'getUTF8Char'), $v);
  132. }
  133. static function getUTF8Char($v) {
  134. $val = $v[1];
  135. if (strlen(trim($val)) === 1) return utf8_encode($val);
  136. if (preg_match('/^([\x00-\x7f])(.+)/', $val, $m)) return $m[1] . ARC2::toUTF8($m[2]);
  137. return $val;
  138. }
  139. /* */
  140. static function splitURI($v) {
  141. /* the following namespaces may lead to conflated URIs,
  142. * we have to set the split position manually
  143. */
  144. if (strpos($v, 'www.w3.org')) {
  145. $specials = array(
  146. 'http://www.w3.org/XML/1998/namespace',
  147. 'http://www.w3.org/2005/Atom',
  148. 'http://www.w3.org/1999/xhtml',
  149. );
  150. foreach ($specials as $ns) {
  151. if (strpos($v, $ns) === 0) {
  152. $local_part = substr($v, strlen($ns));
  153. if (!preg_match('/^[\/\#]/', $local_part)) {
  154. return array($ns, $local_part);
  155. }
  156. }
  157. }
  158. }
  159. /* auto-splitting on / or # */
  160. //$re = '^(.*?)([A-Z_a-z][-A-Z_a-z0-9.]*)$';
  161. if (preg_match('/^(.*[\/\#])([^\/\#]+)$/', $v, $m)) return array($m[1], $m[2]);
  162. /* auto-splitting on last special char, e.g. urn:foo:bar */
  163. if (preg_match('/^(.*[\:\/])([^\:\/]+)$/', $v, $m)) return array($m[1], $m[2]);
  164. return array($v, '');
  165. }
  166. /* */
  167. static function getSimpleIndex($triples, $flatten_objects = 1, $vals = '') {
  168. $r = array();
  169. foreach ($triples as $t) {
  170. $skip_t = 0;
  171. foreach (array('s', 'p', 'o') as $term) {
  172. $$term = $t[$term];
  173. /* template var */
  174. if (isset($t[$term . '_type']) && ($t[$term . '_type'] == 'var')) {
  175. $val = isset($vals[$$term]) ? $vals[$$term] : '';
  176. $skip_t = isset($vals[$$term]) ? $skip_t : 1;
  177. $type = '';
  178. $type = !$type && isset($vals[$$term . ' type']) ? $vals[$$term . ' type'] : $type;
  179. $type = !$type && preg_match('/^\_\:/', $val) ? 'bnode' : $type;
  180. if ($term == 'o') {
  181. $type = !$type && (preg_match('/\s/s', $val) || !preg_match('/\:/', $val)) ? 'literal' : $type;
  182. $type = !$type && !preg_match('/[\/]/', $val) ? 'literal' : $type;
  183. }
  184. $type = !$type ? 'uri' : $type;
  185. $t[$term . '_type'] = $type;
  186. $$term = $val;
  187. }
  188. }
  189. if ($skip_t) {
  190. continue;
  191. }
  192. if (!isset($r[$s])) $r[$s] = array();
  193. if (!isset($r[$s][$p])) $r[$s][$p] = array();
  194. if ($flatten_objects) {
  195. if (!in_array($o, $r[$s][$p])) $r[$s][$p][] = $o;
  196. }
  197. else {
  198. $o = array('value' => $o);
  199. foreach (array('lang', 'type', 'datatype') as $suffix) {
  200. if (isset($t['o_' . $suffix]) && $t['o_' . $suffix]) {
  201. $o[$suffix] = $t['o_' . $suffix];
  202. }
  203. elseif (isset($t['o ' . $suffix]) && $t['o ' . $suffix]) {
  204. $o[$suffix] = $t['o ' . $suffix];
  205. }
  206. }
  207. if (!in_array($o, $r[$s][$p])) {
  208. $r[$s][$p][] = $o;
  209. }
  210. }
  211. }
  212. return $r;
  213. }
  214. static function getTriplesFromIndex($index) {
  215. $r = array();
  216. foreach ($index as $s => $ps) {
  217. foreach ($ps as $p => $os) {
  218. foreach ($os as $o) {
  219. $r[] = array(
  220. 's' => $s,
  221. 'p' => $p,
  222. 'o' => $o['value'],
  223. 's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
  224. 'o_type' => $o['type'],
  225. 'o_datatype' => isset($o['datatype']) ? $o['datatype'] : '',
  226. 'o_lang' => isset($o['lang']) ? $o['lang'] : '',
  227. );
  228. }
  229. }
  230. }
  231. return $r;
  232. }
  233. static function getMergedIndex() {
  234. $r = array();
  235. foreach (func_get_args() as $index) {
  236. foreach ($index as $s => $ps) {
  237. if (!isset($r[$s])) $r[$s] = array();
  238. foreach ($ps as $p => $os) {
  239. if (!isset($r[$s][$p])) $r[$s][$p] = array();
  240. foreach ($os as $o) {
  241. if (!in_array($o, $r[$s][$p])) {
  242. $r[$s][$p][] = $o;
  243. }
  244. }
  245. }
  246. }
  247. }
  248. return $r;
  249. }
  250. static function getCleanedIndex() {/* removes triples from a given index */
  251. $indexes = func_get_args();
  252. $r = $indexes[0];
  253. for ($i = 1, $i_max = count($indexes); $i < $i_max; $i++) {
  254. $index = $indexes[$i];
  255. foreach ($index as $s => $ps) {
  256. if (!isset($r[$s])) continue;
  257. foreach ($ps as $p => $os) {
  258. if (!isset($r[$s][$p])) continue;
  259. $r_os = $r[$s][$p];
  260. $new_os = array();
  261. foreach ($r_os as $r_o) {
  262. $r_o_val = is_array($r_o) ? $r_o['value'] : $r_o;
  263. $keep = 1;
  264. foreach ($os as $o) {
  265. $del_o_val = is_array($o) ? $o['value'] : $o;
  266. if ($del_o_val == $r_o_val) {
  267. $keep = 0;
  268. break;
  269. }
  270. }
  271. if ($keep) {
  272. $new_os[] = $r_o;
  273. }
  274. }
  275. if ($new_os) {
  276. $r[$s][$p] = $new_os;
  277. }
  278. else {
  279. unset($r[$s][$p]);
  280. }
  281. }
  282. }
  283. }
  284. /* check r */
  285. $has_data = 0;
  286. foreach ($r as $s => $ps) {
  287. if ($ps) {
  288. $has_data = 1;
  289. break;
  290. }
  291. }
  292. return $has_data ? $r : array();
  293. }
  294. /* */
  295. static function getStructType($v) {
  296. /* string */
  297. if (is_string($v)) return 'string';
  298. /* flat array, numeric keys */
  299. if (in_array(0, array_keys($v))) {/* numeric keys */
  300. /* simple array */
  301. if (!is_array($v[0])) return 'array';
  302. /* triples */
  303. //if (isset($v[0]) && isset($v[0]['s']) && isset($v[0]['p'])) return 'triples';
  304. if (in_array('p', array_keys($v[0]))) return 'triples';
  305. }
  306. /* associative array */
  307. else {
  308. /* index */
  309. foreach ($v as $s => $ps) {
  310. if (!is_array($ps)) break;
  311. foreach ($ps as $p => $os) {
  312. if (!is_array($os) || !is_array($os[0])) break;
  313. if (in_array('value', array_keys($os[0]))) return 'index';
  314. }
  315. }
  316. }
  317. /* array */
  318. return 'array';
  319. }
  320. /* */
  321. static function getComponent($name, $a = '', $caller = '') {
  322. ARC2::inc($name);
  323. $prefix = 'ARC2';
  324. if (preg_match('/^([^\_]+)\_(.+)$/', $name, $m)) {
  325. $prefix = $m[1];
  326. $name = $m[2];
  327. }
  328. $cls = $prefix . '_' . $name;
  329. if (!$caller) $caller = new stdClass();
  330. return new $cls($a, $caller);
  331. }
  332. /* resource */
  333. static function getResource($a = '') {
  334. return ARC2::getComponent('Resource', $a);
  335. }
  336. /* reader */
  337. static function getReader($a = '') {
  338. return ARC2::getComponent('Reader', $a);
  339. }
  340. /* parsers */
  341. static function getParser($prefix, $a = '') {
  342. return ARC2::getComponent($prefix . 'Parser', $a);
  343. }
  344. static function getRDFParser($a = '') {
  345. return ARC2::getParser('RDF', $a);
  346. }
  347. static function getRDFXMLParser($a = '') {
  348. return ARC2::getParser('RDFXML', $a);
  349. }
  350. static function getTurtleParser($a = '') {
  351. return ARC2::getParser('Turtle', $a);
  352. }
  353. static function getRSSParser($a = '') {
  354. return ARC2::getParser('RSS', $a);
  355. }
  356. static function getSemHTMLParser($a = '') {
  357. return ARC2::getParser('SemHTML', $a);
  358. }
  359. static function getSPARQLParser($a = '') {
  360. return ARC2::getComponent('SPARQLParser', $a);
  361. }
  362. static function getSPARQLPlusParser($a = '') {
  363. return ARC2::getParser('SPARQLPlus', $a);
  364. }
  365. static function getSPARQLXMLResultParser($a = '') {
  366. return ARC2::getParser('SPARQLXMLResult', $a);
  367. }
  368. static function getJSONParser($a = '') {
  369. return ARC2::getParser('JSON', $a);
  370. }
  371. static function getSGAJSONParser($a = '') {
  372. return ARC2::getParser('SGAJSON', $a);
  373. }
  374. static function getCBJSONParser($a = '') {
  375. return ARC2::getParser('CBJSON', $a);
  376. }
  377. static function getSPARQLScriptParser($a = '') {
  378. return ARC2::getParser('SPARQLScript', $a);
  379. }
  380. /* store */
  381. static function getStore($a = '', $caller = '') {
  382. return ARC2::getComponent('Store', $a, $caller);
  383. }
  384. static function getStoreEndpoint($a = '', $caller = '') {
  385. return ARC2::getComponent('StoreEndpoint', $a, $caller);
  386. }
  387. static function getRemoteStore($a = '', $caller = '') {
  388. return ARC2::getComponent('RemoteStore', $a, $caller);
  389. }
  390. static function getMemStore($a = '') {
  391. return ARC2::getComponent('MemStore', $a);
  392. }
  393. /* serializers */
  394. static function getSer($prefix, $a = '') {
  395. return ARC2::getComponent($prefix . 'Serializer', $a);
  396. }
  397. static function getTurtleSerializer($a = '') {
  398. return ARC2::getSer('Turtle', $a);
  399. }
  400. static function getRDFXMLSerializer($a = '') {
  401. return ARC2::getSer('RDFXML', $a);
  402. }
  403. static function getNTriplesSerializer($a = '') {
  404. return ARC2::getSer('NTriples', $a);
  405. }
  406. static function getRDFJSONSerializer($a = '') {
  407. return ARC2::getSer('RDFJSON', $a);
  408. }
  409. static function getPOSHRDFSerializer($a = '') {/* deprecated */
  410. return ARC2::getSer('POSHRDF', $a);
  411. }
  412. static function getMicroRDFSerializer($a = '') {
  413. return ARC2::getSer('MicroRDF', $a);
  414. }
  415. static function getRSS10Serializer($a = '') {
  416. return ARC2::getSer('RSS10', $a);
  417. }
  418. /* sparqlscript */
  419. static function getSPARQLScriptProcessor($a = '') {
  420. return ARC2::getComponent('SPARQLScriptProcessor', $a);
  421. }
  422. /* */
  423. }