SparqlClient.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. */
  20. namespace MediaWiki\Sparql;
  21. use Http;
  22. use MediaWiki\Http\HttpRequestFactory;
  23. /**
  24. * Simple SPARQL client
  25. *
  26. * @author Stas Malyshev
  27. */
  28. class SparqlClient {
  29. /**
  30. * Limit on how long can be the query to be sent by GET.
  31. */
  32. const MAX_GET_SIZE = 2048;
  33. /**
  34. * User agent for HTTP requests.
  35. * @var string
  36. */
  37. private $userAgent;
  38. /**
  39. * Query timeout (seconds)
  40. * @var int
  41. */
  42. private $timeout = 30;
  43. /**
  44. * SPARQL endpoint URL
  45. * @var string
  46. */
  47. private $endpoint;
  48. /**
  49. * Client options
  50. * @var array
  51. */
  52. private $options = [];
  53. /**
  54. * @var HttpRequestFactory
  55. */
  56. private $requestFactory;
  57. /**
  58. * @param string $url SPARQL Endpoint
  59. * @param HttpRequestFactory $requestFactory
  60. */
  61. public function __construct( $url, HttpRequestFactory $requestFactory ) {
  62. $this->endpoint = $url;
  63. $this->requestFactory = $requestFactory;
  64. $this->userAgent = Http::userAgent() . " SparqlClient";
  65. }
  66. /**
  67. * Set query timeout (in seconds)
  68. * @param int $timeout
  69. * @return $this
  70. */
  71. public function setTimeout( $timeout ) {
  72. if ( $timeout >= 0 ) {
  73. $this->timeout = $timeout;
  74. }
  75. return $this;
  76. }
  77. /**
  78. * Set client options
  79. * @param array $options
  80. * @return $this
  81. */
  82. public function setClientOptions( $options ) {
  83. $this->options = $options;
  84. return $this;
  85. }
  86. /**
  87. * Get current user agent.
  88. * @return string
  89. */
  90. public function getUserAgent() {
  91. return $this->userAgent;
  92. }
  93. /**
  94. * Set user agent string.
  95. *
  96. * Mote it is not recommended to completely override user agent for
  97. * most applications.
  98. * @see appendUserAgent() for recommended way of specifying user agent.
  99. *
  100. * @param string $agent
  101. */
  102. public function setUserAgent( $agent ) {
  103. $this->userAgent = $agent;
  104. }
  105. /**
  106. * Append specific string to user agent.
  107. *
  108. * This is the recommended way of specifying the user agent
  109. * for specific applications of the SparqlClient inside MediaWiki
  110. * and extension code.
  111. *
  112. * @param string $agent
  113. */
  114. public function appendUserAgent( $agent ) {
  115. $this->userAgent .= ' ' . $agent;
  116. }
  117. /**
  118. * Query SPARQL endpoint
  119. *
  120. * @param string $sparql query
  121. * @param bool $rawData Whether to return only values or full data objects
  122. *
  123. * @return array List of results, one row per array element
  124. * Each row will contain fields indexed by variable name.
  125. * @throws SparqlException
  126. */
  127. public function query( $sparql, $rawData = false ) {
  128. if ( empty( $this->endpoint ) ) {
  129. throw new SparqlException( 'Endpoint URL can not be empty' );
  130. }
  131. $queryData = [ "query" => $sparql, "format" => "json" ];
  132. $options = array_merge( [ 'method' => 'GET' ], $this->options );
  133. if ( empty( $options['userAgent'] ) ) {
  134. $options['userAgent'] = $this->userAgent;
  135. }
  136. if ( $this->timeout >= 0 ) {
  137. // Blazegraph setting, see https://wiki.blazegraph.com/wiki/index.php/REST_API
  138. $queryData['maxQueryTimeMillis'] = $this->timeout * 1000;
  139. $options['timeout'] = $this->timeout;
  140. }
  141. if ( strlen( $sparql ) > self::MAX_GET_SIZE ) {
  142. // big requests go to POST
  143. $options['method'] = 'POST';
  144. $options['postData'] = 'query=' . urlencode( $sparql );
  145. unset( $queryData['query'] );
  146. }
  147. $url = wfAppendQuery( $this->endpoint, $queryData );
  148. $request = $this->requestFactory->create( $url, $options, __METHOD__ );
  149. $status = $request->execute();
  150. if ( !$status->isOK() ) {
  151. throw new SparqlException( 'HTTP error: ' . $status->getWikiText( false, false, 'en' ) );
  152. }
  153. $result = $request->getContent();
  154. \Wikimedia\suppressWarnings();
  155. $data = json_decode( $result, true );
  156. \Wikimedia\restoreWarnings();
  157. if ( $data === null || $data === false ) {
  158. throw new SparqlException( "HTTP request failed, response:\n" .
  159. substr( $result, 1024 ) );
  160. }
  161. return $this->extractData( $data, $rawData );
  162. }
  163. /**
  164. * Extract data from SPARQL response format.
  165. * The response must be in format described in:
  166. * https://www.w3.org/TR/sparql11-results-json/
  167. *
  168. * @param array $data SPARQL result
  169. * @param bool $rawData Whether to return only values or full data objects
  170. *
  171. * @return array List of results, one row per element.
  172. */
  173. private function extractData( $data, $rawData = false ) {
  174. $result = [];
  175. if ( $data && !empty( $data['results'] ) ) {
  176. $vars = $data['head']['vars'];
  177. $resrow = [];
  178. foreach ( $data['results']['bindings'] as $row ) {
  179. foreach ( $vars as $var ) {
  180. if ( !isset( $row[$var] ) ) {
  181. $resrow[$var] = null;
  182. continue;
  183. }
  184. if ( $rawData ) {
  185. $resrow[$var] = $row[$var];
  186. } else {
  187. $resrow[$var] = $row[$var]['value'];
  188. }
  189. }
  190. $result[] = $resrow;
  191. }
  192. }
  193. return $result;
  194. }
  195. }