MWHttpRequest.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. */
  20. use Psr\Log\LoggerInterface;
  21. use Psr\Log\LoggerAwareInterface;
  22. use Psr\Log\NullLogger;
  23. /**
  24. * This wrapper class will call out to curl (if available) or fallback
  25. * to regular PHP if necessary for handling internal HTTP requests.
  26. *
  27. * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
  28. * PHP's HTTP extension.
  29. */
  30. abstract class MWHttpRequest implements LoggerAwareInterface {
  31. const SUPPORTS_FILE_POSTS = false;
  32. /**
  33. * @var int|string
  34. */
  35. protected $timeout = 'default';
  36. protected $content;
  37. protected $headersOnly = null;
  38. protected $postData = null;
  39. protected $proxy = null;
  40. protected $noProxy = false;
  41. protected $sslVerifyHost = true;
  42. protected $sslVerifyCert = true;
  43. protected $caInfo = null;
  44. protected $method = "GET";
  45. /** @var array */
  46. protected $reqHeaders = [];
  47. protected $url;
  48. protected $parsedUrl;
  49. /** @var callable */
  50. protected $callback;
  51. protected $maxRedirects = 5;
  52. protected $followRedirects = false;
  53. protected $connectTimeout;
  54. /**
  55. * @var CookieJar
  56. */
  57. protected $cookieJar;
  58. protected $headerList = [];
  59. protected $respVersion = "0.9";
  60. protected $respStatus = "200 Ok";
  61. /** @var string[][] */
  62. protected $respHeaders = [];
  63. /** @var StatusValue */
  64. protected $status;
  65. /**
  66. * @var Profiler
  67. */
  68. protected $profiler;
  69. /**
  70. * @var string
  71. */
  72. protected $profileName;
  73. /**
  74. * @var LoggerInterface
  75. */
  76. protected $logger;
  77. /**
  78. * @param string $url Url to use. If protocol-relative, will be expanded to an http:// URL
  79. * @param array $options (optional) extra params to pass (see HttpRequestFactory::create())
  80. * @codingStandardsIgnoreStart
  81. * @phan-param array{timeout?:int|string,connectTimeout?:int|string,postData?:array,proxy?:string,noProxy?:bool,sslVerifyHost?:bool,sslVerifyCert?:bool,caInfo?:string,maxRedirects?:int,followRedirects?:bool,userAgent?:string,logger?:LoggerInterface,username?:string,password?:string,originalRequest?:WebRequest|array{ip:string,userAgent:string},method?:string} $options
  82. * @codingStandardsIgnoreEnd
  83. * @param string $caller The method making this request, for profiling
  84. * @param Profiler|null $profiler An instance of the profiler for profiling, or null
  85. * @throws Exception
  86. */
  87. public function __construct(
  88. $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null
  89. ) {
  90. global $wgHTTPTimeout, $wgHTTPConnectTimeout;
  91. $this->url = wfExpandUrl( $url, PROTO_HTTP );
  92. $this->parsedUrl = wfParseUrl( $this->url );
  93. $this->logger = $options['logger'] ?? new NullLogger();
  94. if ( !$this->parsedUrl || !Http::isValidURI( $this->url ) ) {
  95. $this->status = StatusValue::newFatal( 'http-invalid-url', $url );
  96. } else {
  97. $this->status = StatusValue::newGood( 100 ); // continue
  98. }
  99. if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
  100. $this->timeout = $options['timeout'];
  101. } else {
  102. $this->timeout = $wgHTTPTimeout;
  103. }
  104. if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) {
  105. $this->connectTimeout = $options['connectTimeout'];
  106. } else {
  107. $this->connectTimeout = $wgHTTPConnectTimeout;
  108. }
  109. if ( isset( $options['userAgent'] ) ) {
  110. $this->setUserAgent( $options['userAgent'] );
  111. }
  112. if ( isset( $options['username'] ) && isset( $options['password'] ) ) {
  113. $this->setHeader(
  114. 'Authorization',
  115. 'Basic ' . base64_encode( $options['username'] . ':' . $options['password'] )
  116. );
  117. }
  118. if ( isset( $options['originalRequest'] ) ) {
  119. $this->setOriginalRequest( $options['originalRequest'] );
  120. }
  121. $this->setHeader( 'X-Request-Id', WebRequest::getRequestId() );
  122. $members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
  123. "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ];
  124. foreach ( $members as $o ) {
  125. if ( isset( $options[$o] ) ) {
  126. // ensure that MWHttpRequest::method is always
  127. // uppercased. T38137
  128. if ( $o == 'method' ) {
  129. // @phan-suppress-next-line PhanTypeInvalidDimOffset
  130. $options[$o] = strtoupper( $options[$o] );
  131. }
  132. $this->$o = $options[$o];
  133. }
  134. }
  135. if ( $this->noProxy ) {
  136. $this->proxy = ''; // noProxy takes precedence
  137. }
  138. // Profile based on what's calling us
  139. $this->profiler = $profiler;
  140. $this->profileName = $caller;
  141. }
  142. /**
  143. * @param LoggerInterface $logger
  144. */
  145. public function setLogger( LoggerInterface $logger ) {
  146. $this->logger = $logger;
  147. }
  148. /**
  149. * Simple function to test if we can make any sort of requests at all, using
  150. * cURL or fopen()
  151. * @return bool
  152. */
  153. public static function canMakeRequests() {
  154. return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
  155. }
  156. /**
  157. * Generate a new request object
  158. * @deprecated since 1.34, use HttpRequestFactory instead
  159. * @param string $url Url to use
  160. * @param array|null $options (optional) extra params to pass (see HttpRequestFactory::create())
  161. * @param string $caller The method making this request, for profiling
  162. * @throws DomainException
  163. * @return MWHttpRequest
  164. * @see MWHttpRequest::__construct
  165. */
  166. public static function factory( $url, array $options = null, $caller = __METHOD__ ) {
  167. if ( $options === null ) {
  168. $options = [];
  169. }
  170. return \MediaWiki\MediaWikiServices::getInstance()
  171. ->getHttpRequestFactory()
  172. ->create( $url, $options, $caller );
  173. }
  174. /**
  175. * Get the body, or content, of the response to the request
  176. *
  177. * @return string
  178. */
  179. public function getContent() {
  180. return $this->content;
  181. }
  182. /**
  183. * Set the parameters of the request
  184. *
  185. * @param array $args
  186. * @todo overload the args param
  187. */
  188. public function setData( array $args ) {
  189. $this->postData = $args;
  190. }
  191. /**
  192. * Take care of setting up the proxy (do nothing if "noProxy" is set)
  193. *
  194. * @return void
  195. */
  196. protected function proxySetup() {
  197. // If there is an explicit proxy set and proxies are not disabled, then use it
  198. if ( $this->proxy && !$this->noProxy ) {
  199. return;
  200. }
  201. // Otherwise, fallback to $wgHTTPProxy if this is not a machine
  202. // local URL and proxies are not disabled
  203. if ( self::isLocalURL( $this->url ) || $this->noProxy ) {
  204. $this->proxy = '';
  205. } else {
  206. global $wgHTTPProxy;
  207. $this->proxy = (string)$wgHTTPProxy;
  208. }
  209. }
  210. /**
  211. * Check if the URL can be served by localhost
  212. *
  213. * @param string $url Full url to check
  214. * @return bool
  215. */
  216. private static function isLocalURL( $url ) {
  217. global $wgCommandLineMode, $wgLocalVirtualHosts;
  218. if ( $wgCommandLineMode ) {
  219. return false;
  220. }
  221. // Extract host part
  222. $matches = [];
  223. if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
  224. $host = $matches[1];
  225. // Split up dotwise
  226. $domainParts = explode( '.', $host );
  227. // Check if this domain or any superdomain is listed as a local virtual host
  228. $domainParts = array_reverse( $domainParts );
  229. $domain = '';
  230. $countParts = count( $domainParts );
  231. for ( $i = 0; $i < $countParts; $i++ ) {
  232. $domainPart = $domainParts[$i];
  233. if ( $i == 0 ) {
  234. $domain = $domainPart;
  235. } else {
  236. $domain = $domainPart . '.' . $domain;
  237. }
  238. if ( in_array( $domain, $wgLocalVirtualHosts ) ) {
  239. return true;
  240. }
  241. }
  242. }
  243. return false;
  244. }
  245. /**
  246. * Set the user agent
  247. * @param string $UA
  248. */
  249. public function setUserAgent( $UA ) {
  250. $this->setHeader( 'User-Agent', $UA );
  251. }
  252. /**
  253. * Set an arbitrary header
  254. * @param string $name
  255. * @param string $value
  256. */
  257. public function setHeader( $name, $value ) {
  258. // I feel like I should normalize the case here...
  259. $this->reqHeaders[$name] = $value;
  260. }
  261. /**
  262. * Get an array of the headers
  263. * @return array
  264. */
  265. protected function getHeaderList() {
  266. $list = [];
  267. if ( $this->cookieJar ) {
  268. $this->reqHeaders['Cookie'] =
  269. $this->cookieJar->serializeToHttpRequest(
  270. $this->parsedUrl['path'],
  271. $this->parsedUrl['host']
  272. );
  273. }
  274. foreach ( $this->reqHeaders as $name => $value ) {
  275. $list[] = "$name: $value";
  276. }
  277. return $list;
  278. }
  279. /**
  280. * Set a read callback to accept data read from the HTTP request.
  281. * By default, data is appended to an internal buffer which can be
  282. * retrieved through $req->getContent().
  283. *
  284. * To handle data as it comes in -- especially for large files that
  285. * would not fit in memory -- you can instead set your own callback,
  286. * in the form function($resource, $buffer) where the first parameter
  287. * is the low-level resource being read (implementation specific),
  288. * and the second parameter is the data buffer.
  289. *
  290. * You MUST return the number of bytes handled in the buffer; if fewer
  291. * bytes are reported handled than were passed to you, the HTTP fetch
  292. * will be aborted.
  293. *
  294. * @param callable|null $callback
  295. * @throws InvalidArgumentException
  296. */
  297. public function setCallback( $callback ) {
  298. return $this->doSetCallback( $callback );
  299. }
  300. /**
  301. * Worker function for setting callbacks. Calls can originate both internally and externally
  302. * via setCallback). Defaults to the internal read callback if $callback is null.
  303. *
  304. * @param callable|null $callback
  305. * @throws InvalidArgumentException
  306. */
  307. protected function doSetCallback( $callback ) {
  308. if ( is_null( $callback ) ) {
  309. $callback = [ $this, 'read' ];
  310. } elseif ( !is_callable( $callback ) ) {
  311. $this->status->fatal( 'http-internal-error' );
  312. throw new InvalidArgumentException( __METHOD__ . ': invalid callback' );
  313. }
  314. $this->callback = $callback;
  315. }
  316. /**
  317. * A generic callback to read the body of the response from a remote
  318. * server.
  319. *
  320. * @param resource $fh
  321. * @param string $content
  322. * @return int
  323. * @internal
  324. */
  325. public function read( $fh, $content ) {
  326. $this->content .= $content;
  327. return strlen( $content );
  328. }
  329. /**
  330. * Take care of whatever is necessary to perform the URI request.
  331. *
  332. * @return Status
  333. * @note currently returns Status for B/C
  334. */
  335. public function execute() {
  336. throw new LogicException( 'children must override this' );
  337. }
  338. protected function prepare() {
  339. $this->content = "";
  340. if ( strtoupper( $this->method ) == "HEAD" ) {
  341. $this->headersOnly = true;
  342. }
  343. $this->proxySetup(); // set up any proxy as needed
  344. if ( !$this->callback ) {
  345. $this->doSetCallback( null );
  346. }
  347. if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
  348. $this->setUserAgent( Http::userAgent() );
  349. }
  350. }
  351. /**
  352. * Parses the headers, including the HTTP status code and any
  353. * Set-Cookie headers. This function expects the headers to be
  354. * found in an array in the member variable headerList.
  355. */
  356. protected function parseHeader() {
  357. $lastname = "";
  358. // Failure without (valid) headers gets a response status of zero
  359. if ( !$this->status->isOK() ) {
  360. $this->respStatus = '0 Error';
  361. }
  362. foreach ( $this->headerList as $header ) {
  363. if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
  364. $this->respVersion = $match[1];
  365. $this->respStatus = $match[2];
  366. } elseif ( preg_match( "#^[ \t]#", $header ) ) {
  367. $last = count( $this->respHeaders[$lastname] ) - 1;
  368. $this->respHeaders[$lastname][$last] .= "\r\n$header";
  369. } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
  370. $this->respHeaders[strtolower( $match[1] )][] = $match[2];
  371. $lastname = strtolower( $match[1] );
  372. }
  373. }
  374. $this->parseCookies();
  375. }
  376. /**
  377. * Sets HTTPRequest status member to a fatal value with the error
  378. * message if the returned integer value of the status code was
  379. * not successful (1-299) or a redirect (300-399).
  380. * See RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
  381. * for a list of status codes.
  382. */
  383. protected function setStatus() {
  384. if ( !$this->respHeaders ) {
  385. $this->parseHeader();
  386. }
  387. if ( ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) ) {
  388. $this->status->setResult( true, (int)$this->respStatus );
  389. } else {
  390. list( $code, $message ) = explode( " ", $this->respStatus, 2 );
  391. $this->status->setResult( false, (int)$this->respStatus );
  392. $this->status->fatal( "http-bad-status", $code, $message );
  393. }
  394. }
  395. /**
  396. * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok")
  397. * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
  398. * for a list of status codes.)
  399. *
  400. * @return int
  401. */
  402. public function getStatus() {
  403. if ( !$this->respHeaders ) {
  404. $this->parseHeader();
  405. }
  406. return (int)$this->respStatus;
  407. }
  408. /**
  409. * Returns true if the last status code was a redirect.
  410. *
  411. * @return bool
  412. */
  413. public function isRedirect() {
  414. if ( !$this->respHeaders ) {
  415. $this->parseHeader();
  416. }
  417. $status = (int)$this->respStatus;
  418. if ( $status >= 300 && $status <= 303 ) {
  419. return true;
  420. }
  421. return false;
  422. }
  423. /**
  424. * Returns an associative array of response headers after the
  425. * request has been executed. Because some headers
  426. * (e.g. Set-Cookie) can appear more than once the, each value of
  427. * the associative array is an array of the values given.
  428. * Header names are always in lowercase.
  429. *
  430. * @return array
  431. */
  432. public function getResponseHeaders() {
  433. if ( !$this->respHeaders ) {
  434. $this->parseHeader();
  435. }
  436. return $this->respHeaders;
  437. }
  438. /**
  439. * Returns the value of the given response header.
  440. *
  441. * @param string $header case-insensitive
  442. * @return string|null
  443. */
  444. public function getResponseHeader( $header ) {
  445. if ( !$this->respHeaders ) {
  446. $this->parseHeader();
  447. }
  448. if ( isset( $this->respHeaders[strtolower( $header )] ) ) {
  449. $v = $this->respHeaders[strtolower( $header )];
  450. return $v[count( $v ) - 1];
  451. }
  452. return null;
  453. }
  454. /**
  455. * Tells the MWHttpRequest object to use this pre-loaded CookieJar.
  456. *
  457. * To read response cookies from the jar, getCookieJar must be called first.
  458. *
  459. * @param CookieJar $jar
  460. */
  461. public function setCookieJar( CookieJar $jar ) {
  462. $this->cookieJar = $jar;
  463. }
  464. /**
  465. * Returns the cookie jar in use.
  466. *
  467. * @return CookieJar
  468. */
  469. public function getCookieJar() {
  470. if ( !$this->respHeaders ) {
  471. $this->parseHeader();
  472. }
  473. return $this->cookieJar;
  474. }
  475. /**
  476. * Sets a cookie. Used before a request to set up any individual
  477. * cookies. Used internally after a request to parse the
  478. * Set-Cookie headers.
  479. * @see Cookie::set
  480. * @param string $name
  481. * @param string $value
  482. * @param array $attr
  483. */
  484. public function setCookie( $name, $value, array $attr = [] ) {
  485. if ( !$this->cookieJar ) {
  486. $this->cookieJar = new CookieJar;
  487. }
  488. if ( $this->parsedUrl && !isset( $attr['domain'] ) ) {
  489. $attr['domain'] = $this->parsedUrl['host'];
  490. }
  491. $this->cookieJar->setCookie( $name, $value, $attr );
  492. }
  493. /**
  494. * Parse the cookies in the response headers and store them in the cookie jar.
  495. */
  496. protected function parseCookies() {
  497. if ( !$this->cookieJar ) {
  498. $this->cookieJar = new CookieJar;
  499. }
  500. if ( isset( $this->respHeaders['set-cookie'] ) ) {
  501. $url = parse_url( $this->getFinalUrl() );
  502. foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
  503. $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
  504. }
  505. }
  506. }
  507. /**
  508. * Returns the final URL after all redirections.
  509. *
  510. * Relative values of the "Location" header are incorrect as
  511. * stated in RFC, however they do happen and modern browsers
  512. * support them. This function loops backwards through all
  513. * locations in order to build the proper absolute URI - Marooned
  514. * at wikia-inc.com
  515. *
  516. * Note that the multiple Location: headers are an artifact of
  517. * CURL -- they shouldn't actually get returned this way. Rewrite
  518. * this when T31232 is taken care of (high-level redirect
  519. * handling rewrite).
  520. *
  521. * @return string
  522. */
  523. public function getFinalUrl() {
  524. $headers = $this->getResponseHeaders();
  525. // return full url (fix for incorrect but handled relative location)
  526. if ( isset( $headers['location'] ) ) {
  527. $locations = $headers['location'];
  528. $domain = '';
  529. $foundRelativeURI = false;
  530. $countLocations = count( $locations );
  531. for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
  532. $url = parse_url( $locations[$i] );
  533. if ( isset( $url['host'] ) ) {
  534. $domain = $url['scheme'] . '://' . $url['host'];
  535. break; // found correct URI (with host)
  536. } else {
  537. $foundRelativeURI = true;
  538. }
  539. }
  540. if ( !$foundRelativeURI ) {
  541. return $locations[$countLocations - 1];
  542. }
  543. if ( $domain ) {
  544. return $domain . $locations[$countLocations - 1];
  545. }
  546. $url = parse_url( $this->url );
  547. if ( isset( $url['host'] ) ) {
  548. return $url['scheme'] . '://' . $url['host'] .
  549. $locations[$countLocations - 1];
  550. }
  551. }
  552. return $this->url;
  553. }
  554. /**
  555. * Returns true if the backend can follow redirects. Overridden by the
  556. * child classes.
  557. * @return bool
  558. */
  559. public function canFollowRedirects() {
  560. return true;
  561. }
  562. /**
  563. * Set information about the original request. This can be useful for
  564. * endpoints/API modules which act as a proxy for some service, and
  565. * throttling etc. needs to happen in that service.
  566. * Calling this will result in the X-Forwarded-For and X-Original-User-Agent
  567. * headers being set.
  568. * @param WebRequest|array $originalRequest When in array form, it's
  569. * expected to have the keys 'ip' and 'userAgent'.
  570. * @note IP/user agent is personally identifiable information, and should
  571. * only be set when the privacy policy of the request target is
  572. * compatible with that of the MediaWiki installation.
  573. */
  574. public function setOriginalRequest( $originalRequest ) {
  575. if ( $originalRequest instanceof WebRequest ) {
  576. $originalRequest = [
  577. 'ip' => $originalRequest->getIP(),
  578. 'userAgent' => $originalRequest->getHeader( 'User-Agent' ),
  579. ];
  580. } elseif (
  581. !is_array( $originalRequest )
  582. || array_diff( [ 'ip', 'userAgent' ], array_keys( $originalRequest ) )
  583. ) {
  584. throw new InvalidArgumentException( __METHOD__ . ': $originalRequest must be a '
  585. . "WebRequest or an array with 'ip' and 'userAgent' keys" );
  586. }
  587. $this->reqHeaders['X-Forwarded-For'] = $originalRequest['ip'];
  588. $this->reqHeaders['X-Original-User-Agent'] = $originalRequest['userAgent'];
  589. }
  590. /**
  591. * Check that the given URI is a valid one.
  592. *
  593. * This hardcodes a small set of protocols only, because we want to
  594. * deterministically reject protocols not supported by all HTTP-transport
  595. * methods.
  596. *
  597. * "file://" specifically must not be allowed, for security reasons
  598. * (see <https://www.mediawiki.org/wiki/Special:Code/MediaWiki/r67684>).
  599. *
  600. * @todo FIXME this is wildly inaccurate and fails to actually check most stuff
  601. *
  602. * @since 1.34
  603. * @param string $uri URI to check for validity
  604. * @return bool
  605. */
  606. public static function isValidURI( $uri ) {
  607. return (bool)preg_match(
  608. '/^https?:\/\/[^\/\s]\S*$/D',
  609. $uri
  610. );
  611. }
  612. }