httpclient.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * Utility for doing HTTP-related things
  6. *
  7. * PHP version 5
  8. *
  9. * LICENCE: This program is free software: you can redistribute it and/or modify
  10. * it under the terms of the GNU Affero General Public License as published by
  11. * the Free Software Foundation, either version 3 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Affero General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Affero General Public License
  20. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. *
  22. * @category Action
  23. * @package StatusNet
  24. * @author Evan Prodromou <evan@status.net>
  25. * @copyright 2009 StatusNet, Inc.
  26. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  27. * @link http://status.net/
  28. */
  29. if (!defined('GNUSOCIAL')) { exit(1); }
  30. /**
  31. * Useful structure for HTTP responses
  32. *
  33. * We make HTTP calls in several places, and we have several different
  34. * ways of doing them. This class hides the specifics of what underlying
  35. * library (curl or PHP-HTTP or whatever) that's used.
  36. *
  37. * This extends the HTTP_Request2_Response class with methods to get info
  38. * about any followed redirects.
  39. *
  40. * Originally used the name 'HTTPResponse' to match earlier code, but
  41. * this conflicts with a class in in the PECL HTTP extension.
  42. *
  43. * @category HTTP
  44. * @package StatusNet
  45. * @author Evan Prodromou <evan@status.net>
  46. * @author Brion Vibber <brion@status.net>
  47. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  48. * @link http://status.net/
  49. */
  50. class GNUsocial_HTTPResponse extends HTTP_Request2_Response
  51. {
  52. public $redirUrls;
  53. public $url;
  54. public $redirectCount;
  55. function __construct(HTTP_Request2_Response $response, $url, $redirects=0)
  56. {
  57. foreach (get_object_vars($response) as $key => $val) {
  58. $this->$key = $val;
  59. }
  60. $this->url = strval($url);
  61. $this->redirectCount = intval($redirects);
  62. }
  63. /**
  64. * Get the count of redirects that have been followed, if any.
  65. * @return int
  66. */
  67. function getRedirectCount()
  68. {
  69. return $this->redirectCount;
  70. }
  71. /**
  72. * Gets the target URL, before any redirects. Use getEffectiveUrl() for final target.
  73. * @return string URL
  74. */
  75. function getUrl()
  76. {
  77. return $this->url;
  78. }
  79. /**
  80. * Check if the response is OK, generally a 200 or other 2xx status code.
  81. * @return bool
  82. */
  83. function isOk()
  84. {
  85. $status = $this->getStatus();
  86. return ($status >= 200 && $status < 300);
  87. }
  88. }
  89. /**
  90. * Utility class for doing HTTP client stuff
  91. *
  92. * We make HTTP calls in several places, and we have several different
  93. * ways of doing them. This class hides the specifics of what underlying
  94. * library (curl or PHP-HTTP or whatever) that's used.
  95. *
  96. * This extends the PEAR HTTP_Request2 package:
  97. * - sends StatusNet-specific User-Agent header
  98. * - 'follow_redirects' config option, defaulting on
  99. * - 'max_redirs' config option, defaulting to 10
  100. * - extended response class adds getRedirectCount() and getUrl() methods
  101. * - get() and post() convenience functions return body content directly
  102. *
  103. * @category HTTP
  104. * @package StatusNet
  105. * @author Evan Prodromou <evan@status.net>
  106. * @author Brion Vibber <brion@status.net>
  107. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  108. * @link http://status.net/
  109. */
  110. class HTTPClient extends HTTP_Request2
  111. {
  112. function __construct($url=null, $method=self::METHOD_GET, $config=array())
  113. {
  114. if (is_int(common_config('http', 'timeout'))) {
  115. // Reasonably you shouldn't set http/timeout to 0 because of
  116. // malicious remote servers that can cause infinitely long
  117. // responses... But the default in HTTP_Request2 is 0 for
  118. // some reason and should probably be considered a valid value.
  119. $this->config['timeout'] = common_config('http', 'timeout');
  120. } else {
  121. common_log(LOG_ERR, 'config option http/timeout is not an integer value: '._ve(common_config('http', 'timeout')));
  122. }
  123. if (!empty(common_config('http', 'connect_timeout'))) {
  124. $this->config['connect_timeout'] = common_config('http', 'connect_timeout');
  125. }
  126. $this->config['max_redirs'] = 10;
  127. $this->config['follow_redirects'] = true;
  128. // We've had some issues with keepalive breaking with
  129. // HEAD requests, such as to youtube which seems to be
  130. // emitting chunked encoding info for an empty body
  131. // instead of not emitting anything. This may be a
  132. // bug on YouTube's end, but the upstream libray
  133. // ought to be investigated to see if we can handle
  134. // it gracefully in that case as well.
  135. $this->config['protocol_version'] = '1.0';
  136. // Default state of OpenSSL seems to have no trusted
  137. // SSL certificate authorities, which breaks hostname
  138. // verification and means we have a hard time communicating
  139. // with other sites' HTTPS interfaces.
  140. //
  141. // Turn off verification unless we've configured a CA bundle.
  142. if (common_config('http', 'ssl_cafile')) {
  143. $this->config['ssl_cafile'] = common_config('http', 'ssl_cafile');
  144. } else {
  145. $this->config['ssl_verify_peer'] = false;
  146. }
  147. // This means "verify the cert hostname against what we connect to", it does not
  148. // imply CA trust or anything like that. Just the hostname.
  149. $this->config['ssl_verify_host'] = common_config('http', 'ssl_verify_host');
  150. if (common_config('http', 'curl') && extension_loaded('curl')) {
  151. $this->config['adapter'] = 'HTTP_Request2_Adapter_Curl';
  152. }
  153. foreach (array('host', 'port', 'user', 'password', 'auth_scheme') as $cf) {
  154. $k = 'proxy_'.$cf;
  155. $v = common_config('http', $k);
  156. if (!empty($v)) {
  157. $this->config[$k] = $v;
  158. }
  159. }
  160. parent::__construct($url, $method, $config);
  161. $this->setHeader('User-Agent', self::userAgent());
  162. }
  163. /**
  164. * Convenience/back-compat instantiator
  165. * @return HTTPClient
  166. */
  167. public static function start()
  168. {
  169. return new HTTPClient();
  170. }
  171. /**
  172. * Quick static function to GET a URL
  173. */
  174. public static function quickGet($url, $accept=null, array $params=array(), array $headers=array())
  175. {
  176. if (!empty($params)) {
  177. $params = http_build_query($params, null, '&');
  178. if (strpos($url, '?') === false) {
  179. $url .= '?' . $params;
  180. } else {
  181. $url .= '&' . $params;
  182. }
  183. }
  184. $client = new HTTPClient();
  185. if (!is_null($accept)) {
  186. $client->setHeader('Accept', $accept);
  187. }
  188. $response = $client->get($url, $headers);
  189. if (!$response->isOk()) {
  190. // TRANS: Exception. %s is the URL we tried to GET.
  191. throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
  192. }
  193. return $response->getBody();
  194. }
  195. public static function quickGetJson($url, $params=array())
  196. {
  197. $data = json_decode(self::quickGet($url, null, $params));
  198. if (is_null($data)) {
  199. common_debug('Could not decode JSON data from URL: '.$url);
  200. throw new ServerException('Could not decode JSON data from URL');
  201. }
  202. return $data;
  203. }
  204. /**
  205. * If you want an Accept header, put it in $headers
  206. */
  207. public static function quickHead($url, array $params=array(), array $headers=array())
  208. {
  209. if (!empty($params)) {
  210. $params = http_build_query($params, null, '&');
  211. if (strpos($url, '?') === false) {
  212. $url .= '?' . $params;
  213. } else {
  214. $url .= '&' . $params;
  215. }
  216. }
  217. $client = new HTTPClient();
  218. $response = $client->head($url, $headers);
  219. if (!$response->isOk()) {
  220. // TRANS: Exception. %s is the URL we tried to GET.
  221. throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
  222. }
  223. return $response->getHeader();
  224. }
  225. /**
  226. * Convenience function to run a GET request.
  227. *
  228. * @return GNUsocial_HTTPResponse
  229. * @throws HTTP_Request2_Exception
  230. */
  231. public function get($url, $headers=array())
  232. {
  233. return $this->doRequest($url, self::METHOD_GET, $headers);
  234. }
  235. /**
  236. * Convenience function to run a HEAD request.
  237. *
  238. * NOTE: Will probably turn into a GET request if you let it follow redirects!
  239. * That option is only there to be flexible and may be removed in the future!
  240. *
  241. * @return GNUsocial_HTTPResponse
  242. * @throws HTTP_Request2_Exception
  243. */
  244. public function head($url, $headers=array(), $follow_redirects=false)
  245. {
  246. // Save the configured value for follow_redirects
  247. $old_follow = $this->config['follow_redirects'];
  248. try {
  249. // Temporarily (possibly) override the follow_redirects setting
  250. $this->config['follow_redirects'] = $follow_redirects;
  251. return $this->doRequest($url, self::METHOD_HEAD, $headers);
  252. } catch (Exception $e) {
  253. // Let the exception go on its merry way.
  254. throw $e;
  255. } finally {
  256. // reset to the old value
  257. $this->config['follow_redirects'] = $old_follow;
  258. }
  259. //we've either returned or thrown exception here
  260. }
  261. /**
  262. * Convenience function to POST form data.
  263. *
  264. * @param string $url
  265. * @param array $headers optional associative array of HTTP headers
  266. * @param array $data optional associative array or blob of form data to submit
  267. * @return GNUsocial_HTTPResponse
  268. * @throws HTTP_Request2_Exception
  269. */
  270. public function post($url, $headers=array(), $data=array())
  271. {
  272. if ($data) {
  273. $this->addPostParameter($data);
  274. }
  275. return $this->doRequest($url, self::METHOD_POST, $headers);
  276. }
  277. /**
  278. * @param string $url The URL including possible querystring
  279. * @param string $method The HTTP method to use
  280. * @param array $headers List of already formatted strings
  281. * (not an associative array, to allow
  282. * multiple same-named headers)
  283. *
  284. * @return GNUsocial_HTTPResponse
  285. * @throws HTTP_Request2_Exception
  286. */
  287. protected function doRequest($url, $method, array $headers=array())
  288. {
  289. $this->setUrl($url);
  290. // Workaround for HTTP_Request2 not setting up SNI in socket contexts;
  291. // This fixes cert validation for SSL virtual hosts using SNI.
  292. // Requires PHP 5.3.2 or later and OpenSSL with SNI support.
  293. if ($this->url->getScheme() == 'https' && defined('OPENSSL_TLSEXT_SERVER_NAME')) {
  294. $this->config['ssl_SNI_enabled'] = true;
  295. $this->config['ssl_SNI_server_name'] = $this->url->getHost();
  296. }
  297. $this->setMethod($method);
  298. foreach ($headers as $header) {
  299. $this->setHeader($header);
  300. }
  301. $response = $this->send();
  302. if (is_null($response)) {
  303. // TRANS: Failed to retrieve a remote web resource, %s is the target URL.
  304. throw new NoHttpResponseException($url);
  305. }
  306. return $response;
  307. }
  308. protected function log($level, $detail) {
  309. $method = $this->getMethod();
  310. $url = $this->getUrl();
  311. common_log($level, __CLASS__ . ": HTTP $method $url - $detail");
  312. }
  313. /**
  314. * Pulls up GNU Social's customized user-agent string, so services
  315. * we hit can track down the responsible software.
  316. *
  317. * @return string
  318. */
  319. static public function userAgent()
  320. {
  321. return GNUSOCIAL_ENGINE . '/' . GNUSOCIAL_VERSION
  322. . ' (' . GNUSOCIAL_CODENAME . ')';
  323. }
  324. /**
  325. * Actually performs the HTTP request and returns a
  326. * GNUsocial_HTTPResponse object with response body and header info.
  327. *
  328. * Wraps around parent send() to add logging and redirection processing.
  329. *
  330. * @return GNUsocial_HTTPResponse
  331. * @throw HTTP_Request2_Exception
  332. */
  333. public function send()
  334. {
  335. $maxRedirs = intval($this->config['max_redirs']);
  336. if (empty($this->config['max_redirs'])) {
  337. $maxRedirs = 0;
  338. }
  339. $redirs = 0;
  340. $redirUrls = array();
  341. do {
  342. try {
  343. $response = parent::send();
  344. } catch (Exception $e) {
  345. $this->log(LOG_ERR, $e->getMessage());
  346. throw $e;
  347. }
  348. $code = $response->getStatus();
  349. $effectiveUrl = $response->getEffectiveUrl();
  350. $redirUrls[] = $effectiveUrl;
  351. $response->redirUrls = $redirUrls;
  352. if ($code >= 200 && $code < 300) {
  353. $reason = $response->getReasonPhrase();
  354. $this->log(LOG_INFO, "$code $reason");
  355. } elseif ($code >= 300 && $code < 400) {
  356. $url = $this->getUrl();
  357. $target = $response->getHeader('Location');
  358. if (++$redirs >= $maxRedirs) {
  359. common_log(LOG_ERR, __CLASS__ . ": Too many redirects: skipping $code redirect from $url to $target");
  360. break;
  361. }
  362. try {
  363. $this->setUrl($target);
  364. $this->setHeader('Referer', $url);
  365. common_log(LOG_INFO, __CLASS__ . ": Following $code redirect from $url to $target");
  366. continue;
  367. } catch (HTTP_Request2_Exception $e) {
  368. common_log(LOG_ERR, __CLASS__ . ": Invalid $code redirect from $url to $target");
  369. }
  370. } else {
  371. $reason = $response->getReasonPhrase();
  372. $this->log(LOG_ERR, "$code $reason");
  373. }
  374. break;
  375. } while ($maxRedirs);
  376. return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs);
  377. }
  378. public static function get_filename(string $url, array $headers = null) : ?string {
  379. if ($headers === null) {
  380. $head = (new HTTPClient())->head($url);
  381. $headers = $head->getHeader();
  382. $headers = array_change_key_case($headers, CASE_LOWER);
  383. }
  384. if (array_key_exists('content-disposition', $headers) &&
  385. preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'], $matches) === 1) {
  386. return $matches[1];
  387. } else {
  388. common_log(LOG_INFO, "Couldn't determine filename for url: {$url}");
  389. return null;
  390. }
  391. }
  392. }