StoreRemoteMediaPlugin.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. <?php
  2. // This file is part of GNU social - https://www.gnu.org/software/social
  3. //
  4. // GNU social is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // GNU social is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * The StoreRemoteMedia plugin downloads remotely attached files to local server.
  18. *
  19. * @package GNUsocial
  20. * @author Mikael Nordfeldth
  21. * @author Stephen Paul Weber
  22. * @author Mikael Nordfeldth
  23. * @author Miguel Dantas
  24. * @author Diogo Peralta Cordeiro
  25. * @copyright 2015-2016, 2019-2021 Free Software Foundation, Inc http://www.fsf.org
  26. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  27. */
  28. class StoreRemoteMediaPlugin extends Plugin
  29. {
  30. const PLUGIN_VERSION = '3.0.0';
  31. // settings which can be set in config.php with addPlugin('StoreRemoteMedia', array('param'=>'value', ...));
  32. // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings
  33. public $domain_whitelist = [
  34. // hostname => service provider
  35. '^i\d*\.ytimg\.com$' => 'YouTube',
  36. '^i\d*\.vimeocdn\.com$' => 'Vimeo',
  37. ];
  38. public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources
  39. public $check_whitelist = false; // security/abuse precaution
  40. public $store_original = false; // Whether to maintain a copy of the original media or only a thumbnail of it
  41. public $thumbnail_width = null;
  42. public $thumbnail_height = null;
  43. public $crop = null;
  44. public $max_size = null;
  45. /**
  46. * Initialize the StoreRemoteMedia plugin and set up the environment it needs for it.
  47. * Returns true if it initialized properly, the exception object if it
  48. * doesn't.
  49. */
  50. public function initialize()
  51. {
  52. parent::initialize();
  53. $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist);
  54. // Load global configuration if specific not provided
  55. $this->thumbnail_width = $this->thumbnail_width ?? common_config('thumbnail', 'width');
  56. $this->thumbnail_height = $this->thumbnail_height ?? common_config('thumbnail', 'height');
  57. $this->max_size = $this->max_size ?? common_config('attachments', 'file_quota');
  58. $this->crop = $this->crop ?? common_config('thumbnail', 'crop');
  59. }
  60. /**
  61. * This event executes when GNU social is creating a file thumbnail entry in
  62. * the database. We glom onto this to fetch remote attachments.
  63. *
  64. * @param $file File the file of the created thumbnail
  65. * @param &$imgPath null|string = out the path to the created thumbnail (output parameter)
  66. * @param $media string = media type (unused)
  67. * @return bool
  68. * @throws AlreadyFulfilledException
  69. * @throws FileNotFoundException
  70. * @throws FileNotStoredLocallyException
  71. * @throws HTTP_Request2_Exception
  72. * @throws ServerException
  73. */
  74. public function onCreateFileImageThumbnailSource(File $file, ?string &$imgPath = null, ?string $media=null): bool
  75. {
  76. // If we are on a private node, we won't do any remote calls (just as a precaution until
  77. // we can configure this from config.php for the private nodes)
  78. if (common_config('site', 'private')) {
  79. return true;
  80. }
  81. // If there is a local filename, it is either a local file already or has already been downloaded.
  82. if (!$file->isStoredRemotely()) {
  83. common_debug(sprintf('File id==%d isn\'t a non-fetched remote file (%s), so nothing StoreRemoteMedia '.
  84. 'should handle.', $file->getID(), _ve($file->filename)));
  85. return true;
  86. }
  87. try {
  88. File_thumbnail::byFile($file);
  89. // If we don't get the exception `No result found on File_thumbnail lookup.` then Embed has already handled it most likely.
  90. return true;
  91. } catch (NoResultException $e) {
  92. // We can move on
  93. }
  94. $url = $file->getUrl(false);
  95. if (substr($url, 0, 7) == 'file://') {
  96. $filename = substr($url, 7);
  97. $info = getimagesize($filename);
  98. $filename = basename($filename);
  99. $width = $info[0];
  100. $height = $info[1];
  101. } else {
  102. $this->checkWhitelist($url);
  103. $head = (new HTTPClient())->head($url);
  104. $headers = $head->getHeader();
  105. $headers = array_change_key_case($headers, CASE_LOWER);
  106. try {
  107. $is_image = $this->isRemoteImage($url, $headers);
  108. if ($is_image == true) {
  109. $file_size = $this->getRemoteFileSize($url, $headers);
  110. if (($file_size!=false) && ($file_size > $this->max_size)) {
  111. common_debug("Went to store remote thumbnail of size " . $file_size .
  112. " but the upload limit is " . $this->max_size . " so we aborted.");
  113. return false;
  114. }
  115. } else {
  116. return false;
  117. }
  118. } catch (Exception $err) {
  119. common_debug("Could not determine size of remote image, aborted local storage.");
  120. throw $err;
  121. }
  122. // First we download the file to memory and test whether it's actually an image file
  123. // FIXME: To support remote video/whatever files, this needs reworking.
  124. common_debug(sprintf(
  125. 'Downloading remote image for file id==%u with URL: %s',
  126. $file->getID(),
  127. $url
  128. ));
  129. try {
  130. $imgData = HTTPClient::quickGet($url);
  131. if (isset($imgData)) {
  132. list($filename, $filehash, $width, $height) = $this->validateAndWriteImage(
  133. $imgData,
  134. $url,
  135. $headers,
  136. $file->getID()
  137. );
  138. } else {
  139. throw new UnsupportedMediaException('HTTPClient returned an empty result');
  140. }
  141. } catch (UnsupportedMediaException $e) {
  142. // Couldn't find anything that looks like an image, nothing to do
  143. common_debug("StoreRemoteMedia was not able to find an image for URL `$url`: " . $e->getMessage());
  144. return false;
  145. }
  146. }
  147. $ft = null;
  148. if ($this->store_original) {
  149. try {
  150. // Update our database for the file record
  151. $orig = clone($file);
  152. $file->filename = $filename;
  153. $file->filehash = $filehash;
  154. $file->width = $width;
  155. $file->height = $height;
  156. // Throws exception on failure.
  157. $file->updateWithKeys($orig);
  158. } catch (Exception $err) {
  159. common_log(LOG_ERR, "Went to update a file entry on the database in " .
  160. "StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: " . $err);
  161. throw $err;
  162. }
  163. } else {
  164. try {
  165. // Insert a thumbnail record for this file
  166. $data = new stdClass();
  167. $data->thumbnail_url = $url;
  168. $data->thumbnail_width = $width;
  169. $data->thumbnail_height = $height;
  170. File_thumbnail::saveNew($data, $file->getID());
  171. $ft = File_thumbnail::byFile($file);
  172. $orig = clone($ft);
  173. $ft->filename = $filename;
  174. $ft->updateWithKeys($orig);
  175. } catch (Exception $err) {
  176. common_log(LOG_ERR, "Went to write a thumbnail entry to the database in " .
  177. "StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: " . $err);
  178. throw $err;
  179. }
  180. }
  181. // Out
  182. try {
  183. $imgPath = $file->getFileOrThumbnailPath($ft);
  184. return !file_exists($imgPath);
  185. } catch (Exception $e) {
  186. return true;
  187. }
  188. }
  189. /**
  190. * Check the file size of a remote file using a HEAD request and checking
  191. * the content-length variable returned. This isn't 100% foolproof but is
  192. * reliable enough for our purposes.
  193. *
  194. * @return string|bool the file size if it succeeds, false otherwise.
  195. */
  196. private function getRemoteFileSize($url, $headers = null)
  197. {
  198. try {
  199. if ($headers === null) {
  200. if (!common_valid_http_url($url)) {
  201. common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::getRemoteFileSize()");
  202. return false;
  203. }
  204. $head = (new HTTPClient())->head($url);
  205. $headers = $head->getHeader();
  206. $headers = array_change_key_case($headers, CASE_LOWER);
  207. }
  208. return $headers['content-length'] ?? false;
  209. } catch (Exception $err) {
  210. common_log(LOG_ERR, __CLASS__.': getRemoteFileSize on URL : '._ve($url).
  211. ' threw exception: '.$err->getMessage());
  212. return false;
  213. }
  214. }
  215. /**
  216. * A private helper function that uses a CURL lookup to check the mime type
  217. * of a remote URL to see it it's an image.
  218. *
  219. * @return bool true if the remote URL is an image, or false otherwise.
  220. */
  221. private function isRemoteImage($url, $headers = null): bool
  222. {
  223. if (empty($headers)) {
  224. if (!common_valid_http_url($url)) {
  225. common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::isRemoteImage()");
  226. return false;
  227. }
  228. $head = (new HTTPClient())->head($url);
  229. $headers = $head->getHeader();
  230. $headers = array_change_key_case($headers, CASE_LOWER);
  231. }
  232. return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image';
  233. }
  234. /**
  235. * Validate that $imgData is a valid image before writing it to
  236. * disk, as well as resizing it to at most $this->thumbnail_width
  237. * by $this->thumbnail_height
  238. *
  239. * @param $imgData - The image data to validate. Taken by reference to avoid copying
  240. * @param string|null $url - The url where the image came from, to fetch metadata
  241. * @param array|null $headers - The headers possible previous request to $url
  242. * @param int|null $file_id - The id of the file this image belongs to, used for logging
  243. */
  244. protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null) : array
  245. {
  246. $info = @getimagesizefromstring($imgData);
  247. // array indexes documented on php.net:
  248. // https://php.net/manual/en/function.getimagesize.php
  249. if ($info === false) {
  250. throw new UnsupportedMediaException(_m('Remote file format was not identified as an image.'), $url);
  251. } elseif (!$info[0] || !$info[1]) {
  252. throw new UnsupportedMediaException(_m('Image file had impossible geometry (0 width or height)'));
  253. }
  254. $width = min($info[0], $this->thumbnail_width);
  255. $height = min($info[1], $this->thumbnail_height);
  256. $filehash = hash(File::FILEHASH_ALG, $imgData);
  257. try {
  258. if (!empty($url)) {
  259. $original_name = HTTPClient::get_filename($url, $headers);
  260. }
  261. $filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash);
  262. } catch (Exception $err) {
  263. common_log(LOG_ERR, "Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail " .
  264. "but encountered error: $err");
  265. throw $err;
  266. }
  267. try {
  268. $fullpath = $this->store_original ? File::path($filename) : File_thumbnail::path($filename);
  269. // Write the file to disk. Throw Exception on failure
  270. if (!file_exists($fullpath)) {
  271. if (strpos($fullpath, INSTALLDIR) !== 0 || file_put_contents($fullpath, $imgData) === false) {
  272. throw new ServerException(_m('Could not write downloaded file to disk.'));
  273. }
  274. if (common_get_mime_media(MediaFile::getUploadedMimeType($fullpath)) !== 'image') {
  275. @unlink($fullpath);
  276. throw new UnsupportedMediaException(
  277. _m('Remote file format was not identified as an image.'),
  278. $url
  279. );
  280. }
  281. // If the image is not of the desired size, resize it
  282. if (!$this->store_original && $this->crop && ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height)) {
  283. try {
  284. // Temporary object, not stored in DB
  285. $img = new ImageFile($fullpath, -1);
  286. list($width, $height, $x, $y, $w, $h) = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->crop);
  287. // The boundary box for our resizing
  288. $box = [
  289. 'width' => $width, 'height' => $height,
  290. 'x' => $x, 'y' => $y,
  291. 'w' => $w, 'h' => $h,
  292. ];
  293. $width = $box['width'];
  294. $height = $box['height'];
  295. $img->resizeTo($fullpath, $box);
  296. } catch (\Intervention\Image\Exception\NotReadableException $e) {
  297. common_log(LOG_ERR, "StoreRemoteMediaPlugin::storeRemoteThumbnail was unable to decode image with Intervention: $e");
  298. // No need to interrupt processing
  299. }
  300. }
  301. } else {
  302. throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' .
  303. ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath);
  304. }
  305. } catch (AlreadyFulfilledException $e) {
  306. // Carry on
  307. } catch (Exception $err) {
  308. common_log(LOG_ERR, "Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail " .
  309. "but encountered error: $err");
  310. throw $err;
  311. } finally {
  312. unset($imgData);
  313. }
  314. return [$filename, $filehash, $width, $height];
  315. }
  316. /**
  317. * @return bool false on no check made, provider name on success
  318. * @throws ServerException if check is made but fails
  319. */
  320. protected function checkWhitelist($url)
  321. {
  322. if (!$this->check_whitelist) {
  323. return false; // indicates "no check made"
  324. }
  325. $host = parse_url($url, PHP_URL_HOST);
  326. foreach ($this->domain_whitelist as $regex => $provider) {
  327. if (preg_match("/$regex/", $host)) {
  328. return $provider; // we trust this source, return provider name
  329. }
  330. }
  331. throw new ServerException(sprintf(_m('Domain not in remote thumbnail source whitelist: %s'), $host));
  332. }
  333. /**
  334. * Event raised when GNU social polls the plugin for information about it.
  335. * Adds this plugin's version information to $versions array
  336. *
  337. * @param &$versions array inherited from parent
  338. * @return bool true hook value
  339. */
  340. public function onPluginVersion(array &$versions): bool
  341. {
  342. $versions[] = ['name' => 'StoreRemoteMedia',
  343. 'version' => self::PLUGIN_VERSION,
  344. 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro',
  345. 'homepage' => GNUSOCIAL_ENGINE_URL,
  346. 'description' =>
  347. // TRANS: Plugin description.
  348. _m('Plugin for downloading remotely attached files to local server.')];
  349. return true;
  350. }
  351. }