StoreRemoteMedia.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. namespace Plugin\StoreRemoteMedia;
  20. use App\Core\DB\DB;
  21. use App\Core\Event;
  22. use App\Core\GSFile;
  23. use App\Core\HTTPClient;
  24. use function App\Core\I18n\_m;
  25. use App\Core\Log;
  26. use App\Core\Modules\Plugin;
  27. use App\Entity\Note;
  28. use App\Util\Common;
  29. use App\Util\Exception\DuplicateFoundException;
  30. use App\Util\Exception\ServerException;
  31. use App\Util\Exception\TemporaryFileException;
  32. use App\Util\TemporaryFile;
  33. use Component\Attachment\Entity\AttachmentThumbnail;
  34. use Component\Attachment\Entity\AttachmentToLink;
  35. use Component\Attachment\Entity\AttachmentToNote;
  36. use Component\Link\Entity\Link;
  37. use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
  38. use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
  39. use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
  40. use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
  41. /**
  42. * The StoreRemoteMedia plugin downloads remotely attached files to local server.
  43. *
  44. * @package GNUsocial
  45. *
  46. * @author Mikael Nordfeldth
  47. * @author Stephen Paul Weber
  48. * @author Mikael Nordfeldth
  49. * @author Miguel Dantas
  50. * @author Diogo Peralta Cordeiro
  51. * @copyright 2015-2016, 2019-2021 Free Software Foundation, Inc http://www.fsf.org
  52. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  53. */
  54. class StoreRemoteMedia extends Plugin
  55. {
  56. public function version(): string
  57. {
  58. return '3.0.0';
  59. }
  60. /**
  61. * Settings which can be set in social.local.yaml
  62. * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
  63. */
  64. public bool $check_whitelist = false;
  65. public bool $check_blacklist = false;
  66. public array $domain_whitelist = [
  67. // hostname
  68. '.*', // Default to allowing any host
  69. ];
  70. public array $domain_blacklist = [];
  71. // Whether to maintain a copy of the original media or only a thumbnail of it
  72. private function getStoreOriginal(): bool
  73. {
  74. return Common::config('plugin_store_remote_media', 'store_original');
  75. }
  76. private function getMaxFileSize(): int
  77. {
  78. return min(Common::config('plugin_store_remote_media', 'max_file_size'), Common::config('attachments', 'file_quota'));
  79. }
  80. private function getSmartCrop(): bool
  81. {
  82. return Common::config('plugin_store_remote_media', 'smart_crop');
  83. }
  84. /**
  85. * @throws DuplicateFoundException
  86. * @throws ServerException
  87. * @throws TemporaryFileException
  88. */
  89. public function onNewLinkFromNote(Link $link, Note $note): bool
  90. {
  91. // Embed is the plugin to handle these
  92. if ($link->getMimetypeMajor() === 'text') {
  93. return Event::next;
  94. }
  95. // Is this URL trusted?
  96. if (!$this->allowedLink($link->getUrl())) {
  97. Log::info("Blocked URL ({$link->getUrl()}) in StoreRemoteMedia->onNewLinkFromNote.");
  98. return Event::next;
  99. }
  100. // Have we handled it already?
  101. $attachment_to_link = DB::find(
  102. 'attachment_to_link',
  103. ['link_id' => $link->getId()],
  104. );
  105. // If it was handled already
  106. // XXX: Maybe it would be interesting to have retroactive application of $this->getOriginal here
  107. if (!\is_null($attachment_to_link)) {
  108. // Relate the note with the existing attachment
  109. DB::persist(AttachmentToNote::create([
  110. 'attachment_id' => $attachment_to_link->getAttachmentId(),
  111. 'note_id' => $note->getId(),
  112. ]));
  113. DB::flush();
  114. return Event::stop;
  115. } else {
  116. // Validate if the URL really does point to a remote image
  117. $head = HTTPClient::head($link->getUrl());
  118. try {
  119. $headers = $head->getHeaders();
  120. } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) {
  121. Log::debug('StoreRemoteMedia->onNewLinkFromNote@HTTPHead->getHeaders: ' . $e->getMessage(), [$e]);
  122. return Event::next;
  123. }
  124. // Does it respect the file quota?
  125. $file_size = $headers['content-length'][0] ?? null;
  126. $max_size = $this->getMaxFileSize();
  127. if (\is_null($file_size) || $file_size > $max_size) {
  128. Log::debug("Went to download remote media of size {$file_size} but the plugin's filesize limit is {$max_size} so we aborted in StoreRemoteMedia->onNewLinkFromNote.");
  129. return Event::next;
  130. }
  131. // Retrieve media
  132. $get_response = HTTPClient::get($link->getUrl());
  133. $media = $get_response->getContent();
  134. $mimetype = $get_response->getHeaders()['content-type'][0] ?? null;
  135. unset($get_response);
  136. // TODO: Add functionality to specify allowed content types to retrieve here
  137. // Ensure we still want to handle it
  138. if ($mimetype != $link->getMimetype()) {
  139. $link->setMimetype($mimetype);
  140. DB::persist($link);
  141. DB::flush();
  142. if ($link->getMimetypeMajor() === 'text') {
  143. return Event::next;
  144. }
  145. }
  146. // We can ignore empty files safely, the user can guess them (:
  147. if (!empty($media)) {
  148. // Create an attachment for this
  149. $temp_file = new TemporaryFile();
  150. $temp_file->write($media);
  151. $attachment = GSFile::storeFileAsAttachment($temp_file);
  152. // Relate the link with the attachment
  153. // TODO: Create a function that gets the title from content disposition or URL when such header isn't available
  154. DB::persist(AttachmentToLink::create([
  155. 'link_id' => $link->getId(),
  156. 'attachment_id' => $attachment->getId(),
  157. ]));
  158. // Relate the note with the attachment
  159. DB::persist(AttachmentToNote::create([
  160. 'attachment_id' => $attachment->getId(),
  161. 'note_id' => $note->getId(),
  162. ]));
  163. DB::flush();
  164. // Should we create a thumb and delete the original file?
  165. if (!$this->getStoreOriginal()) {
  166. $thumbnail = AttachmentThumbnail::getOrCreate(
  167. attachment: $attachment,
  168. size: 'medium',
  169. crop: $this->getSmartCrop(),
  170. );
  171. $attachment->deleteStorage();
  172. }
  173. }
  174. return Event::stop;
  175. }
  176. }
  177. /**
  178. * @return bool true if allowed by the lists, false otherwise
  179. */
  180. private function allowedLink(string $url): bool
  181. {
  182. $passed_whitelist = !$this->check_whitelist;
  183. $passed_blacklist = !$this->check_blacklist;
  184. if ($this->check_whitelist) {
  185. $passed_whitelist = false; // don't trust be default
  186. $host = parse_url($url, \PHP_URL_HOST);
  187. foreach ($this->domain_whitelist as $regex => $provider) {
  188. if (preg_match("/{$regex}/", $host)) {
  189. $passed_whitelist = true; // we trust this source
  190. }
  191. }
  192. }
  193. if ($this->check_blacklist) {
  194. // assume it passed by default
  195. $host = parse_url($url, \PHP_URL_HOST);
  196. foreach ($this->domain_blacklist as $regex => $provider) {
  197. if (preg_match("/{$regex}/", $host)) {
  198. $passed_blacklist = false; // we blocked this source
  199. }
  200. }
  201. }
  202. return $passed_whitelist && $passed_blacklist;
  203. }
  204. /**
  205. * Event raised when GNU social polls the plugin for information about it.
  206. * Adds this plugin's version information to $versions array
  207. *
  208. * @param array $versions inherited from parent
  209. *
  210. * @return bool true hook value
  211. */
  212. public function onPluginVersion(array &$versions): bool
  213. {
  214. $versions[] = [
  215. 'name' => 'StoreRemoteMedia',
  216. 'version' => $this->version(),
  217. 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro',
  218. 'homepage' => GNUSOCIAL_PROJECT_URL,
  219. 'description', // TRANS: Plugin description. => _m('Plugin for downloading remotely attached files to local server.'),
  220. ];
  221. return Event::next;
  222. }
  223. }