Tag.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. namespace Component\Tag;
  20. use App\Core\Cache;
  21. use App\Core\DB;
  22. use App\Core\Event;
  23. use function App\Core\I18n\_m;
  24. use App\Core\Modules\Component;
  25. use App\Core\Router;
  26. use App\Entity\Actor;
  27. use App\Entity\Note;
  28. use App\Util\Common;
  29. use App\Util\Exception\ClientException;
  30. use App\Util\Formatting;
  31. use App\Util\Functional as GSF;
  32. use App\Util\HTML;
  33. use Component\Circle\Entity\ActorTag;
  34. use Component\Language\Entity\Language;
  35. use Component\Tag\Entity\NoteTag;
  36. use Doctrine\Common\Collections\ExpressionBuilder;
  37. use Doctrine\ORM\Query\Expr;
  38. use Doctrine\ORM\QueryBuilder;
  39. use EventResult;
  40. use Functional as F;
  41. use Symfony\Component\Form\Extension\Core\Type\CheckboxType;
  42. use Symfony\Component\HttpFoundation\Request;
  43. /**
  44. * Component responsible for extracting tags from posted notes, as well as normalizing them
  45. *
  46. * @author Hugo Sales <hugo@hsal.es>
  47. * @author Diogo Peralta Cordeiro <@diogo.site>
  48. * @copyright 2021 Free Software Foundation, Inc http://www.fsf.org
  49. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  50. */
  51. class Tag extends Component
  52. {
  53. public const MAX_TAG_LENGTH = 64;
  54. public const TAG_REGEX = '/(^|\\s)(#[\\pL\\pN_\\-]{1,64})/u'; // Brion Vibber 2011-02-23 v2:classes/Notice.php:367 function saveTags
  55. public const TAG_SLUG_REGEX = '[A-Za-z0-9]{1,64}';
  56. public function onAddRoute(Router $r): EventResult
  57. {
  58. $r->connect('single_note_tag', '/note-tag/{tag<' . self::TAG_SLUG_REGEX . '>}', [Controller\Tag::class, 'single_note_tag']);
  59. $r->connect('multi_note_tags', '/note-tags/{tags<(' . self::TAG_SLUG_REGEX . ',)+' . self::TAG_SLUG_REGEX . '>}', [Controller\Tag::class, 'multi_note_tags']);
  60. return Event::next;
  61. }
  62. /**
  63. * @param array{tag_use_canonical?: bool} $extra_args
  64. */
  65. public static function maybeCreateTag(string $tag, int $note_id, ?int $lang_id, array $extra_args = []): ?NoteTag
  66. {
  67. if (!self::validate($tag)) {
  68. return null; // Ignore invalid tag candidates
  69. }
  70. $canonical_tag = self::canonicalTag($tag, \is_null($lang_id) ? null : Language::getById($lang_id)->getLocale());
  71. DB::persist($note_tag = NoteTag::create([
  72. 'tag' => $tag,
  73. 'canonical' => $canonical_tag,
  74. 'note_id' => $note_id,
  75. 'use_canonical' => $extra_args['tag_use_canonical'] ?? false,
  76. 'language_id' => $lang_id,
  77. ]));
  78. foreach (self::cacheKeys($canonical_tag) as $key) {
  79. Cache::delete($key);
  80. }
  81. return $note_tag;
  82. }
  83. /**
  84. * @return NoteTag[]
  85. */
  86. public static function getNoteTags(int $actor_id, ?string $note_type): array
  87. {
  88. $query = <<<'EOF'
  89. select nt from \App\Entity\Note n
  90. join \Component\Tag\Entity\NoteTag nt with n.id = nt.note_id
  91. where n.actor_id = :id
  92. EOF;
  93. if (\is_null($note_type)) {
  94. return Cache::getList(
  95. Actor::cacheKeys($actor_id, 'any')['note-tags'],
  96. fn () => DB::dql(
  97. $query,
  98. ['id' => $actor_id],
  99. ),
  100. );
  101. } else {
  102. return Cache::getList(
  103. Actor::cacheKeys($actor_id, $note_type)['note-tags'],
  104. fn () => DB::dql(
  105. $query . ' and n.type = :type',
  106. ['id' => $actor_id, 'type' => $note_type],
  107. ),
  108. );
  109. }
  110. }
  111. /**
  112. * Process note by extracting any tags present
  113. *
  114. * @param array{TagProcessed?: bool} $extra_args
  115. */
  116. public function onProcessNoteContent(Note $note, string $content, string $content_type, array $extra_args): EventResult
  117. {
  118. if ($extra_args['TagProcessed'] ?? false) {
  119. return Event::next;
  120. }
  121. // XXX: We remove <span> because when content is in html the tag comes as #<span>hashtag</span>
  122. $content = str_replace('<span>', '', $content);
  123. $matched_tags = [];
  124. preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER);
  125. $matched_tags = array_unique(F\map($matched_tags, fn ($m) => $m[2]));
  126. foreach ($matched_tags as $match) {
  127. $tag = self::extract($match);
  128. self::maybeCreateTag(tag: $tag, note_id: $note->getId(), lang_id: $note->getLanguageId());
  129. }
  130. return Event::next;
  131. }
  132. public function onRenderPlainTextNoteContent(string &$text, ?string $locale = null): EventResult
  133. {
  134. $text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . self::tagLink($m[2], $locale), $text);
  135. return Event::next;
  136. }
  137. public static function cacheKeys(string $tag_single_or_multi): array
  138. {
  139. return [
  140. 'note_single' => "note-tag-feed-{$tag_single_or_multi}",
  141. 'note_multi' => "note-tags-feed-{$tag_single_or_multi}",
  142. 'actor_single' => "actor-tag-feed-{$tag_single_or_multi}",
  143. 'actor_multi' => "actor-tags-feed-{$tag_single_or_multi}",
  144. ];
  145. }
  146. private static function tagLink(string $tag, ?string $locale): string
  147. {
  148. $tag = self::extract($tag);
  149. $url = Router::url('single_note_tag', !\is_null($locale) ? ['tag' => $tag, 'locale' => $locale] : ['tag' => $tag]);
  150. return HTML::html(['span' => ['attrs' => ['class' => 'tag'],
  151. '#' . HTML::html(['a' => [
  152. 'attrs' => [
  153. 'href' => $url,
  154. 'rel' => 'tag', // https://microformats.org/wiki/rel-tag
  155. ],
  156. $tag,
  157. ]], options: ['indent' => false]),
  158. ]], options: ['indent' => false, 'raw' => true]);
  159. }
  160. public static function extract(string $tag): string
  161. {
  162. return self::ensureLength(Formatting::removePrefix($tag, '#'));
  163. }
  164. public static function validate(string $tag): bool
  165. {
  166. return preg_match(self::TAG_REGEX, '#' . $tag) === 1;
  167. }
  168. public static function sanitize(string $tag): string
  169. {
  170. $tag = self::extract($tag);
  171. if (!self::validate($tag)) {
  172. throw new ClientException(_m('Invalid tag given: {tag}', ['{tag}' => $tag]));
  173. }
  174. return $tag;
  175. }
  176. public static function ensureLength(string $tag): string
  177. {
  178. return mb_substr($tag, 0, self::MAX_TAG_LENGTH);
  179. }
  180. /**
  181. * Convert a tag to its canonical representation, by splitting it
  182. * into words, stemming it in the given language (if enabled) and
  183. * sluggifying it (turning it into an ASCII representation)
  184. */
  185. public static function canonicalTag(string $tag, ?string $language = null): string
  186. {
  187. $result = '';
  188. foreach (Formatting::splitWords(str_replace('#', '', $tag)) as $word) {
  189. $temp_res = null;
  190. if (\is_null($language) || Event::handle('StemWord', [$language, $word, &$temp_res]) !== Event::stop) {
  191. $temp_res = $word;
  192. }
  193. $result .= Formatting::slugify($temp_res);
  194. }
  195. return self::ensureLength($result);
  196. }
  197. /**
  198. * Populate $note_expr with an expression to match a tag, if the term looks like a tag
  199. *
  200. * $term /^(note|tag|people|actor)/ means we want to match only either a note or an actor
  201. *
  202. * @param mixed $note_expr
  203. * @param mixed $actor_expr
  204. */
  205. public function onCollectionQueryCreateExpression(ExpressionBuilder $eb, string $term, ?string $locale, ?Actor $actor, &$note_expr, &$actor_expr): EventResult
  206. {
  207. if (!str_contains($term, ':')) {
  208. return Event::next;
  209. }
  210. if (\is_null($locale)) {
  211. $locale = Common::currentLanguage();
  212. }
  213. [$search_type, $search_term] = explode(':', $term);
  214. if (str_starts_with($search_term, '#')) {
  215. $search_term = self::sanitize($search_term);
  216. $canonical_search_term = self::canonicalTag($search_term, $locale);
  217. $temp_note_expr = $eb->eq('note_tag.canonical', $canonical_search_term);
  218. $temp_actor_expr = $eb->eq('actor_tag.canonical', $canonical_search_term);
  219. if (Formatting::startsWith($term, ['note:', 'tag:', 'people:'])) {
  220. $note_expr = $temp_note_expr;
  221. } elseif (Formatting::startsWith($term, ['people:', 'actor:'])) {
  222. $actor_expr = $temp_actor_expr;
  223. } elseif (Formatting::startsWith($term, GSF::cartesianProduct([['people', 'actor'], ['circle', 'list'], [':']], separator: ['-', '_']))) {
  224. $null_tagger_expr = $eb->isNull('actor_circle.tagger');
  225. $tagger_expr = \is_null($actor_expr) ? $null_tagger_expr : $eb->orX($null_tagger_expr, $eb->eq('actor_circle.tagger', $actor->getId()));
  226. $tags = array_unique([$search_term, $canonical_search_term]);
  227. $tag_expr = \count($tags) === 1 ? $eb->eq('actor_circle.tag', $tags[0]) : $eb->in('actor_circle.tag', $tags);
  228. $search_expr = $eb->andX(
  229. $tagger_expr,
  230. $tag_expr,
  231. );
  232. $note_expr = $search_expr;
  233. $actor_expr = $search_expr;
  234. } else {
  235. $note_expr = $temp_note_expr;
  236. $actor_expr = $temp_actor_expr;
  237. return Event::next;
  238. }
  239. return Event::stop;
  240. }
  241. return Event::next;
  242. }
  243. public function onCollectionQueryAddJoins(QueryBuilder &$note_qb, QueryBuilder &$actor_qb): EventResult
  244. {
  245. if (!\in_array('note_tag', $note_qb->getAllAliases())) {
  246. $note_qb->leftJoin(NoteTag::class, 'note_tag', Expr\Join::WITH, 'note_tag.note_id = note.id');
  247. }
  248. if (!\in_array('actor_tag', $actor_qb->getAllAliases())) {
  249. $actor_qb->leftJoin(ActorTag::class, 'actor_tag', Expr\Join::WITH, 'actor_tag.tagger = actor.id');
  250. }
  251. return Event::next;
  252. }
  253. /**
  254. * @param array{string, class-string, array<string, mixed>} $form_params
  255. */
  256. public function onPostingAddFormEntries(Request $request, Actor $actor, array &$form_params): EventResult
  257. {
  258. $form_params[] = ['tag_use_canonical', CheckboxType::class, ['required' => false, 'data' => true, 'label' => _m('Make note tags canonical'), 'help' => _m('Canonical tags will be treated as a version of an existing tag with the same root/stem (e.g. \'#great_tag\' will be considered as a version of \'#great\', if it already exists)')]];
  259. return Event::next;
  260. }
  261. /**
  262. * @param array{tag_use_canonical?: bool} $data
  263. * @param array{tag_use_canonical?: bool} $extra_args
  264. */
  265. public function onAddExtraArgsToNoteContent(Request $request, Actor $actor, array $data, array &$extra_args): EventResult
  266. {
  267. if (!isset($data['tag_use_canonical'])) {
  268. throw new ClientException(_m('Missing Use Canonical preference for Tags.'));
  269. }
  270. $extra_args['tag_use_canonical'] = $data['tag_use_canonical'];
  271. return Event::next;
  272. }
  273. }