sanitize_config.rb 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # frozen_string_literal: true
  2. class Sanitize
  3. module Config
  4. HTTP_PROTOCOLS = %w(
  5. http
  6. https
  7. ).freeze
  8. LINK_PROTOCOLS = %w(
  9. http
  10. https
  11. dat
  12. dweb
  13. ipfs
  14. ipns
  15. ssb
  16. gopher
  17. xmpp
  18. magnet
  19. gemini
  20. ).freeze
  21. CLASS_WHITELIST_TRANSFORMER = lambda do |env|
  22. node = env[:node]
  23. class_list = node['class']&.split(/[\t\n\f\r ]/)
  24. return unless class_list
  25. class_list.keep_if do |e|
  26. next true if /^(h|p|u|dt|e)-/.match?(e) # microformats classes
  27. next true if /^(mention|hashtag)$/.match?(e) # semantic classes
  28. next true if /^(ellipsis|invisible)$/.match?(e) # link formatting classes
  29. end
  30. node['class'] = class_list.join(' ')
  31. end
  32. UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
  33. return unless env[:node_name] == 'a'
  34. current_node = env[:node]
  35. scheme = begin
  36. if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
  37. Regexp.last_match(1).downcase
  38. else
  39. :relative
  40. end
  41. end
  42. current_node.replace(Nokogiri::XML::Text.new(current_node.text, current_node.document)) unless LINK_PROTOCOLS.include?(scheme)
  43. end
  44. UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
  45. return unless %w(h6).include?(env[:node_name])
  46. current_node = env[:node]
  47. case env[:node_name]
  48. when 'li'
  49. current_node.traverse do |node|
  50. next unless %w(p ul ol li).include?(node.name)
  51. node.add_next_sibling('<br>') if node.next_sibling
  52. node.replace(node.children) unless node.text?
  53. end
  54. else
  55. current_node.name = 'p'
  56. end
  57. end
  58. MASTODON_STRICT ||= freeze_config(
  59. elements: %w(p br span a abbr del pre blockquote code b strong i em h1 h2 h3 h4 h5 ul ol li img),
  60. attributes: {
  61. 'a' => %w(href rel class title),
  62. 'span' => %w(class),
  63. 'abbr' => %w(title),
  64. 'blockquote' => %w(cite),
  65. 'img' => %w(src alt),
  66. },
  67. add_attributes: {
  68. 'a' => {
  69. 'rel' => 'nofollow noopener noreferrer',
  70. 'target' => '_blank',
  71. },
  72. },
  73. protocols: {
  74. 'a' => { 'href' => HTTP_PROTOCOLS },
  75. 'blockquote' => { 'cite' => HTTP_PROTOCOLS },
  76. },
  77. transformers: [
  78. CLASS_WHITELIST_TRANSFORMER,
  79. UNSUPPORTED_ELEMENTS_TRANSFORMER,
  80. UNSUPPORTED_HREF_TRANSFORMER,
  81. ]
  82. )
  83. MASTODON_OEMBED ||= freeze_config(
  84. elements: %w(audio embed iframe source video),
  85. attributes: {
  86. 'audio' => %w(controls),
  87. 'embed' => %w(height src type width),
  88. 'iframe' => %w(allowfullscreen frameborder height scrolling src width),
  89. 'source' => %w(src type),
  90. 'video' => %w(controls height loop width),
  91. },
  92. protocols: {
  93. 'embed' => { 'src' => HTTP_PROTOCOLS },
  94. 'iframe' => { 'src' => HTTP_PROTOCOLS },
  95. 'source' => { 'src' => HTTP_PROTOCOLS },
  96. },
  97. add_attributes: {
  98. 'iframe' => { 'sandbox' => 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms' },
  99. }
  100. )
  101. end
  102. end