ban-contributors.pl 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. # Copyright (C) 2013-2021 Alex Schroeder <alex@gnu.org>
  2. # This program is free software: you can redistribute it and/or modify it under
  3. # the terms of the GNU General Public License as published by the Free Software
  4. # Foundation, either version 3 of the License, or (at your option) any later
  5. # version.
  6. #
  7. # This program is distributed in the hope that it will be useful, but WITHOUT
  8. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  9. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License along with
  12. # this program. If not, see <http://www.gnu.org/licenses/>.
  13. =head1 Ban Contributors Extension
  14. This module adds "Ban contributors" to the administration page. If you
  15. click on it, it will list all the recent contributors to the page
  16. you've been looking at. Each contributor (IP or hostname) will be
  17. compared to the list of regular expressions on the C<BannedHosts> page
  18. (see C<$BannedHosts>). If the contributor is already banned, this is
  19. mentioned. If the contributor is not banned, you'll see a button
  20. allowing you to ban him or her immediately. If you click the button,
  21. the IP will be added to the C<BannedHosts> page for you.
  22. =cut
  23. use strict;
  24. use v5.10;
  25. our ($q, $Now, %Page, $OpenPageName, %Action, $UrlPattern, $BannedContent, $BannedHosts, @MyAdminCode);
  26. AddModuleDescription('ban-contributors.pl', 'Ban Contributors Extension');
  27. push(@MyAdminCode, \&BanMenu);
  28. sub BanMenu {
  29. my ($id, $menuref, $restref) = @_;
  30. if ($id and UserIsAdmin()) {
  31. push(@$menuref, ScriptLink('action=ban;id=' . UrlEncode($id),
  32. T('Ban contributors')));
  33. }
  34. }
  35. $Action{ban} = \&DoBanHosts;
  36. sub IsItBanned {
  37. my ($it, $regexps) = @_;
  38. my $re = undef;
  39. foreach my $regexp (@$regexps) {
  40. eval { $re = qr/$regexp/i; };
  41. if (defined($re) && $it =~ $re) {
  42. return $it;
  43. }
  44. }
  45. }
  46. sub DoBanHosts {
  47. my $id = shift;
  48. my $content = GetParam('content', '');
  49. my $range = GetParam('range', '');
  50. my $regexp = GetParam('regexp', '');
  51. if ($content) {
  52. SetParam('text', GetPageContent($BannedContent)
  53. . $content . " # " . CalcDay($Now) . " "
  54. . NormalToFree($id) . "\n");
  55. SetParam('summary', NormalToFree($id));
  56. DoPost($BannedContent);
  57. } elsif ($regexp) {
  58. SetParam('text', GetPageContent($BannedHosts)
  59. . $regexp . " # " . CalcDay($Now)
  60. . " $range "
  61. . NormalToFree($id) . "\n");
  62. SetParam('summary', NormalToFree($id));
  63. DoPost($BannedHosts);
  64. } else {
  65. ValidIdOrDie($id);
  66. print GetHeader('', Ts('Ban Contributors to %s', NormalToFree($id)));
  67. SetParam('rcidonly', $id);
  68. SetParam('all', 1);
  69. SetParam('showedit', 1);
  70. my %contrib = ();
  71. for my $line (GetRcLines()) {
  72. $contrib{$line->[4]}->{$line->[5]} = 1 if $line->[4];
  73. }
  74. my @regexps = ();
  75. foreach (split(/\n/, GetPageContent($BannedHosts))) {
  76. if (/^\s*([^#]\S+)/) { # all lines except empty lines and comments, trim whitespace
  77. push(@regexps, $1);
  78. }
  79. }
  80. print '<div class="content ban">';
  81. foreach (sort(keys %contrib)) {
  82. my $name = $_;
  83. delete $contrib{$_}{''};
  84. $name .= " (" . join(", ", sort(keys(%{$contrib{$_}}))) . ")";
  85. if (IsItBanned($_, \@regexps)) {
  86. print $q->p(Ts("%s is banned", $name));
  87. } else {
  88. my @pairs = BanContributors::get_range($_);
  89. while (@pairs) {
  90. my $start = shift(@pairs);
  91. my $end = shift(@pairs);
  92. $range = "[$start - $end]";
  93. $name .= " " . $range;
  94. print GetFormStart(undef, 'get', 'ban'),
  95. GetHiddenValue('action', 'ban'),
  96. GetHiddenValue('id', $id),
  97. GetHiddenValue('range', $range),
  98. GetHiddenValue('regexp', BanContributors::get_regexp_ip($start, $end)),
  99. GetHiddenValue('recent_edit', 'on'),
  100. $q->p($name, $q->submit(T('Ban!'))), $q->end_form();
  101. }
  102. }
  103. }
  104. }
  105. PrintFooter();
  106. }
  107. =head2 Rollback
  108. If you are an admin and rolled back a single page, this extension will
  109. list the URLs your rollback removed (assuming that those URLs are part
  110. of the spam) and it will allow you to provide a regular expression
  111. that will be added to BannedHosts.
  112. =cut
  113. *OldBanContributorsWriteRcLog = \&WriteRcLog;
  114. *WriteRcLog = \&NewBanContributorsWriteRcLog;
  115. sub NewBanContributorsWriteRcLog {
  116. my ($tag, $id, $to) = @_;
  117. if ($tag eq '[[rollback]]' and $id and $to > 0
  118. and $OpenPageName eq $id and UserIsAdmin()) {
  119. # we currently have the clean page loaded, so we need to reload
  120. # the spammed revision (there is a possible race condition here)
  121. my $old = GetTextRevision($Page{revision} - 1, 1)->{text};
  122. my %urls = map {$_ => 1 } $old =~ /$UrlPattern/g;
  123. # we open the file again to force a load of the despammed page
  124. foreach my $url ($Page{text} =~ /$UrlPattern/g) {
  125. delete($urls{$url});
  126. }
  127. # we also remove any candidates that are already banned
  128. my @regexps = ();
  129. foreach (split(/\n/, GetPageContent($BannedContent))) {
  130. if (/^\s*([^#]\S+)/) { # all lines except empty lines and comments, trim whitespace
  131. push(@regexps, $1);
  132. }
  133. }
  134. foreach my $url (keys %urls) {
  135. delete($urls{$url}) if IsItBanned($url, \@regexps);
  136. }
  137. if (keys %urls) {
  138. print $q->p(Ts("These URLs were rolled back. Perhaps you want to add a regular expression to %s?",
  139. GetPageLink($BannedContent)));
  140. print $q->pre(join("\n", sort keys %urls));
  141. print GetFormStart(undef, 'get', 'ban'),
  142. GetHiddenValue('action', 'ban'),
  143. GetHiddenValue('id', $id),
  144. GetHiddenValue('recent_edit', 'on'),
  145. $q->p($q->label({-for=>'content'}, T('Regular expression:')), " ",
  146. $q->textfield(-name=>'content', -size=>30), " ",
  147. $q->submit(T('Ban!'))),
  148. $q->end_form();
  149. };
  150. print $q->p(T("Consider banning the IP number as well:"), ' ',
  151. ScriptLink('action=ban;id=' . UrlEncode($id), T('Ban contributors')));
  152. };
  153. return OldBanContributorsWriteRcLog(@_);
  154. }
  155. package BanContributors;
  156. use Net::Whois::Parser qw/parse_whois/;
  157. use Net::IP;
  158. sub get_range {
  159. my $ip = shift;
  160. my $response = parse_whois(domain => $ip);
  161. my $re = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}';
  162. # Just try all the keys and see whether there is a range match.
  163. for (keys %$response) {
  164. my @result;
  165. $_ = $response->{$_};
  166. for (ref eq 'ARRAY' ? @$_ : $_) {
  167. $ip = Net::IP->new($_);
  168. push(@result, $ip->ip, $ip->last_ip) if $ip;
  169. }
  170. return @result if @result;
  171. }
  172. # Fallback
  173. return $ip, $ip;
  174. }
  175. sub get_groups {
  176. my ($from, $to) = @_;
  177. my @groups;
  178. if ($from == $to) {
  179. return [$from, $to];
  180. }
  181. # ones up to the nearest ten
  182. if ($from < $to and ($from % 10 or $from < 10)) {
  183. # from 5-7: as is
  184. # from 5-17: 5 + 9 - 5 = 9 thus 5-9, set $from to 10
  185. my $to2 = int($to/10) > int($from/10) ? $from + 9 - $from % 10 : $to;
  186. push(@groups, [$from, $to2]);
  187. $from = $to2 + 1;
  188. }
  189. # tens up to the nearest hundred
  190. if ($from < $to and $from % 100) {
  191. # 10-17: as is
  192. # 10-82: 10 to 79, set $from to 80 (8*10-1)
  193. # 10-182: 10 to 99, set $from to 100 (10+99=10=99)
  194. # 110-182: 110 to 179, set $from to 180 (170)
  195. # 110-222: 110 to 199, set $from to 200 (110+99-10 = 199)
  196. my $to2 = int($to/100) > int($from/100) ? $from + 99 - $from % 100
  197. : int($to/10) > int($from/10) ? int($to / 10) * 10 - 1
  198. : $to;
  199. push(@groups, [$from, $to2]);
  200. $from = $to2 + 1;
  201. }
  202. # up to the next hundred
  203. if (int($to/100) > int($from/100)) {
  204. # from 100 to 223: set $from to 200 (2*100-1)
  205. my $to2 = int($to/100) * 100 - 1;
  206. push(@groups, [$from, $to2]);
  207. $from = $to2 + 1;
  208. }
  209. # up to the next ten
  210. if (int($to/10) > int($from/10)) {
  211. # 10 to 17: skip
  212. # 100 to 143: set $from to 140 (14*10-1)
  213. my $to2 = int($to / 10) * 10 - 1;
  214. push(@groups, [$from, $to2]);
  215. $from = $to2 + 1;
  216. }
  217. # up to the next one
  218. if ($from <= $to) {
  219. push(@groups, [$from, $to]);
  220. }
  221. # warn join("; ", map { "@$_" } @groups);
  222. return \@groups;
  223. }
  224. sub get_regexp_range {
  225. my @chars;
  226. for my $group (@{get_groups(@_)}) {
  227. my ($from, $to) = @$group;
  228. my $char;
  229. for (my $i = length($from); $i >= 1; $i--) {
  230. if (substr($from, - $i, 1) eq substr($to, - $i, 1)) {
  231. $char .= substr($from, - $i, 1);
  232. } else {
  233. $char .= '[' . substr($from, - $i, 1) . '-' . substr($to, - $i, 1). ']';
  234. }
  235. }
  236. push(@chars, $char);
  237. }
  238. return join('|', @chars);
  239. }
  240. sub get_regexp_ip {
  241. my ($from, $to) = @_;
  242. my @start = split(/\./, $from);
  243. my @end = split(/\./, $to);
  244. my $regexp = "^";
  245. for my $i (0 .. 3) {
  246. if ($start[$i] eq $end[$i]) {
  247. # if the byte is the same, use it as is
  248. $regexp .= $start[$i];
  249. $regexp .= '\.' if $i < 3;
  250. } elsif ($start[$i] == 0 and $end[$i] == 255) {
  251. # the starting byte is 0 and the end byte is 255, then anything goes:
  252. # we're done, e.g. 185.244.214.0 - 185.244.214.255 results in 185\.244\.214\.
  253. last;
  254. } elsif ($i == 3 and $start[$i] != $end[$i]) {
  255. # example 45.87.2.128 - 45.87.2.255: the last bytes differ
  256. $regexp .= '(' . get_regexp_range($start[$i], $end[$i]) . ')';
  257. last;
  258. } elsif ($start[$i + 1] == 0 and $end[$i + 1] == 255) {
  259. # if we're here, we already know that the start byte and the end byte are
  260. # not the same; if the next bytes are from 0 to 255, we know that
  261. # everything else doesn't matter, e.g. 42.118.48.0 - 42.118.63.255
  262. $regexp .= '(' . get_regexp_range($start[$i], $end[$i]) . ')';
  263. $regexp .= '\.' if $i < 3;
  264. last;
  265. } elsif ($end[$i] - $start[$i] == 1 and $start[$i + 1] > 0 and $end[$i + 1] < 255) {
  266. # if we're here, we already know that the start byte and the end byte are
  267. # not the same; if the starting byte of the next (!) byte is bigger than
  268. # zero, then we need groups: in the case 77.56.180.0 - 77.57.70.255 for
  269. # example,
  270. $regexp .= '(' . $start[$i] . '\.(' . get_regexp_range($start[$i + 1], 255) . ')|'
  271. . $end[$i] . '\.(' . get_regexp_range(0, $end[$i + 1]) . ')';
  272. $regexp .= '\.' if $i < 3;
  273. last;
  274. } else {
  275. warn "Unhandled regexp: $from - $to ($i)";
  276. $regexp .= 'XXX';
  277. $regexp .= '\.' if $i < 3;
  278. last;
  279. }
  280. }
  281. return $regexp;
  282. }
  283. # this is required in case we concatenate other modules to this one
  284. package OddMuse;