referrer-tracking.pl 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. # Copyright (C) 2004, 2005, 2006, 2010 Alex Schroeder <alex@gnu.org>
  2. #
  3. # This program is free software: you can redistribute it and/or modify it under
  4. # the terms of the GNU General Public License as published by the Free Software
  5. # Foundation, either version 3 of the License, or (at your option) any later
  6. # version.
  7. #
  8. # This program is distributed in the hope that it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License along with
  13. # this program. If not, see <http://www.gnu.org/licenses/>.
  14. use strict;
  15. use v5.10;
  16. AddModuleDescription('referrer-tracking.pl', 'Automatic Link Back');
  17. use LWP::UserAgent;
  18. our ($q, $Now, $OpenPageName, %Action, @KnownLocks, %AdminPages,
  19. $ScriptName, $DataDir, $EmbedWiki, $FS, @MyInitVariables,
  20. @MyAdminCode, $FullUrlPattern, @MyFooters);
  21. push(@KnownLocks, "refer_*");
  22. $Action{refer} = \&DoPrintAllReferers;
  23. our ($RefererDir, $RefererTimeLimit, $RefererLimit, $RefererFilter,
  24. $RefererTitleLimit, %Referers);
  25. $RefererTimeLimit = 86400; # How long referrals shall be remembered in seconds
  26. $RefererLimit = 15; # How many different referer shall be remembered
  27. $RefererFilter = 'ReferrerFilter'; # Name of the filter page
  28. $RefererTitleLimit = 70; # This is used to shorten long titles
  29. push(@MyInitVariables, \&RefererInit);
  30. sub RefererInit {
  31. $RefererFilter = FreeToNormal($RefererFilter); # spaces to underscores
  32. $AdminPages{$RefererFilter} = 1;
  33. $RefererDir = "$DataDir/referer"; # Stores referer data
  34. }
  35. push(@MyAdminCode, \&RefererMenu);
  36. sub RefererMenu {
  37. my ($id, $menuref, $restref) = @_;
  38. push(@$menuref, ScriptLink('action=refer', T('All Referrers'), 'refer'));
  39. }
  40. *RefererOldExpireKeepFiles = \&ExpireKeepFiles;
  41. *ExpireKeepFiles = \&RefererNewExpireKeepFiles;
  42. sub RefererNewExpireKeepFiles {
  43. RefererOldExpireKeepFiles(@_); # call with opened page
  44. ReadReferers($OpenPageName); # clean up reading (expiring) and writing
  45. WriteReferers($OpenPageName);
  46. }
  47. *RefererOldDeletePage = \&DeletePage;
  48. *DeletePage = \&RefererNewDeletePage;
  49. sub RefererNewDeletePage {
  50. my $status = RefererOldDeletePage(@_);
  51. return $status if $status; # this would be the error message
  52. my $id = shift;
  53. my $fname = GetRefererFile($id);
  54. Unlink($fname) if (IsFile($fname));
  55. return ''; # no error
  56. }
  57. ## == Actual Code ==
  58. sub GetRefererFile {
  59. my $id = shift;
  60. return "$RefererDir/$id.rf";
  61. }
  62. sub ReadReferers {
  63. my $file = GetRefererFile(shift);
  64. %Referers = ();
  65. if (IsFile($file)) {
  66. my ($status, $data) = ReadFile($file);
  67. %Referers = split(/$FS/, $data, -1) if $status;
  68. }
  69. ExpireReferers();
  70. }
  71. sub ExpireReferers { # no need to save the pruned list if nothing else changes
  72. if ($RefererTimeLimit) {
  73. foreach (keys %Referers) {
  74. if ($Now - $Referers{$_} > $RefererTimeLimit) {
  75. delete $Referers{$_};
  76. }
  77. }
  78. }
  79. if ($RefererLimit) {
  80. my @list = sort {$Referers{$a} cmp $Referers{$b}} keys %Referers;
  81. @list = @list[$RefererLimit .. @list-1];
  82. foreach (@list) {
  83. delete $Referers{$_};
  84. }
  85. }
  86. }
  87. # maybe test for valid utf-8 later?
  88. # http://www.w3.org/International/questions/qa-forms-utf-8
  89. # $field =~
  90. # m/^(
  91. # [\x09\x0A\x0D\x20-\x7E] # ASCII
  92. # | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
  93. # | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
  94. # | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
  95. # | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
  96. # | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
  97. # | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
  98. # | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
  99. # )*$/x;
  100. sub UrlToTitle {
  101. my $title = QuoteHtml(shift);
  102. $title = $1 if $title =~ /$FullUrlPattern/; # extract valid URL
  103. $title =~ s/\%([0-9a-f][0-9a-f])/chr(hex($1))/egi; # decode if possible
  104. $title =~ s!^https?://!!;
  105. $title =~ s!\.html?$!!;
  106. $title =~ s!/$!!;
  107. # shorten it if necessary
  108. if (length($title) > $RefererTitleLimit) {
  109. $title = substr($title, 0, $RefererTitleLimit - 10)
  110. . "..." . substr($title, -7);
  111. }
  112. return $title;
  113. }
  114. sub GetReferers {
  115. my $result = join(' ', map {
  116. my ($ts, $title) = split(/ /, $Referers{$_}, 2);
  117. $title = UrlToTitle($_) unless $title;
  118. $q->a({-href=>$_}, $title);
  119. } keys %Referers);
  120. return $q->div({-class=>'refer'}, $q->p(T('Referrers') . ': ' . $result))
  121. if $result;
  122. }
  123. sub PageContentToTitle {
  124. my ($content) = @_;
  125. my $title = $content =~ m!<h1.*?>(.*?)</h1>! ? $1 : '';
  126. $title = $1 if not $title and $content =~ m!<title>(.*?)</title>!;
  127. # get rid of extra tags
  128. $title =~ s!<.*?>!!g;
  129. # trimming
  130. $title =~ s!\s+! !g;
  131. $title =~ s!^ !!;
  132. $title =~ s! $!!;
  133. $title = substr($title, 0, $RefererTitleLimit) . "..."
  134. if length($title) > $RefererTitleLimit;
  135. return $title;
  136. }
  137. sub UpdateReferers {
  138. my $self = $ScriptName;
  139. my $referer = $q->referer();
  140. return unless $referer and $referer !~ /$self/;
  141. foreach (split(/\n/,GetPageContent($RefererFilter))) {
  142. if (/^ ([^ ]+)[ \t]*$/) { # only read lines with one word after one space
  143. my $regexp = $1;
  144. return if $referer =~ /$regexp/i;
  145. }
  146. }
  147. my $ua = LWP::UserAgent->new;
  148. my $response = $ua->get($referer);
  149. return unless $response->is_success and $response->decoded_content =~ /$self/;
  150. my $title = PageContentToTitle($response->decoded_content);
  151. # starting with a timestamp makes sure that numerical comparisons still work!
  152. $Referers{$referer} = "$Now $title";
  153. return 1;
  154. }
  155. sub WriteReferers {
  156. my $id = shift;
  157. return unless RequestLockDir('refer_' . $id); # not fatal
  158. my $data = join($FS, %Referers);
  159. my $file = GetRefererFile($id);
  160. if ($data) {
  161. CreateDir($RefererDir);
  162. WriteStringToFile($file, $data);
  163. } else {
  164. Unlink($file); # just try it, doesn't matter if it fails
  165. }
  166. ReleaseLockDir('refer_' . $id);
  167. }
  168. if ($MyFooters[-1] == \&DefaultFooter) {
  169. splice(@MyFooters, -1, 0, \&RefererTrack);
  170. } else {
  171. push(@MyFooters, \&RefererTrack);
  172. }
  173. sub RefererTrack {
  174. my $id = shift;
  175. return unless $id;
  176. ReadReferers($id);
  177. WriteReferers($id) if UpdateReferers($id);
  178. return GetReferers();
  179. }
  180. sub DoPrintAllReferers {
  181. print GetHeader('', T('All Referrers'), ''), $q->start_div({-class=>'content refer'});
  182. PrintAllReferers(AllPagesList());
  183. print $q->end_div();
  184. PrintFooter();
  185. }
  186. sub PrintAllReferers {
  187. for my $id (@_) {
  188. ReadReferers($id);
  189. print $q->div({-class=>'page'},
  190. $q->p(GetPageLink($id)),
  191. GetReferers()) if %Referers;
  192. }
  193. }