BlogspamNetModule.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * Module to check submitted notices with blogspam.net
  6. *
  7. * PHP version 5
  8. *
  9. * LICENCE: This program is free software: you can redistribute it and/or modify
  10. * it under the terms of the GNU Affero General Public License as published by
  11. * the Free Software Foundation, either version 3 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Affero General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Affero General Public License
  20. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. *
  22. * @category Module
  23. * @package StatusNet
  24. * @author Evan Prodromou <evan@status.net>
  25. * @author Brion Vibber <brion@status.net>
  26. * @copyright 2009 StatusNet, Inc.
  27. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  28. * @link http://status.net/
  29. */
  30. if (!defined('STATUSNET')) {
  31. exit(1);
  32. }
  33. define('PLUGIN_VERSION', '0.1.0');
  34. /**
  35. * Module to check submitted notices with blogspam.net
  36. *
  37. * When new notices are saved, we check their text with blogspam.net (or
  38. * a compatible service).
  39. *
  40. * Blogspam.net is supposed to catch blog comment spam, and I found that
  41. * some of its tests (min/max size, bayesian match) gave a lot of false positives.
  42. * So, I've turned those tests off by default. This may not get as many
  43. * hits, but it's better than nothing.
  44. *
  45. * @category Module
  46. * @package StatusNet
  47. * @author Evan Prodromou <evan@status.net>
  48. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  49. * @link http://status.net/
  50. *
  51. * @see Event
  52. */
  53. class BlogspamNetModule extends Module
  54. {
  55. var $baseUrl = 'http://test.blogspam.net:8888/';
  56. function __construct($url=null)
  57. {
  58. parent::__construct();
  59. if ($url) {
  60. $this->baseUrl = $url;
  61. }
  62. }
  63. function onStartNoticeSave($notice)
  64. {
  65. $args = $this->testArgs($notice);
  66. common_debug("Blogspamnet args = " . print_r($args, TRUE));
  67. $requestBody = xmlrpc_encode_request('testComment', array($args));
  68. $request = new HTTPClient($this->baseUrl, HTTPClient::METHOD_POST);
  69. $request->setHeader('Content-Type', 'text/xml');
  70. $request->setBody($requestBody);
  71. $httpResponse = $request->send();
  72. $response = xmlrpc_decode($httpResponse->getBody());
  73. if (xmlrpc_is_fault($response)) {
  74. throw new ServerException("$response[faultString] ($response[faultCode])", 500);
  75. } else {
  76. common_debug("Blogspamnet results = " . $response);
  77. if (preg_match('/^ERROR(:(.*))?$/', $response, $match)) {
  78. // TRANS: Server exception thrown when blogspam.net returns error status.
  79. // TRANS: %1$s is the base URL, %2$s is the error (unknown contents; no period).
  80. throw new ServerException(sprintf(_m('Error from %1$s: %2$s'), $this->baseUrl, $match[2]), 500);
  81. } else if (preg_match('/^SPAM(:(.*))?$/', $response, $match)) {
  82. // TRANS: Server exception thrown when blogspam.net returns spam status.
  83. // TRANS: Does not end with period because of unknown contents for %s (spam match).
  84. throw new ClientException(sprintf(_m('Spam checker results: %s'), $match[2]), 400);
  85. } else if (preg_match('/^OK$/', $response)) {
  86. // don't do anything
  87. } else {
  88. // TRANS: Server exception thrown when blogspam.net returns an unexpected status.
  89. // TRANS: %1$s is the base URL, %2$s is the response (unknown contents; no period).
  90. throw new ServerException(sprintf(_m('Unexpected response from %1$s: %2$s'), $this->baseUrl, $response), 500);
  91. }
  92. }
  93. return true;
  94. }
  95. function testArgs($notice)
  96. {
  97. $args = array();
  98. $args['comment'] = $notice->content;
  99. $args['ip'] = $this->getClientIP();
  100. if (isset($_SERVER) && array_key_exists('HTTP_USER_AGENT', $_SERVER)) {
  101. $args['agent'] = $_SERVER['HTTP_USER_AGENT'];
  102. }
  103. $profile = $notice->getProfile();
  104. if ($profile && $profile->homepage) {
  105. $args['link'] = $profile->homepage;
  106. }
  107. if ($profile && $profile->fullname) {
  108. $args['name'] = $profile->fullname;
  109. } else {
  110. $args['name'] = $profile->nickname;
  111. }
  112. $args['site'] = common_root_url();
  113. $args['version'] = $this->userAgent();
  114. $args['options'] = "max-size=" . common_config('site','textlimit') . ",min-size=0,min-words=0,exclude=bayasian";
  115. return $args;
  116. }
  117. function getClientIP()
  118. {
  119. if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) {
  120. // Note: order matters here; use proxy-forwarded stuff first
  121. foreach (array('HTTP_X_FORWARDED_FOR', 'CLIENT-IP', 'REMOTE_ADDR') as $k) {
  122. if (isset($_SERVER[$k])) {
  123. return $_SERVER[$k];
  124. }
  125. }
  126. }
  127. return '127.0.0.1';
  128. }
  129. function version()
  130. {
  131. return PLUGIN_VERSION;
  132. }
  133. function onModuleVersion(array &$versions)
  134. {
  135. $versions[] = array('name' => 'BlogspamNet',
  136. 'version' => self::PLUGIN_VERSION,
  137. 'author' => 'Evan Prodromou, Brion Vibber',
  138. 'homepage' => 'https://git.gnu.io/gnu/gnu-social/tree/master/plugins/BlogspamNet',
  139. 'rawdescription' =>
  140. // TRANS: Module description.
  141. _m('Module to check submitted notices with blogspam.net.'));
  142. return true;
  143. }
  144. }