importtwitteratom.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #!/usr/bin/env php
  2. <?php
  3. /*
  4. * StatusNet - the distributed open-source microblogging tool
  5. * Copyright (C) 2010 StatusNet, Inc.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
  21. $shortoptions = 'i:n:f:';
  22. $longoptions = array('id=', 'nickname=', 'file=');
  23. $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
  24. importtwitteratom.php [options]
  25. import an Atom feed from Twitter as notices by a user
  26. -i --id ID of user to update
  27. -n --nickname nickname of the user to update
  28. -f --file file to import (Atom-only for now)
  29. END_OF_IMPORTTWITTERATOM_HELP;
  30. require_once INSTALLDIR.'/scripts/commandline.inc';
  31. require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php';
  32. function getAtomFeedDocument()
  33. {
  34. $filename = get_option_value('f', 'file');
  35. if (empty($filename)) {
  36. show_help();
  37. exit(1);
  38. }
  39. if (!file_exists($filename)) {
  40. throw new Exception("No such file '$filename'.");
  41. }
  42. if (!is_file($filename)) {
  43. throw new Exception("Not a regular file: '$filename'.");
  44. }
  45. if (!is_readable($filename)) {
  46. throw new Exception("File '$filename' not readable.");
  47. }
  48. $xml = file_get_contents($filename);
  49. $dom = DOMDocument::loadXML($xml);
  50. if ($dom->documentElement->namespaceURI != Activity::ATOM ||
  51. $dom->documentElement->localName != 'feed') {
  52. throw new Exception("'$filename' is not an Atom feed.");
  53. }
  54. return $dom;
  55. }
  56. function importActivityStream($user, $doc)
  57. {
  58. $feed = $doc->documentElement;
  59. $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
  60. for ($i = $entries->length - 1; $i >= 0; $i--) {
  61. $entry = $entries->item($i);
  62. $activity = new Activity($entry, $feed);
  63. $object = $activity->objects[0];
  64. if (!have_option('q', 'quiet')) {
  65. print $activity->content . "\n";
  66. }
  67. $html = getTweetHtml($object->link);
  68. $config = array('safe' => 1,
  69. 'deny_attribute' => 'class,rel,id,style,on*');
  70. $html = htmLawed($html, $config);
  71. $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
  72. $notice = Notice::saveNew($user->id,
  73. $content,
  74. 'importtwitter',
  75. array('uri' => $object->id,
  76. 'url' => $object->link,
  77. 'rendered' => $html,
  78. 'created' => common_sql_date($activity->time),
  79. 'replies' => array(),
  80. 'groups' => array()));
  81. }
  82. }
  83. function getTweetHtml($url)
  84. {
  85. try {
  86. $client = new HTTPClient();
  87. $response = $client->get($url);
  88. } catch (HTTP_Request2_Exception $e) {
  89. print "ERROR: HTTP response " . $e->getMessage() . "\n";
  90. return false;
  91. }
  92. if (!$response->isOk()) {
  93. print "ERROR: HTTP response " . $response->getCode() . "\n";
  94. return false;
  95. }
  96. $body = $response->getBody();
  97. return tweetHtmlFromBody($body);
  98. }
  99. function tweetHtmlFromBody($body)
  100. {
  101. $doc = DOMDocument::loadHTML($body);
  102. $xpath = new DOMXPath($doc);
  103. $spans = $xpath->query('//span[@class="entry-content"]');
  104. if ($spans->length == 0) {
  105. print "ERROR: No content in tweet page.\n";
  106. return '';
  107. }
  108. $span = $spans->item(0);
  109. $children = $span->childNodes;
  110. $text = '';
  111. for ($i = 0; $i < $children->length; $i++) {
  112. $child = $children->item($i);
  113. if ($child instanceof DOMElement &&
  114. $child->tagName == 'a' &&
  115. !preg_match('#^https?://#', $child->getAttribute('href'))) {
  116. $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
  117. }
  118. $text .= $doc->saveXML($child);
  119. }
  120. return $text;
  121. }
  122. try {
  123. $doc = getAtomFeedDocument();
  124. $user = getUser();
  125. importActivityStream($user, $doc);
  126. } catch (Exception $e) {
  127. print $e->getMessage()."\n";
  128. exit(1);
  129. }