twitterimport.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * PHP version 5
  6. *
  7. * LICENCE: This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. * @category Module
  21. * @package StatusNet
  22. * @author Zach Copley <zach@status.net>
  23. * @author Julien C <chaumond@gmail.com>
  24. * @author Brion Vibber <brion@status.net>
  25. * @copyright 2009-2010 StatusNet, Inc.
  26. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  27. * @link http://status.net/
  28. */
  29. if (!defined('STATUSNET')) {
  30. exit(1);
  31. }
  32. require_once dirname(__DIR__) . '/twitter.php';
  33. /**
  34. * Encapsulation of the Twitter status -> notice incoming bridge import.
  35. * Is used by both the polling twitterstatusfetcher.php daemon, and the
  36. * in-progress streaming import.
  37. *
  38. * @category Module
  39. * @package StatusNet
  40. * @author Zach Copley <zach@status.net>
  41. * @author Julien C <chaumond@gmail.com>
  42. * @author Brion Vibber <brion@status.net>
  43. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  44. * @link http://status.net/
  45. * @link http://twitter.com/
  46. */
  47. class TwitterImport
  48. {
  49. public $avatarsizename = 'reasonably_small'; // a Twitter size name for 128x128 px
  50. public $avatarsize = 128; // they're square...
  51. public function importStatus($status)
  52. {
  53. // Hacktastic: filter out stuff coming from this StatusNet
  54. $source = mb_strtolower(common_config('integration', 'source'));
  55. if (preg_match("/$source/", mb_strtolower($status->source))) {
  56. common_debug(__METHOD__ . ' - Skipping import of status ' .
  57. twitter_id($status) . " with source {$source}");
  58. return null;
  59. }
  60. // Don't save it if the user is protected
  61. // FIXME: save it but treat it as private
  62. if ($status->user->protected) {
  63. return null;
  64. }
  65. $notice = $this->saveStatus($status);
  66. return $notice;
  67. }
  68. function name()
  69. {
  70. return get_class($this);
  71. }
  72. function saveStatus($status)
  73. {
  74. $profile = $this->ensureProfile($status->user);
  75. if (empty($profile)) {
  76. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice. No associated Profile.');
  77. return null;
  78. }
  79. $statusId = twitter_id($status);
  80. $statusUri = $this->makeStatusURI($status->user->screen_name, $statusId);
  81. // check to see if we've already imported the status
  82. $n2s = Notice_to_status::getKV('status_id', $statusId);
  83. if (!empty($n2s)) {
  84. common_log(
  85. LOG_INFO,
  86. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  87. );
  88. return Notice::getKV('id', $n2s->notice_id);
  89. }
  90. $dupe = Notice::getKV('uri', $statusUri);
  91. if($dupe instanceof Notice) {
  92. // Add it to our record
  93. Notice_to_status::saveNew($dupe->id, $statusId);
  94. common_log(
  95. LOG_INFO,
  96. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  97. );
  98. return $dupe;
  99. }
  100. // If it's a retweet, save it as a repeat!
  101. if (!empty($status->retweeted_status)) {
  102. common_log(LOG_INFO, "Status {$statusId} is a retweet of " . twitter_id($status->retweeted_status) . ".");
  103. $original = $this->saveStatus($status->retweeted_status);
  104. if (empty($original)) {
  105. return null;
  106. } else {
  107. $author = $original->getProfile();
  108. // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'.
  109. // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice.
  110. $content = sprintf(_m('RT @%1$s %2$s'),
  111. $author->nickname,
  112. $original->content);
  113. if (Notice::contentTooLong($content)) {
  114. $contentlimit = Notice::maxContent();
  115. $content = mb_substr($content, 0, $contentlimit - 4) . ' ...';
  116. }
  117. $repeat = Notice::saveNew($profile->id,
  118. $content,
  119. 'twitter',
  120. array('repeat_of' => $original->id,
  121. 'uri' => $statusUri,
  122. 'is_local' => Notice::GATEWAY,
  123. 'object_type' => ActivityObject::NOTE,
  124. 'verb' => ActivityVerb::POST
  125. ));
  126. common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}");
  127. Notice_to_status::saveNew($repeat->id, $statusId);
  128. return $repeat;
  129. }
  130. }
  131. $notice = new Notice();
  132. $notice->profile_id = $profile->id;
  133. $notice->uri = $statusUri;
  134. $notice->url = $statusUri;
  135. $notice->verb = ActivityVerb::POST;
  136. $notice->object_type = ActivityObject::NOTE;
  137. $notice->created = strftime(
  138. '%Y-%m-%d %H:%M:%S',
  139. strtotime($status->created_at)
  140. );
  141. $notice->source = 'twitter';
  142. $notice->reply_to = null;
  143. $replyTo = twitter_id($status, 'in_reply_to_status_id');
  144. if (!empty($replyTo)) {
  145. common_log(LOG_INFO, "Status {$statusId} is a reply to status {$replyTo}");
  146. $n2s = Notice_to_status::getKV('status_id', $replyTo);
  147. if (empty($n2s)) {
  148. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  149. } else {
  150. $reply = Notice::getKV('id', $n2s->notice_id);
  151. if (empty($reply)) {
  152. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  153. } else {
  154. common_log(LOG_INFO, "Found local notice {$reply->id} for status {$replyTo}");
  155. $notice->reply_to = $reply->id;
  156. $notice->conversation = $reply->conversation;
  157. }
  158. }
  159. }
  160. $notice->is_local = Notice::GATEWAY;
  161. $notice->content = html_entity_decode($this->linkify($status, FALSE), ENT_QUOTES, 'UTF-8');
  162. $notice->rendered = $this->linkify($status, TRUE);
  163. if (Event::handle('StartNoticeSave', array(&$notice))) {
  164. if (empty($notice->conversation)) {
  165. $conv = Conversation::create();
  166. common_log(LOG_INFO, "No known conversation for status {$statusId} so a new one ({$conv->getID()}) was created.");
  167. $notice->conversation = $conv->getID();
  168. }
  169. $id = $notice->insert();
  170. if ($id === false) {
  171. common_log_db_error($notice, 'INSERT', __FILE__);
  172. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice.');
  173. }
  174. Event::handle('EndNoticeSave', array($notice));
  175. }
  176. Notice_to_status::saveNew($notice->id, $statusId);
  177. $this->saveStatusMentions($notice, $status);
  178. $this->saveStatusAttachments($notice, $status);
  179. $notice->blowOnInsert();
  180. return $notice;
  181. }
  182. /**
  183. * Make an URI for a status.
  184. *
  185. * @param object $status status object
  186. *
  187. * @return string URI
  188. */
  189. function makeStatusURI($username, $id)
  190. {
  191. return 'https://twitter.com/'
  192. . $username
  193. . '/status/'
  194. . $id;
  195. }
  196. /**
  197. * Look up a Profile by profileurl field. Profile::getKV() was
  198. * not working consistently.
  199. *
  200. * @param string $nickname local nickname of the Twitter user
  201. * @param string $profileurl the profile url
  202. *
  203. * @return mixed value the first Profile with that url, or null
  204. */
  205. protected function getProfileByUrl($nickname, $profileurl)
  206. {
  207. $profile = new Profile();
  208. $profile->nickname = $nickname;
  209. $profile->profileurl = $profileurl;
  210. $profile->limit(1);
  211. if (!$profile->find(true)) {
  212. $profile->profileurl = str_replace('https://', 'http://', $profileurl);
  213. if (!$profile->find(true)) {
  214. throw new NoResultException($profile);
  215. }
  216. }
  217. return $profile;
  218. }
  219. protected function ensureProfile($twuser)
  220. {
  221. // check to see if there's already a profile for this user
  222. $profileurl = 'https://twitter.com/' . $twuser->screen_name;
  223. try {
  224. $profile = $this->getProfileByUrl($twuser->screen_name, $profileurl);
  225. $this->updateAvatar($twuser, $profile);
  226. return $profile;
  227. } catch (NoResultException $e) {
  228. common_debug(__METHOD__ . ' - Adding profile and remote profile ' .
  229. "for Twitter user: $profileurl.");
  230. }
  231. $profile = new Profile();
  232. $profile->query("BEGIN");
  233. $profile->nickname = $twuser->screen_name;
  234. $profile->fullname = $twuser->name;
  235. $profile->homepage = $twuser->url;
  236. $profile->bio = $twuser->description;
  237. $profile->location = $twuser->location;
  238. $profile->profileurl = $profileurl;
  239. $profile->created = common_sql_now();
  240. try {
  241. $id = $profile->insert(); // insert _should_ throw exception on failure
  242. if (empty($id)) {
  243. throw new Exception('Failed insert');
  244. }
  245. } catch(Exception $e) {
  246. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert profile: " . $e->getMessage());
  247. common_log_db_error($profile, 'INSERT', __FILE__);
  248. $profile->query("ROLLBACK");
  249. return false;
  250. }
  251. $profile->query("COMMIT");
  252. $this->updateAvatar($twuser, $profile);
  253. return $profile;
  254. }
  255. /*
  256. * Checks whether we have to update the profile's avatar
  257. *
  258. * @return true when updated, false on failure, null when no action taken
  259. */
  260. protected function updateAvatar($twuser, Profile $profile)
  261. {
  262. $path_parts = pathinfo($twuser->profile_image_url);
  263. $ext = isset($path_parts['extension'])
  264. ? '.'.$path_parts['extension']
  265. : ''; // some lack extension
  266. $img_root = basename($path_parts['basename'], '_normal'.$ext); // cut off extension
  267. $filename = "Twitter_{$twuser->id}_{$img_root}_{$this->avatarsizename}{$ext}";
  268. try {
  269. $avatar = Avatar::getUploaded($profile);
  270. if ($avatar->filename === $filename) {
  271. return null;
  272. }
  273. common_debug(__METHOD__ . " - Updating profile avatar (profile_id={$profile->id}) " .
  274. "from {$avatar->filename} to {$filename}");
  275. // else we continue with creating a new avatar
  276. } catch (NoAvatarException $e) {
  277. // Avatar was not found. We can catch NoAvatarException or FileNotFoundException
  278. // but generally we just want to continue creating a new avatar.
  279. common_debug(__METHOD__ . " - No avatar found for (profile_id={$profile->id})");
  280. }
  281. $url = "{$path_parts['dirname']}/{$img_root}_{$this->avatarsizename}{$ext}";
  282. $mediatype = $this->getMediatype(mb_substr($ext, 1));
  283. try {
  284. $this->newAvatar($profile, $url, $filename, $mediatype);
  285. } catch (Exception $e) {
  286. if (file_exists(Avatar::path($filename))) {
  287. unlink(Avatar::path($filename));
  288. }
  289. return false;
  290. }
  291. return true;
  292. }
  293. protected function getMediatype($ext)
  294. {
  295. $mediatype = null;
  296. switch (strtolower($ext)) {
  297. case 'jpeg':
  298. case 'jpg':
  299. $mediatype = 'image/jpeg';
  300. break;
  301. case 'gif':
  302. $mediatype = 'image/gif';
  303. break;
  304. default:
  305. $mediatype = 'image/png';
  306. }
  307. return $mediatype;
  308. }
  309. protected function newAvatar(Profile $profile, $url, $filename, $mediatype)
  310. {
  311. // Clear out old avatars, won't do anything if there are none
  312. Avatar::deleteFromProfile($profile);
  313. // throws exception if unable to fetch
  314. $this->fetchRemoteUrl($url, Avatar::path($filename));
  315. $avatar = new Avatar();
  316. $avatar->profile_id = $profile->id;
  317. $avatar->original = 1; // this is an original/"uploaded" avatar
  318. $avatar->mediatype = $mediatype;
  319. $avatar->filename = $filename;
  320. $avatar->width = $this->avatarsize;
  321. $avatar->height = $this->avatarsize;
  322. $avatar->created = common_sql_now();
  323. $id = $avatar->insert();
  324. if (empty($id)) {
  325. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert avatar - " . $e->getMessage());
  326. common_log_db_error($avatar, 'INSERT', __FILE__);
  327. throw new ServerException('Could not insert avatar');
  328. }
  329. common_debug(__METHOD__ . " - Saved new avatar for {$profile->id}.");
  330. return $avatar;
  331. }
  332. /**
  333. * Fetch a remote avatar image and save to local storage.
  334. *
  335. * @param string $url avatar source URL
  336. * @param string $filename bare local filename for download
  337. * @return bool true on success, false on failure
  338. */
  339. protected function fetchRemoteUrl($url, $filename)
  340. {
  341. common_debug(__METHOD__ . " - Fetching Twitter avatar: {$url} to {$filename}");
  342. $request = HTTPClient::start();
  343. $request->setConfig('connect_timeout', 3); // I had problems with throttling
  344. $request->setConfig('timeout', 6); // and locking the process sucks.
  345. $response = $request->get($url);
  346. if ($response->isOk()) {
  347. if (!file_put_contents($filename, $response->getBody())) {
  348. throw new ServerException('Failed saving fetched file');
  349. }
  350. } else {
  351. throw new Exception('Unexpected HTTP status code');
  352. }
  353. return true;
  354. }
  355. const URL = 1;
  356. const HASHTAG = 2;
  357. const MENTION = 3;
  358. function linkify($status, $html = FALSE)
  359. {
  360. $text = $status->text;
  361. if (empty($status->entities)) {
  362. $statusId = twitter_id($status);
  363. common_log(LOG_WARNING, "No entities data for {$statusId}; trying to fake up links ourselves.");
  364. $text = common_replace_urls_callback($text, 'common_linkify');
  365. $text = preg_replace_callback('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/',
  366. function ($m) { return $m[1].'#'.TwitterStatusFetcher::tagLink($m[2]); }, $text);
  367. $text = preg_replace_callback('/(^|\s+)@([a-z0-9A-Z_]{1,64})/',
  368. function ($m) { return $m[1].'@'.TwitterStatusFetcher::atLink($m[2]); }, $text);
  369. return $text;
  370. }
  371. // Move all the entities into order so we can
  372. // replace them and escape surrounding plaintext
  373. // in order
  374. $toReplace = array();
  375. if (!empty($status->entities->urls)) {
  376. foreach ($status->entities->urls as $url) {
  377. $toReplace[$url->indices[0]] = array(self::URL, $url);
  378. }
  379. }
  380. if (!empty($status->entities->hashtags)) {
  381. foreach ($status->entities->hashtags as $hashtag) {
  382. $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag);
  383. }
  384. }
  385. if (!empty($status->entities->user_mentions)) {
  386. foreach ($status->entities->user_mentions as $mention) {
  387. $toReplace[$mention->indices[0]] = array(self::MENTION, $mention);
  388. }
  389. }
  390. // sort in forward order by key
  391. ksort($toReplace);
  392. $result = '';
  393. $cursor = 0;
  394. foreach ($toReplace as $part) {
  395. list($type, $object) = $part;
  396. $start = $object->indices[0];
  397. $end = $object->indices[1];
  398. if ($cursor < $start) {
  399. // Copy in the preceding plaintext
  400. $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor));
  401. $cursor = $start;
  402. }
  403. $orig = $this->twitEscape(mb_substr($text, $start, $end - $start));
  404. switch($type) {
  405. case self::URL:
  406. $linkText = $this->makeUrlLink($object, $orig, $html);
  407. break;
  408. case self::HASHTAG:
  409. if ($html) {
  410. $linkText = $this->makeHashtagLink($object, $orig);
  411. }else{
  412. $linkText = $orig;
  413. }
  414. break;
  415. case self::MENTION:
  416. if ($html) {
  417. $linkText = $this->makeMentionLink($object, $orig);
  418. }else{
  419. $linkText = $orig;
  420. }
  421. break;
  422. default:
  423. $linkText = $orig;
  424. continue;
  425. }
  426. $result .= $linkText;
  427. $cursor = $end;
  428. }
  429. $last = $this->twitEscape(mb_substr($text, $cursor));
  430. $result .= $last;
  431. return $result;
  432. }
  433. function twitEscape($str)
  434. {
  435. // Twitter seems to preemptive turn < and > into &lt; and &gt;
  436. // but doesn't for &, so while you may have some magic protection
  437. // against XSS by not bothing to escape manually, you still get
  438. // invalid XHTML. Thanks!
  439. //
  440. // Looks like their web interface pretty much sends anything
  441. // through intact, so.... to do equivalent, decode all entities
  442. // and then re-encode the special ones.
  443. return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8'));
  444. }
  445. function makeUrlLink($object, $orig, $html)
  446. {
  447. if ($html) {
  448. return '<a href="'.htmlspecialchars($object->expanded_url).'" class="extlink">'.htmlspecialchars($object->display_url).'</a>';
  449. }else{
  450. return htmlspecialchars($object->expanded_url);
  451. }
  452. }
  453. function makeHashtagLink($object, $orig)
  454. {
  455. return "#" . self::tagLink($object->text, substr($orig, 1));
  456. }
  457. function makeMentionLink($object, $orig)
  458. {
  459. return "@".self::atLink($object->screen_name, $object->name, substr($orig, 1));
  460. }
  461. static function tagLink($tag, $orig)
  462. {
  463. return "<a href='https://twitter.com/search?q=%23{$tag}' class='hashtag'>{$orig}</a>";
  464. }
  465. static function atLink($screenName, $fullName, $orig)
  466. {
  467. if (!empty($fullName)) {
  468. return "<a href='https://twitter.com/{$screenName}' title='{$fullName}'>{$orig}</a>";
  469. } else {
  470. return "<a href='https://twitter.com/{$screenName}'>{$orig}</a>";
  471. }
  472. }
  473. function saveStatusMentions($notice, $status)
  474. {
  475. $mentions = array();
  476. if (empty($status->entities) || empty($status->entities->user_mentions)) {
  477. return;
  478. }
  479. foreach ($status->entities->user_mentions as $mention) {
  480. try {
  481. $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE);
  482. $user = $flink->getUser();
  483. $reply = new Reply();
  484. $reply->notice_id = $notice->id;
  485. $reply->profile_id = $user->id;
  486. $reply->modified = $notice->created;
  487. common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}");
  488. $id = $reply->insert();
  489. } catch (NoSuchUserException $e) {
  490. common_log(LOG_WARNING, 'No local user found for Foreign_link with id: '.$mention->id);
  491. } catch (NoResultException $e) {
  492. common_log(LOG_WARNING, 'No foreign link or profile found for Foreign_link with id: '.$mention->id);
  493. }
  494. }
  495. }
  496. /**
  497. * Record URL links from the notice. Needed to get thumbnail records
  498. * for referenced photo and video posts, etc.
  499. *
  500. * @param Notice $notice
  501. * @param object $status
  502. */
  503. function saveStatusAttachments(Notice $notice, $status)
  504. {
  505. if (common_config('attachments', 'process_links')) {
  506. if (!empty($status->entities) && !empty($status->entities->urls)) {
  507. foreach ($status->entities->urls as $url) {
  508. try {
  509. File::processNew($url->url, $notice);
  510. } catch (ServerException $e) {
  511. // Could not process attached URL
  512. }
  513. }
  514. }
  515. }
  516. }
  517. }