linkhtml.php 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. <?php
  2. // This file is part of GNU social - https://www.gnu.org/software/social
  3. //
  4. // GNU social is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // GNU social is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Implementation of discovery using HTML <link> element
  18. *
  19. * Discovers XRD file for a user by fetching the URL and reading any
  20. * <link> elements in the HTML response.
  21. *
  22. * @category Discovery
  23. * @package GNUsocial
  24. * @author James Walker <james@status.net>
  25. * @copyright 2010 StatusNet, Inc.
  26. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  27. */
  28. class LRDDMethod_LinkHTML extends LRDDMethod
  29. {
  30. /**
  31. * For HTTP IDs, fetch the URL and look for <link> elements
  32. * in the HTML response.
  33. *
  34. * @todo fail out of WebFinger URIs faster
  35. */
  36. public function discover($uri)
  37. {
  38. $response = self::fetchUrl($uri);
  39. return self::parse($response->getBody());
  40. }
  41. /**
  42. * Parse HTML and return <link> elements
  43. *
  44. * Given an HTML string, scans the string for <link> elements
  45. *
  46. * @param string $html HTML to scan
  47. *
  48. * @return array array of associative arrays in JRD-ish array format
  49. */
  50. public function parse($html)
  51. {
  52. $links = [];
  53. preg_match('/<head(\s[^>]*)?>(.*?)<\/head>/is', $html, $head_matches);
  54. if (count($head_matches) != 3) {
  55. return [];
  56. }
  57. [,, $head_html] = $head_matches;
  58. preg_match_all('/<link\s[^>]*>/i', $head_html, $link_matches);
  59. foreach ($link_matches[0] as $link_html) {
  60. $link_url = null;
  61. $link_rel = null;
  62. $link_type = null;
  63. preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches);
  64. if (count($rel_matches) > 3) {
  65. $link_rel = $rel_matches[3];
  66. } elseif (count($rel_matches) > 1) {
  67. $link_rel = $rel_matches[1];
  68. }
  69. preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches);
  70. if (count($href_matches) > 3) {
  71. $link_uri = $href_matches[3];
  72. } elseif (count($href_matches) > 1) {
  73. $link_uri = $href_matches[1];
  74. }
  75. preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches);
  76. if (count($type_matches) > 3) {
  77. $link_type = $type_matches[3];
  78. } elseif (count($type_matches) > 1) {
  79. $link_type = $type_matches[1];
  80. }
  81. $links[] = new XML_XRD_Element_Link($link_rel, $link_uri, $link_type);
  82. }
  83. return $links;
  84. }
  85. }