semantic_scholar.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Semantic Scholar (Science)
  4. """
  5. from json import dumps, loads
  6. from datetime import datetime
  7. from flask_babel import gettext
  8. about = {
  9. "website": 'https://www.semanticscholar.org/',
  10. "wikidata_id": 'Q22908627',
  11. "official_api_documentation": 'https://api.semanticscholar.org/',
  12. "use_official_api": True,
  13. "require_api_key": False,
  14. "results": 'JSON',
  15. }
  16. categories = ['science', 'scientific publications']
  17. paging = True
  18. search_url = 'https://www.semanticscholar.org/api/1/search'
  19. paper_url = 'https://www.semanticscholar.org/paper'
  20. def request(query, params):
  21. params['url'] = search_url
  22. params['method'] = 'POST'
  23. params['headers']['content-type'] = 'application/json'
  24. params['data'] = dumps(
  25. {
  26. "queryString": query,
  27. "page": params['pageno'],
  28. "pageSize": 10,
  29. "sort": "relevance",
  30. "useFallbackRankerService": False,
  31. "useFallbackSearchCluster": False,
  32. "getQuerySuggestions": False,
  33. "authors": [],
  34. "coAuthors": [],
  35. "venues": [],
  36. "performTitleMatch": True,
  37. }
  38. )
  39. return params
  40. def response(resp):
  41. res = loads(resp.text)
  42. results = []
  43. for result in res['results']:
  44. url = result.get('primaryPaperLink', {}).get('url')
  45. if not url and result.get('links'):
  46. url = result.get('links')[0]
  47. if not url:
  48. alternatePaperLinks = result.get('alternatePaperLinks')
  49. if alternatePaperLinks:
  50. url = alternatePaperLinks[0].get('url')
  51. if not url:
  52. url = paper_url + '/%s' % result['id']
  53. # publishedDate
  54. if 'pubDate' in result:
  55. publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
  56. else:
  57. publishedDate = None
  58. # authors
  59. authors = [author[0]['name'] for author in result.get('authors', [])]
  60. # pick for the first alternate link, but not from the crawler
  61. pdf_url = None
  62. for doc in result.get('alternatePaperLinks', []):
  63. if doc['linkType'] not in ('crawler', 'doi'):
  64. pdf_url = doc['url']
  65. break
  66. # comments
  67. comments = None
  68. if 'citationStats' in result:
  69. comments = gettext(
  70. '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
  71. ).format(
  72. numCitations=result['citationStats']['numCitations'],
  73. firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
  74. lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
  75. )
  76. results.append(
  77. {
  78. 'template': 'paper.html',
  79. 'url': url,
  80. 'title': result['title']['text'],
  81. 'content': result['paperAbstract']['text'],
  82. 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
  83. 'doi': result.get('doiInfo', {}).get('doi'),
  84. 'tags': result.get('fieldsOfStudy'),
  85. 'authors': authors,
  86. 'pdf_url': pdf_url,
  87. 'publishedDate': publishedDate,
  88. 'comments': comments,
  89. }
  90. )
  91. return results