py_小红薯.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. # by嗷呜
  4. import json
  5. import random
  6. import sys
  7. import time
  8. from base64 import b64decode
  9. from Crypto.Cipher import AES
  10. from Crypto.Hash import MD5
  11. from Crypto.Util.Padding import unpad
  12. sys.path.append('..')
  13. from base.spider import Spider
  14. class Spider(Spider):
  15. def getName(self):
  16. return "小红书"
  17. def init(self, extend=""):
  18. self.did = self.random_str(32)
  19. self.token,self.phost = self.gettoken()
  20. pass
  21. def isVideoFormat(self, url):
  22. pass
  23. def manualVideoCheck(self):
  24. pass
  25. def destroy(self):
  26. pass
  27. def random_str(self,length=16):
  28. hex_chars = '0123456789abcdef'
  29. return ''.join(random.choice(hex_chars) for _ in range(length))
  30. def md5(self, text: str) -> str:
  31. h = MD5.new()
  32. h.update(text.encode('utf-8'))
  33. return h.hexdigest()
  34. def homeContent(self, filter):
  35. data = self.fetch(f'{self.host}/api/video/queryClassifyList?mark=4', headers=self.headers()).json()['encData']
  36. data1 = self.aes(data)
  37. result = {}
  38. classes = []
  39. for k in data1['data']:
  40. classes.append({'type_name': k['classifyTitle'], 'type_id': k['classifyId']})
  41. result['class'] = classes
  42. return result
  43. def homeVideoContent(self):
  44. pass
  45. def categoryContent(self, tid, pg, filter, extend):
  46. path=f'/api/short/video/getShortVideos?classifyId={tid}&videoMark=4&page={pg}&pageSize=20'
  47. result = {}
  48. videos = []
  49. data=self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
  50. vdata=self.aes(data)
  51. for k in vdata['data']:
  52. videos.append({"vod_id": k['videoId'], 'vod_name': k.get('title'), 'vod_pic': self.getProxyUrl() + '&url=' + k['coverImg'],
  53. 'vod_remarks': self.dtim(k.get('playTime'))})
  54. result["list"] = videos
  55. result["page"] = pg
  56. result["pagecount"] = 9999
  57. result["limit"] = 90
  58. result["total"] = 999999
  59. return result
  60. def detailContent(self, ids):
  61. path = f'/api/video/getVideoById?videoId={ids[0]}'
  62. data = self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
  63. v = self.aes(data)
  64. d=f'{v["title"]}$auth_key={v["authKey"]}&path={v["videoUrl"]}'
  65. vod = {'vod_name': v["title"], 'type_name': ''.join(v.get('tagTitles',[])),'vod_play_from': v.get('nickName') or "小红书官方", 'vod_play_url': d}
  66. result = {"list": [vod]}
  67. return result
  68. def searchContent(self, key, quick, pg='1'):
  69. pass
  70. def playerContent(self, flag, id, vipFlags):
  71. h=self.headers()
  72. h['Authorization'] = h.pop('aut')
  73. del h['deviceid']
  74. result = {"parse": 0, "url": f"{self.host}/api/m3u8/decode/authPath?{id}", "header": h}
  75. return result
  76. def localProxy(self, param):
  77. return self.action(param)
  78. def aes(self, word):
  79. key = b64decode("SmhiR2NpT2lKSVV6STFOaQ==")
  80. iv = key
  81. cipher = AES.new(key, AES.MODE_CBC, iv)
  82. decrypted = unpad(cipher.decrypt(b64decode(word)), AES.block_size)
  83. return json.loads(decrypted.decode('utf-8'))
  84. def dtim(self, seconds):
  85. try:
  86. seconds = int(seconds)
  87. hours = seconds // 3600
  88. remaining_seconds = seconds % 3600
  89. minutes = remaining_seconds // 60
  90. remaining_seconds = remaining_seconds % 60
  91. formatted_minutes = str(minutes).zfill(2)
  92. formatted_seconds = str(remaining_seconds).zfill(2)
  93. if hours > 0:
  94. formatted_hours = str(hours).zfill(2)
  95. return f"{formatted_hours}:{formatted_minutes}:{formatted_seconds}"
  96. else:
  97. return f"{formatted_minutes}:{formatted_seconds}"
  98. except:
  99. return ''
  100. def getsign(self):
  101. t=str(int(time.time() * 1000))
  102. return self.md5(t[3:8])
  103. def gettoken(self):
  104. url = f'{self.host}/api/user/traveler'
  105. headers = {
  106. 'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36;SuiRui/xhs/ver=1.2.6',
  107. 'deviceid': self.did, 't': str(int(time.time() * 1000)), 's': self.getsign(), }
  108. data = {'deviceId': self.did, 'tt': 'U', 'code': '', 'chCode': 'dafe13'}
  109. data1 = self.post(url, json=data, headers=headers).json()
  110. data2 = data1['data']
  111. return data2['token'], data2['imgDomain']
  112. host = 'https://jhfkdnov21vfd.fhoumpjjih.work'
  113. def headers(self):
  114. henda = {
  115. 'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36;SuiRui/xhs/ver=1.2.6',
  116. 'deviceid': self.did, 't': str(int(time.time() * 1000)), 's': self.getsign(), 'aut': self.token}
  117. return henda
  118. def action(self, param):
  119. headers = {
  120. 'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 11; M2012K10C Build/RP1A.200720.011)'}
  121. data = self.fetch(f'{self.phost}{param["url"]}', headers=headers)
  122. type=data.headers.get('Content-Type').split(';')[0]
  123. base64_data = self.img(data.content, 100, '2020-zq3-888')
  124. return [200, type, base64_data]
  125. def img(self, data: bytes, length: int, key: str):
  126. GIF = b'\x47\x49\x46'
  127. JPG = b'\xFF\xD8\xFF'
  128. PNG = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
  129. def is_dont_need_decode_for_gif(data):
  130. return len(data) > 2 and data[:3] == GIF
  131. def is_dont_need_decode_for_jpg(data):
  132. return len(data) > 7 and data[:3] == JPG
  133. def is_dont_need_decode_for_png(data):
  134. return len(data) > 7 and data[1:8] == PNG[1:8]
  135. if is_dont_need_decode_for_png(data):
  136. return data
  137. elif is_dont_need_decode_for_gif(data):
  138. return data
  139. elif is_dont_need_decode_for_jpg(data):
  140. return data
  141. else:
  142. key_bytes = key.encode('utf-8')
  143. result = bytearray(data)
  144. for i in range(length):
  145. result[i] ^= key_bytes[i % len(key_bytes)]
  146. return bytes(result)