parse.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : parse.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/9/24
  6. from flask import Blueprint, jsonify,redirect,make_response
  7. from utils.web import getParmas,get_interval
  8. import os
  9. from utils.cfg import cfg
  10. from utils.log import logger
  11. from utils.encode import OcrApi,base64ToImage
  12. from controllers.service import storage_service
  13. from utils.pyctx import py_ctx,getPreJs,runJScode,JsObjectWrapper,PyJsString,parseText,jsoup,time
  14. from utils.env import get_env
  15. import base64
  16. parse = Blueprint("parse", __name__)
  17. class R(object):
  18. @classmethod
  19. def ok(self, msg='操作成功', url=None, extra=None):
  20. if extra is None:
  21. extra = {}
  22. header = {
  23. "user-agent": "Mozilla/5.0"
  24. }
  25. if 'bilivideo.c' in url:
  26. header.update({
  27. 'referer':'https://www.bilibili.com/'
  28. })
  29. result = {"code": 200, "msg": msg, "url":url}
  30. result.update(header)
  31. result.update(extra)
  32. return jsonify(result)
  33. @classmethod
  34. def error(self,msg="系统异常",code=404,extra=None):
  35. if extra is None:
  36. extra = {}
  37. result = {"code": code, "msg": msg}
  38. result.update(extra)
  39. return jsonify(result)
  40. @classmethod
  41. def success(self,msg='操作成功', url=None,extra=None):
  42. return self.ok(msg,url,extra)
  43. @classmethod
  44. def failed(self,msg="系统异常", code=404,extra=None):
  45. return self.error(msg,code,extra)
  46. def 重定向(url:str):
  47. if isinstance(url, PyJsString):
  48. url = parseText(str(url))
  49. if str(url).startswith('http'):
  50. return f'redirect://{url}'
  51. else:
  52. return str(url)
  53. def toast(url:str):
  54. if isinstance(url, PyJsString):
  55. url = parseText(str(url))
  56. return f'toast://{url}'
  57. def image(text:str):
  58. if isinstance(text, PyJsString):
  59. text = parseText(str(text))
  60. return f'image://{text}'
  61. @parse.route('/api/<path:filename>')
  62. def parse_home(filename):
  63. url = getParmas('url')
  64. # http://localhost:5705/parse/api/%E6%97%A0%E5%90%8D.js?url=https://www.iqiyi.com/v_ik3832z0go.html
  65. # http://localhost:5705/parse/api/哔哩.js?url=https://www.bilibili.com/bangumi/play/ep704873
  66. if not url or not url.startswith('http'):
  67. return R.failed(f'url必填!{url},且必须是http开头')
  68. base_path = 'jiexi'
  69. os.makedirs(base_path, exist_ok=True)
  70. file_path = os.path.join(base_path, filename)
  71. if not os.path.exists(file_path):
  72. return R.failed(f'{file_path}文件不存在')
  73. logger.info(f'开始尝试通过{filename}解析:{url}')
  74. jsp = jsoup(url)
  75. env = get_env()
  76. py_ctx.update({
  77. 'vipUrl': url,
  78. 'realUrl': '',
  79. 'input': url,
  80. 'fetch_params': {'headers': {'Referer':url}, 'timeout': 10, 'encoding': 'utf-8'},
  81. 'jsp':jsp,
  82. '重定向':重定向,
  83. 'toast':toast,
  84. 'env':env,
  85. 'image':image,
  86. 'print':print,
  87. 'log':logger.info,
  88. 'getParmas':getParmas,
  89. 'params':getParmas()
  90. })
  91. ctx = py_ctx
  92. with open(file_path,encoding='utf-8') as f:
  93. code = f.read()
  94. jscode = getPreJs() + code.strip().replace('js:', '', 1)
  95. # print(jscode)
  96. t1 = time()
  97. try:
  98. loader, _ = runJScode(jscode, ctx=ctx)
  99. # realUrl = loader.eval('realUrl')
  100. # realUrl = loader.eval(f'lazy("{url}")')
  101. realUrl = loader.eval('lazy()')
  102. if not realUrl:
  103. return R.failed(f'解析失败:{realUrl}')
  104. if isinstance(realUrl, PyJsString):
  105. realUrl = parseText(str(realUrl))
  106. if not realUrl or realUrl == url:
  107. return R.failed(f'解析失败',extra={'from':realUrl})
  108. # print(realUrl)
  109. if str(realUrl).startswith('redirect://'):
  110. return redirect(realUrl.split('redirect://')[1])
  111. elif str(realUrl).startswith('toast://'):
  112. return R.failed(str(realUrl).split('toast://')[1],extra={'from':url})
  113. elif str(realUrl).startswith('image://'):
  114. img_data = base64ToImage(str(realUrl).split('image://')[1])
  115. response = make_response(img_data)
  116. response.headers['Content-Type'] = 'image/jpeg'
  117. return response
  118. return R.success(f'{filename}解析成功',realUrl,{'time':f'{get_interval(t1)}毫秒','from':url})
  119. except Exception as e:
  120. msg = f'{filename}解析出错:{e}'
  121. logger.info(msg)
  122. return R.failed(msg,extra={'time':f'{get_interval(t1)}毫秒','from':url})
  123. @parse.route('/ocr',methods=['POST'])
  124. def base64_ocr():
  125. lsg = storage_service()
  126. ocr_api = lsg.getItem('OCR_API',cfg.OCR_API)
  127. # print(ocr_api)
  128. # print('params:',getParmas())
  129. img = getParmas('img')
  130. # print(img)
  131. if not img:
  132. return R.failed('识别失败:缺少img参数')
  133. try:
  134. img_bytes = base64.b64decode(img)
  135. except:
  136. return R.failed('识别失败:img参数不是正确的base64格式')
  137. # print(img_bytes)
  138. img_path = 'txt/pluto'
  139. os.makedirs(img_path,exist_ok=True)
  140. with open(f'{img_path}/yzm.png','wb+') as f:
  141. f.write(img_bytes)
  142. ocr = OcrApi(ocr_api)
  143. code = ocr.classification(img_bytes)
  144. # resp = R.success('识别成功',code)
  145. # print(resp.json)
  146. resp = code
  147. return resp