download_progress.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : download_progress.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2023/10/30
  6. # 下载进度条工具
  7. import os
  8. import time
  9. # import logging
  10. import requests
  11. from urllib.parse import unquote
  12. from contextlib import closing
  13. from utils.log import logger
  14. chunkSize = 1024 * 1024
  15. loop = 5
  16. headers = {
  17. "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
  18. }
  19. def speed_handle(process, file_length):
  20. if process != file_length:
  21. num = process / file_length
  22. progress = ': \033[1;33m{:.2f}\033[0m%|{}{}| '.format(float(num * 100), '■' * round(num * 20),
  23. '□' * round((1 - num) * 20))
  24. else:
  25. progress = ' \033[1;33m{}\033[0m% |{}|'.format(100, '■' * 50)
  26. # print(progress, flush=True, end='')
  27. logger.info(progress)
  28. def get_file_name(url, headers):
  29. filename = ''
  30. if 'Content-Disposition' in headers and headers['Content-Disposition']:
  31. disposition_split = headers['Content-Disposition'].split(';')
  32. if len(disposition_split) > 1:
  33. if disposition_split[1].strip().lower().startswith('filename='):
  34. file_name = disposition_split[1].split('=')
  35. if len(file_name) > 1:
  36. filename = unquote(file_name[1])
  37. if not filename and os.path.basename(url):
  38. filename = os.path.basename(url).split("?")[0]
  39. if not filename:
  40. return time.time()
  41. return filename
  42. def file_download(fileUrl, filePath):
  43. if os.path.exists(filePath):
  44. os.remove(filePath)
  45. # response = requests.get(fileUrl, headers=headers, stream=True, verify=False)
  46. response = requests.get(fileUrl, headers=headers, stream=True)
  47. is_chunked = response.headers.get('Transfer-Encoding') or ''
  48. if is_chunked == 'chunked':
  49. logger.info('chunked文件不支持获取总文件大小,tqdm模块才能分段下载')
  50. return False
  51. fileSize = int(response.headers.get('content-length') or 0) # 文件大小
  52. logger.info(f'fileSize:{fileSize}')
  53. if fileSize < 1:
  54. return False
  55. tmpSize = 0
  56. n = 0
  57. isDownloaded = False
  58. while n < loop:
  59. if os.path.exists(filePath): # 判断文件是否存在
  60. tmpSize = os.path.getsize(filePath)
  61. _headers = {"Range": "bytes={}-{}".format(tmpSize, fileSize),
  62. "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"}
  63. # headers.update({"Range": "bytes={}-{}".format(tmpSize, fileSize)})
  64. contentSize = 0
  65. remainSize = (fileSize - tmpSize) / chunkSize
  66. filename = os.path.basename(filePath)
  67. st = time.perf_counter()
  68. if remainSize > 0:
  69. with closing(requests.get(fileUrl, headers=_headers, stream=True)) as _response, open(
  70. filePath,
  71. "ab") as file:
  72. for content in _response.iter_content(chunk_size=chunkSize):
  73. file.write(content)
  74. timeTook = time.perf_counter() - st
  75. contentSize += len(content) / chunkSize
  76. # print('\r{}/{}: {}'.format(cnt + 1, len(fileUrls), filename), flush=True, end='')
  77. # logger.info('\r{}/{}: {}'.format(cnt + 1, len(fileUrls), filename))
  78. logger.info(f'文件{filename}下载中...')
  79. speed_handle(contentSize + tmpSize / chunkSize, fileSize / chunkSize)
  80. downloadSpeed = contentSize / timeTook # 平均下载速度
  81. remainingTime = int(timeTook / (contentSize / remainSize) - timeTook) # 估计剩余下载时间
  82. # print(
  83. # '[' + 'average speed: \033[1;31m{:.2f}MiB/s\033[0m, remaining time: \033[1;32m{}s\033[0m, file size: \033[1;34m{:.2f}MiB\033[0m'.format(
  84. # downloadSpeed,
  85. # remainingTime,
  86. # fileSize / chunkSize) + ']', flush=True, end=' '
  87. # )
  88. logger.info(
  89. '[' + 'average speed: \033[1;31m{:.2f}MiB/s\033[0m, remaining time: \033[1;32m{}s\033[0m, file size: \033[1;34m{:.2f}MiB\033[0m'.format(
  90. downloadSpeed,
  91. remainingTime,
  92. fileSize / chunkSize) + ']'
  93. )
  94. else:
  95. isDownloaded = True
  96. break
  97. n += 1
  98. return isDownloaded
  99. def file_downloads(files, save_path='download'):
  100. """
  101. files = [{'url':'https://ghproxy.liuzhicong.com/https://github.com/hjdhnx/dr_py/archive/refs/heads/main.zip','name':'dr_py.zip'}]
  102. :param save_path:
  103. :param files:
  104. :return:
  105. """
  106. # save_path = 'tmp'
  107. os.makedirs(save_path, exist_ok=True)
  108. # logging.basicConfig(level=logging.INFO, filename='download/downloading.log', filemode='a', format="%(message)s")
  109. localtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
  110. logger.info(localtime + ': Start downloading task: {}'.format(files))
  111. failedUrl = []
  112. for cnt, file in enumerate(files):
  113. fileUrl = file.get('url')
  114. if not fileUrl:
  115. print('file error:no url')
  116. continue
  117. fileName = file.get('name')
  118. filename = fileName or get_file_name(fileUrl, headers) # 获取文件名称
  119. logger.info(f'开始下载{filename}: {fileUrl}')
  120. try:
  121. t0 = time.perf_counter()
  122. isDload = file_download(fileUrl, os.path.join(save_path, filename))
  123. t1 = time.perf_counter()
  124. localtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
  125. if isDload:
  126. logger.info(
  127. localtime + ': {} download successfully! Time consuming: {:.3f}s'.format(filename, t1 - t0))
  128. else:
  129. logger.info(localtime + ': {} download failed! Url: {}'.format(filename, fileUrl))
  130. failedUrl.append(fileUrl)
  131. except Exception as e:
  132. logger.info(f'链接{fileUrl}下载失败,由于出现了错误:{e}')
  133. failedUrl.append(fileUrl)
  134. if len(failedUrl):
  135. with open(os.path.join(save_path, 'failedUrl.txt'), 'w') as p:
  136. for url in failedUrl:
  137. p.write(url + '\n')
  138. fn = len(failedUrl)
  139. sn = len(files) - fn
  140. # print("\n{} file{} download successfully, {} file{} download failed!".format(sn, 's' * (sn > 1), fn, 's' * (fn > 1)))
  141. logger.info(
  142. "\n{} file{} download successfully, {} file{} download failed!".format(sn, 's' * (sn > 1), fn, 's' * (fn > 1)))
  143. if fn > 0:
  144. return False
  145. else:
  146. return True
  147. if __name__ == '__main__':
  148. # urlTxt = 'download/urls.txt'
  149. # with open(urlTxt, "r") as f:
  150. # fileUrls = [line.strip() for line in f.readlines()]
  151. response = requests.get('https://ghproxy.liuzhicong.com/https://github.com/hjdhnx/dr_py/archive/refs/heads/main.zip', headers=headers, stream=True)
  152. print(response.headers)
  153. fileSize = int(response.headers.get('content-length')) # 文件大小
  154. logger.info(f'fileSize:{fileSize}')
  155. # files = [{'url': 'https://ghproxy.liuzhicong.com/https://github.com/hjdhnx/dr_py/archive/refs/heads/main.zip',
  156. # 'name': 'dr_py.zip'}]
  157. # file_downloads(files, 'tmp')