filename_to_fingerprint.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #!/usr/bin/env python3
  2. import re
  3. import sys
  4. import os
  5. import subprocess
  6. from collections import defaultdict
  7. from lxml import etree
  8. t = {
  9. "Chrome": 1,
  10. "IE": 2,
  11. "Safari": 3,
  12. "Firefox": 4,
  13. "Android": 5,
  14. "Opera": 6,
  15. "Blackberry": 7,
  16. "UCBrowser": 8,
  17. "Silk": 9,
  18. "Nokia": 10,
  19. "NetFront": 11,
  20. "QQ": 12,
  21. "Maxthon": 13,
  22. "SogouExplorer": 14,
  23. "Spotify": 15,
  24. "Bot": 16,
  25. "AppleBot": 17,
  26. "BaiduBot": 18,
  27. "BingBot": 19,
  28. "DuckDuckGoBot": 20,
  29. "FacebookBot": 21,
  30. "GoogleBot": 22,
  31. "LinkedInBot": 23,
  32. "MsnBot": 24,
  33. "PingdomBot": 25,
  34. "TwitterBot": 26,
  35. "YandexBot": 27,
  36. "YahooBot": 28,
  37. }
  38. browser_to_int = defaultdict(int)
  39. for k,v in t.items():
  40. browser_to_int[k] = v
  41. t = {
  42. "WindowsPhone": 1,
  43. "Windows": 2,
  44. "MacOSX": 3,
  45. "iOS": 4,
  46. "Android": 5,
  47. "Blackberry": 6,
  48. "ChromeOS": 7,
  49. "Kindle": 8,
  50. "WebOS": 9,
  51. "Linux": 10,
  52. "Playstation": 11,
  53. "Xbox": 12,
  54. "Nintendo": 13,
  55. "Bot": 14,
  56. }
  57. os_to_int = defaultdict(int)
  58. for k,v in t.items():
  59. os_to_int[k] = v
  60. t = {
  61. "Windows": 1,
  62. "Mac": 2,
  63. "Linux": 3,
  64. "iPad": 4,
  65. "iPhone": 5,
  66. "iPod": 6,
  67. "Blackberry": 7,
  68. "WindowsPhone": 8,
  69. "Playstation": 9,
  70. "Xbox": 10,
  71. "Nintendo": 11,
  72. "Bot": 12,
  73. }
  74. platform_to_int = defaultdict(int)
  75. for k,v in t.items():
  76. platform_to_int[k] = v
  77. t = {
  78. "Computer": 1,
  79. "Tablet": 2,
  80. "Phone": 3,
  81. "Console": 4,
  82. "Wearable": 5,
  83. "TV": 6,
  84. }
  85. device_to_int = defaultdict(int)
  86. for k,v in t.items():
  87. device_to_int[k] = v
  88. t = {
  89. "Antivirus": 1,
  90. "FakeBrowser": 2,
  91. "Malware": 3,
  92. "Parental": 4,
  93. "Proxy": 5,
  94. }
  95. mitm_type_to_int = defaultdict(int)
  96. for k,v in t.items():
  97. mitm_type_to_int[k] = v
  98. t = {
  99. "A": 1,
  100. "B": 2,
  101. "C": 3,
  102. "F": 4,
  103. }
  104. mitm_grade_to_int = defaultdict(int)
  105. for k,v in t.items():
  106. mitm_grade_to_int[k] = v
  107. class MitmFingerprint:
  108. def __init__(self):
  109. self.ua_fp = UserAgentFingerprint()
  110. self.mitm_name = ""
  111. self.mitm_grade = ""
  112. self.mitm_type = ""
  113. def __str__(self):
  114. return "{mitm_name}:{mitm_type}:{mitm_grade}".format(
  115. mitm_name=self.mitm_name,
  116. mitm_type=self.mitm_type,
  117. mitm_grade=self.mitm_grade)
  118. def set_fields(self, mitm_name, mitm_type, mitm_grade):
  119. self.mitm_name = mitm_name
  120. self.mitm_type = mitm_type_to_int[mitm_type]
  121. self.mitm_grade = mitm_type_to_int[mitm_grade]
  122. class UserAgentFingerprint:
  123. def __init__(self):
  124. self.browser = ""
  125. self.browser_version = ""
  126. self.platform = ""
  127. self.os = ""
  128. self.os_version = ""
  129. self.device = ""
  130. self.quirks = []
  131. def __str__(self):
  132. return "{browser}:{browser_version}:{platform}:{os}:{os_version}:{device}:{quirks}".format(
  133. browser=self.browser,
  134. browser_version=self.browser_version,
  135. platform=self.platform,
  136. os=self.os,
  137. os_version=self.os_version,
  138. device=self.device,
  139. quirks=",".join(self.quirks))
  140. def set_fields(self, device, os, os_version, browser, browser_version, platform):
  141. # handle some parsing exceptions
  142. if browser == "ipad":
  143. device="Tablet"
  144. os = "iOS"
  145. platform = "iPad"
  146. browser = "Safari"
  147. if browser == "iphone":
  148. device="Phone"
  149. os="iOS"
  150. platform="iPhone"
  151. browser="Safari"
  152. # use os version for browser version if not known
  153. if browser_version == "":
  154. browser_version = os_version
  155. # normalize browser
  156. browser = browser.replace("chrome", "Chrome")
  157. browser = browser.replace("firefox", "Firefox")
  158. browser = browser.replace("safari", "Safari")
  159. browser = browser.replace("android", "Android")
  160. browser = browser.replace("opera", "Opera")
  161. browser = browser.replace("silk", "Silk")
  162. browser = browser.replace("ie", "IE")
  163. browser = browser.replace("edge", "IE")
  164. # normalize browser version
  165. if not (re.match("^([0-9]+)\.([0-9]+)\.([0-9]+)$", browser_version)
  166. or re.match("^([0-9]+)\.([0-9]+)$", browser_version)
  167. or re.match("^([0-9]+)$", browser_version)):
  168. browser_version = "-1.-1.-1"
  169. # normalize device
  170. if browser == "Android":
  171. device="Phone" # some of these could be tablets, but w/e
  172. device = device.replace("computer", "Computer")
  173. # normalize platform
  174. platform = platform.replace("android", "Linux")
  175. platform = platform.replace("ipod", "iPod")
  176. platform = platform.replace("ipad", "iPad")
  177. platform = platform.replace("iphone", "iPhone")
  178. platform = platform.replace("OS_X", "Mac")
  179. platform = platform.replace("mac", "Mac")
  180. platform = platform.replace("windows", "Windows")
  181. # normalize os
  182. os = os.replace("OS_X", "MacOSX")
  183. os = os.replace("mac", "MacOSX")
  184. os = os.replace("ios", "iOS")
  185. os = os.replace("android", "Android")
  186. os = os.replace("windows", "Windows")
  187. # normalize os version
  188. if os == "Windows":
  189. os_version = os_version.replace("XP", "5.1.0")
  190. os_version = os_version.replace("7", "6.1.0")
  191. os_version = os_version.replace("8.1", "6.3.0")
  192. os_version = os_version.replace("8", "6.2.0")
  193. os_version = os_version.replace("10", "10.0.0")
  194. elif os == "MacOSX":
  195. os_version = os_version.replace("El_Capitan", "10.11.0")
  196. os_version = os_version.replace("Yosemite", "10.10.0")
  197. os_version = os_version.replace("Mavericks", "10.9.0")
  198. os_version = os_version.replace("Mountain_Lion", "10.8.0")
  199. os_version = os_version.replace("Lion", "10.7.0")
  200. os_version = os_version.replace("Snow_Leopard", "10.6.0")
  201. if not (re.match("^([0-9]+)\.([0-9]+)\.([0-9]+)$", browser_version)
  202. or re.match("^([0-9]+)\.([0-9]+)$", browser_version)
  203. or re.match("^([0-9]+)$", browser_version)):
  204. os_version = "-1.-1.-1"
  205. self.browser = browser_to_int[browser]
  206. self.browser_version = browser_version
  207. self.os = os_to_int[os]
  208. self.os_version = os_version
  209. self.platform = platform_to_int[platform]
  210. self.device = device_to_int[device]
  211. class RequestFingerprint:
  212. def __init__(self):
  213. self.clear()
  214. def clear(self):
  215. self.record_tls_version = ""
  216. self.tls_version = ""
  217. self.ciphersuites = []
  218. self.compression_methods = []
  219. self.signature_algorithms = []
  220. self.extensions = []
  221. self.elliptic_curves = []
  222. self.ec_point_formats = []
  223. self.headers = []
  224. self.quirks = []
  225. self.parsed = False
  226. def __str__(self):
  227. if not self.parsed:
  228. return ""
  229. if len(self.compression_methods) > 1:
  230. self.quirks.append("compr")
  231. return "{version}:{ciphersuites}:{extensions}:{elliptic_curves}:{ec_point_formats}:{headers}:{quirks}".format(
  232. version="{:x}".format(int(self.tls_version,16)),
  233. ciphersuites=",".join("{:x}".format(int(x,16)) for x in self.ciphersuites),
  234. extensions=",".join("{:x}".format(int(x,16)) for x in self.extensions),
  235. elliptic_curves=",".join("{:x}".format(int(x,16)) for x in self.elliptic_curves),
  236. ec_point_formats=",".join("{:x}".format(int(x,16)) for x in self.ec_point_formats),
  237. headers=",".join(self.headers),
  238. quirks=",".join(self.quirks))
  239. def parse(self, filename):
  240. self.clear()
  241. pdml = subprocess.run(["tshark", "-r", filename, "-T", "pdml"], capture_output=True, encoding='utf-8').stdout
  242. # tshark may omit closing tag on incomplete pcaps
  243. if '</pdml>' not in pdml:
  244. pdml += '</pdml>'
  245. root = etree.fromstring(pdml)
  246. for pkt in root:
  247. for proto in pkt:
  248. if proto.get("name") != "ssl":
  249. continue
  250. # TODO: skip SSLv2 records
  251. for field0 in proto:
  252. if field0.get("name") != "ssl.record":
  253. continue
  254. # only want the final client hello, so clear fields
  255. self.clear()
  256. # parse record version
  257. for field1 in field0:
  258. if field1.get("name") == "ssl.record.version":
  259. self.record_tls_version = field1.get("value")
  260. # parse record
  261. for field1 in field0:
  262. # check record type
  263. if field1.get("name") != "ssl.handshake":
  264. continue
  265. # check handshake type
  266. is_client_hello = False
  267. for field2 in field1:
  268. if field2.get("name") == "ssl.handshake.type" and field2.get("value") == "01":
  269. is_client_hello = True
  270. if not is_client_hello:
  271. continue
  272. # parse version
  273. for field2 in field1:
  274. if field2.get("name") == "ssl.handshake.version":
  275. self.tls_version = field2.get("value")
  276. # parse ciphersuites
  277. for field2 in field1:
  278. if field2.get("name") != "ssl.handshake.ciphersuites":
  279. continue
  280. for field3 in field2:
  281. if field3.get("name") != "ssl.handshake.ciphersuite":
  282. continue # unexpected
  283. self.ciphersuites.append(field3.get("value"))
  284. # parse compression methods
  285. for field2 in field1:
  286. if field2.get("name") != "ssl.handshake.comp_methods":
  287. continue
  288. for field3 in field2:
  289. if field3.get("name") != "ssl.handshake.comp_method":
  290. continue # unexpected
  291. self.compression_methods.append(field3.get("value"))
  292. # parse extensions
  293. for field2 in field1:
  294. if field2.get("name") == "": # extensions, etc.
  295. # find the extension type
  296. is_elliptic_curves = False
  297. is_ec_point_formats = False
  298. is_signature_algorithms = False
  299. for field3 in field2:
  300. if field3.get("name") == "ssl.handshake.extension.type":
  301. if field3.get("value") == "000a":
  302. is_elliptic_curves = True
  303. if field3.get("value") == "000b":
  304. is_ec_point_formats = True
  305. if field3.get("value") == "000d":
  306. is_signature_algorithms = True
  307. self.extensions.append(field3.get("value"))
  308. if is_elliptic_curves:
  309. for field3 in field2:
  310. if field3.get("name") != "ssl.handshake.extensions_elliptic_curves":
  311. continue
  312. for field4 in field3:
  313. if field4.get("name") != "ssl.handshake.extensions_elliptic_curve":
  314. continue # unexpected
  315. self.elliptic_curves.append(field4.get("value"))
  316. if is_ec_point_formats:
  317. for field3 in field2:
  318. if field3.get("name") != "ssl.handshake.extensions_elliptic_curves": # accounting for bug in tshark?
  319. continue
  320. for field4 in field3:
  321. if field4.get("name") != "ssl.handshake.extensions_ec_point_format":
  322. continue # unexpected
  323. self.ec_point_formats.append(field4.get("value"))
  324. if is_signature_algorithms:
  325. for field3 in field2:
  326. if field3.get("name") != "ssl.handshake.extensions_signature_algorithms":
  327. continue
  328. for field4 in field3:
  329. if field4.get("name") != "ssl.handshake.extensions_signature_algorithm":
  330. continue # unexpected
  331. self.signature_algorithms.append(field4.get("value"))
  332. self.parsed = True
  333. return
  334. if __name__ == "__main__":
  335. import argparse
  336. parser = argparse.ArgumentParser()
  337. parser.add_argument("filename", type=str, help="pcap containing TLS client hello")
  338. parser.add_argument("--mitm", action="store_true", help="parse pcap as MITM fingerprint file")
  339. args = parser.parse_args()
  340. ua_fp = UserAgentFingerprint()
  341. req_fp = RequestFingerprint()
  342. mitm_fp = MitmFingerprint()
  343. # parse user agent/mitm info from from file name
  344. description = args.filename.split('/')[-2]
  345. if not args.mitm:
  346. # filenames should conform to this format
  347. m = re.match('^([^-]+)-([^-]+)-([^-]+)-([^-]+)-([^-]+)$', description)
  348. if not m:
  349. sys.exit(1)
  350. device = m.group(1)
  351. os = m.group(2)
  352. os_version = m.group(3)
  353. browser = m.group(4)
  354. browser_version = m.group(5)
  355. platform = os
  356. ua_fp.set_fields(device, os, os_version, browser, browser_version, platform)
  357. else: # parse mitm file
  358. # mitm description should conform to this format (middle field can contain '-')
  359. m = re.match('^([^-]+)-([^-]+)-(.+)-([^-]+)-([^-]+)$', description)
  360. if not m:
  361. sys.exit(1)
  362. os = m.group(1)
  363. os_version = m.group(2)
  364. mitm_name = m.group(3)
  365. browser = m.group(4)
  366. browser_version = m.group(5)
  367. mitm_type = ""
  368. mitm_grade = ""
  369. device="Computer"
  370. platform = os
  371. # handle some exceptions
  372. if browser == "android":
  373. platform = "Linux"
  374. os = "Android"
  375. if mitm_name == "none":
  376. mitm_name = ""
  377. else:
  378. mitm_type = "Antivirus"
  379. mitm_fp.set_fields(mitm_name, mitm_type, mitm_grade)
  380. ua_fp.set_fields(device, os, os_version, browser, browser_version, platform)
  381. # parse request fingerprint from pcap
  382. req_fp.parse(args.filename)
  383. if not req_fp.parsed:
  384. sys.exit(1)
  385. full_fp = "{ua}|{req}|{mitm}".format(ua=ua_fp,req=req_fp,mitm=mitm_fp)
  386. print(full_fp)