filename_to_metadata.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #!/usr/bin/env python3
  2. import re
  3. import sys
  4. import os.path as ospath
  5. import subprocess
  6. from collections import defaultdict
  7. from lxml import etree
  8. import json
  9. class MitmFingerprint:
  10. def __init__(self, mitm_name, mitm_type):
  11. if mitm_name == "none":
  12. mitm_name = ""
  13. mitm_type = ""
  14. self.raw_ua = ""
  15. self.name = mitm_name
  16. self.type = mitm_type
  17. class UserAgentFingerprint:
  18. def __init__(self, device, os, os_version, browser, browser_version, platform):
  19. # handle some parsing exceptions
  20. if browser == "ipad":
  21. device="Tablet"
  22. os = "iOS"
  23. platform = "iPad"
  24. browser = "Safari"
  25. if browser == "iphone":
  26. device="Phone"
  27. os="iOS"
  28. platform="iPhone"
  29. browser="Safari"
  30. # use os version for browser version if not known
  31. if browser_version == "":
  32. browser_version = os_version
  33. # normalize browser
  34. browser = browser.replace("chrome", "Chrome")
  35. browser = browser.replace("firefox", "Firefox")
  36. browser = browser.replace("safari", "Safari")
  37. browser = browser.replace("android", "Android")
  38. browser = browser.replace("opera", "Opera")
  39. browser = browser.replace("silk", "Silk")
  40. browser = browser.replace("ie", "IE")
  41. browser = browser.replace("edge", "IE")
  42. # normalize browser version
  43. if not (re.match("^([0-9]+)\.([0-9]+)\.([0-9]+)$", browser_version)
  44. or re.match("^([0-9]+)\.([0-9]+)$", browser_version)
  45. or re.match("^([0-9]+)$", browser_version)):
  46. browser_version = ""
  47. # normalize device
  48. if browser == "Android":
  49. device = "Phone" # some of these could be tablets, but w/e
  50. platform = "Linux"
  51. os = "Android"
  52. device = device.replace("computer", "Computer")
  53. # normalize platform
  54. platform = platform.replace("android", "Linux")
  55. platform = platform.replace("ipod", "iPod")
  56. platform = platform.replace("ipad", "iPad")
  57. platform = platform.replace("iphone", "iPhone")
  58. platform = platform.replace("OS_X", "Mac")
  59. platform = platform.replace("mac", "Mac")
  60. platform = platform.replace("windows", "Windows")
  61. # normalize os
  62. os = os.replace("OS_X", "MacOSX")
  63. os = os.replace("mac", "MacOSX")
  64. os = os.replace("ios", "iOS")
  65. os = os.replace("android", "Android")
  66. os = os.replace("windows", "Windows")
  67. # normalize os version
  68. if os == "Windows":
  69. os_version = os_version.replace("XP", "5.1.0")
  70. os_version = os_version.replace("7", "6.1.0")
  71. os_version = os_version.replace("8.1", "6.3.0")
  72. os_version = os_version.replace("8", "6.2.0")
  73. os_version = os_version.replace("10", "10.0.0")
  74. elif os == "MacOSX":
  75. os_version = os_version.replace("El_Capitan", "10.11.0")
  76. os_version = os_version.replace("Yosemite", "10.10.0")
  77. os_version = os_version.replace("Mavericks", "10.9.0")
  78. os_version = os_version.replace("Mountain_Lion", "10.8.0")
  79. os_version = os_version.replace("Lion", "10.7.0")
  80. os_version = os_version.replace("Snow_Leopard", "10.6.0")
  81. if not (re.match("^([0-9]+)\.([0-9]+)\.([0-9]+)$", browser_version)
  82. or re.match("^([0-9]+)\.([0-9]+)$", browser_version)
  83. or re.match("^([0-9]+)$", browser_version)):
  84. os_version = ""
  85. self.browser = browser
  86. self.browser_version = browser_version
  87. self.os = os
  88. self.os_version = os_version
  89. self.platform = platform
  90. self.device = device
  91. if __name__ == "__main__":
  92. import argparse
  93. parser = argparse.ArgumentParser()
  94. parser.add_argument("filename", type=str, help="pcap containing TLS client hello")
  95. args = parser.parse_args()
  96. record = {}
  97. # metadata fields
  98. desc = ""
  99. os = ""
  100. os_version = ""
  101. browser = ""
  102. browser_version = ""
  103. device = ""
  104. platform = ""
  105. mitm_name = ""
  106. mitm_version = ""
  107. mitm_type = ""
  108. # only process files named handshake.pcap
  109. dirname, basename = ospath.split(args.filename)
  110. if basename != "handshake.pcap":
  111. sys.exit(1)
  112. # the lowest level directory contains a description
  113. _, desc = ospath.split(dirname)
  114. metadata = {}
  115. metadata["desc"] = desc
  116. metadata["comment"] = "generated by {}".format(sys.argv[0])
  117. metadata["handshake_pcap"] = args.filename
  118. # check if accompanying header pcap is present
  119. header_pcap = ospath.join(dirname, "header.pcap")
  120. if ospath.exists(header_pcap):
  121. metadata["header_pcap"] = header_pcap
  122. if "browsers" in dirname:
  123. # browser filenames should conform to this format
  124. m = re.match('^([^-]+)-([^-]+)-([^-]+)-([^-]+)-([^-]+)$', desc)
  125. if not m:
  126. sys.exit(1)
  127. device = m.group(1)
  128. os = m.group(2)
  129. os_version = m.group(3)
  130. browser = m.group(4)
  131. browser_version = m.group(5)
  132. platform = os
  133. ua_fp = UserAgentFingerprint(device, os, os_version, browser, browser_version, platform)
  134. metadata["ua_fingerprint"] = ua_fp.__dict__
  135. elif "antivirus-run2" in dirname:
  136. # mitm description should conform to this format (middle field can contain '-')
  137. m = re.match('^([^-]+)-([^-]+)-(.+)-([^-]+)-([^-]+)$', desc)
  138. if not m:
  139. sys.exit(1)
  140. os = m.group(1)
  141. os_version = m.group(2)
  142. browser = m.group(4)
  143. browser_version = m.group(5)
  144. device = "Computer"
  145. mitm_name = m.group(3)
  146. mitm_type = "Antivirus"
  147. mitm_fp = MitmFingerprint(mitm_name, mitm_type)
  148. ua_fp = UserAgentFingerprint(device, os, os_version, browser, browser_version, platform)
  149. metadata["ua_fingerprint"] = ua_fp.__dict__
  150. metadata["mitm_fingerprint"] = mitm_fp.__dict__
  151. elif "middleboxes" in dirname:
  152. mitm_type = "Proxy"
  153. mitm_name = desc
  154. mitm_fp = MitmFingerprint(mitm_name, mitm_type)
  155. ua_fp = UserAgentFingerprint(device, os, os_version, browser, browser_version, platform)
  156. metadata["ua_fingerprint"] = ua_fp.__dict__
  157. metadata["mitm_fingerprint"] = mitm_fp.__dict__
  158. pass
  159. print(json.dumps(metadata))