bundle_fix_up.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. import argparse
  2. import hashlib
  3. import os
  4. from pathlib import Path
  5. import platform
  6. import shutil
  7. import struct
  8. import subprocess
  9. from typing import List, Optional, Tuple
  10. parser = argparse.ArgumentParser(description="Fixup for MacOS application bundle")
  11. parser.add_argument("input_directory", help="Input directory (Application path)")
  12. parser.add_argument("executable_sub_path", help="Main executable sub path")
  13. # Use Apple LLVM on Darwin, otherwise standard LLVM.
  14. if platform.system() == "Darwin":
  15. OTOOL = "otool"
  16. INSTALL_NAME_TOOL = "install_name_tool"
  17. else:
  18. OTOOL = shutil.which("llvm-otool")
  19. if OTOOL is None:
  20. for llvm_ver in [15, 14, 13]:
  21. otool_path = shutil.which(f"llvm-otool-{llvm_ver}")
  22. if otool_path is not None:
  23. OTOOL = otool_path
  24. INSTALL_NAME_TOOL = shutil.which(f"llvm-install-name-tool-{llvm_ver}")
  25. break
  26. else:
  27. INSTALL_NAME_TOOL = shutil.which("llvm-install-name-tool")
  28. args = parser.parse_args()
  29. def get_dylib_id(dylib_path: Path) -> str:
  30. res = subprocess.check_output([OTOOL, "-D", str(dylib_path.absolute())]).decode(
  31. "utf-8"
  32. )
  33. return res.split("\n")[1]
  34. def get_dylib_dependencies(dylib_path: Path) -> List[str]:
  35. output = (
  36. subprocess.check_output([OTOOL, "-L", str(dylib_path.absolute())])
  37. .decode("utf-8")
  38. .split("\n")[1:]
  39. )
  40. res = []
  41. for line in output:
  42. line = line.strip()
  43. index = line.find(" (compatibility version ")
  44. if index == -1:
  45. continue
  46. line = line[:index]
  47. res.append(line)
  48. return res
  49. def replace_dylib_id(dylib_path: Path, new_id: str):
  50. subprocess.check_call(
  51. [INSTALL_NAME_TOOL, "-id", new_id, str(dylib_path.absolute())]
  52. )
  53. def change_dylib_link(dylib_path: Path, old: str, new: str):
  54. subprocess.check_call(
  55. [INSTALL_NAME_TOOL, "-change", old, new, str(dylib_path.absolute())]
  56. )
  57. def add_dylib_rpath(dylib_path: Path, rpath: str):
  58. subprocess.check_call(
  59. [INSTALL_NAME_TOOL, "-add_rpath", rpath, str(dylib_path.absolute())]
  60. )
  61. def fixup_dylib(
  62. dylib_path: Path,
  63. replacement_path: str,
  64. search_path: List[str],
  65. content_directory: Path,
  66. ):
  67. dylib_id = get_dylib_id(dylib_path)
  68. new_dylib_id = replacement_path + "/" + os.path.basename(dylib_id)
  69. replace_dylib_id(dylib_path, new_dylib_id)
  70. dylib_dependencies = get_dylib_dependencies(dylib_path)
  71. dylib_new_mapping = {}
  72. for dylib_dependency in dylib_dependencies:
  73. if (
  74. not dylib_dependency.startswith("@executable_path")
  75. and not dylib_dependency.startswith("/usr/lib")
  76. and not dylib_dependency.startswith("/System/Library")
  77. ):
  78. dylib_dependency_name = os.path.basename(dylib_dependency)
  79. library_found = False
  80. for library_base_path in search_path:
  81. lib_path = Path(os.path.join(library_base_path, dylib_dependency_name))
  82. if lib_path.exists():
  83. target_replacement_path = get_path_related_to_target_exec(
  84. content_directory, lib_path
  85. )
  86. dylib_new_mapping[dylib_dependency] = (
  87. target_replacement_path
  88. + "/"
  89. + os.path.basename(dylib_dependency)
  90. )
  91. library_found = True
  92. if not library_found:
  93. raise Exception(
  94. f"{dylib_id}: Cannot find dependency {dylib_dependency_name} for fixup"
  95. )
  96. for key in dylib_new_mapping:
  97. change_dylib_link(dylib_path, key, dylib_new_mapping[key])
  98. FILE_TYPE_ASSEMBLY = 1
  99. ALIGN_REQUIREMENTS = 4096
  100. def parse_embedded_string(data: bytes) -> Tuple[bytes, str]:
  101. first_byte = data[0]
  102. if (first_byte & 0x80) == 0:
  103. size = first_byte
  104. data = data[1:]
  105. else:
  106. second_byte = data[1]
  107. assert (second_byte & 0x80) == 0
  108. size = (second_byte << 7) | (first_byte & 0x7F)
  109. data = data[2:]
  110. res = data[:size].decode("utf-8")
  111. data = data[size:]
  112. return (data, res)
  113. def write_embedded_string(file, string: str):
  114. raw_str = string.encode("utf-8")
  115. raw_str_len = len(raw_str)
  116. assert raw_str_len < 0x7FFF
  117. if raw_str_len > 0x7F:
  118. file.write(struct.pack("b", raw_str_len & 0x7F | 0x80))
  119. file.write(struct.pack("b", raw_str_len >> 7))
  120. else:
  121. file.write(struct.pack("b", raw_str_len))
  122. file.write(raw_str)
  123. class BundleFileEntry(object):
  124. offset: int
  125. size: int
  126. compressed_size: int
  127. file_type: int
  128. relative_path: str
  129. data: bytes
  130. def __init__(
  131. self,
  132. offset: int,
  133. size: int,
  134. compressed_size: int,
  135. file_type: int,
  136. relative_path: str,
  137. data: bytes,
  138. ) -> None:
  139. self.offset = offset
  140. self.size = size
  141. self.compressed_size = compressed_size
  142. self.file_type = file_type
  143. self.relative_path = relative_path
  144. self.data = data
  145. def write(self, file):
  146. self.offset = file.tell()
  147. if (
  148. self.file_type == FILE_TYPE_ASSEMBLY
  149. and (self.offset % ALIGN_REQUIREMENTS) != 0
  150. ):
  151. padding_size = ALIGN_REQUIREMENTS - (self.offset % ALIGN_REQUIREMENTS)
  152. file.write(b"\0" * padding_size)
  153. self.offset += padding_size
  154. file.write(self.data)
  155. def write_header(self, file):
  156. file.write(
  157. struct.pack(
  158. "QQQb", self.offset, self.size, self.compressed_size, self.file_type
  159. )
  160. )
  161. write_embedded_string(file, self.relative_path)
  162. class BundleManifest(object):
  163. major: int
  164. minor: int
  165. bundle_id: str
  166. deps_json: BundleFileEntry
  167. runtimeconfig_json: BundleFileEntry
  168. flags: int
  169. files: List[BundleFileEntry]
  170. def __init__(
  171. self,
  172. major: int,
  173. minor: int,
  174. bundle_id: str,
  175. deps_json: BundleFileEntry,
  176. runtimeconfig_json: BundleFileEntry,
  177. flags: int,
  178. files: List[BundleFileEntry],
  179. ) -> None:
  180. self.major = major
  181. self.minor = minor
  182. self.bundle_id = bundle_id
  183. self.deps_json = deps_json
  184. self.runtimeconfig_json = runtimeconfig_json
  185. self.flags = flags
  186. self.files = files
  187. def write(self, file) -> int:
  188. for bundle_file in self.files:
  189. bundle_file.write(file)
  190. bundle_header_offset = file.tell()
  191. file.write(struct.pack("iiI", self.major, self.minor, len(self.files)))
  192. write_embedded_string(file, self.bundle_id)
  193. if self.deps_json is not None:
  194. deps_json_location_offset = self.deps_json.offset
  195. deps_json_location_size = self.deps_json.size
  196. else:
  197. deps_json_location_offset = 0
  198. deps_json_location_size = 0
  199. if self.runtimeconfig_json is not None:
  200. runtimeconfig_json_location_offset = self.runtimeconfig_json.offset
  201. runtimeconfig_json_location_size = self.runtimeconfig_json.size
  202. else:
  203. runtimeconfig_json_location_offset = 0
  204. runtimeconfig_json_location_size = 0
  205. file.write(
  206. struct.pack("qq", deps_json_location_offset, deps_json_location_size)
  207. )
  208. file.write(
  209. struct.pack(
  210. "qq",
  211. runtimeconfig_json_location_offset,
  212. runtimeconfig_json_location_size,
  213. )
  214. )
  215. file.write(struct.pack("q", self.flags))
  216. for bundle_file in self.files:
  217. bundle_file.write_header(file)
  218. return bundle_header_offset
  219. def read_file_entry(
  220. raw_data: bytes, header_bytes: bytes
  221. ) -> Tuple[bytes, BundleFileEntry]:
  222. (
  223. offset,
  224. size,
  225. compressed_size,
  226. file_type,
  227. ) = struct.unpack("QQQb", header_bytes[:0x19])
  228. (header_bytes, relative_path) = parse_embedded_string(header_bytes[0x19:])
  229. target_size = compressed_size
  230. if target_size == 0:
  231. target_size = size
  232. return (
  233. header_bytes,
  234. BundleFileEntry(
  235. offset,
  236. size,
  237. compressed_size,
  238. file_type,
  239. relative_path,
  240. raw_data[offset : offset + target_size],
  241. ),
  242. )
  243. def get_dotnet_bundle_data(data: bytes) -> Optional[Tuple[int, int, BundleManifest]]:
  244. offset = data.find(hashlib.sha256(b".net core bundle\n").digest())
  245. if offset == -1:
  246. return None
  247. raw_header_offset = data[offset - 8 : offset]
  248. (header_offset,) = struct.unpack("q", raw_header_offset)
  249. header_bytes = data[header_offset:]
  250. (
  251. major,
  252. minor,
  253. files_count,
  254. ) = struct.unpack("iiI", header_bytes[:0xC])
  255. header_bytes = header_bytes[0xC:]
  256. (header_bytes, bundle_id) = parse_embedded_string(header_bytes)
  257. # v2 header
  258. (
  259. deps_json_location_offset,
  260. deps_json_location_size,
  261. ) = struct.unpack("qq", header_bytes[:0x10])
  262. (
  263. runtimeconfig_json_location_offset,
  264. runtimeconfig_json_location_size,
  265. ) = struct.unpack("qq", header_bytes[0x10:0x20])
  266. (flags,) = struct.unpack("q", header_bytes[0x20:0x28])
  267. header_bytes = header_bytes[0x28:]
  268. files = []
  269. deps_json = None
  270. runtimeconfig_json = None
  271. for _ in range(files_count):
  272. (header_bytes, file_entry) = read_file_entry(data, header_bytes)
  273. files.append(file_entry)
  274. if file_entry.offset == deps_json_location_offset:
  275. deps_json = file_entry
  276. elif file_entry.offset == runtimeconfig_json_location_offset:
  277. runtimeconfig_json = file_entry
  278. file_entry = files[0]
  279. return (
  280. file_entry.offset,
  281. header_offset,
  282. BundleManifest(
  283. major, minor, bundle_id, deps_json, runtimeconfig_json, flags, files
  284. ),
  285. )
  286. LC_SYMTAB = 0x2
  287. LC_SEGMENT_64 = 0x19
  288. LC_CODE_SIGNATURE = 0x1D
  289. def fixup_linkedit(file, data: bytes, new_size: int):
  290. offset = 0
  291. (
  292. macho_magic,
  293. macho_cputype,
  294. macho_cpusubtype,
  295. macho_filetype,
  296. macho_ncmds,
  297. macho_sizeofcmds,
  298. macho_flags,
  299. macho_reserved,
  300. ) = struct.unpack("IiiIIIII", data[offset : offset + 0x20])
  301. offset += 0x20
  302. linkedit_offset = None
  303. symtab_offset = None
  304. codesign_offset = None
  305. for _ in range(macho_ncmds):
  306. (cmd, cmdsize) = struct.unpack("II", data[offset : offset + 8])
  307. if cmd == LC_SEGMENT_64:
  308. (
  309. cmd,
  310. cmdsize,
  311. segname_raw,
  312. vmaddr,
  313. vmsize,
  314. fileoff,
  315. filesize,
  316. maxprot,
  317. initprot,
  318. nsects,
  319. flags,
  320. ) = struct.unpack("II16sQQQQiiII", data[offset : offset + 72])
  321. segname = segname_raw.decode("utf-8").split("\0")[0]
  322. if segname == "__LINKEDIT":
  323. linkedit_offset = offset
  324. elif cmd == LC_SYMTAB:
  325. symtab_offset = offset
  326. elif cmd == LC_CODE_SIGNATURE:
  327. codesign_offset = offset
  328. offset += cmdsize
  329. pass
  330. assert linkedit_offset is not None and symtab_offset is not None
  331. # If there is a codesign section, clean it up.
  332. if codesign_offset is not None:
  333. (
  334. codesign_cmd,
  335. codesign_cmdsize,
  336. codesign_dataoff,
  337. codesign_datasize,
  338. ) = struct.unpack("IIII", data[codesign_offset : codesign_offset + 16])
  339. file.seek(codesign_offset)
  340. file.write(b"\0" * codesign_cmdsize)
  341. macho_ncmds -= 1
  342. macho_sizeofcmds -= codesign_cmdsize
  343. file.seek(0)
  344. file.write(
  345. struct.pack(
  346. "IiiIIIII",
  347. macho_magic,
  348. macho_cputype,
  349. macho_cpusubtype,
  350. macho_filetype,
  351. macho_ncmds,
  352. macho_sizeofcmds,
  353. macho_flags,
  354. macho_reserved,
  355. )
  356. )
  357. file.seek(codesign_dataoff)
  358. file.write(b"\0" * codesign_datasize)
  359. (
  360. symtab_cmd,
  361. symtab_cmdsize,
  362. symtab_symoff,
  363. symtab_nsyms,
  364. symtab_stroff,
  365. symtab_strsize,
  366. ) = struct.unpack("IIIIII", data[symtab_offset : symtab_offset + 24])
  367. symtab_strsize = new_size - symtab_stroff
  368. new_symtab = struct.pack(
  369. "IIIIII",
  370. symtab_cmd,
  371. symtab_cmdsize,
  372. symtab_symoff,
  373. symtab_nsyms,
  374. symtab_stroff,
  375. symtab_strsize,
  376. )
  377. file.seek(symtab_offset)
  378. file.write(new_symtab)
  379. (
  380. linkedit_cmd,
  381. linkedit_cmdsize,
  382. linkedit_segname_raw,
  383. linkedit_vmaddr,
  384. linkedit_vmsize,
  385. linkedit_fileoff,
  386. linkedit_filesize,
  387. linkedit_maxprot,
  388. linkedit_initprot,
  389. linkedit_nsects,
  390. linkedit_flags,
  391. ) = struct.unpack("II16sQQQQiiII", data[linkedit_offset : linkedit_offset + 72])
  392. linkedit_filesize = new_size - linkedit_fileoff
  393. linkedit_vmsize = linkedit_filesize
  394. new_linkedit = struct.pack(
  395. "II16sQQQQiiII",
  396. linkedit_cmd,
  397. linkedit_cmdsize,
  398. linkedit_segname_raw,
  399. linkedit_vmaddr,
  400. linkedit_vmsize,
  401. linkedit_fileoff,
  402. linkedit_filesize,
  403. linkedit_maxprot,
  404. linkedit_initprot,
  405. linkedit_nsects,
  406. linkedit_flags,
  407. )
  408. file.seek(linkedit_offset)
  409. file.write(new_linkedit)
  410. def write_bundle_data(
  411. output,
  412. old_bundle_base_offset: int,
  413. new_bundle_base_offset: int,
  414. bundle: BundleManifest,
  415. ) -> int:
  416. # Write bundle data
  417. bundle_header_offset = bundle.write(output)
  418. total_size = output.tell()
  419. # Patch the header position
  420. offset = file_data.find(hashlib.sha256(b".net core bundle\n").digest())
  421. output.seek(offset - 8)
  422. output.write(struct.pack("q", bundle_header_offset))
  423. return total_size - new_bundle_base_offset
  424. input_directory: Path = Path(args.input_directory)
  425. content_directory: Path = Path(os.path.join(args.input_directory, "Contents"))
  426. executable_path: Path = Path(os.path.join(content_directory, args.executable_sub_path))
  427. def get_path_related_to_other_path(a: Path, b: Path) -> str:
  428. temp = b
  429. parts = []
  430. while temp != a:
  431. temp = temp.parent
  432. parts.append(temp.name)
  433. parts.remove(parts[-1])
  434. parts.reverse()
  435. return "/".join(parts)
  436. def get_path_related_to_target_exec(input_directory: Path, path: Path):
  437. return "@executable_path/../" + get_path_related_to_other_path(
  438. input_directory, path
  439. )
  440. search_path = [
  441. Path(os.path.join(content_directory, "Frameworks")),
  442. Path(os.path.join(content_directory, "Resources/lib")),
  443. ]
  444. for path in content_directory.rglob("**/*.dylib"):
  445. current_search_path = [path.parent]
  446. current_search_path.extend(search_path)
  447. fixup_dylib(
  448. path,
  449. get_path_related_to_target_exec(content_directory, path),
  450. current_search_path,
  451. content_directory,
  452. )
  453. for path in content_directory.rglob("**/*.so"):
  454. current_search_path = [path.parent]
  455. current_search_path.extend(search_path)
  456. fixup_dylib(
  457. path,
  458. get_path_related_to_target_exec(content_directory, path),
  459. current_search_path,
  460. content_directory,
  461. )
  462. with open(executable_path, "rb") as input:
  463. file_data = input.read()
  464. (bundle_base_offset, bundle_header_offset, bundle) = get_dotnet_bundle_data(file_data)
  465. add_dylib_rpath(executable_path, "@executable_path/../Frameworks/")
  466. # Recent "vanilla" version of LLVM (LLVM 13 and upper) seems to really dislike how .NET package its assemblies.
  467. # As a result, after execution of install_name_tool it will have "fixed" the symtab resulting in a missing .NET bundle...
  468. # To mitigate that, we check if the bundle offset inside the binary is valid after install_name_tool and readd .NET bundle if not.
  469. output_file_size = os.stat(executable_path).st_size
  470. if output_file_size < bundle_header_offset:
  471. print("LLVM broke the .NET bundle, readding bundle data...")
  472. with open(executable_path, "r+b") as output:
  473. file_data = output.read()
  474. bundle_data_size = write_bundle_data(
  475. output, bundle_base_offset, output_file_size, bundle
  476. )
  477. # Now patch the __LINKEDIT section
  478. new_size = output_file_size + bundle_data_size
  479. fixup_linkedit(output, file_data, new_size)