123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610 |
- import argparse
- import hashlib
- import os
- from pathlib import Path
- import platform
- import shutil
- import struct
- import subprocess
- from typing import List, Optional, Tuple
- parser = argparse.ArgumentParser(description="Fixup for MacOS application bundle")
- parser.add_argument("input_directory", help="Input directory (Application path)")
- parser.add_argument("executable_sub_path", help="Main executable sub path")
- # Use Apple LLVM on Darwin, otherwise standard LLVM.
- if platform.system() == "Darwin":
- OTOOL = "otool"
- INSTALL_NAME_TOOL = "install_name_tool"
- else:
- OTOOL = shutil.which("llvm-otool")
- if OTOOL is None:
- for llvm_ver in [15, 14, 13]:
- otool_path = shutil.which(f"llvm-otool-{llvm_ver}")
- if otool_path is not None:
- OTOOL = otool_path
- INSTALL_NAME_TOOL = shutil.which(f"llvm-install-name-tool-{llvm_ver}")
- break
- else:
- INSTALL_NAME_TOOL = shutil.which("llvm-install-name-tool")
- args = parser.parse_args()
- def get_dylib_id(dylib_path: Path) -> str:
- res = subprocess.check_output([OTOOL, "-D", str(dylib_path.absolute())]).decode(
- "utf-8"
- )
- return res.split("\n")[1]
- def get_dylib_dependencies(dylib_path: Path) -> List[str]:
- output = (
- subprocess.check_output([OTOOL, "-L", str(dylib_path.absolute())])
- .decode("utf-8")
- .split("\n")[1:]
- )
- res = []
- for line in output:
- line = line.strip()
- index = line.find(" (compatibility version ")
- if index == -1:
- continue
- line = line[:index]
- res.append(line)
- return res
- def replace_dylib_id(dylib_path: Path, new_id: str):
- subprocess.check_call(
- [INSTALL_NAME_TOOL, "-id", new_id, str(dylib_path.absolute())]
- )
- def change_dylib_link(dylib_path: Path, old: str, new: str):
- subprocess.check_call(
- [INSTALL_NAME_TOOL, "-change", old, new, str(dylib_path.absolute())]
- )
- def add_dylib_rpath(dylib_path: Path, rpath: str):
- subprocess.check_call(
- [INSTALL_NAME_TOOL, "-add_rpath", rpath, str(dylib_path.absolute())]
- )
- def fixup_dylib(
- dylib_path: Path,
- replacement_path: str,
- search_path: List[str],
- content_directory: Path,
- ):
- dylib_id = get_dylib_id(dylib_path)
- new_dylib_id = replacement_path + "/" + os.path.basename(dylib_id)
- replace_dylib_id(dylib_path, new_dylib_id)
- dylib_dependencies = get_dylib_dependencies(dylib_path)
- dylib_new_mapping = {}
- for dylib_dependency in dylib_dependencies:
- if (
- not dylib_dependency.startswith("@executable_path")
- and not dylib_dependency.startswith("/usr/lib")
- and not dylib_dependency.startswith("/System/Library")
- ):
- dylib_dependency_name = os.path.basename(dylib_dependency)
- library_found = False
- for library_base_path in search_path:
- lib_path = Path(os.path.join(library_base_path, dylib_dependency_name))
- if lib_path.exists():
- target_replacement_path = get_path_related_to_target_exec(
- content_directory, lib_path
- )
- dylib_new_mapping[dylib_dependency] = (
- target_replacement_path
- + "/"
- + os.path.basename(dylib_dependency)
- )
- library_found = True
- if not library_found:
- raise Exception(
- f"{dylib_id}: Cannot find dependency {dylib_dependency_name} for fixup"
- )
- for key in dylib_new_mapping:
- change_dylib_link(dylib_path, key, dylib_new_mapping[key])
- FILE_TYPE_ASSEMBLY = 1
- ALIGN_REQUIREMENTS = 4096
- def parse_embedded_string(data: bytes) -> Tuple[bytes, str]:
- first_byte = data[0]
- if (first_byte & 0x80) == 0:
- size = first_byte
- data = data[1:]
- else:
- second_byte = data[1]
- assert (second_byte & 0x80) == 0
- size = (second_byte << 7) | (first_byte & 0x7F)
- data = data[2:]
- res = data[:size].decode("utf-8")
- data = data[size:]
- return (data, res)
- def write_embedded_string(file, string: str):
- raw_str = string.encode("utf-8")
- raw_str_len = len(raw_str)
- assert raw_str_len < 0x7FFF
- if raw_str_len > 0x7F:
- file.write(struct.pack("b", raw_str_len & 0x7F | 0x80))
- file.write(struct.pack("b", raw_str_len >> 7))
- else:
- file.write(struct.pack("b", raw_str_len))
- file.write(raw_str)
- class BundleFileEntry(object):
- offset: int
- size: int
- compressed_size: int
- file_type: int
- relative_path: str
- data: bytes
- def __init__(
- self,
- offset: int,
- size: int,
- compressed_size: int,
- file_type: int,
- relative_path: str,
- data: bytes,
- ) -> None:
- self.offset = offset
- self.size = size
- self.compressed_size = compressed_size
- self.file_type = file_type
- self.relative_path = relative_path
- self.data = data
- def write(self, file):
- self.offset = file.tell()
- if (
- self.file_type == FILE_TYPE_ASSEMBLY
- and (self.offset % ALIGN_REQUIREMENTS) != 0
- ):
- padding_size = ALIGN_REQUIREMENTS - (self.offset % ALIGN_REQUIREMENTS)
- file.write(b"\0" * padding_size)
- self.offset += padding_size
- file.write(self.data)
- def write_header(self, file):
- file.write(
- struct.pack(
- "QQQb", self.offset, self.size, self.compressed_size, self.file_type
- )
- )
- write_embedded_string(file, self.relative_path)
- class BundleManifest(object):
- major: int
- minor: int
- bundle_id: str
- deps_json: BundleFileEntry
- runtimeconfig_json: BundleFileEntry
- flags: int
- files: List[BundleFileEntry]
- def __init__(
- self,
- major: int,
- minor: int,
- bundle_id: str,
- deps_json: BundleFileEntry,
- runtimeconfig_json: BundleFileEntry,
- flags: int,
- files: List[BundleFileEntry],
- ) -> None:
- self.major = major
- self.minor = minor
- self.bundle_id = bundle_id
- self.deps_json = deps_json
- self.runtimeconfig_json = runtimeconfig_json
- self.flags = flags
- self.files = files
- def write(self, file) -> int:
- for bundle_file in self.files:
- bundle_file.write(file)
- bundle_header_offset = file.tell()
- file.write(struct.pack("iiI", self.major, self.minor, len(self.files)))
- write_embedded_string(file, self.bundle_id)
- if self.deps_json is not None:
- deps_json_location_offset = self.deps_json.offset
- deps_json_location_size = self.deps_json.size
- else:
- deps_json_location_offset = 0
- deps_json_location_size = 0
- if self.runtimeconfig_json is not None:
- runtimeconfig_json_location_offset = self.runtimeconfig_json.offset
- runtimeconfig_json_location_size = self.runtimeconfig_json.size
- else:
- runtimeconfig_json_location_offset = 0
- runtimeconfig_json_location_size = 0
- file.write(
- struct.pack("qq", deps_json_location_offset, deps_json_location_size)
- )
- file.write(
- struct.pack(
- "qq",
- runtimeconfig_json_location_offset,
- runtimeconfig_json_location_size,
- )
- )
- file.write(struct.pack("q", self.flags))
- for bundle_file in self.files:
- bundle_file.write_header(file)
- return bundle_header_offset
- def read_file_entry(
- raw_data: bytes, header_bytes: bytes
- ) -> Tuple[bytes, BundleFileEntry]:
- (
- offset,
- size,
- compressed_size,
- file_type,
- ) = struct.unpack("QQQb", header_bytes[:0x19])
- (header_bytes, relative_path) = parse_embedded_string(header_bytes[0x19:])
- target_size = compressed_size
- if target_size == 0:
- target_size = size
- return (
- header_bytes,
- BundleFileEntry(
- offset,
- size,
- compressed_size,
- file_type,
- relative_path,
- raw_data[offset : offset + target_size],
- ),
- )
- def get_dotnet_bundle_data(data: bytes) -> Optional[Tuple[int, int, BundleManifest]]:
- offset = data.find(hashlib.sha256(b".net core bundle\n").digest())
- if offset == -1:
- return None
- raw_header_offset = data[offset - 8 : offset]
- (header_offset,) = struct.unpack("q", raw_header_offset)
- header_bytes = data[header_offset:]
- (
- major,
- minor,
- files_count,
- ) = struct.unpack("iiI", header_bytes[:0xC])
- header_bytes = header_bytes[0xC:]
- (header_bytes, bundle_id) = parse_embedded_string(header_bytes)
- # v2 header
- (
- deps_json_location_offset,
- deps_json_location_size,
- ) = struct.unpack("qq", header_bytes[:0x10])
- (
- runtimeconfig_json_location_offset,
- runtimeconfig_json_location_size,
- ) = struct.unpack("qq", header_bytes[0x10:0x20])
- (flags,) = struct.unpack("q", header_bytes[0x20:0x28])
- header_bytes = header_bytes[0x28:]
- files = []
- deps_json = None
- runtimeconfig_json = None
- for _ in range(files_count):
- (header_bytes, file_entry) = read_file_entry(data, header_bytes)
- files.append(file_entry)
- if file_entry.offset == deps_json_location_offset:
- deps_json = file_entry
- elif file_entry.offset == runtimeconfig_json_location_offset:
- runtimeconfig_json = file_entry
- file_entry = files[0]
- return (
- file_entry.offset,
- header_offset,
- BundleManifest(
- major, minor, bundle_id, deps_json, runtimeconfig_json, flags, files
- ),
- )
- LC_SYMTAB = 0x2
- LC_SEGMENT_64 = 0x19
- LC_CODE_SIGNATURE = 0x1D
- def fixup_linkedit(file, data: bytes, new_size: int):
- offset = 0
- (
- macho_magic,
- macho_cputype,
- macho_cpusubtype,
- macho_filetype,
- macho_ncmds,
- macho_sizeofcmds,
- macho_flags,
- macho_reserved,
- ) = struct.unpack("IiiIIIII", data[offset : offset + 0x20])
- offset += 0x20
- linkedit_offset = None
- symtab_offset = None
- codesign_offset = None
- for _ in range(macho_ncmds):
- (cmd, cmdsize) = struct.unpack("II", data[offset : offset + 8])
- if cmd == LC_SEGMENT_64:
- (
- cmd,
- cmdsize,
- segname_raw,
- vmaddr,
- vmsize,
- fileoff,
- filesize,
- maxprot,
- initprot,
- nsects,
- flags,
- ) = struct.unpack("II16sQQQQiiII", data[offset : offset + 72])
- segname = segname_raw.decode("utf-8").split("\0")[0]
- if segname == "__LINKEDIT":
- linkedit_offset = offset
- elif cmd == LC_SYMTAB:
- symtab_offset = offset
- elif cmd == LC_CODE_SIGNATURE:
- codesign_offset = offset
- offset += cmdsize
- pass
- assert linkedit_offset is not None and symtab_offset is not None
- # If there is a codesign section, clean it up.
- if codesign_offset is not None:
- (
- codesign_cmd,
- codesign_cmdsize,
- codesign_dataoff,
- codesign_datasize,
- ) = struct.unpack("IIII", data[codesign_offset : codesign_offset + 16])
- file.seek(codesign_offset)
- file.write(b"\0" * codesign_cmdsize)
- macho_ncmds -= 1
- macho_sizeofcmds -= codesign_cmdsize
- file.seek(0)
- file.write(
- struct.pack(
- "IiiIIIII",
- macho_magic,
- macho_cputype,
- macho_cpusubtype,
- macho_filetype,
- macho_ncmds,
- macho_sizeofcmds,
- macho_flags,
- macho_reserved,
- )
- )
- file.seek(codesign_dataoff)
- file.write(b"\0" * codesign_datasize)
- (
- symtab_cmd,
- symtab_cmdsize,
- symtab_symoff,
- symtab_nsyms,
- symtab_stroff,
- symtab_strsize,
- ) = struct.unpack("IIIIII", data[symtab_offset : symtab_offset + 24])
- symtab_strsize = new_size - symtab_stroff
- new_symtab = struct.pack(
- "IIIIII",
- symtab_cmd,
- symtab_cmdsize,
- symtab_symoff,
- symtab_nsyms,
- symtab_stroff,
- symtab_strsize,
- )
- file.seek(symtab_offset)
- file.write(new_symtab)
- (
- linkedit_cmd,
- linkedit_cmdsize,
- linkedit_segname_raw,
- linkedit_vmaddr,
- linkedit_vmsize,
- linkedit_fileoff,
- linkedit_filesize,
- linkedit_maxprot,
- linkedit_initprot,
- linkedit_nsects,
- linkedit_flags,
- ) = struct.unpack("II16sQQQQiiII", data[linkedit_offset : linkedit_offset + 72])
- linkedit_filesize = new_size - linkedit_fileoff
- linkedit_vmsize = linkedit_filesize
- new_linkedit = struct.pack(
- "II16sQQQQiiII",
- linkedit_cmd,
- linkedit_cmdsize,
- linkedit_segname_raw,
- linkedit_vmaddr,
- linkedit_vmsize,
- linkedit_fileoff,
- linkedit_filesize,
- linkedit_maxprot,
- linkedit_initprot,
- linkedit_nsects,
- linkedit_flags,
- )
- file.seek(linkedit_offset)
- file.write(new_linkedit)
- def write_bundle_data(
- output,
- old_bundle_base_offset: int,
- new_bundle_base_offset: int,
- bundle: BundleManifest,
- ) -> int:
- # Write bundle data
- bundle_header_offset = bundle.write(output)
- total_size = output.tell()
- # Patch the header position
- offset = file_data.find(hashlib.sha256(b".net core bundle\n").digest())
- output.seek(offset - 8)
- output.write(struct.pack("q", bundle_header_offset))
- return total_size - new_bundle_base_offset
- input_directory: Path = Path(args.input_directory)
- content_directory: Path = Path(os.path.join(args.input_directory, "Contents"))
- executable_path: Path = Path(os.path.join(content_directory, args.executable_sub_path))
- def get_path_related_to_other_path(a: Path, b: Path) -> str:
- temp = b
- parts = []
- while temp != a:
- temp = temp.parent
- parts.append(temp.name)
- parts.remove(parts[-1])
- parts.reverse()
- return "/".join(parts)
- def get_path_related_to_target_exec(input_directory: Path, path: Path):
- return "@executable_path/../" + get_path_related_to_other_path(
- input_directory, path
- )
- search_path = [
- Path(os.path.join(content_directory, "Frameworks")),
- Path(os.path.join(content_directory, "Resources/lib")),
- ]
- for path in content_directory.rglob("**/*.dylib"):
- current_search_path = [path.parent]
- current_search_path.extend(search_path)
- fixup_dylib(
- path,
- get_path_related_to_target_exec(content_directory, path),
- current_search_path,
- content_directory,
- )
- for path in content_directory.rglob("**/*.so"):
- current_search_path = [path.parent]
- current_search_path.extend(search_path)
- fixup_dylib(
- path,
- get_path_related_to_target_exec(content_directory, path),
- current_search_path,
- content_directory,
- )
- with open(executable_path, "rb") as input:
- file_data = input.read()
- (bundle_base_offset, bundle_header_offset, bundle) = get_dotnet_bundle_data(file_data)
- add_dylib_rpath(executable_path, "@executable_path/../Frameworks/")
- # Recent "vanilla" version of LLVM (LLVM 13 and upper) seems to really dislike how .NET package its assemblies.
- # As a result, after execution of install_name_tool it will have "fixed" the symtab resulting in a missing .NET bundle...
- # To mitigate that, we check if the bundle offset inside the binary is valid after install_name_tool and readd .NET bundle if not.
- output_file_size = os.stat(executable_path).st_size
- if output_file_size < bundle_header_offset:
- print("LLVM broke the .NET bundle, readding bundle data...")
- with open(executable_path, "r+b") as output:
- file_data = output.read()
- bundle_data_size = write_bundle_data(
- output, bundle_base_offset, output_file_size, bundle
- )
- # Now patch the __LINKEDIT section
- new_size = output_file_size + bundle_data_size
- fixup_linkedit(output, file_data, new_size)
|