Changes in directory llvm/include/llvm/CodeGen:
MachOWriter.h added (r1.1) --- Log message: Initial checkin of the Mach-O emitter. There's plenty of fixmes, but it does emit linkable .o files in very simple cases. --- Diffs of the changes: (+600 -0) MachOWriter.h | 600 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 600 insertions(+) Index: llvm/include/llvm/CodeGen/MachOWriter.h diff -c /dev/null llvm/include/llvm/CodeGen/MachOWriter.h:1.1 *** /dev/null Wed Aug 23 16:09:02 2006 --- llvm/include/llvm/CodeGen/MachOWriter.h Wed Aug 23 16:08:52 2006 *************** *** 0 **** --- 1,600 ---- + //=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// + // + // The LLVM Compiler Infrastructure + // + // This file was developed by Chris Lattner and is distributed under the + // University of Illinois Open Source License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // + // This file defines the MachOWriter class. + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_CODEGEN_MACHOWRITER_H + #define LLVM_CODEGEN_MACHOWRITER_H + + #include "llvm/CodeGen/MachineRelocation.h" + #include "llvm/CodeGen/MachineFunctionPass.h" + #include <list> + + namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class MachOCodeEmitter; + + /// MachOWriter - This class implements the common target-independent code for + /// writing Mach-O files. Targets should derive a class from this to + /// parameterize the output format. + /// + class MachOWriter : public MachineFunctionPass { + friend class MachOCodeEmitter; + public: + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + ~MachOWriter(); + + typedef std::vector<unsigned char> DataBuffer; + + protected: + MachOWriter(std::ostream &O, TargetMachine &TM); + + /// Output stream to send the resultant object file to. + /// + std::ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + MachOCodeEmitter *MCE; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the Mach-O file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + /// doFinalization - Now that the module has been completely processed, emit + /// the Mach-O file to 'O'. + bool doFinalization(Module &M); + + /// MachOHeader - This struct contains the header information about a + /// specific architecture type/subtype pair that is emitted to the file. + struct MachOHeader { + uint32_t magic; // mach magic number identifier + uint32_t cputype; // cpu specifier + uint32_t cpusubtype; // machine specifier + uint32_t filetype; // type of file + uint32_t ncmds; // number of load commands + uint32_t sizeofcmds; // the size of all the load commands + uint32_t flags; // flags + uint32_t reserved; // 64-bit only + + /// HeaderData - The actual data for the header which we are building + /// up for emission to the file. + DataBuffer HeaderData; + + // Constants for the cputype field + // see <mach/machine.h> + enum { CPU_TYPE_I386 = 7, + CPU_TYPE_X86_64 = 7 | 0x1000000, + CPU_TYPE_ARM = 12, + CPU_TYPE_SPARC = 14, + CPU_TYPE_POWERPC = 18, + CPU_TYPE_POWERPC64 = 18 | 0x1000000 + }; + + // Constants for the cpusubtype field + // see <mach/machine.h> + enum { CPU_SUBTYPE_I386_ALL = 3, + CPU_SUBTYPE_X86_64_ALL = 3, + CPU_SUBTYPE_ARM_ALL = 0, + CPU_SUBTYPE_SPARC_ALL = 0, + CPU_SUBTYPE_POWERPC_ALL = 0 + }; + + // Constants for the filetype field + // see <mach-o/loader.h> for additional info on the various types + enum { MH_OBJECT = 1, // relocatable object file + MH_EXECUTE = 2, // demand paged executable file + MH_FVMLIB = 3, // fixed VM shared library file + MH_CORE = 4, // core file + MH_PRELOAD = 5, // preloaded executable file + MH_DYLIB = 6, // dynamically bound shared library + MH_DYLINKER = 7, // dynamic link editor + MH_BUNDLE = 8, // dynamically bound bundle file + MH_DYLIB_STUB = 9, // shared library stub for static linking only + MH_DSYM = 10 // companion file wiht only debug sections + }; + + // Constants for the flags field + enum { MH_NOUNDEFS = 1 << 0, + // the object file has no undefined references + MH_INCRLINK = 1 << 1, + // the object file is the output of an incremental link against + // a base file and cannot be link edited again + MH_DYLDLINK = 1 << 2, + // the object file is input for the dynamic linker and cannot be + // statically link edited again. + MH_BINDATLOAD = 1 << 3, + // the object file's undefined references are bound by the + // dynamic linker when loaded. + MH_PREBOUND = 1 << 4, + // the file has its dynamic undefined references prebound + MH_SPLIT_SEGS = 1 << 5, + // the file has its read-only and read-write segments split + // see <mach/shared_memory_server.h> + MH_LAZY_INIT = 1 << 6, + // the shared library init routine is to be run lazily via + // catching memory faults to its writable segments (obsolete) + MH_TWOLEVEL = 1 << 7, + // the image is using two-level namespace bindings + MH_FORCE_FLAT = 1 << 8, + // the executable is forcing all images to use flat namespace + // bindings. + MH_NOMULTIDEFS = 1 << 8, + // this umbrella guarantees no multiple definitions of symbols + // in its sub-images so the two-level namespace hints can + // always be used. + MH_NOFIXPREBINDING = 1 << 10, + // do not have dyld notify the prebidning agent about this + // executable. + MH_PREBINDABLE = 1 << 11, + // the binary is not prebound but can have its prebinding + // redone. only used when MH_PREBOUND is not set. + MH_ALLMODSBOUND = 1 << 12, + // indicates that this binary binds to all two-level namespace + // modules of its dependent libraries. Only used when + // MH_PREBINDABLE and MH_TWOLEVEL are both set. + MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, + // safe to divide up the sections into sub-sections via symbols + // for dead code stripping. + MH_CANONICAL = 1 << 14, + // the binary has been canonicalized via the unprebind operation + MH_WEAK_DEFINES = 1 << 15, + // the final linked image contains external weak symbols + MH_BINDS_TO_WEAK = 1 << 16, + // the final linked image uses weak symbols + MH_ALLOW_STACK_EXECUTION = 1 << 17 + // When this bit is set, all stacks in the task will be given + // stack execution privilege. Only used in MH_EXECUTE filetype + }; + + MachOHeader() : magic(0), cputype(0), cpusubtype(0), filetype(0), + ncmds(0), sizeofcmds(0), flags(0), reserved(0) { } + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 8 * sizeof(uint32_t); + else + return 7 * sizeof(uint32_t); + } + + /// setMagic - This routine sets the appropriate value for the 'magic' + /// field based on pointer size and endianness. + void setMagic(bool isLittleEndian, bool is64Bit) { + if (isLittleEndian) + if (is64Bit) magic = 0xcffaedfe; + else magic = 0xcefaedfe; + else + if (is64Bit) magic = 0xfeedfacf; + else magic = 0xfeedface; + } + }; + + /// Header - An instance of MachOHeader that we will update while we build + /// the file, and then emit during finalization. + MachOHeader Header; + + private: + + /// MachOSegment - This struct contains the necessary information to + /// emit the load commands for each section in the file. + struct MachOSegment { + uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 + uint32_t cmdsize; // Total size of this struct and section commands + std::string segname; // segment name + uint64_t vmaddr; // address of this segment + uint64_t vmsize; // size of this segment, may be larger than filesize + uint64_t fileoff; // offset in file + uint64_t filesize; // amount to read from file + uint32_t maxprot; // maximum VM protection + uint32_t initprot; // initial VM protection + uint32_t nsects; // number of sections in this segment + uint32_t flags; // flags + + // Constants for the vm protection fields + // see <mach-o/vm_prot.h> + enum { VM_PROT_NONE = 0x00, + VM_PROT_READ = 0x01, // read permission + VM_PROT_WRITE = 0x02, // write permission + VM_PROT_EXECUTE = 0x04, // execute permission, + VM_PROT_ALL = 0x07 + }; + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SEGMENT = 0x01, // segment of this file to be mapped + LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; + else + return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits + } + + MachOSegment(const std::string &seg, bool is64Bit) + : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), + vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), + initprot(VM_PROT_ALL), nsects(0), flags(0) { } + }; + + /// MachOSection - This struct contains information about each section in a + /// particular segment that is emitted to the file. This is eventually + /// turned into the SectionCommand in the load command for a particlar + /// segment. + struct MachOSection { + std::string sectname; // name of this section, + std::string segname; // segment this section goes in + uint64_t addr; // memory address of this section + uint64_t size; // size in bytes of this section + uint32_t offset; // file offset of this section + uint32_t align; // section alignment (power of 2) + uint32_t reloff; // file offset of relocation entries + uint32_t nreloc; // number of relocation entries + uint32_t flags; // flags (section type and attributes) + uint32_t reserved1; // reserved (for offset or index) + uint32_t reserved2; // reserved (for count or sizeof) + uint32_t reserved3; // reserved (64 bit only) + + /// A unique number for this section, which will be used to match symbols + /// to the correct section. + uint32_t Index; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + // Constants for the section types (low 8 bits of flags field) + // see <mach-o/loader.h> + enum { S_REGULAR = 0, + // regular section + S_ZEROFILL = 1, + // zero fill on demand section + S_CSTRING_LITERALS = 2, + // section with only literal C strings + S_4BYTE_LITERALS = 3, + // section with only 4 byte literals + S_8BYTE_LITERALS = 4, + // section with only 8 byte literals + S_LITERAL_POINTERS = 5, + // section with only pointers to literals + S_NON_LAZY_SYMBOL_POINTERS = 6, + // section with only non-lazy symbol pointers + S_LAZY_SYMBOL_POINTERS = 7, + // section with only lazy symbol pointers + S_SYMBOL_STUBS = 8, + // section with only symbol stubs + // byte size of stub in the reserved2 field + S_MOD_INIT_FUNC_POINTERS = 9, + // section with only function pointers for initialization + S_MOD_TERM_FUNC_POINTERS = 10, + // section with only function pointers for termination + S_COALESCED = 11, + // section contains symbols that are coalesced + S_GB_ZEROFILL = 12, + // zero fill on demand section (that can be larger than 4GB) + S_INTERPOSING = 13, + // section with only pairs of function pointers for interposing + S_16BYTE_LITERALS = 14 + // section with only 16 byte literals + }; + + // Constants for the section flags (high 24 bits of flags field) + // see <mach-o/loader.h> + enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, + // section contains only true machine instructions + S_ATTR_NO_TOC = 1 << 30, + // section contains coalesced symbols that are not to be in a + // ranlib table of contents + S_ATTR_STRIP_STATIC_SYMS = 1 << 29, + // ok to strip static symbols in this section in files with the + // MY_DYLDLINK flag + S_ATTR_NO_DEAD_STRIP = 1 << 28, + // no dead stripping + S_ATTR_LIVE_SUPPORT = 1 << 27, + // blocks are live if they reference live blocks + S_ATTR_SELF_MODIFYING_CODE = 1 << 26, + // used with i386 code stubs written on by dyld + S_ATTR_DEBUG = 1 << 25, + // a debug section + S_ATTR_SOME_INSTRUCTIONS = 1 << 10, + // section contains some machine instructions + S_ATTR_EXT_RELOC = 1 << 9, + // section has external relocation entries + S_ATTR_LOC_RELOC = 1 << 8 + // section has local relocation entries + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; + else + return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits + } + + MachOSection(const std::string &seg, const std::string §) + : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(0), + reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + reserved3(0) { } + }; + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the segment load command + /// SectionCommands are constructed from this info. + std::list<MachOSection> SectionList; + + /// SectionLookup - This is a mapping from section name to SectionList entry + std::map<std::string, MachOSection*> SectionLookup; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + MachOSection &getSection(const std::string &seg, const std::string §, + unsigned Flags = 0) { + MachOSection *&SN = SectionLookup[seg+sect]; + if (SN) return *SN; + + SectionList.push_back(MachOSection(seg, sect)); + SN = &SectionList.back(); + SN->Index = SectionList.size(); + SN->flags = MachOSection::S_REGULAR | Flags; + return *SN; + } + MachOSection &getTextSection() { + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + } + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachOSymTab { + uint32_t cmd; // LC_SYMTAB + uint32_t cmdsize; // sizeof( MachOSymTab ) + uint32_t symoff; // symbol table offset + uint32_t nsyms; // number of symbol table entries + uint32_t stroff; // string table offset + uint32_t strsize; // string table size in bytes + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info + }; + + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), + nsyms(0), stroff(0), strsize(0) { } + }; + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof( MachODySymTab ) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } + }; + + /// SymTab - The "stab" style symbol table information + MachOSymTab SymTab; + /// DySymTab - symbol table info for the dynamic link editor + MachODySymTab DySymTab; + + /// MachOSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct MachOSym { + const GlobalValue *GV; // The global value this corresponds to. + uint32_t n_strx; // index into the string table + uint8_t n_type; // type flag + uint8_t n_sect; // section number or NO_SECT + int16_t n_desc; // see <mach-o/stab.h> + uint64_t n_value; // value for this symbol (or stab offset) + + // Constants for the n_sect field + // see <mach-o/nlist.h> + enum { NO_SECT = 0 }; // symbol is not in any section + + // Constants for the n_type field + // see <mach-o/nlist.h> + enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT + N_ABS = 0x2, // absolute, n_sect == NO_SECT + N_SECT = 0xe, // defined in section number n_sect + N_PBUD = 0xc, // prebound undefined (defined in a dylib) + N_INDR = 0xa // indirect + }; + // The following bits are OR'd into the types above. For example, a type + // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). + enum { N_EXT = 0x01, // external symbol bit + N_PEXT = 0x10 // private external symbol bit + }; + + // Constants for the n_desc field + // see <mach-o/loader.h> + enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 + }; + enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped + N_WEAK_REF = 0x0040, // symbol is weak referenced + N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition + }; + + /// entrySize - This routine returns the size of a symbol table entry as + /// written to disk. + static unsigned entrySize() { return 12; } + + MachOSym(const GlobalValue *gv, uint8_t sect) : GV(gv), n_strx(0), + n_type(N_UNDF), n_sect(sect), n_desc(0), n_value(0) {} + }; + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector<MachOSym> SymbolTable; + + /// DynamicSymbolTable - This is just a vector of indices into + /// SymbolTable to aid in emitting the DYSYMTAB load command. + std::vector<unsigned> DynamicSymbolTable; + + /// StringTable - The table of strings referenced by SymbolTable entries + std::vector<std::string> StringTable; + + // align - Emit padding into the file until the current output position is + // aligned to the specified power of two boundary. + static void align(DataBuffer &Output, unsigned Boundary) { + assert(Boundary && (Boundary & (Boundary-1)) == 0 && + "Must align to 2^k boundary"); + size_t Size = Output.size(); + if (Size & (Boundary-1)) { + // Add padding to get alignment to the correct place. + size_t Pad = Boundary-(Size & (Boundary-1)); + Output.resize(Size+Pad); + } + } + + void outbyte(DataBuffer &Output, unsigned char X) { + Output.push_back(X); + } + void outhalf(DataBuffer &Output, unsigned short X) { + if (isLittleEndian) { + Output.push_back(X&255); + Output.push_back(X >> 8); + } else { + Output.push_back(X >> 8); + Output.push_back(X&255); + } + } + void outword(DataBuffer &Output, unsigned X) { + if (isLittleEndian) { + Output.push_back((X >> 0) & 255); + Output.push_back((X >> 8) & 255); + Output.push_back((X >> 16) & 255); + Output.push_back((X >> 24) & 255); + } else { + Output.push_back((X >> 24) & 255); + Output.push_back((X >> 16) & 255); + Output.push_back((X >> 8) & 255); + Output.push_back((X >> 0) & 255); + } + } + void outxword(DataBuffer &Output, uint64_t X) { + if (isLittleEndian) { + Output.push_back(unsigned(X >> 0) & 255); + Output.push_back(unsigned(X >> 8) & 255); + Output.push_back(unsigned(X >> 16) & 255); + Output.push_back(unsigned(X >> 24) & 255); + Output.push_back(unsigned(X >> 32) & 255); + Output.push_back(unsigned(X >> 40) & 255); + Output.push_back(unsigned(X >> 48) & 255); + Output.push_back(unsigned(X >> 56) & 255); + } else { + Output.push_back(unsigned(X >> 56) & 255); + Output.push_back(unsigned(X >> 48) & 255); + Output.push_back(unsigned(X >> 40) & 255); + Output.push_back(unsigned(X >> 32) & 255); + Output.push_back(unsigned(X >> 24) & 255); + Output.push_back(unsigned(X >> 16) & 255); + Output.push_back(unsigned(X >> 8) & 255); + Output.push_back(unsigned(X >> 0) & 255); + } + } + void outaddr32(DataBuffer &Output, unsigned X) { + outword(Output, X); + } + void outaddr64(DataBuffer &Output, uint64_t X) { + outxword(Output, X); + } + void outaddr(DataBuffer &Output, uint64_t X) { + if (!is64Bit) + outword(Output, (unsigned)X); + else + outxword(Output, X); + } + void outstring(DataBuffer &Output, std::string &S, unsigned Length) { + char *buffer = (char *)calloc(1, Length); + unsigned i; + // FIXME: it is unclear if mach-o requires null terminated strings, or + // if a string of 16 bytes with no null terminator is ok. If so, + // we should switch to strncpy. + strlcpy(buffer, S.c_str(), Length); + + for (i = 0; i < Length; ++i) + outbyte(Output, buffer[i]); + + free(buffer); + } + private: + void EmitGlobal(GlobalVariable *GV); + void EmitHeaderAndLoadCommands(); + void EmitSections(); + void EmitRelocations(); + void EmitSymbolTable(); + void EmitStringTable(); + }; + } + + #endif _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits