Changes in directory llvm/lib/CodeGen:
MachOWriter.cpp added (r1.1) --- Log message: Initial checkin of the Mach-O emitter. There's plenty of fixmes, but it does emit linkable .o files in very simple cases. --- Diffs of the changes: (+428 -0) MachOWriter.cpp | 428 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 428 insertions(+) Index: llvm/lib/CodeGen/MachOWriter.cpp diff -c /dev/null llvm/lib/CodeGen/MachOWriter.cpp:1.1 *** /dev/null Wed Aug 23 16:09:02 2006 --- llvm/lib/CodeGen/MachOWriter.cpp Wed Aug 23 16:08:52 2006 *************** *** 0 **** --- 1,428 ---- + //===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// + // + // The LLVM Compiler Infrastructure + // + // This file was developed by Nate Begeman and is distributed under the + // University of Illinois Open Source License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // + // This file implements the target-independent Mach-O writer. This file writes + // out the Mach-O file in the following order: + // + // #1 FatHeader (universal-only) + // #2 FatArch (universal-only, 1 per universal arch) + // Per arch: + // #3 Header + // #4 Load Commands + // #5 Sections + // #6 Relocations + // #7 Symbols + // #8 Strings + // + //===----------------------------------------------------------------------===// + + #include "llvm/Module.h" + #include "llvm/CodeGen/MachineCodeEmitter.h" + #include "llvm/CodeGen/MachineConstantPool.h" + #include "llvm/CodeGen/MachineRelocation.h" + #include "llvm/CodeGen/MachOWriter.h" + #include "llvm/Target/TargetData.h" + #include "llvm/Target/TargetJITInfo.h" + #include "llvm/Target/TargetMachine.h" + #include "llvm/Support/Mangler.h" + #include <iostream> + using namespace llvm; + + //===----------------------------------------------------------------------===// + // MachOCodeEmitter Implementation + //===----------------------------------------------------------------------===// + + namespace llvm { + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code + /// for functions to the Mach-O file. + class MachOCodeEmitter : public MachineCodeEmitter { + MachOWriter &MOW; + + /// MOS - The current section we're writing to + MachOWriter::MachOSection *MOS; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector<MachineRelocation> Relocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector<intptr_t> MBBLocations; + + public: + MachOCodeEmitter(MachOWriter &mow) : MOW(mow) {} + + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); + + void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + } + + virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(0 && "CP not implementated yet!"); + return 0; + } + virtual intptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + void startFunctionStub(unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + void *finishFunctionStub(const Function *F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; + } + + /// startFunction - This callback is invoked when a new machine function is + /// about to be emitted. + void MachOCodeEmitter::startFunction(MachineFunction &F) { + // Align the output buffer to the appropriate alignment, power of 2. + // FIXME: GENERICIZE!! + unsigned Align = 4; + + // Get the Mach-O Section that this function belongs in. + MOS = &MOW.getTextSection(); + + // FIXME: better memory management + MOS->SectionData.reserve(4096); + BufferBegin = &(MOS->SectionData[0]); + BufferEnd = BufferBegin + MOS->SectionData.capacity(); + CurBufferPtr = BufferBegin + MOS->size; + + // Upgrade the section alignment if required. + if (MOS->align < Align) MOS->align = Align; + + // Make sure we only relocate to this function's MBBs. + MBBLocations.clear(); + } + + /// finishFunction - This callback is invoked after the function is completely + /// finished. + bool MachOCodeEmitter::finishFunction(MachineFunction &F) { + MOS->size += CurBufferPtr - BufferBegin; + + // Get a symbol for the function to add to the symbol table + MachOWriter::MachOSym FnSym(F.getFunction(), MOS->Index); + + // Figure out the binding (linkage) of the symbol. + switch (F.getFunction()->getLinkage()) { + default: + // appending linkage is illegal for functions. + assert(0 && "Unknown linkage type!"); + case GlobalValue::ExternalLinkage: + FnSym.n_type = MachOWriter::MachOSym::N_SECT | MachOWriter::MachOSym::N_EXT; + break; + case GlobalValue::InternalLinkage: + FnSym.n_type = MachOWriter::MachOSym::N_SECT; + break; + } + + // Resolve the function's relocations either to concrete pointers in the case + // of branches from one block to another, or to target relocation entries. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + if (MR.isBasicBlock()) { + void *MBBAddr = (void *)getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setResultPointer(MBBAddr); + MOW.TM.getJITInfo()->relocate(BufferBegin, &MR, 1, 0); + // FIXME: we basically want the JITInfo relocate() function to rewrite + // this guy right now, so we just write the correct displacement + // to the file. + } else { + // isString | isGV | isCPI | isJTI + // FIXME: do something smart here. We won't be able to relocate these + // until the sections are all layed out, but we still need to + // record them. Maybe emit TargetRelocations and then resolve + // those at file writing time? + std::cerr << "whee!\n"; + } + } + Relocations.clear(); + + // Finally, add it to the symtab. + MOW.SymbolTable.push_back(FnSym); + return false; + } + + //===----------------------------------------------------------------------===// + // MachOWriter Implementation + //===----------------------------------------------------------------------===// + + MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) : O(o), TM(tm) { + // FIXME: set cpu type and cpu subtype somehow from TM + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new MachOCodeEmitter(*this); + } + + MachOWriter::~MachOWriter() { + delete MCE; + } + + void MachOWriter::EmitGlobal(GlobalVariable *GV) { + // FIXME: do something smart here. + } + + + bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object. + return false; + } + + bool MachOWriter::doInitialization(Module &M) { + // Set the magic value, now that we know the pointer size and endianness + Header.setMagic(isLittleEndian, is64Bit); + + // Set the file type + // FIXME: this only works for object files, we do not support the creation + // of dynamic libraries or executables at this time. + Header.filetype = MachOHeader::MH_OBJECT; + + Mang = new Mangler(M); + return false; + } + + /// doFinalization - Now that the module has been completely processed, emit + /// the Mach-O file to 'O'. + bool MachOWriter::doFinalization(Module &M) { + // Okay, the.text section has been completed, build the .data, .bss, and + // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the header and load commands. + EmitHeaderAndLoadCommands(); + + // Emit the text and data sections. + EmitSections(); + + // Emit the relocation entry data for each section. + // FIXME: presumably this should be a virtual method, since different targets + // have different relocation types. + EmitRelocations(); + + // Emit the symbol table. + // FIXME: we don't handle debug info yet, we should probably do that. + EmitSymbolTable(); + + // Emit the string table for the sections we have. + EmitStringTable(); + + // We are done with the abstract symbols. + SectionList.clear(); + SymbolTable.clear(); + DynamicSymbolTable.clear(); + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; + } + + void MachOWriter::EmitHeaderAndLoadCommands() { + // Step #0: Fill in the segment load command size, since we need it to figure + // out the rest of the header fields + MachOSegment SEG("", is64Bit); + SEG.nsects = SectionList.size(); + SEG.cmdsize = SEG.cmdSize(is64Bit) + + SEG.nsects * SectionList.begin()->cmdSize(is64Bit); + + // Step #1: calculate the number of load commands. We always have at least + // one, for the LC_SEGMENT load command, plus two for the normal + // and dynamic symbol tables, if there are any symbols. + Header.ncmds = SymbolTable.empty() ? 1 : 3; + + // Step #2: calculate the size of the load commands + Header.sizeofcmds = SEG.cmdsize; + if (!SymbolTable.empty()) + Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; + + // Step #3: write the header to the file + // Local alias to shortenify coming code. + DataBuffer &FH = Header.HeaderData; + outword(FH, Header.magic); + outword(FH, Header.cputype); + outword(FH, Header.cpusubtype); + outword(FH, Header.filetype); + outword(FH, Header.ncmds); + outword(FH, Header.sizeofcmds); + outword(FH, Header.flags); + if (is64Bit) + outword(FH, Header.reserved); + + // Step #4: Finish filling in the segment load command and write it out + for (std::list<MachOSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + SEG.filesize += I->size; + SEG.vmsize = SEG.filesize; + SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; + + outword(FH, SEG.cmd); + outword(FH, SEG.cmdsize); + outstring(FH, SEG.segname, 16); + outaddr(FH, SEG.vmaddr); + outaddr(FH, SEG.vmsize); + outaddr(FH, SEG.fileoff); + outaddr(FH, SEG.filesize); + outword(FH, SEG.maxprot); + outword(FH, SEG.initprot); + outword(FH, SEG.nsects); + outword(FH, SEG.flags); + + // Step #5: Write out the section commands for each section + for (std::list<MachOSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + I->offset = SEG.fileoff; // FIXME: separate offset + outstring(FH, I->sectname, 16); + outstring(FH, I->segname, 16); + outaddr(FH, I->addr); + outaddr(FH, I->size); + outword(FH, I->offset); + outword(FH, I->align); + outword(FH, I->reloff); + outword(FH, I->nreloc); + outword(FH, I->flags); + outword(FH, I->reserved1); + outword(FH, I->reserved2); + if (is64Bit) + outword(FH, I->reserved3); + } + + // Step #6: Emit LC_SYMTAB/LC_DYSYMTAB load commands + // FIXME: We'll need to scan over the symbol table and possibly do the sort + // here so that we can set the proper indices in the dysymtab load command for + // the index and number of external symbols defined in this module. + // FIXME: We'll also need to scan over all the symbols so that we can + // calculate the size of the string table. + // FIXME: add size of relocs + SymTab.symoff = SEG.fileoff + SEG.filesize; + SymTab.nsyms = SymbolTable.size(); + SymTab.stroff = SymTab.symoff + SymTab.nsyms * MachOSym::entrySize(); + SymTab.strsize = 10; + outword(FH, SymTab.cmd); + outword(FH, SymTab.cmdsize); + outword(FH, SymTab.symoff); + outword(FH, SymTab.nsyms); + outword(FH, SymTab.stroff); + outword(FH, SymTab.strsize); + + // FIXME: set DySymTab fields appropriately + outword(FH, DySymTab.cmd); + outword(FH, DySymTab.cmdsize); + outword(FH, DySymTab.ilocalsym); + outword(FH, DySymTab.nlocalsym); + outword(FH, DySymTab.iextdefsym); + outword(FH, DySymTab.nextdefsym); + outword(FH, DySymTab.iundefsym); + outword(FH, DySymTab.nundefsym); + outword(FH, DySymTab.tocoff); + outword(FH, DySymTab.ntoc); + outword(FH, DySymTab.modtaboff); + outword(FH, DySymTab.nmodtab); + outword(FH, DySymTab.extrefsymoff); + outword(FH, DySymTab.nextrefsyms); + outword(FH, DySymTab.indirectsymoff); + outword(FH, DySymTab.nindirectsyms); + outword(FH, DySymTab.extreloff); + outword(FH, DySymTab.nextrel); + outword(FH, DySymTab.locreloff); + outword(FH, DySymTab.nlocrel); + + O.write((char*)&FH[0], FH.size()); + } + + /// EmitSections - Now that we have constructed the file header and load + /// commands, emit the data for each section to the file. + void MachOWriter::EmitSections() { + for (std::list<MachOSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + O.write((char*)&I->SectionData[0], I->size); + } + } + + void MachOWriter::EmitRelocations() { + // FIXME: this should probably be a pure virtual function, since the + // relocation types and layout of the relocations themselves are target + // specific. + } + + /// EmitSymbolTable - Sort the symbols we encountered and assign them each a + /// string table index so that they appear in the correct order in the output + /// file. + void MachOWriter::EmitSymbolTable() { + // The order of the symbol table is: + // local symbols + // defined external symbols (sorted by name) + // undefined external symbols (sorted by name) + DataBuffer ST; + + // FIXME: enforce the above ordering, presumably by sorting by name, + // then partitioning twice. + unsigned stringIndex; + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + // FIXME: remove when we actually calculate these correctly + I->n_strx = 1; + StringTable.push_back(Mang->getValueName(I->GV)); + // Emit nlist to buffer + outword(ST, I->n_strx); + outbyte(ST, I->n_type); + outbyte(ST, I->n_sect); + outhalf(ST, I->n_desc); + outaddr(ST, I->n_value); + } + + O.write((char*)&ST[0], ST.size()); + } + + /// EmitStringTable - This method adds and emits a section for the Mach-O + /// string table. + void MachOWriter::EmitStringTable() { + // The order of the string table is: + // strings for external symbols + // strings for local symbols + // This is the symbol table, but backwards. This allows us to avoid a sorting + // the symbol table again; all we have to do is use a reverse iterator. + DataBuffer ST; + + // Write out a leading zero byte when emitting string table, for n_strx == 0 + // which means an empty string. + outbyte(ST, 0); + + for (std::vector<std::string>::iterator I = StringTable.begin(), + E = StringTable.end(); I != E; ++I) { + // FIXME: do not arbitrarily cap symbols to 16 characters + // FIXME: do something more efficient than outstring + outstring(ST, *I, 16); + } + O.write((char*)&ST[0], ST.size()); + } _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits