pax_global_header00006660000000000000000000000064150755204400014514gustar00rootroot0000000000000052 comment=73ed1364cc4846658eb22ac8dd922c54654b3e16 storm-lang-0.7.3/000077500000000000000000000000001507552044000136065ustar00rootroot00000000000000storm-lang-0.7.3/.dir-locals.el000066400000000000000000000001301507552044000162310ustar00rootroot00000000000000(("doc" . ((fundamental-mode . ((indent-tabs-mode . nil) (tab-width . 4)))))) storm-lang-0.7.3/.gdbinit000066400000000000000000000004501507552044000152260ustar00rootroot00000000000000# Place the following line in the file ~/.gdbinit to load this file. # add-auto-load-safe-path ~/Projects/storm/.gdbinit handle SIGSEGV nostop noprint # Old signals: handle SIGXFSZ nostop noprint handle SIGXCPU nostop noprint # New signals handle SIG34 nostop noprint handle SIG35 nostop noprint storm-lang-0.7.3/.gitignore000066400000000000000000000014641507552044000156030ustar00rootroot00000000000000# Compiled source # ################### *.com *.class *.dll *.exe *.o *.so *.so.* # Temporary files # ################### *~ # Packages # ############ # it's better to unpack these files and commit the raw source # git has its own built in compression methods *.7z *.dmg *.gz *.iso *.jar *.rar *.tar *.zip # Logs and databases # ###################### *.log *.sql *.sqlite *.ncb *.suo # OS generated files # ###################### .DS_Store* ehthumbs.db Icon? Thumbs.db *.aps *.user debug debug64 release release64 release_* slow slow_* *.ncb *.suo Tools/ build build64 build_* log/ ReadMe.txt *.aps \#*\# # External code in other git repos. External/ mymake/ # Generated code. */Gen/* !*/Gen/.gitignore !Core/Gen/* # Documentation pack files for Storm. root/doc *_doc # Database files (sqlite) *.db html/ storm-lang-0.7.3/.gitmodules000066400000000000000000000002041507552044000157570ustar00rootroot00000000000000[submodule "mps"] path = mps url = ../mps.git [submodule "Linux/backtrace"] path = Linux/backtrace url = ../linux/backtrace.git storm-lang-0.7.3/.myproject000066400000000000000000000167401507552044000156330ustar00rootroot00000000000000#Storm has a bit weird concept of release and debug configurations. #The default debug configuration is acutally a release configuration, #but with some debug flags added. This is since Storm is painfully slow #without compiler optimizations on, and when using other debug features #like the debug heap. There is a mode, called "slow" that will enable #the actual debug mode (i.e. no optimizations etc.) [project] #Only build targets containing .mymake-files. explicitTargets=yes [project,release,!dist] input+=Main input+=Test [project,release] execute=no [build,!slow,!release] #Always 'release' mode, except when 'slow' is specified here. all+=release all+=storm_debug [build,release] #When compiling in release mode, use link-time code generation. all+=release all+=storm_release [build,slow] all+=storm_slow # Forward build flags: [build,dist] all+=dist [build,nostatic] all+=nostatic [build,noskia] Gui+=noskia [build,cairogl] Gui+=cairogl [build,nobacktrace] Utils+=nobacktrace [build,localmariadb] SQL+=localmariadb [build,branchprotection] all+=branchprotection [build,nouring] all+=nouring # Compatibility flag. For various backwards-compatibility fixes. # Currently, we disable large file support in SQLite, since that causes issues on Ubuntu 18.04 LTS. # We will phase this out eventually. [build,compat] all+=compat [compat] define+=STORM_COMPAT [build] #Regular libraries linked into Storm and/or any storm libraries. Core+=lib Code+=lib Compiler+=lib OS+=lib Shared+=lib Gc+=lib Utils+=lib SoundLib+=lib SoundLib+=extern_lib #Libraries that need the storm preprocessor to be run. Compiler+=stormpp TestLib+=stormpp Gui+=stormpp Graphics+=stormpp Sound+=stormpp SQL+=stormpp Crypto+=stormpp #Libraries linked into shared libraries loaded by Storm at runtime. TestLib+=sharedlib Gui+=sharedlib Graphics+=sharedlib Sound+=sharedlib SQL+=sharedlib Crypto+=sharedlib #Libraries that require position independent code on some platforms. If you intend to put the compiler itself #inside a shared library, add 'Compiler+=pic' and 'Gc+=pic' here. Core+=pic OS+=pic Shared+=pic Utils+=pic [deps] TestLib+=CppTypes Gui+=CppTypes Compiler+=CppTypes Sound+=CppTypes Graphics+=CppTypes SQL+=CppTypes Crypto+=CppTypes [deps,!dist] #To make sure everything is built. #When building for Debian we want a bit more control of what we are building. Compiler+=TestLib Compiler+=Gui Compiler+=Graphics Compiler+=Sound Compiler+=SQL Compiler+=Crypto #Selection GC to use. [build,mps] all+=mps [build,smm] all+=smm # Forward 64-bit flag if present. [build,windows,64] all+=64 #Add variables that indicate the GC choice. We add them to the entire project, even though #it is only necessary for Gc, Test and Main [mps] gcSuffix=_mps [smm] gcSuffix=_smm [!extern_lib] #Global build parameters pch=stdafx.h include+=./ include+=../ [storm_debug] #Turn on some runtime checks (implemented purely in Storm). define+=FAST_DEBUG [storm_debug] buildDir=build/ execDir=../debug/ [storm_debug,windows,64] buildDir=build64/ execDir=../debug64/ linkFlags+=/MACHINE:X64 [storm_debug,windows] #Generate pdb file. flags+=/Zi /Fd linkFlags+=/DEBUG /INCREMENTAL:NO /PDB:.pdb [unix] flags=-std=c++11 -pipe cflags+=-pipe flags+= [storm_debug,unix] #We do not need that aggressive optimizations... opt+=-O1 #Generate debug information. flags+=-g cflags+=-g [storm_release] buildDir=release/ execDir=../release/ [storm_release,windows,64] buildDir=release64/ execDir=../release64/ linkFlags+=/MACHINE:X64 [storm_release,windows] #Enable link-time code generation. Too slow to use regularly, but gives good performance! flags+=/GL linkFlags+=/LTCG [storm_release,lib,windows] #Need extra flag to the linker... link=lib /LTCG /nologo /OUT: [storm_release,unix] #We do not need O3 opt=-O2 [pic,unix] #All libraries need to be compiled with the -fPIC flag. At least on X86-64. flags+=-fPIC cflags+=-fPIC [storm_slow] buildDir=slow/ execDir=../slow/ [nostatic] define+=NOSTATIC_BUILD [windows] #Tell the Win32 API we're working with UTF16. define+=_UNICODE define+=UNICODE [windows,!64] #Compile asm files on X86 ext+=asm noIncludes+=*.asm compile+=*.asm:1!ml /c /nologo /Fo /safeseh /W3 /Zi [windows,64] #Compile asm files on X64 ext+=asm64 noIncludes+=*.asm64 compile+=*.asm64:1!ml64 /c /nologo /Fo /W3 /Zi /Ta [unix] flags+=-Wno-unknown-pragmas flags+=-Wno-reorder flags+=-Wno-terminate flags+=-Wno-unused-parameter flags+=-Wno-missing-field-initializers flags+=-Wno-pmf-conversions flags+=-Wno-switch flags+=-Wno-parentheses flags+=-Wno-unused-function flags+=-Wno-pragmas #Gives out of bounds warnings due to "dynamic arrays" at the end of structs. flags+=-Wno-stringop-overflow #We need to align functions to even addresses, otherwise they will be seen as vtable offsets. flags+=-falign-functions=2 #Do not export all symbols from .so-files. Storm assumes that functions and variables in different #modules are different variables and may thus contain different values. This is not the default on UNIX #systems, at least not when using GCC. flags+=-fvisibility=hidden cflags+=-fvisibility=hidden linkFlags+=-pthread # Note: We exclude ".s" since we want to use the preprocessor in general. ext+=S noIncludes+=*.s noIncludes+=*.S compile+=*.S:gcc -g -c -o # Enable branch protection on ARM (PAC) if asked to [unix,branchprotection] flags+=-mbranch-protection=standard cflags+=-mbranch-protection=standard # Disable io_uring if desired. Ideally we would be able to fall back to "standard" operations if # we find that io_uring is not available (e.g. in QEMU, or because they are disabled). [unix,nouring] define+=LINUX_NO_IO_URING [stormpp] stormpp=CppTypes stormppUses=--use stormProvides=./ stormUses=../Core/ stormppUsing=--using packagePath=../root/ docName=doc preBuild+= --template ../Core/Gen/CppTypes.cpp --out Gen/CppTypes.cpp --asm --doc preBuildCreates+=Gen/CppTypes.cpp [stormpp,windows,!64] stormppAsmTemplate=../Core/Gen/CppVTables.VS_X86 stormppAsmOut=Gen/CppVTables.asm preBuildCreates+=Gen/CppVTables.asm [stormpp,windows,64] stormppAsmTemplate=../Core/Gen/CppVTables.VS_X64 stormppAsmOut=Gen/CppVTables.asm64 preBuildCreates+=Gen/CppVTables.asm64 [stormpp,unix] stormppAsmTemplate=../Core/Gen/CppVTables.GCC stormppAsmOut=Gen/CppVTables.S preBuildCreates+=Gen/CppVTables.S [sharedlib] packagePath=../root// docName=_doc [sharedlib,!unix] postBuild+=if not exist "" mkdir postBuild+=1!copy [sharedlib,unix] flags+=-fPIC cflags+=-fPIC linkFlags+=-Wl,-z,defs postBuild+=mkdir -p postBuild+=cp [sharedlib,storm_debug] libPrefix=Debug [sharedlib,storm_debug,windows,64] libPrefix=Debug64 [sharedlib,storm_release] libPrefix=Release [sharedlib,storm_release,windows,64] libPrefix=Release64 [sharedlib,storm_slow] libPrefix=Slow [sharedlib,storm_slow,windows,64] libPrefix=Slow64 # No prefix on Dist releases. [sharedlib,storm_release,dist] libPrefix= #No PCH for c-files. [windows] compile+=*.c:1!cl /c /Fo [unix] compile+=*.c:gcc -Wno-unknown-pragmas -Wno-pragmas -std=c99 -O3 -Wno-maybe-uninitialized -c -o storm-lang-0.7.3/Code/000077500000000000000000000000001507552044000144605ustar00rootroot00000000000000storm-lang-0.7.3/Code/.mymake000066400000000000000000000000551507552044000157440ustar00rootroot00000000000000[] #Needed to mark this as a Mymake project. storm-lang-0.7.3/Code/ActiveBlock.cpp000066400000000000000000000002661507552044000173560ustar00rootroot00000000000000#include "stdafx.h" #include "ActiveBlock.h" namespace code { ActiveBlock::ActiveBlock(Block block, Nat activated, Label pos) : block(block), activated(activated), pos(pos) {} } storm-lang-0.7.3/Code/ActiveBlock.h000066400000000000000000000007661507552044000170300ustar00rootroot00000000000000#pragma once #include "Block.h" #include "Label.h" namespace code { STORM_PKG(core.asm); /** * Generic data structure used in various back-ends to keep track of active blocks during code * generation. * * Used by the X64 and Arm64 backends among others. */ class ActiveBlock { STORM_VALUE; public: ActiveBlock(Block block, Nat activated, Label pos); // Which block? Block block; // Which activation ID? Nat activated; // Where does the block start? Label pos; }; } storm-lang-0.7.3/Code/Arena.cpp000066400000000000000000000173201507552044000162150ustar00rootroot00000000000000#include "stdafx.h" #include "Arena.h" #include "Reg.h" #include "X86/Arena.h" #include "X64/Arena.h" #include "Arm64/Arena.h" #include "Core/Str.h" #include "Listing.h" #include "Binary.h" namespace code { Arena::Arena() {} Ref Arena::external(const wchar *name, const void *ptr) const { return Ref(externalSource(name, ptr)); } RefSource *Arena::externalSource(const wchar *name, const void *ptr) const { RefSource *src = new (this) StrRefSource(name); src->setPtr(ptr); return src; } Listing *Arena::transform(Listing *l) const { return transformInfo(l).listing; } void Arena::removeFnRegs(RegSet *from) const { from->remove(ptrA); from->remove(ptrB); from->remove(ptrC); } Instr *Arena::saveFnResultReg(Reg reg, Operand to) const { return mov(engine(), to, reg); } Instr *Arena::restoreFnResultReg(Reg reg, Operand from) const { return mov(engine(), reg, from); } #if defined(X86) && defined(WINDOWS) Arena *arena(EnginePtr e) { return new (e.v) x86::Arena(); } #elif defined(X64) && defined(WINDOWS) Arena *arena(EnginePtr e) { return new (e.v) x64::WindowsArena(); } #elif defined(X64) && defined(POSIX) Arena *arena(EnginePtr e) { return new (e.v) x64::PosixArena(); } #elif defined(ARM64) && defined(POSIX) Arena *arena(EnginePtr e) { return new (e.v) arm64::Arena(); } #else #error "Please note which is the default arena for your platform." #endif Binary *codeBinaryImpl(GcCode *refs) { return (Binary *)refs->refs[0].pointer; } Binary *codeBinary(const void *fn) { // All backends do this. return codeBinaryImpl(runtime::codeRefs((void *)fn)); } void Arena::updateEhInfo(const void *function, size_t offset, void *framePointer) { // Nothing to do in the default implementation. } Arena::Skeleton::Skeleton(Listing *listing) : listing(listing), currentBlock(0), currentActivation(0), accessMode(-1) { savedRegs = new (this) Array(); savedLocs = new (this) Array(); varOffsets = new (this) Array(); extraMetadata = new (this) Array(); } static Size makeSize(Nat size, Int currentOffset) { Size r(size); if ((size & 0x3) || (currentOffset & 0x3)) { return r.alignedAs(Size::sByte); } else if ((size & 0x7) || (currentOffset & 0x7)) { return r.alignedAs(Size::sInt); } else { return r.alignedAs(Size::sPtr); } } // State for keeping track of added variables. class VarState { public: // Create, initialize with the maximum offset. VarState(Listing *to, Int startOffset, Nat minAlign) : to(to), currentOffset(startOffset), minAlign(minAlign) {} // Add a new variable. Var add(FreeOpt freeOpt, Nat size, Listing::VarInfo *info, Operand free, Int offset) { if (size == 0) { // Emit this variable as a part of the previous one: size = Nat(currentOffset - offset); currentOffset = offset; Var v = to->createVar(to->root(), makeSize(size), free, freeOpt); to->varInfo(v, info); return v; } else { // Output any necessary padding: Nat alignedSz = roundUp(size, minAlign); output(offset + alignedSz, false); // Emit a variable now. currentOffset -= alignedSz; Var v = to->createVar(to->root(), makeSize(size), free, freeOpt); to->varInfo(v, info); return v; } } // Finish any remaining variables. Emit empty space until the specified offset. void done(Int targetOffset) { output(targetOffset, true); } private: Listing *to; Int currentOffset; Nat minAlign; void output(Int offset, Bool last) { Int size = currentOffset - offset; currentOffset = offset; if (size > 0) { Block parent = to->root(); // For the last one: create a separate block to make the space usable for other things. if (last) parent = to->createBlock(to->root()); to->createVar(parent, makeSize(Nat(size))); } } Size makeSize(Nat size) { return code::makeSize(size, currentOffset); } }; Arena::Skeleton *Arena::frameSkeletonHead(Binary *binary) { Listing *l = new (this) Listing(binary->isMember(), binary->result()); Array *parameters = binary->params(); for (Nat i = 0; i < parameters->count(); i++) l->createParam(parameters->at(i)); return new (this) Skeleton(l); } static Nat toNat(Size s, Bool is64) { if (is64) return s.size64(); else return s.size32(); } static void frameSkeletonTailImpl(Binary *binary, Arena::Skeleton *result, Int maxOffset, Int minOffset, Nat minAlign, Bool is64) { Listing *l = result->listing; Array *paramVars = l->allParams(); // Find variables that are active. GcArray *blocks = binary->blockInfo(); VarCleanup *cleanup = binary->cleanupInfo(); // Collect all variables we need to process. Note that we traverse from the current node // towards the root, which is why we add nodes in reverse order. vector> ids; for (size_t i = result->currentBlock; i < blocks->count; i = blocks->v[i]->parent) { Binary::Block *current = blocks->v[i]; for (Nat varId = Nat(current->count); varId > 0; varId--) { const Binary::Variable &v = current->vars[varId - 1]; // Skip parameters entirely in this step. They are already handled. if (v.flags & Binary::Variable::sParamMask) { // However, we can add take its metadata and add that. Nat paramId = (v.flags & Binary::Variable::sParamMask) >> Binary::Variable::sParamShift; paramId--; if (v.varInfo) l->varInfo(paramVars->at(paramId), v.varInfo); continue; } FreeOpt freeOpts = FreeOpt(v.flags & Binary::Variable::sFreeOptMask); if ((freeOpts & freeOnException) || v.varInfo) ids.push_back(std::make_pair(Nat(i), varId - 1)); } } // Traverse the offsets and add them: VarState varState(l, maxOffset, minAlign); for (size_t i = ids.size(); i > 0; i--) { Binary::Block *current = blocks->v[ids[i - 1].first]; const Binary::Variable &v = current->vars[ids[i - 1].second]; const VarCleanup &c = cleanup[v.id]; // Store the variable. Nat size = 0; if ((v.flags & freePtr) == 0) { if (v.flags & Binary::Variable::sPtr) size = toNat(Size::sPtr, is64); else if (v.flags & Binary::Variable::sByte) size = toNat(Size::sByte, is64); else if (v.flags & Binary::Variable::sInt) size = toNat(Size::sInt, is64); else if (v.flags & Binary::Variable::sLong) size = toNat(Size::sLong, is64); } FreeOpt freeOpts = FreeOpt(v.flags & Binary::Variable::sFreeOptMask); Operand freeOp; if (c.function) { // Find the function that was stored here: size_t offset = size_t(&c.function) - size_t(binary->address()); Reference *found = binary->findReferenceByOffset(Nat(offset)); if (found) freeOp = Operand(found); } if (c.activeAfter >= result->currentActivation) { // If it is active, clear its 'freeInactive' flag to make it immediately active. freeOpts &= ~freeInactive; } Var added = varState.add(freeOpts, size, v.varInfo, freeOp, c.offset); while (added.key() >= result->varOffsets->count()) result->varOffsets->push(Operand()); result->varOffsets->at(added.key()) = xRel(added.size(), ptrFrame, Offset(c.offset)); } varState.done(minOffset); } void Arena::frameSkeletonTail(Binary *binary, Skeleton *result, Nat extraWords, Nat minAlign, Bool is64) { frameSkeletonTailImpl(binary, result, -Int(extraWords * toNat(Size::sPtr, is64)), Int(binary->stackOffset()), minAlign, is64); } void Arena::frameSkeletonTailBelow(Binary *binary, Skeleton *result, Nat extraBelow, Nat extraAbove, Nat minAlign, Bool is64) { frameSkeletonTailImpl(binary, result, Int(binary->stackSize() - extraAbove * toNat(Size::sPtr, is64)), Int(extraBelow * toNat(Size::sPtr, is64)), minAlign, is64); } } storm-lang-0.7.3/Code/Arena.h000066400000000000000000000171711507552044000156660ustar00rootroot00000000000000#pragma once #include "Core/TObject.h" #include "Core/EnginePtr.h" #include "Output.h" #include "Operand.h" namespace code { STORM_PKG(core.asm); class Listing; class Binary; class RegSet; class TypeDesc; class Instr; /** * An arena represents a collection of compiled code and external references for some architecture. * * Abstract class, there is one instantiation for each supported platform. */ class Arena : public ObjectOn { STORM_ABSTRACT_CLASS; public: // Create an arena. Arena(); // Create external references. Ref external(const wchar *name, const void *ptr) const; RefSource *externalSource(const wchar *name, const void *ptr) const; /** * Transform and translate code into machine code. */ // Detailed info from the transform operation. class TransformInfo { STORM_VALUE; public: // The final listing. Listing *listing; // The layout of all local variables. Array *varLayout; // Create. STORM_CTOR TransformInfo(Listing *listing, Array *layout) : listing(listing), varLayout(layout) {} }; // Transform the code in preparation for this backend's code generation. This is // backend-specific. 'owner' is the binary object that will be called to handle exceptions. virtual TransformInfo STORM_FN transformInfo(Listing *src) const ABSTRACT; virtual Listing *STORM_FN transform(Listing *src) const; // Translate a previously transformed listing into machine code for this arena. virtual void STORM_FN output(Listing *src, Output *to) const ABSTRACT; /** * Create output objects for this backend. */ // Create an offset-computing output. virtual LabelOutput *STORM_FN labelOutput() const ABSTRACT; // Create a code-generating output based on sizes computed by a LabelOutput. virtual CodeOutput *STORM_FN codeOutput(Binary *owner, LabelOutput *size) const ABSTRACT; // Remove all registers not preserved during a function call on this platform. This // implementation removes ptrA, ptrB and ptrC, but other Arena implementations may want to // remove others as well. virtual void STORM_FN removeFnRegs(RegSet *from) const; // Get a list of registers that may contain the return value from a function. virtual RegSet *STORM_FN fnResultRegs() const ABSTRACT; virtual Instr *STORM_FN saveFnResultReg(Reg reg, Operand to) const; virtual Instr *STORM_FN restoreFnResultReg(Reg reg, Operand from) const; /** * Other backend-specific things. */ // Create a function that calls another function (optionally with a pointer sized parameter) // to figure out which function to actually call. Useful when implementing lazy compilation. // // Calls 'fn' with 'param' (always pointer-sized or empty) to compute the // actual function to call. The actual function (as well as the 'function' implemented by // the redirect) takes params as defined by 'params' and returns 'result'. // // These redirect objects are *not* platform independent! virtual Listing *STORM_FN redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param) ABSTRACT; // Create a function that calls another (pre-determined) function and appends an 'EnginePtr' // object as the first parameter to the other function. Calling member functions in this // manner is not supported. virtual Listing *STORM_FN engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine) ABSTRACT; /** * Get the location of the first parameter for a function call. Assumes that a member function is called. * * The location is acquired in two steps: first, an implementation asks the ID of the * parameter location by calling the 'firstParamId(TypeDesc *)' function. This returns one * out of several possible integers describing the parameter location. The number of * possible values can be acquired by calling 'firstParamId(null)'. * * The ID can then be passed to 'firstParamLoc' to get an Operand describing the location. * * This scheme is used so that classes like VTableCalls can detect when two functions with * different return values have the same vtable stub. This allows it to re-use the stubs. */ // Get the ID of the location of the first param. virtual Nat STORM_FN firstParamId(MAYBE(TypeDesc *) desc) ABSTRACT; // Access the location of the first parameter in a function size. The returned Operand is // always pointer-sized. virtual Operand STORM_FN firstParamLoc(Nat id) ABSTRACT; // Get a parameter that can safely be used to implement function dispatches. virtual Reg STORM_FN functionDispatchReg() ABSTRACT; /** * Machine-specific parts of the ability to replace active functions. */ // Information about a created skeleton. class Skeleton : public Object { STORM_CLASS; public: // Saved non-volatile registers. Array *savedRegs; // Location of the non-volatile registers, relative to the frame pointer. Array *savedLocs; // Variable offsets we know about. Some may be empty. Array *varOffsets; // Listing containing variables to make the stack frame compatible the one in the Binary. Listing *listing; // Array of extra offsets that need to be restored during an update. Array *extraMetadata; // Current block and activation. Nat currentBlock; Nat currentActivation; // How to access variables from a different stack frame of a larger size: // < 0: use same offsets as from the other stack frame. // >=0: offsets are relative to 'n' bytes after the end of this frame. Int accessMode; // Create. STORM_CTOR Skeleton(Listing *listing); }; // Create a skeleton Listing that has a variable layout that is binary compatible with the // code in a given Binary object. The function may also initialize the listing with dummy // writes to callee saved registers in order to get the right layout. virtual Skeleton *STORM_FN compatibleFrameSkeleton(Binary *binary, Nat offset) ABSTRACT; // Update any exception information when replacing an active function. virtual void updateEhInfo(const void *function, size_t offset, void *framePointer); // Generate code to resize the stack frame of the current function to be as large as // required by 'newSize'. virtual void STORM_FN resizeStackFrame(Listing *out, Reg tmpReg, Binary *newSize) ABSTRACT; protected: // Helper function for the first step of 'compatibleFrameSkeleton': // Creates a listing, and populates it from the binary. Skeleton *frameSkeletonHead(Binary *binary); // Helper function for the last step of 'compatibleFrameSkeleton': // Assumes that 'preservedRegs' and 'preservedLocs' have been filled in after the first step. // Also, expects 'extraWords' indicating the number of words with "overhead" that is allocated // on the stack by the backend. Also expects that 'currentBlock' and 'currentActivation' are // populated. 'minAlign' is the minimum alignment imposed by the stack layout. void frameSkeletonTail(Binary *binary, Skeleton *skeleton, Nat extraWords, Nat minAlign, Bool is64); // Version of 'frameSkeletonTail', but for architectures where the frame pointer is placed // after the variables rather than before them (e.g. ARM). void frameSkeletonTailBelow(Binary *binary, Skeleton *skeleton, Nat extraBelow, Nat extraAbove, Nat minAlign, Bool is64); }; // Create an arena for this platform. Arena *STORM_FN arena(EnginePtr e); // Extract the Binary associated with a function. This is only valid for code generated with the current backend. // 'fn' is expected to be a pointer to the start of a code allocation. Binary *codeBinary(const void *fn); Binary *codeBinaryImpl(GcCode *refs); } storm-lang-0.7.3/Code/Arm64/000077500000000000000000000000001507552044000153515ustar00rootroot00000000000000storm-lang-0.7.3/Code/Arm64/Arena.cpp000066400000000000000000000173161507552044000171130ustar00rootroot00000000000000#include "stdafx.h" #include "Arena.h" #include "Asm.h" #include "AsmOut.h" #include "Output.h" #include "Code/Listing.h" #include "Code/Output.h" #include "RemoveInvalid.h" #include "Layout.h" #include "Params.h" #include "Code/Binary.h" #include "Code/FnState.h" #include "Code/Exception.h" #include "Code/PosixEh/StackInfo.h" #include "Gc/DwarfTable.h" namespace code { namespace arm64 { Arena::Arena() {} Arena::TransformInfo Arena::transformInfo(Listing *l) const { #if defined(POSIX) && defined(ARM64) code::eh::activatePosixInfo(); #endif // Remove unsupported OP-codes, replacing them with their equivalents. l = code::transform(l, this, new (this) RemoveInvalid()); // Expand variables and function calls as well as function prolog and epilog. Layout *layout = new (this) Layout(); l = code::transform(l, this, layout); return TransformInfo(l, layout->layout); } void Arena::output(Listing *src, Output *to) const { code::arm64::output(src, to); to->finish(); } LabelOutput *Arena::labelOutput() const { return new (this) LabelOutput(8); } CodeOutput *Arena::codeOutput(Binary *owner, LabelOutput *size) const { return new (this) CodeOut(owner, size->offsets, size->size, size->refs); } void Arena::removeFnRegs(RegSet *from) const { for (size_t i = 0; i < fnDirtyCount; i++) from->remove(fnDirtyRegs[i]); } RegSet *Arena::fnResultRegs() const { RegSet *result = new (this) RegSet(); result->put(xr(0)); result->put(xr(1)); result->put(dr(0)); result->put(dr(1)); result->put(dr(2)); result->put(dr(3)); return result; } Listing *Arena::redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param) { Listing *l = new (this) Listing(this); // Generate a layout of all parameters so we can properly restore them later. Params *layout = layoutParams(result, params); Result res = layout->result(); // Note: We want to use the 'prolog' and 'epilog' functionality so that exceptions from // 'fn' are able to propagate through this stub properly. *l << prolog(); // Store the registers used for parameters inside variables on the stack. Array *vars = new (this) Array(layout->registerCount(), Var()); for (Nat i = 0; i < layout->registerCount(); i++) { if (layout->registerParam(i) != Param()) { Var &v = vars->at(i); v = l->createVar(l->root(), Size::sLong); *l << mov(v, asSize(layout->registerSrc(i), Size::sLong)); } } // If result is in memory, we need to save/restore x8 as well! Var resVar; if (res.memoryRegister() != noReg) { resVar = l->createVar(l->root(), Size::sPtr); *l << mov(resVar, ptrr(8)); } // Call 'fn' to obtain the actual function to call. if (!param.empty()) *l << fnParam(ptrDesc(engine()), param); *l << fnCall(fn, member, ptrDesc(engine()), ptrA); // Save the output from x0 to another register, otherwise parameters will overwrite it. x17 is good. *l << mov(ptrr(17), ptrA); // Restore the registers. for (Nat i = 0; i < layout->registerCount(); i++) { Var v = vars->at(i); if (v != Var()) *l << mov(asSize(layout->registerSrc(i), Size::sLong), v); } if (res.memoryRegister() != noReg) { *l << mov(ptrr(8), resVar); } // Note: The epilog will preserve all registers in this case since there are no destructors to call! *l << epilog(); *l << jmp(ptrr(17)); return l; } static Reg nextIntReg(Params *params, Nat &id) { while (id > 0) { Reg r = params->registerSrc(--id); if (r == noReg || isVectorReg(r)) continue; if (params->registerParam(id) == Param()) continue; return r; } return noReg; } Listing *Arena::engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine) { Listing *l = new (this) Listing(this); // Examine parameters to see what we need to do. Aarch64 is a bit tricky since some // register usage is "aligned" to even numbers. For this reason, we produce two layouts // and "diff" them. Params *called = new (this) Params(); Params *toCall = new (this) Params(); toCall->add(0, Primitive(primitive::pointer, Size::sPtr, Offset())); for (Nat i = 0; i < params->count(); i++) { called->add(i + 1, params->at(i)); toCall->add(i + 1, params->at(i)); } if (toCall->stackCount() > 0 || called->stackCount() > 0) throw new (this) InvalidValue(S("Can not create an engine redirect for this function. ") S("It has too many (integer) parameters.")); // Traverse backwards to ensure we don't overwrite anything. Nat calledId = called->registerCount(); Nat toCallId = toCall->registerCount(); while (true) { // Find the next source register: Reg srcReg = nextIntReg(called, calledId); Reg destReg = nextIntReg(toCall, toCallId); if (srcReg == noReg) break; assert(destReg, L"Internal inconsistency when creating a redirect stub!"); *l << mov(destReg, srcReg); } // Now, we can simply put the engine ptr in x0 and jump to the function we need to call. *l << mov(ptrr(0), engine); *l << jmp(fn); return l; } Nat Arena::firstParamId(MAYBE(TypeDesc *) desc) { if (!desc) return 1; return 0; } Operand Arena::firstParamLoc(Nat id) { return ptrr(0); } Reg Arena::functionDispatchReg() { return ptrr(17); // We can also use x16. x17 is nice as we use that elsewhere. } Arena::Skeleton *Arena::compatibleFrameSkeleton(Binary *binary, Nat offset) { Arena::Skeleton *result = frameSkeletonHead(binary); Array *preservedRegs = result->savedRegs; Array *preservedLocs = result->savedLocs; // Figure out which registers were spilled in the prolog: { FDE *desc = dwarfTable().find(binary->address()); if (desc) code::dwarf::findPreservedRegs(preservedRegs, preservedLocs, desc, &fromDwarfRegister, dataAlignment); // Remove x30 from 'preservedRegs' - we don't bother with the link register. for (Nat i = 0; i < preservedRegs->count(); i++) { if (same(preservedRegs->at(i).reg(), ptrr(30))) { preservedRegs->remove(i); preservedLocs->remove(i); break; } } } Nat wordsBelow = 2; // For return addr + old frame pointer. Nat wordsAbove = preservedRegs->count(); { code::Params *layout = layoutParams(binary->result(), binary->params()); for (Nat i = 0; i < layout->registerCount(); i++) if (layout->registerParam(i).any()) wordsAbove++; } result->accessMode = Int(wordsBelow) * Offset::sPtr.current(); // Find current block and active piece: Nat active = findFunctionState(binary->address(), offset); decodeFnState(active, result->currentBlock, result->currentActivation); frameSkeletonTailBelow(binary, result, wordsBelow, wordsAbove, Size::sPtr.current(), true); return result; } void Arena::resizeStackFrame(Listing *out, Reg tmpReg, Binary *newSz) { tmpReg = asSize(tmpReg, Size::sPtr); // Load return address and old fp into registers. *out << mov(ptrr(29), ptrRel(ptrStack)); *out << mov(ptrr(30), ptrRel(ptrStack, Offset::sPtr)); // Adjust the stack pointer. First, figure out the size of the stack. *out << mov(tmpReg, ptrRel(out->meta())); *out << band(tmpReg, ptrConst(~Nat(0x1))); // Note: This is large enough. Stacks are typically < 4GiB. // Compute how to adjust the stack pointer to fit the new size. *out << sub(tmpReg, ptrConst(Nat(newSz->stackSize()))); // Update the stack pointer. *out << add(ptrStack, tmpReg); // Store back return address and old fp. *out << mov(ptrRel(ptrStack), ptrr(29)); *out << mov(ptrRel(ptrStack, Offset::sPtr), ptrr(30)); // Update frame pointer. *out << mov(ptrFrame, ptrStack); } } } storm-lang-0.7.3/Code/Arm64/Arena.h000066400000000000000000000026411507552044000165530ustar00rootroot00000000000000#pragma once #include "../Arena.h" namespace code { namespace arm64 { STORM_PKG(core.asm.arm64); /** * Arena for Arm64 (Aarch64), for UNIX platforms (Windows might be the same) */ class Arena : public code::Arena { STORM_CLASS; public: // Create. STORM_CTOR Arena(); /** * Transform. */ virtual code::Arena::TransformInfo STORM_FN transformInfo(Listing *src) const; virtual void STORM_FN output(Listing *src, Output *to) const; /** * Outputs. */ virtual LabelOutput *STORM_FN labelOutput() const; virtual CodeOutput *STORM_FN codeOutput(Binary *owner, LabelOutput *size) const; /** * Registers. */ virtual void STORM_FN removeFnRegs(RegSet *from) const; virtual RegSet *STORM_FN fnResultRegs() const; /** * Misc. */ virtual Listing *STORM_FN redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param); virtual Listing *STORM_FN engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine); virtual Nat STORM_FN firstParamId(MAYBE(TypeDesc *) desc); virtual Operand STORM_FN firstParamLoc(Nat id); virtual Reg STORM_FN functionDispatchReg(); /** * Replacing functions. */ virtual code::Arena::Skeleton *STORM_FN compatibleFrameSkeleton(Binary *binary, Nat offset); virtual void STORM_FN resizeStackFrame(Listing *out, Reg tmpReg, Binary *newSz); }; } } storm-lang-0.7.3/Code/Arm64/Asm.cpp000066400000000000000000000266631507552044000166120ustar00rootroot00000000000000#include "stdafx.h" #include "Asm.h" #include "../Listing.h" #include "../Exception.h" namespace code { namespace arm64 { // We map registers as follows: // ptrStack (1) <-> sp // ptrFrame (2) <-> x29 // ptrA (3) <-> x0 // ptrB (4) <-> x1 // ptrC (5) <-> x2 // 0x?30..0x?3F <-> x3..x18 // 0x?40..0x?4F <-> x19..x28,x30,xzr,pc // 0x?50..0x?5F <-> q0..q15 // 0x?60..0x?6F <-> q16..q31 // Arm integer register to storm register. static Nat armIntToStorm(Nat arm) { if (arm <= 2) return 0x003 + arm; else if (arm <= 18) return 0x030 + arm - 3; else if (arm <= 28) return 0x040 + arm - 19; else if (arm == 29) return ptrFrame; else if (arm == 30) return 0x04A; else if (arm == 31) return 0x04B; else if (arm == 32) return ptrStack; else if (arm == 33) return 0x04C; else return noReg; } // Storm reg number to Arm int register. static Nat stormToArmInt(Reg stormReg) { Nat storm = stormReg & 0xFF; Nat type = storm >> 4; if (storm == 0x01) { return 32; // sp } else if (storm == 0x2) { // Reg. 29 is frame ptr. return 29; } else if (type == 0x0) { return storm - 0x3; } else if (type == 0x3) { return (storm & 0xF) + 3; } else if (type == 0x4) { if (storm < 0x4A) return storm - 0x40 + 19; else if (storm == 0x4A) return 30; else if (storm == 0x4B) return 31; // xzr else if (storm == 0x4C) return 33; // pc } return -1; } Reg xr(Nat id) { return Reg(armIntToStorm(id) | 0x800); } Reg wr(Nat id) { return Reg(armIntToStorm(id) | 0x400); } Reg ptrr(Nat id) { return Reg(armIntToStorm(id) | 0x000); } Reg dr(Nat id) { return Reg(0x850 + id); } Reg sr(Nat id) { return Reg(0x450 + id); } Reg br(Nat id) { return Reg(0x150 + id); } const Reg pc = Reg(0x04C); const Reg sp = ptrStack; const Reg pzr = Reg(0x04B); const Reg xzr = Reg(0x84B); const Reg zr = Reg(0x44B); Bool isIntReg(Reg r) { Nat cat = r & 0x0F0; return cat == 0x000 || cat == 0x030 || cat == 0x40; } Bool isVectorReg(Reg r) { Nat cat = r & 0x0F0; return cat == 0x050 || cat == 0x060; } Nat intRegNumber(Reg r) { return stormToArmInt(r); } Nat vectorRegNumber(Reg r) { Nat z = Nat(r) & 0xFF; if (z < 0x50 || z > 0x6F) return -1; return z - 0x50; } #define ARM_REG_SPECIAL(NR, NAME) \ if (number == NR) { \ if (size == 0) { \ return S("px") S(NAME); \ } else if (size == 4) { \ return S("w") S(NAME); \ } else if (size == 8) { \ return S("x") S(NAME); \ } else if (size == 1) { \ return S("b") S(NAME); \ } \ } #define ARM_REG_CASE(NR) \ ARM_REG_SPECIAL(NR, #NR) #define ARM_VEC(NR) \ if (number == NR) { \ if (size == 1) { \ return S("b") S(#NR); \ } else if (size == 4) { \ return S("s") S(#NR); \ } else if (size == 8) { \ return S("d") S(#NR); \ } else { \ return S("q") S(#NR) S("(invalid)"); \ } \ } const wchar *nameArm64(Reg r) { Nat size = r >> 8; if (isIntReg(r)) { Nat number = stormToArmInt(r); ARM_REG_CASE(0); ARM_REG_CASE(1); ARM_REG_CASE(2); ARM_REG_CASE(3); ARM_REG_CASE(4); ARM_REG_CASE(5); ARM_REG_CASE(6); ARM_REG_CASE(7); ARM_REG_CASE(8); ARM_REG_CASE(9); ARM_REG_CASE(10); ARM_REG_CASE(11); ARM_REG_CASE(12); ARM_REG_CASE(13); ARM_REG_CASE(14); ARM_REG_CASE(15); ARM_REG_CASE(16); ARM_REG_CASE(17); ARM_REG_CASE(18); ARM_REG_CASE(19); ARM_REG_CASE(20); ARM_REG_CASE(21); ARM_REG_CASE(22); ARM_REG_CASE(23); ARM_REG_CASE(24); ARM_REG_CASE(25); ARM_REG_CASE(26); ARM_REG_CASE(27); ARM_REG_CASE(28); ARM_REG_CASE(29); ARM_REG_CASE(30); ARM_REG_SPECIAL(31, "zr"); if (number == 33) return S("pc"); } else if (isVectorReg(r)) { Nat number = vectorRegNumber(r); ARM_VEC(0); ARM_VEC(1); ARM_VEC(2); ARM_VEC(3); ARM_VEC(4); ARM_VEC(5); ARM_VEC(6); ARM_VEC(7); ARM_VEC(8); ARM_VEC(9); ARM_VEC(10); ARM_VEC(11); ARM_VEC(12); ARM_VEC(13); ARM_VEC(14); ARM_VEC(15); ARM_VEC(16); ARM_VEC(17); ARM_VEC(18); ARM_VEC(19); ARM_VEC(20); ARM_VEC(21); ARM_VEC(22); ARM_VEC(23); ARM_VEC(24); ARM_VEC(25); ARM_VEC(26); ARM_VEC(27); ARM_VEC(28); ARM_VEC(29); ARM_VEC(30); ARM_VEC(31); } return null; } Nat condArm64(CondFlag flag) { switch (flag) { case ifAlways: return 0xE; case ifNever: return 0xF; case ifOverflow: return 0x6; case ifNoOverflow: return 0x7; case ifEqual: return 0x0; case ifNotEqual: return 0x1; // Unsigned compare: case ifBelow: return 0x3; case ifBelowEqual: return 0x9; case ifAboveEqual: return 0x2; case ifAbove: return 0x8; // Singned comparision. case ifLess: return 0xB; case ifLessEqual: return 0xD; case ifGreaterEqual: return 0xA; case ifGreater: return 0xC; // Float comparision. case ifFBelow: return 0x3; case ifFBelowEqual: return 0x9; case ifFAboveEqual: return 0xA; case ifFAbove: return 0xC; } return 0xE; } Reg unusedReg(RegSet *used) { Reg r = unusedRegUnsafe(used); if (r == noReg) throw new (used) InvalidValue(S("We should not run out of registers on ARM64.")); return r; } Reg unusedReg(RegSet *used, Size size) { return asSize(unusedReg(used), size); } Reg unusedRegUnsafe(RegSet *used) { static const Reg candidates[] = { ptrr(0), ptrr(1), ptrr(2), ptrr(3), ptrr(4), ptrr(5), ptrr(6), ptrr(7), ptrr(8), ptrr(9), ptrr(10), ptrr(11), ptrr(12), ptrr(13), ptrr(14), ptrr(15), ptrr(16), ptrr(17), ptrr(19), ptrr(20), ptrr(21), ptrr(22), ptrr(23), ptrr(24), ptrr(25), ptrr(26), ptrr(27), ptrr(28), }; for (Nat i = 0; i < ARRAY_COUNT(candidates); i++) if (!used->has(candidates[i])) return candidates[i]; return noReg; } Reg unusedVectorReg(RegSet *used) { for (Nat i = 0; i < 32; i++) { Reg r = sr(i); if (!used->has(r)) return r; } throw new (used) InvalidValue(S("Out of vector registers!")); } Reg unusedVectorReg(RegSet *used, Size size) { return asSize(unusedVectorReg(used), size); } static const Reg dirtyRegs[] = { ptrr(0), ptrr(1), ptrr(2), ptrr(3), ptrr(4), ptrr(5), ptrr(6), ptrr(7), ptrr(8), ptrr(9), ptrr(10), ptrr(11), ptrr(12), ptrr(13), ptrr(14), ptrr(15), ptrr(16), ptrr(17), // Not technically in the set of dirty registers, but we always save them anyway to // make sure they are not saved/preserved twice. ptrFrame, ptrr(30), dr(0), dr(1), dr(2), dr(3), dr(4), dr(5), dr(6), dr(7), }; const Reg *fnDirtyRegs = dirtyRegs; const size_t fnDirtyCount = ARRAY_COUNT(dirtyRegs); Reg preserveRegInReg(Reg reg, RegSet *used, Listing *dest) { Reg targetReg = noReg; if (isIntReg(reg)) { for (Nat i = 19; i < 29; i++) { if (used->has(ptrr(i))) continue; targetReg = ptrr(i); break; } } else { for (Nat i = 8; i < 16; i++) { if (used->has(dr(i))) continue; targetReg = dr(i); break; } } used->remove(reg); if (targetReg != noReg) { targetReg = asSize(targetReg, size(reg)); used->put(targetReg); *dest << mov(targetReg, reg); return targetReg; } return noReg; } Operand preserveReg(Reg reg, RegSet *used, Listing *dest, Block block) { Reg targetReg = preserveRegInReg(reg, used, dest); if (targetReg != noReg) return targetReg; // Store on the stack. Var to = dest->createVar(block, size(reg)); *dest << mov(to, reg); return to; } // Get a pointer-sized offset into whatever "operand" represents. Operand opPtrOffset(Operand op, Nat offset) { return opOffset(Size::sPtr, op, offset); } Operand opOffset(Size sz, Operand op, Nat offset) { switch (op.type()) { case opRelative: return xRel(sz, op.reg(), op.offset() + Offset(offset)); case opVariable: return xRel(sz, op.var(), op.offset() + Offset(offset)); case opRegister: if (offset == 0) return asSize(op.reg(), sz); assert(false, L"Offset in registers are not supported."); break; default: assert(false, L"Unsupported operand passed to 'opOffset'!"); } return Operand(); } void inlineMemcpy(Listing *dest, Operand to, Operand from, Reg tmpA, Reg tmpB) { Nat size = from.size().size64(); if (size <= 8) { *dest << mov(asSize(tmpA, from.size()), from); *dest << mov(to, asSize(tmpA, from.size())); return; } // Make them pointer-sized. tmpA = asSize(tmpA, Size::sPtr); tmpB = asSize(tmpB, Size::sPtr); Nat offset = 0; while (offset + 16 <= size) { // The backend will make this into a double-load. *dest << mov(tmpA, opPtrOffset(from, offset)); *dest << mov(tmpB, opPtrOffset(from, offset + 8)); // The backend will make this into a double-store. *dest << mov(opPtrOffset(to, offset), tmpA); *dest << mov(opPtrOffset(to, offset + 8), tmpB); offset += 16; } // Copy remaining 8 bytes (up to machine alignment, typically OK). if (offset < size) { *dest << mov(tmpA, opPtrOffset(from, offset)); *dest << mov(opPtrOffset(to, offset), tmpA); } } void inlineSlowMemcpy(Listing *dest, Operand to, Operand from, Reg tmpReg) { Nat size = from.size().size64(); if (size <= 8) { *dest << mov(asSize(tmpReg, from.size()), from); *dest << mov(to, asSize(tmpReg, from.size())); return; } tmpReg = asSize(tmpReg, Size::sPtr); Nat offset = 0; while (offset < size) { *dest << mov(tmpReg, opPtrOffset(from, offset)); *dest << mov(opPtrOffset(to, offset), tmpReg); offset += 8; } } Nat encodeBitmask(Word bitmask, bool use64) { if (!use64) { // Pretend that the bitmask was 64-bit by mirroring the existing data. That makes // the algorithm the same for both cases (until we encode the result). bitmask = (bitmask & 0xFFFFFFFF) | (bitmask << 32); } // If it is all ones or all zeroes, we can't encode it (that value is reserved). if (bitmask == 0 || ~bitmask == 0) return 0; // Shift it to the right until we have a one in the least significant position, and a // zero in the most significant position. Nat shift = 0; while ((bitmask & 0x1) != 1 || (bitmask >> 63) != 0) { // This is a rotate right operation. bitmask = ((bitmask & 0x1) << 63) | (bitmask >> 1); shift++; } // Count the number of ones in the sequence. Nat ones = 0; for (Word mask = bitmask; mask & 0x1; mask >>= 1) ones++; // Try different possible pattern lengths. for (Nat length = 2; length <= 64; length *= 2) { if (length <= ones) continue; Word pattern = (Word(1) << ones) - 1; for (Nat offset = length; offset < 64; offset *= 2) pattern |= pattern << offset; if (pattern == bitmask) { // Found it! Encode its representation. Nat immr = length - shift; Nat imms = (Nat(0x80) - (length * 2)) | (ones - 1); imms ^= 0x40; // the N bit is inverted. Note: due to our setup in the start, the // N bit will never be set when we are in 32-bit mode. return ((imms & 0x40) << 6) | (immr << 6) | (imms & 0x3F); } } return 0; } Bool allOnes(Word mask, bool use64) { if (!use64) mask |= mask << 32; return ~mask == 0; } } } storm-lang-0.7.3/Code/Arm64/Asm.h000066400000000000000000000061031507552044000162420ustar00rootroot00000000000000#pragma once #include "Code/Reg.h" #include "Code/Output.h" #include "Code/Operand.h" #include "Code/CondFlag.h" namespace code { class Listing; class TypeDesc; namespace arm64 { STORM_PKG(core.asm.arm64); /** * ARM64 specific registers. * * Since registers are numbered, we don't make constants for all of them. */ Reg xr(Nat id); Reg wr(Nat id); Reg ptrr(Nat id); Reg dr(Nat id); // Doubles Reg sr(Nat id); // Singles Reg br(Nat id); // Bytes extern const Reg pc; // Program counter (for addressing). extern const Reg sp; // Stack pointer (always 64-bit). extern const Reg pzr; // Zero register (ptr). extern const Reg xzr; // Zero register. extern const Reg zr; // Zero register (32-bit). // Check if register is integer register. Bool isIntReg(Reg r); // Check if register is vector register. Bool isVectorReg(Reg r); // Arm integer register number for register. Returns "out-of-bounds" values for pc, etc. Nat intRegNumber(Reg r); // Arm register number for reals. Nat vectorRegNumber(Reg r); // Register name. const wchar *nameArm64(Reg r); // Condition code for ARM. Nat condArm64(CondFlag flag); // Registers clobbered by function calls. extern const Reg *fnDirtyRegs; extern const size_t fnDirtyCount; // Get an unused register. Reg unusedReg(RegSet *used); // Get unused register (as above), but specify desired size. Reg unusedReg(RegSet *used, Size size); // Get unused register, don't throw if none is available. Reg unusedRegUnsafe(RegSet *used); // Get unused fp register. Reg unusedVectorReg(RegSet *used); Reg unusedVectorReg(RegSet *used, Size size); // Preserve a register by saving it to a register that is safe through function // calls. Returns new location of the operand. It could be in memory. // Note: The RegSet is *updated* to match new register allocation. Operand preserveReg(Reg reg, RegSet *used, Listing *dest, Block block); // As above, but attempts to preserve a register inside a new register. May fail. Reg preserveRegInReg(Reg reg, RegSet *used, Listing *dest); // Perform a memcpy operation of a fixed size. Uses the two specified registers as // temporaries (ARM has load pair and store pair). Copies up to 7 bytes beyond the specified // location (i.e., copies a multiple of 8 bytes). void inlineMemcpy(Listing *dest, Operand to, Operand from, Reg tmpA, Reg tmpB); // Slower version of the above, only able to use one register. Avoid if possible. void inlineSlowMemcpy(Listing *dest, Operand to, Operand from, Reg tmpReg); // Get a pointer-sized offset into whatever "operand" represents. Operand opPtrOffset(Operand op, Nat offset); Operand opOffset(Size sz, Operand op, Nat offset); // Encode a bitmask. Returns 12-bits N, immr, imms (in that order) if possible. Otherwise, // returns 0 (which is not a valid encoding). 'n' is only used if 64-bit bitmask is required. Nat encodeBitmask(Word bitmask, bool use64); // Check if the word is all ones, taking into account if the value is 64-bit or not. Bool allOnes(Word mask, bool use64); } } storm-lang-0.7.3/Code/Arm64/AsmOut.cpp000066400000000000000000001310751507552044000172740ustar00rootroot00000000000000#include "stdafx.h" #include "AsmOut.h" #include "Asm.h" #include "../OpTable.h" #include "../Exception.h" #include "Utils/Cache.h" namespace code { namespace arm64 { // Get register number where 31=sp. static Nat intRegSP(Reg reg) { Nat r = intRegNumber(reg); if (r < 31) return r; if (r == 32) return 31; throw new (runtime::someEngine()) InternalError(S("Can not use this register with this op-code.")); } // Get register number where 31=zr. static Nat intRegZR(Reg reg) { Nat r = intRegNumber(reg); if (r < 32) return r; throw new (runtime::someEngine()) InternalError(S("Can not use this register with this op-code.")); } // Get fp register number. static Nat fpReg(Reg reg) { return vectorRegNumber(reg); } // Good reference for instruction encoding: // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding?lang=en // Check if value fits in 6-bit signed. static Bool isImm6S(Long value) { return value >= -0x20 && value <= 0x1F; } static void checkImm6S(RootObject *e, Long value) { if (!isImm6S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 6-bit immediate value: ") << value)); } // Check if value fits in 6-bit unsigned. static Bool isImm6U(Word value) { return value <= 0x3F; } static void checkImm6U(RootObject *e, Long value) { if (!isImm6U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 6-bit immediate value: ") << value)); } // Check if value fits in 7-bit signed. static Bool isImm7S(Long value) { return value >= -0x40 && value <= 0x3F; } static void checkImm7S(RootObject *e, Long value) { if (!isImm7S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 7-bit immediate value: ") << value)); } // Check if value fits in 7-bit unsigned. static Bool isImm7U(Word value) { return value <= 0x7F; } static void checkImm7U(RootObject *e, Word value) { if (!isImm7U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 7-bit immediate value: ") << value)); } // Check if value fits in 9-bit signed. static Bool isImm9S(Long value) { return value >= -0x100 && value <= 0xFF; } static void checkImm9S(RootObject *e, Long value) { if (!isImm9S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 9-bit immediate value: ") << value)); } // Check if value fits in 9-bit unsigned. static Bool isImm9U(Word value) { return value <= 0x1FF; } static void checkImm9U(RootObject *e, Word value) { if (!isImm9U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 9-bit immediate value: ") << value)); } // Check if value fits in 12-bit signed. static Bool isImm12S(Long value) { return value >= -0x800 && value <= 0x7FF; } static void checkImm12S(RootObject *e, Long value) { if (!isImm12S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 12-bit immediate value: ") << value)); } // Check if value fits in 12-bit unsigned. static Bool isImm12U(Word value) { return value <= 0xFFF; } static void checkImm12U(RootObject *e, Word value) { if (!isImm12U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 12-bit immediate value: ") << value)); } // Check if value fits in 19-bit signed. static Bool isImm19S(Long value) { return value >= -0x40000 && value <= 0x3FFFF; } static void checkImm19S(RootObject *e, Long value) { if (!isImm19S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 19-bit immediate value: ") << value)); } // Check if value fits in 19-bit unsigned. static Bool isImm19U(Word value) { return value <= 0x7FFFF; } static void checkImm19U(RootObject *e, Long value) { if (!isImm19U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 19-bit immediate value: ") << value)); } // Check if value fits in 26-bit signed. static Bool isImm26S(Long value) { return value >= -0x02000000 && value <= 0x03FFFFFF; } static void checkImm26S(RootObject *e, Long value) { if (!isImm26S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 26-bit immediate value: ") << value)); } // Put data instructions. 2 registers, 12-bit unsigned immediate. static inline void putData2(Output *to, Nat op, Nat rDest, Nat rSrc, Word imm) { checkImm12U(to, imm); Nat instr = (op << 22) | rDest | (rSrc << 5) | ((imm & 0xFFF) << 10); to->putInt(instr); } // Put data instructions. 3 registers, and a 6-bit unsigned immediate. Some instructions allow 'rb' to be shifted. static inline void putData3(Output *to, Nat op, Nat rDest, Nat ra, Nat rb, Word imm) { checkImm6U(to, imm); Nat instr = (op << 21) | rDest | (rb << 16) | (ra << 5) | ((imm & 0x3F) << 10); to->putInt(instr); } // Put 3-input data instructions. 4 registers, no immediate (modifier is labelled oO in the // docs, part of OP-code for some instructions it seems). static inline void putData4a(Output *to, Nat op, Bool modifier, Nat rDest, Nat ra, Nat rb, Nat rc) { Nat instr = (op << 21) | rDest | (rc << 10) | (rb << 16) | (ra << 5) | (Nat(modifier) << 15); to->putInt(instr); } // Same as putData4a, except that the 'modifier' is in another place. Labelled o1 in the docs. static inline void putData4b(Output *to, Nat op, Bool modifier, Nat rDest, Nat ra, Nat rb, Nat rc) { Nat instr = (op << 21) | rDest | (rc << 16) | (rb << 11) | (ra << 5) | (Nat(modifier) << 10); to->putInt(instr); } // Put a bitmask operation (those that have N, immr, imms in them). The bitmask will be // encoded, and size will be added if needed. static inline void putBitmask(Output *to, Nat op, Nat rSrc, Nat rDest, Bool is64, Word bitmask) { Nat encBitmask = encodeBitmask(bitmask, is64); if (encBitmask == 0) { StrBuf *msg = new (to) StrBuf(); *msg << S("It is not possible to encode the value ") << hex(bitmask) << S(" as a bitmask. It should have been removed by an earlier pass."); throw new (to) InvalidValue(msg->toS()); } Nat instr = (op << 23) | rDest | (rSrc << 5) | (encBitmask << 10); if (is64) instr |= Nat(1) << 31; to->putInt(instr); } // Put instructions for loads and stores: 3 registers and a 7-bit signed immediate. static inline void putLoadStoreS(Output *to, Nat op, Nat base, Nat r1, Nat r2, Long imm) { checkImm7S(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | (r2 << 10) | ((imm & 0x7F) << 15); to->putInt(instr); } // Put instructions for loads and stores: 3 registers and a 7-bit unsigned immediate. static inline void putLoadStoreU(Output *to, Nat op, Nat base, Nat r1, Nat r2, Word imm) { checkImm7U(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | (r2 << 10) | ((imm & 0x7F) << 15); to->putInt(instr); } // Put a "mid-sized" load/store (for negative offsets, mainly): 2 registers and 9-bit immediate. static inline void putLoadStoreMidS(Output *to, Nat op, Nat base, Nat r1, Long imm) { checkImm9S(to, imm); Nat instr = (op << 21) | r1 | (base << 5) | ((0x1FF & imm) << 12); to->putInt(instr); } // Put a "large" load/store (for bytes, mainly): 2 registers and 12-bit immediate. static inline void putLoadStoreLargeS(Output *to, Nat op, Nat base, Nat r1, Long imm) { checkImm12S(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | ((0xFFF & imm) << 10); to->putInt(instr); } // Put a "large" load/store (for bytes, mainly): 2 registers and 12-bit immediate. static inline void putLoadStoreLargeU(Output *to, Nat op, Nat base, Nat r1, Word imm) { checkImm12U(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | ((0xFFF & imm) << 10); to->putInt(instr); } // Put a load/store with 19-bit immediate offset from PC. static inline void putLoadStoreImm(Output *to, Nat op, Nat reg, Long imm) { checkImm19S(to, imm); Nat instr = (op << 24) | reg | ((0x7FFFF & imm) << 5); to->putInt(instr); } // Load a "long" constant into a register. Uses the table of references to store the data. static inline void loadLongConst(Output *to, Nat reg, Ref value) { // Emit "ldr" with literal, make the literal refer to location in the table after the code block. putLoadStoreImm(to, 0x58, reg, 0); to->markGc(GcCodeRef::relativeHereImm19, 4, value); } static inline void loadLongConst(Output *to, Nat reg, RootObject *obj) { // Emit "ldr" with literal, make the literal refer to location in the table after the code block. putLoadStoreImm(to, 0x58, reg, 0); to->markGc(GcCodeRef::relativeHereImm19, 4, (Word)obj); } void nopOut(Output *to, Instr *instr) { // According to ARM manual. to->putInt(0xD503201F); } void prologOut(Output *to, Instr *instr) { #if defined(ARM_USE_PAC) // PACIASP - to "sign" the pointer using current value of SP. to->putInt(0xD503233F); to->markReturnAuth(); #endif Offset stackSize = instr->src().offset(); Int scaled = stackSize.v64() / 8; if (isImm7S(scaled)) { // Small enough: we can do the modifications in the store operation: // - stp x29, x30, [sp, -stackSize]! putLoadStoreS(to, 0x2A6, 31, 29, 30, -scaled); // New offset for stack: to->setFrameOffset(stackSize); // The fact that registers have been preserved is done after the if-stmt as it is the same for all cases. } else if (scaled <= 0xFFFF) { // Too large. Load value into register (inter-procedure clobbered x16 is good for this). // - mov x16, # to->putInt(0xD2800000 | ((scaled & 0xFFFF) << 5) | 16); // - sub sp, sp, (x16 << 3) putData3(to, 0x659, 31, 31, 16, 3 << 3 | 3); // flags mean shift 3 steps, treat as unsigned 64-bit // CFA offset is now different: to->setFrameOffset(stackSize); // - stp x29, x30, [sp] putLoadStoreU(to, 0x2A4, 31, 29, 30, 0); } else { // Note: In reality, the stack size is likely a bit smaller since we are limited by // pointer offsets, since they are limited to 14 bytes (=16 KiB). throw new (to) InvalidValue(S("Too large stack size for Arm64!")); } // We have now saved the stack pointer and the return pointer: to->markSaved(xr(29), -stackSize); to->markSaved(xr(30), -(stackSize - Size::sPtr)); // - mov x29, sp # create stack frame putData2(to, 0x244, 29, 31, 0); // Now: use x29 as the frame register instead! to->setFrameRegister(xr(29)); } void epilogOut(Output *to, Instr *instr) { Offset stackSize = instr->src().offset(); Int scaled = stackSize.v64() / 8; if (isImm7S(scaled)) { // We emit: // - ldp x29, x30, [sp], stackSize putLoadStoreS(to, 0x2A3, 31, 29, 30, scaled); } else if (scaled <= 0xFFFF) { // The inverse of the prolog is: // - ldp x29, x30, [sp] putLoadStoreU(to, 0x2A5, 31, 29, 30, 0); // - mov x16, # to->putInt(0xD2800000 | ((scaled & 0xFFFF) << 5) | 16); // - add sp, sp, (x16 << 3) putData3(to, 0x459, 31, 31, 16, 3 << 3 | 3); // flags mean shift 3 steps, treat as unsigned 64-bit } else { throw new (to) InvalidValue(S("Too large stack size for Arm64!")); } #if defined(ARM_USE_PAC) // AUTIASP to->putInt(0xD50323BF); #endif // Note: No DWARF metadata since this could be an early return. } bool loadOut(Output *to, Instr *instr, MAYBE(Instr *) next) { Reg baseReg = instr->src().reg(); Int offset = instr->src().offset().v64(); Int opSize = instr->dest().size().size64(); Reg dest1 = instr->dest().reg(); Reg dest2 = noReg; Bool isDynamic = instr->src().hasOffsetRef(); Bool intReg = isIntReg(dest1); // Bytes are special: if (opSize == 1) { if (offset < 0) putLoadStoreMidS(to, intReg ? 0x1C2 : 0x1E2, intRegSP(baseReg), intRegZR(dest1), offset); else putLoadStoreLargeU(to, intReg ? 0x0E5 : 0x0F5, intRegSP(baseReg), intRegZR(dest1), offset); if (isDynamic) to->markOffset(ArmOffsetUpdater::tLoadStore, instr->src().offsetRef()); return false; } // Look at "next" to see if we can merge it with this instruction. if (next && next->dest().type() == opRegister && next->src().type() == opRelative) { if (same(next->src().reg(), baseReg) && Int(next->dest().size().size64()) == opSize) { // We don't support offset references. They would invalidate the tight size we have. if (!isDynamic && !next->src().hasOffsetRef()) { // Note: It is undefined to load the same register multiple times. Also: it // might break semantics when turning: // - ldr x0, [x0] // - ldr x0, [x0+8] // into // - ldp x0, x0, [x0] if (!same(next->dest().reg(), dest1) && isIntReg(next->dest().reg()) == intReg) { // Look at the offsets, if they are next to each other, we can merge them. Int off = next->src().offset().v64(); if (off == offset + opSize && isImm7S(offset / opSize)) { dest2 = next->dest().reg(); } else if (off == offset - opSize && isImm7S(off / opSize)) { // Put the second one first. dest2 = dest1; dest1 = next->dest().reg(); offset = off; } } } } } if (offset % opSize) throw new (to) InvalidValue(S("Memory access on Arm must be aligned!")); offset /= opSize; // Interestingly enough, ldp takes a signed offset but ldr takes an unsigned offset. if (dest2 != noReg) { if (intReg) putLoadStoreS(to, opSize == 4 ? 0x0A5 : 0x2A5, intRegSP(baseReg), intRegZR(dest1), intRegZR(dest2), offset); else putLoadStoreS(to, opSize == 4 ? 0x0B5 : 0x1B5, intRegSP(baseReg), fpReg(dest1), fpReg(dest2), offset); } else if (offset < 0) { // Here: We need to use 'ldur' instead. That one is unscaled. if (intReg) putLoadStoreMidS(to, opSize == 4 ? 0x5C2 : 0x7C2, intRegSP(baseReg), intRegZR(dest1), offset * opSize); else putLoadStoreMidS(to, opSize == 4 ? 0x5E2 : 0x7E2, intRegSP(baseReg), intRegZR(dest1), offset * opSize); } else { if (intReg) putLoadStoreLargeU(to, opSize == 4 ? 0x2E5 : 0x3E5, intRegSP(baseReg), intRegZR(dest1), offset); else putLoadStoreLargeU(to, opSize == 4 ? 0x2F5 : 0x3F5, intRegSP(baseReg), fpReg(dest1), offset); } if (isDynamic) to->markOffset(ArmOffsetUpdater::tLoadStore, instr->src().offsetRef()); return dest2 != noReg; } bool storeOut(Output *to, Instr *instr, MAYBE(Instr *) next) { Reg baseReg = instr->dest().reg(); Int offset = instr->dest().offset().v64(); Int opSize = instr->src().size().size64(); Reg src1 = instr->src().reg(); Reg src2 = noReg; Bool isDynamic = instr->dest().hasOffsetRef(); Bool intReg = isIntReg(src1); // Bytes are special: if (opSize == 1) { if (offset < 0) putLoadStoreMidS(to, intReg ? 0x1C0 : 0x1E0, intRegSP(baseReg), intRegZR(src1), offset); else putLoadStoreLargeU(to, intReg ? 0x0E4 : 0x0F4, intRegSP(baseReg), intRegZR(src1), offset); if (isDynamic) to->markOffset(ArmOffsetUpdater::tLoadStore, instr->dest().offsetRef()); return false; } // Look at "next" to see if we can merge it with this instruction. if (next && next->src().type() == opRegister && next->dest().type() == opRelative) { if (same(next->dest().reg(), baseReg) && Int(next->src().size().size64()) == opSize) { // We don't support offset references. They would invalidate the tight size we have. if (!isDynamic && !next->dest().hasOffsetRef()) { // Note: Contrary to the load instruction, it is well-defined to store the same // register multiple times. if (isIntReg(next->src().reg()) == intReg) { // Look at the offsets, if they are next to each other, we can merge them. Int off = next->dest().offset().v64(); if (off == offset + opSize && isImm7S(offset / opSize)) { src2 = next->src().reg(); } else if (off == offset - opSize && isImm7S(off / opSize)) { // Put the second one first. src2 = src1; src1 = next->src().reg(); offset = off; } } } } } if (offset % opSize) throw new (to) InvalidValue(S("Memory access on Arm must be aligned!")); offset /= opSize; // Interestingly enough, LDP takes a signed offset while LDR takes an unsigned offset... if (src2 != noReg) { if (intReg) putLoadStoreS(to, opSize == 4 ? 0x0A4 : 0x2A4, intRegSP(baseReg), intRegZR(src1), intRegZR(src2), offset); else putLoadStoreS(to, opSize == 4 ? 0x0B4 : 0x1B4, intRegSP(baseReg), fpReg(src1), fpReg(src2), offset); } else if (offset < 0) { // Here: we need to use 'ldur' instead. if (intReg) putLoadStoreMidS(to, opSize == 4 ? 0x5C0 : 0x7C0, intRegSP(baseReg), intRegZR(src1), offset * opSize); else putLoadStoreMidS(to, opSize == 4 ? 0x5E0 : 0x7E0, intRegSP(baseReg), intRegZR(src1), offset * opSize); } else { if (intReg) putLoadStoreLargeU(to, opSize == 4 ? 0x2E4 : 0x3E4, intRegSP(baseReg), intRegZR(src1), offset); else putLoadStoreLargeU(to, opSize == 4 ? 0x2F4 : 0x3F4, intRegSP(baseReg), fpReg(src1), offset); } if (isDynamic) to->markOffset(ArmOffsetUpdater::tLoadStore, instr->dest().offsetRef()); return src2 != noReg; } void regRegMove(Output *to, Reg dest, Reg src) { Bool intSrc = isIntReg(src); Bool intDst = isIntReg(dest); if (intSrc && intDst) { if (src == ptrStack || dest == ptrStack) putData2(to, 0x244, intRegSP(dest), intRegSP(src), 0); else if (size(src).size64() > 4) putData3(to, 0x550, intRegZR(dest), 31, intRegZR(src), 0); else putData3(to, 0x150, intRegZR(dest), 31, intRegZR(src), 0); } else if (!intSrc && !intDst) { if (size(src).size64() > 4) to->putInt(0x1E604000 | (fpReg(src) << 5) | fpReg(dest)); else to->putInt(0x1E204000 | (fpReg(src) << 5) | fpReg(dest)); } else if (intSrc) { if (size(src).size64() > 4) to->putInt(0x9E670000 | (intRegZR(src) << 5) | fpReg(dest)); else to->putInt(0x1E270000 | (intRegZR(src) << 5) | fpReg(dest)); } else if (intDst) { if (size(src).size64() > 4) to->putInt(0x9E660000 | (fpReg(src) << 5) | intRegZR(dest)); else to->putInt(0x1E260000 | (fpReg(src) << 5) | intRegZR(dest)); } } // Helper to load an immediate value to a register. static void loadImm(Output *to, Word imm, Reg dest) { if (imm <= 0xFFFF) { // Small enough to use MOVZ to->putInt(0xD2800000 | ((0xFFFF & imm) << 5) | intRegZR(dest)); } else if (~imm <= 0xFFFF) { // Store inverse using MOVN to->putInt(0x92800000 | ((0xFFFF & ~imm) << 5) | intRegZR(dest)); } else { throw new (to) InvalidValue(TO_S(to, S("Too large immediate to load: ") << imm)); } } // Special version called directly when more than one mov was found. Returns "true" if we // could merge the two passed to us. We know that "next" is a mov op if it is non-null. bool movOut(Output *to, Instr *instr, MAYBE(Instr *) next) { switch (instr->dest().type()) { case opRegister: // Fall through to next switch statement. break; case opRelative: if (instr->src().type() != opRegister) throw new (to) InvalidValue(TO_S(to, S("Invalid source for store operation on ARM: ") << instr->src())); return storeOut(to, instr, next); default: throw new (to) InvalidValue(TO_S(to, S("Invalid destination for move operation: ") << instr)); } // dest is a register! Reg destReg = instr->dest().reg(); Operand src = instr->src(); switch (src.type()) { case opRegister: regRegMove(to, destReg, src.reg()); return false; case opRelative: return loadOut(to, instr, next); case opReference: // Must be a pointer: Also, dest must be register. loadLongConst(to, intRegZR(destReg), src.ref()); return false; case opObjReference: // Must be a pointer, and dest must be a register. loadLongConst(to, intRegZR(destReg), src.object()); return false; case opConstant: // Note: No difference between nat and word version. loadImm(to, src.constant(), destReg); return false; case opOffReference: loadImm(to, src.offsetRef().offset().v64(), destReg); to->markOffset(ArmOffsetUpdater::tImm, src.offsetRef()); return false; case opLabel: case opRelativeLbl: { Int offset = to->offset(src.label()) + src.offset().v64() - to->tell(); Bool large = size(destReg).size64() > 4; if (isIntReg(destReg)) { putLoadStoreImm(to, large ? 0x58 : 0x18, intRegZR(destReg), offset / 4); } else { putLoadStoreImm(to, large ? 0x5C : 0x1C, fpReg(destReg), offset / 4); } return false; } default: throw new (to) InvalidValue(TO_S(to, S("Invalid source for move operation: ") << instr)); } } void movOut(Output *to, Instr *instr) { movOut(to, instr, null); } void shadowMovOut(Output *to, Instr *instr) { movOut(to, instr); } void leaOut(Output *to, Instr *instr) { Operand destOp = instr->dest(); if (destOp.type() != opRegister) throw new (to) InvalidValue(S("Destination of lea should have been transformed to a register.")); Nat dest = intRegZR(destOp.reg()); Operand src = instr->src(); switch (src.type()) { case opRelative: // Note: These are not sign-extended, so we need to be careful about the sign. if (src.offset().v64() > 0) { // add: putData2(to, 0x244, dest, intRegZR(src.reg()), src.offset().v64()); } else { // sub: putData2(to, 0x344, dest, intRegZR(src.reg()), -src.offset().v64()); } if (src.hasOffsetRef()) to->markOffset(ArmOffsetUpdater::tLea, src.offsetRef()); break; case opReference: // This means to load the refSource instead of loading from the pointer. loadLongConst(to, dest, src.refSource()); break; default: throw new (to) InvalidValue(TO_S(to, S("Unsupported source operand for lea: ") << src)); } } void callOut(Output *to, Instr *instr) { // Note: We need to use x17 for temporary values. This is assumed by the code in Gc/CodeArm64.cpp. Int offset; Operand target = instr->src(); switch (target.type()) { case opReference: // Load addr. into x17. putLoadStoreImm(to, 0x58, 17, 0); // blr x17 to->putInt(0xD63F0220); // Mark it accordingly. to->markGc(GcCodeRef::jump, 8, target.ref()); break; case opRegister: to->putInt(0xD63F0000 | (intRegZR(target.reg()) << 5)); break; case opRelative: // Split into two op-codes: a load and a register call. offset = target.offset().v64(); if (offset >= 0) { Nat uOffset = Nat(offset) / 8; putLoadStoreLargeU(to, 0x3E5, intRegSP(target.reg()), 17, uOffset); } else { putLoadStoreMidS(to, 0x7C2, intRegSP(target.reg()), 17, offset); } if (target.hasOffsetRef()) to->markOffset(ArmOffsetUpdater::tLoadStore, target.offsetRef()); // blr x17 to->putInt(0xD63F0220); break; default: assert(false, L"Unsupported call target!"); break; } } void retOut(Output *to, Instr *) { to->putInt(0xD65F03C0); } void jmpCondOut(Output *to, CondFlag cond, const Operand &target) { Int offset; switch (target.type()) { case opLabel: offset = Int(to->offset(target.label())) - Int(to->tell()); offset /= 4; checkImm19S(to, offset); to->putInt(0x54000000 | ((Nat(offset) & 0x7FFFF) << 5) | condArm64(cond)); break; default: assert(false, L"Unsupported target for conditional branches."); break; } } void jmpOut(Output *to, Instr *instr) { CondFlag cond = instr->src().condFlag(); Operand target = instr->dest(); Int offset; if (cond == ifNever) return; if (cond != ifAlways) { // Conditional jumps are special, handle them separately. jmpCondOut(to, cond, target); return; } // Note: We need to use x17 for temporary values for long jumps. This is assumed by the // code in Gc/CodeArm64.cpp. switch (target.type()) { case opReference: // Load addr. into x17. putLoadStoreImm(to, 0x58, 17, 0); // br x17 to->putInt(0xD61F0220); // Mark it accordingly. to->markGc(GcCodeRef::jump, 8, target.ref()); break; case opRegister: to->putInt(0xD61F0000 | (intRegZR(target.reg()) << 5)); break; case opRelative: offset = target.offset().v64(); if (offset >= 0) { Nat uOffset = Nat(offset) / 8; putLoadStoreLargeU(to, 0x3E5, intRegSP(target.reg()), 17, uOffset); } else { putLoadStoreMidS(to, 0x7C2, intRegSP(target.reg()), 17, offset); } if (target.hasOffsetRef()) to->markOffset(ArmOffsetUpdater::tLoadStore, target.offsetRef()); // br x17 to->putInt(0xD61F0220); break; case opLabel: to->markJump(target.label()); offset = Int(to->offset(target.label())) - Int(to->tell()); offset /= 4; checkImm26S(to, offset); to->putInt(0x14000000 | (Nat(offset) & 0x03FFFFFF)); break; default: assert(false, L"Unsupported jump target!"); break; } } // Generic output of data instructions that use 12-bit immediates or registers. Assumes that // the high bit of the op-code is the size bit. static void data12Out(Output *to, Instr *instr, Nat opImm, Nat opReg, Nat opSpReg) { assert(instr->dest().type() == opRegister, L"Destinations for data operations should have been transformed into registers."); if (instr->src().size().size64() >= 8) { opImm |= 0x200; opReg |= 0x400; opSpReg |= 0x400; } Reg destReg = instr->dest().reg(); switch (instr->src().type()) { case opRegister: if (same(destReg, ptrStack)) { // Note "imm" is really option + imm3 here. putData3(to, opSpReg, 31, 31, intRegZR(instr->src().reg()), 0x18); } else { Nat dest = intRegZR(destReg); putData3(to, opReg, dest, dest, intRegZR(instr->src().reg()), 0); } break; case opConstant: { Nat dest = intRegSP(destReg); putData2(to, opImm, dest, dest, instr->src().constant()); break; } default: assert(false, L"Unsupported source for data operation."); break; } } void addOut(Output *to, Instr *instr) { data12Out(to, instr, 0x044, 0x058, 0x059); } void subOut(Output *to, Instr *instr) { data12Out(to, instr, 0x144, 0x258, 0x259); } void cmpOut(Output *to, Instr *instr) { assert(instr->dest().type() == opRegister, L"Src and dest for cmp should have been transformed into registers."); Nat dest = intRegZR(instr->dest().reg()); Nat opImm = 0x1C4; Nat opReg = 0x358; if (instr->src().size().size64() >= 8) { opImm |= 0x200; opReg |= 0x400; } switch (instr->src().type()) { case opRegister: putData3(to, opReg, 31, dest, intRegSP(instr->src().reg()), 0); break; case opConstant: putData2(to, opImm, 31, dest, instr->src().constant()); break; default: assert(false, L"Unsupported source for data operation."); break; } } void setCondOut(Output *to, Instr *instr) { Nat dest = intRegZR(instr->dest().reg()); CondFlag cond = instr->src().condFlag(); // Note: There is no "if never" condition, so we need a special case for that. Since we // need to invert the condition, we just special-case both always and never. if (cond == ifAlways) { to->putInt(0xD2800020 | dest); } else if (cond == ifNever) { to->putInt(0xD2800000 | dest); } else { to->putInt(0x1A9F07E0 | dest | (condArm64(inverse(instr->src().condFlag())) << 12)); } } void mulOut(Output *to, Instr *instr) { // Everything has to be in registers here. Operand src = instr->src(); Operand dest = instr->dest(); Nat op = 0x0D8; if (src.size().size64() >= 8) op |= 0x400; Nat destReg = intRegZR(dest.reg()); putData4a(to, op, false, destReg, destReg, intRegZR(src.reg()), 31); } static void divOut(Output *to, Instr *instr, Bool sign) { Operand src = instr->src(); Operand dest = instr->dest(); Nat op = 0x0D6; if (src.size().size64() >= 8) op |= 0x400; Nat destReg = intRegZR(dest.reg()); putData4b(to, op, sign, destReg, destReg, 0x1, intRegZR(src.reg())); } void idivOut(Output *to, Instr *instr) { divOut(to, instr, true); } void udivOut(Output *to, Instr *instr) { divOut(to, instr, false); } static void clampSize(Output *to, Reg reg, Nat size) { if (size == 1) { // This is AND with an encoded bitmask. to->putInt(0x92401C00 | (intRegSP(reg) << 5) | intRegZR(reg)); } else if (size == 4) { // This is AND with an encoded bitmask. to->putInt(0x92407C00 | (intRegSP(reg) << 5) | intRegZR(reg)); } else { // No need to clamp larger values. } } void icastOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat srcSize = src.size().size64(); Nat dstSize = dst.size().size64(); if (!isIntReg(dst.reg())) throw new (to) InvalidValue(S("Can not sign extend floating point registers.")); // Source is either register or memory reference. if (src.type() == opRelative) { // Use a suitable load instruction. Int offset = instr->src().offset().v64(); if (srcSize == 1) { Nat op = 0x0E6; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset); } else if (srcSize == 4) { Nat op = 0x2E6; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 4); } else { // This is a regular load. Nat op = 0x3E5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 8); } // Maybe clamp to smaller size. if (srcSize > dstSize) clampSize(to, dst.reg(), dstSize); } else if (src.type() == opRegister) { // Sign extend to 64 bits: if (srcSize == 1) { // Insn: sxtb Nat op = 0x93401C00 | (intRegZR(src.reg()) << 5) | intRegZR(dst.reg()); to->putInt(op); } else if (srcSize == 4) { Nat op = 0x93407C00 | (intRegZR(src.reg()) << 5) | intRegZR(dst.reg()); to->putInt(op); } clampSize(to, dst.reg(), dstSize); } } void ucastOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat srcSize = src.size().size64(); Nat dstSize = dst.size().size64(); Bool intDst = isIntReg(dst.reg()); // Source is either register or memory reference. if (src.type() == opRelative) { // Use a suitable load instruction. Int offset = instr->src().offset().v64(); if (srcSize == 1) { Nat op = intDst ? 0x0E5 : 0x0F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset); } else if (srcSize == 4) { Nat op = intDst ? 0x2E5 : 0x2F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 4); } else { Nat op = intDst ? 0x3E5 : 0x3F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 8); } // Maybe clamp to smaller size. if (srcSize > dstSize) clampSize(to, dst.reg(), dstSize); } else if (src.type() == opRegister) { // Make sure that the upper bits are zero. Just move the register if needed, then // clamp as necessary. if (!same(src.reg(), dst.reg())) { regRegMove(to, dst.reg(), src.reg()); } clampSize(to, dst.reg(), dstSize); } } void borOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // Or with zero is a no-op. Don't do anything. } else if (allOnes(op, is64)) { // Fill target with all ones. We use ORN , zr, zr for this. putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, 31, 0); } else { putBitmask(to, 0x64, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x550 : 0x150; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void bandOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // And with zero always gives a zero. Simply emit a mov instruction instead // (technically a orr , zr, zr). putData3(to, 0x550, dstReg, 31, 31, 0); } else if (allOnes(op, is64)) { // And with 0xFF is a no-op. Don't emit any code. } else { putBitmask(to, 0x24, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x450 : 0x050; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void testOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); putBitmask(to, 0xE4, 31, dstReg, is64, op); } else { Nat opCode = is64 ? 0x750 : 0x350; putData3(to, opCode, 31, dstReg, intRegZR(src.reg()), 0); } } void bxorOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // XOR with a zero is a no-op. } else if (allOnes(op, is64)) { // XOR with all ones is simply a negate. Use orn , zr, instead. putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, dstReg, 0); } else { putBitmask(to, 0xA8, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x650 : 0x250; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void bnotOut(Output *to, Instr *instr) { Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; // This is ORN , zr, putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, dstReg, 0); } void shlOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat shift = Nat(src.constant()); if (shift == 0) { // Nothing to do. } else if (shift >= Nat(is64 ? 64 : 32)) { // Saturated shift. Simply move 0 to the register. putData3(to, 0x550, dstReg, 31, 31, 0); } else { Nat opCode = 0x53000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= ((~shift + 1) & (is64 ? 0x3F : 0x1F)) << 16; // imms opCode |= (((is64 ? 63 : 31) - shift) & 0x3F) << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x08); } } void shrOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat shift = Nat(src.constant()); if (shift == 0) { // Nothing to do. } else if (shift >= Nat(is64 ? 64 : 32)) { // Saturated shift. Simply move 0 to the register. putData3(to, 0x550, dstReg, 31, 31, 0); } else { Nat opCode = 0x53000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= shift << 16; // imms opCode |= (is64 ? 63 : 31) << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x09); } } void sarOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat bits = is64 ? 64 : 32; Nat shift = Nat(src.constant()); if (shift > bits - 1) shift = bits - 1; if (shift == 0) { // Nothing to do. } else { Nat opCode = 0x13000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= shift << 16; // imms opCode |= bits << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x0A); } } void preserveOut(Output *to, Instr *instr) { to->markSaved(instr->src().reg(), instr->dest().offset()); } static void fpOut(Output *to, Instr *instr, Nat op) { Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat baseOp = 0x0F1; if (is64) baseOp |= 0x2; // sets ftype to 0x1 Nat destReg = fpReg(dest.reg()); putData3(to, baseOp, destReg, destReg, fpReg(instr->src().reg()), op); } void faddOut(Output *to, Instr *instr) { fpOut(to, instr, 0x0A); } void fsubOut(Output *to, Instr *instr) { fpOut(to, instr, 0x0E); } void fnegOut(Output *to, Instr *instr) { Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat op = 0x1E214000; if (is64) op |= Nat(1) << 22; op |= fpReg(dest.reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } void fmulOut(Output *to, Instr *instr) { fpOut(to, instr, 0x02); } void fdivOut(Output *to, Instr *instr) { fpOut(to, instr, 0x06); } void fcmpOut(Output *to, Instr *instr) { // Note: This op-code supports comparing to the literal zero. We don't emit that op-code though... Operand src = instr->src(); Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat baseOp = 0x0F1; if (is64) baseOp |= Nat(1) << 1; putData3(to, baseOp, 0x0, fpReg(dest.reg()), fpReg(src.reg()), 0x08); } void fcastOut(Output *to, Instr *instr) { Bool in64 = instr->src().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (in64 == out64) { // Just emit a mov instruction: regRegMove(to, instr->dest().reg(), instr->src().reg()); return; } Nat op = 0x1E224000; if (in64) op |= Nat(1) << 22; if (out64) op |= Nat(1) << 15; op |= fpReg(instr->dest().reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } static void fromFloat(Output *to, Instr *instr, Nat op) { Bool in64 = instr->src().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (in64) op |= Nat(1) << 22; if (out64) op |= Nat(1) << 31; op |= intRegZR(instr->dest().reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } void fcastiOut(Output *to, Instr *instr) { fromFloat(to, instr, 0x1E380000); } void fcastuOut(Output *to, Instr *instr) { fromFloat(to, instr, 0x1E390000); } static void toFloat(Output *to, Instr *instr, Nat op) { Bool in64 = instr->src().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (out64) op |= Nat(1) << 22; if (in64) op |= Nat(1) << 31; op |= fpReg(instr->dest().reg()); op |= intRegZR(instr->src().reg()) << 5; to->putInt(op); } void icastfOut(Output *to, Instr *instr) { toFloat(to, instr, 0x1E220000); } void ucastfOut(Output *to, Instr *instr) { toFloat(to, instr, 0x1E230000); } void datOut(Output *to, Instr *instr) { Operand src = instr->src(); switch (src.type()) { case opLabel: to->putAddress(src.label()); break; case opReference: to->putAddress(src.ref()); break; case opObjReference: to->putObject(src.object()); break; case opConstant: to->putSize(src.constant(), src.size()); break; default: assert(false, L"Unsupported type for 'dat'."); break; } } void lblOffsetOut(Output *to, Instr *instr) { to->putOffset(instr->src().label()); } void alignOut(Output *to, Instr *instr) { to->align(Nat(instr->src().constant())); } void locationOut(Output *, Instr *) { // We don't save location data in the generated code. } void metaOut(Output *, Instr *) { // We don't save metadata in the generated code. } #define OUTPUT(x) { op::x, &x ## Out } typedef void (*OutputFn)(Output *to, Instr *instr); // Note: "mov" is special: we try to merge mov operations. const OpEntry outputMap[] = { OUTPUT(nop), OUTPUT(prolog), OUTPUT(epilog), OUTPUT(mov), // Note: This is rare, so we don't bother with considering that it is the same as 'mov'. OUTPUT(shadowMov), OUTPUT(lea), OUTPUT(call), OUTPUT(ret), OUTPUT(jmp), OUTPUT(sub), OUTPUT(add), OUTPUT(cmp), OUTPUT(setCond), OUTPUT(mul), OUTPUT(idiv), OUTPUT(udiv), OUTPUT(icast), OUTPUT(ucast), OUTPUT(band), OUTPUT(bor), OUTPUT(bxor), OUTPUT(bnot), OUTPUT(test), OUTPUT(shl), OUTPUT(shr), OUTPUT(sar), OUTPUT(fadd), OUTPUT(fsub), OUTPUT(fneg), OUTPUT(fmul), OUTPUT(fdiv), OUTPUT(fcmp), OUTPUT(fcast), OUTPUT(fcasti), OUTPUT(fcastu), OUTPUT(icastf), OUTPUT(ucastf), OUTPUT(preserve), OUTPUT(dat), OUTPUT(lblOffset), OUTPUT(align), OUTPUT(location), OUTPUT(meta), }; bool empty(Array