NAME
AED_x86_get_assembler,
AED_x86_encoder_alloc,
AED_x86_encoder_free,
AED_x86_encoder_reset,
AED_x86_encoder_encode_labels,
AED_x86_encoder_get_buffer,
AED_x86_encoder_get_buffer_length,
AED_x86_encoder_has_error,
AED_x86_encoder_get_error —
aed x86 encoder functions
SYNOPSIS
/* -laed */
#include <aed/x86.h>
typedef void *
AED_alloc(void *ptr,
size_t old_size, size_t
new_size, void *opaque);
typedef void
AED_free(void *ptr,
size_t size, void *opaque);
const AED_x86_assembler *
AED_x86_get_assembler(void);
AED_x86_encoder *
AED_x86_encoder_alloc(uint32_t
flags, AED_alloc
alloc, AED_free
free, void
*opaque);
void
AED_x86_encoder_free(AED_x86_encoder
*ec);
void
AED_x86_encoder_reset(AED_x86_encoder
*ec);
int
AED_x86_encoder_encode_labels(AED_x86_encoder
*ec, size_t
offset);
const uint8_t *
AED_x86_encoder_get_buffer(const
AED_x86_encoder *ec);
size_t
AED_x86_encoder_get_buffer_length(const
AED_x86_encoder *ec);
int
AED_x86_encoder_has_error(const
AED_x86_encoder *ec);
const char *
AED_x86_encoder_get_error(const
AED_x86_encoder *ec);
DESCRIPTION
The aed x86 encoder provides an API used to encode instructions targeting the x86 instruction set architecture in 64-bit mode. The API is divided into two cooperating parts: the encoder and the assembler. The encoder performs the encoding and populates the instruction buffer with the encoded instructions. The assembler exposes each encodable instruction as a function and instructs the encoder to perform the actual encoding.
Below is a complete usage example showing how to encode a routine that calculates the sum of an array of 32-bit integers. Throughout the examples in this manual, the encoder is referred to as ec and the assembler as a.
/* Jump labels. */
enum {
Ldone,
Lloop,
};
AED_x86_encoder *ec = AED_x86_encoder_alloc(0, NULL, NULL, NULL);
const AED_x86_assembler *a = AED_x86_get_assembler();
a->xor(ec, a->eax, a->eax);
a->xor(ec, a->ecx, a->ecx);
a->test(ec, a->esi, a->esi);
a->jz(ec, a->relative(ec, 8, Ldone));
a->label(ec, Lloop);
a->add(ec, a->eax, a->memory(ec, a->rdi, a->rcx, /*scale=*/4, /*disp=*/0));
a->inc(ec, a->ecx);
a->dec(ec, a->esi);
a->jnz(ec, a->relative(ec, 8, Lloop));
a->label(ec, Ldone);
a->ret(ec);
AED_x86_encoder_encode_labels(ec, 0);
if (AED_x86_encoder_has_error(ec))
errx(1, "%s", AED_x86_encoder_get_error(ec));
AED_x86_encoder_free(ec);
Encoder basics
The
AED_x86_encoder_alloc()
function allocates a new encoder. All memory allocations performed by the
encoder are done through the alloc callback which
defaults to
realloc(3) when passing NULL. The
encoder frees memory using the free callback which
defaults to
free(3) when passing NULL. See the
Custom memory allocator
section for details on how to manage memory.
The flags may be any combination of the following:
- AED_X86_MACHINE_LATEST
- AED_X86_MACHINE_VIA
- AED_X86_MACHINE_NEHALEM
- AED_X86_MACHINE_SKY_LAKE
- AED_X86_MACHINE_COMET_LAKE
- AED_X86_MACHINE_GRANITE_RAPIDS
- AED_X86_MACHINE_DIAMOND_RAPIDS
- AED_X86_MACHINE_CLEARWATER_FOREST
- AED_X86_MACHINE_PANTHER_LAKE
- AED_X86_MACHINE_NOVA_LAKE
- Microarchitecture to target, defaults to
AED_X86_MACHINE_LATEST. - AED_X86_MACHINE_NATIVE
- Favor the microarchitecture of the host processor. If the
microarchitecture cannot be detected, a conservative default of
AED_X86_MACHINE_NEHALEMis favored.
The
AED_x86_encoder_free()
function frees the encoder and all its associated memory.
The
AED_x86_encoder_reset()
function resets the instruction buffer, effectively discarding any
previously encoded instruction(s).
The
AED_x86_encoder_get_buffer()
function returns the instruction buffer, including all encoded
instruction(s).
The
AED_x86_encoder_get_buffer_length()
function returns the length of the instruction buffer as obtained through
AED_x86_encoder_get_buffer().
The
AED_x86_encoder_has_error()
function returns non-zero if an error was encountered during encoding.
The
AED_x86_encoder_get_error()
function returns a human readable representation of the last encountered
error during encoding. Note that this function will never return
NULL, even if an error is absent.
Encoding instructions
Encoding instructions is done using the assembler accessible
through the
AED_x86_get_assembler()
function. The assembler is a stateless singleton object providing functions
used to encode instructions and operands.
Although emitted from the struct definition below, for each encodable instruction the assembler exposes a function used to encode the same instruction named after the instruction mnemonic. Instruction operands are expressed in Intel®-like syntax in which the first operand denotes the destination and all subsequent ones denote source operands.
If an instruction comes in many forms accepting different number of operands, the form with the fewest number of operands will be named after the instruction mnemonic and all following forms suffixed with an integer denoting the number of operands. This pattern is often seen among instructions offering encodings with non-destructive destination operands, such as the ADD instruction.
size_t (*add)(uintptr_t op0, uintptr_t op1); size_t (*add3)(uintptr_t op0, uintptr_t op1, uintptr_t op2);
Note that some instruction encoder functions cannot be named after the mnemonic. See the C and C++ gotchas section for details.
typedef struct AED_x86_assembler {
/* Operand functions. */
uintptr_t (*immediate)(AED_x86_encoder *ec, int64_t imm);
uintptr_t (*memory)(AED_x86_encoder *ec, uintptr_t base, uintptr_t index,
uint8_t scale, int64_t disp);
uintptr_t (*offset)(AED_x86_encoder *ec, uint64_t offset);
uintptr_t (*relative)(AED_x86_encoder *ec, uint8_t size, int label);
uintptr_t (*rip_relative)(AED_x86_encoder *, uint8_t size, int label);
/* AVX-512 operand functions. */
uintptr_t (*broadcast)(void);
uintptr_t (*sae)(void);
uintptr_t (*rn_sae)(void);
uintptr_t (*rd_sae)(void);
uintptr_t (*ru_sae)(void);
uintptr_t (*rz_sae)(void);
uintptr_t (*opmask)(uint8_t opmask, uint8_t z);
/* Label functions. */
void (*label)(AED_x86_encoder *ec, int label);
void (*label_at_offset)(AED_x86_encoder *ec, int label, size_t offset);
/* Memory size functions. */
uintptr_t (*byte)(void);
uintptr_t (*word)(void);
uintptr_t (*dword)(void);
uintptr_t (*qword)(void);
uintptr_t (*xword)(void);
uintptr_t (*yword)(void);
uintptr_t (*zword)(void);
/* Alignment functions. */
size_t (*align)(AED_x86_encoder *ec, uint32_t alignment);
size_t (*fill)(AED_x86_encoder *ec, uint32_t length);
/* Explicit prefix functions. */
const AED_x86_assembler *(*evex)(AED_x86_encoder *ec);
const AED_x86_assembler *(*rex2)(AED_x86_encoder *ec);
/* Segment override functions. */
const AED_x86_assembler *(*es)(AED_x86_encoder *ec);
const AED_x86_assembler *(*cs)(AED_x86_encoder *ec);
const AED_x86_assembler *(*ss)(AED_x86_encoder *ec);
const AED_x86_assembler *(*ds)(AED_x86_encoder *ec);
const AED_x86_assembler *(*fs)(AED_x86_encoder *ec);
const AED_x86_assembler *(*gs)(AED_x86_encoder *ec);
/* General purpose registers. */
uintptr_t al, ax, eax, rax;
uintptr_t cl, cx, ecx, rcx;
uintptr_t dl, dx, edx, rdx;
uintptr_t bl, bx, ebx, rbx;
uintptr_t ah, sp, esp, rsp;
uintptr_t ch, bp, ebp, rbp;
uintptr_t dh, si, esi, rsi;
uintptr_t bh, di, edi, rdi;
uintptr_t r8b, r8w, r8d, r8;
uintptr_t r9b, r9w, r9d, r9;
uintptr_t r10b, r10w, r10d, r10;
uintptr_t r11b, r11w, r11d, r11;
uintptr_t r12b, r12w, r12d, r12;
uintptr_t r13b, r13w, r13d, r13;
uintptr_t r14b, r14w, r14d, r14;
uintptr_t r15b, r15w, r15d, r15;
uintptr_t r16b, r16w, r16d, r16;
uintptr_t r17b, r17w, r17d, r17;
uintptr_t r18b, r18w, r18d, r18;
uintptr_t r19b, r19w, r19d, r19;
uintptr_t r20b, r20w, r20d, r20;
uintptr_t r21b, r21w, r21d, r21;
uintptr_t r22b, r22w, r22d, r22;
uintptr_t r23b, r23w, r23d, r23;
uintptr_t r24b, r24w, r24d, r24;
uintptr_t r25b, r25w, r25d, r25;
uintptr_t r26b, r26w, r26d, r26;
uintptr_t r27b, r27w, r27d, r27;
uintptr_t r28b, r28w, r28d, r28;
uintptr_t r29b, r29w, r29d, r29;
uintptr_t r30b, r30w, r30d, r30;
uintptr_t r31b, r31w, r31d, r31;
uintptr_t eip, rip;
uintptr_t spl, bpl, sil, dil;
/* MMX registers. */
uintptr_t mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
/* AMX registers. */
uintptr_t tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7;
/* XMM registers. */
uintptr_t xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31;
/* YMM registers. */
uintptr_t ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7,
ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23,
ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31;
/* ZMM registers. */
uintptr_t zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7,
zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15,
zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23,
zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
/* AVX-512 opmask registers. */
uintptr_t k0, k1, k2, k3, k4, k5, k6, k7;
} AED_x86_assembler;
Choice of encoding
By default, all instruction encoder functions favor the shortest possible encoding with respect to the given operands.
/* VEX prefix will be favored. */ a->vmovsd(ec, a->xmm0, a->xmm0); /* EVEX prefix is required due to addressing xmm16. */ a->vmovsd(ec, a->xmm16, a->xmm0);
The desired encoding can be explicitly stated using the
evex() and
rex2()
assembler functions. These functions return the assembler, allowing it to be
chained with the instruction to enforce encoding upon.
/* Force EVEX encoding. */ a->evex(ec)->andn(ec, a->rax, a->rcx, a->rdx); /* Force REX2 encoding. */ a->rex2(ec)->mov(ec, a->rax, a->rcx);
Encoding registers
The assembler has dedicated fields for all supported registers, which can be used to encode register operands.
a->push2(ec, a->rax, a->rcx);
Encoding immediates
The
immediate()
assembler function is used to encode immediate operands.
a->jmp(ec, a->immediate(ec, 0xff));
Encoding memory operands
The
memory()
assembler function is used to encode operands addressing memory. The
base argument is mandatory and must refer to a general
purpose register provided by the assembler. The index
argument is optional and must either refer to a general purpose register
provided by the assembler or be omitted by passing zero. The
index register can optionally be scaled using the
scale argument which treats 2, 4 and 8 as valid
scalars. Passing a scale of zero disables scaling. The
disp argument is optional and denotes the memory
displacement. Passing a disp of zero omits the
displacement.
/* Move dword from [rbx] into eax. */ a->mov(ec, a->eax, a->memory(ec, a->rbx, /*index=*/0, /*scale=*/0, /*disp=*/0)); /* Move dword from [rbx + 4 * rcx + 0x10] into eax. */ a->mov(ec, a->eax, a->memory(ec, a->rbx, a->rcx, /*scale=*/4, /*disp=*/0x10));
The encoder infers the memory size from the given
operands. Depending on the instruction, this is not always possible, causing
an ambiguity requiring the memory size to be explicitly stated using either
the byte(),
word(),
dword(),
qword(),
xword(),
yword(),
or
zword()
assembler functions.
a->vcvtph2hf8(ec, a->xmm0,
a->memory(ec, a->rax, /*index=*/0, /*scale=*/0, /*disp=*/0) + a->yword());
Encoding VSIB memory operands
The memory() assembler function is also
used to encode VSIB memory addressing. The index
argument is required to refer to a vector register provided by the
assembler.
a->vpgatherdd(ec, a->xmm0,
a->memory(ec, a->r8, a->xmm15, /*scale=*/0, /*disp=*/0));
Encoding segment offset operands
The
offset()
assembler function is used to encode operands addressing memory using a
segment relative offset.
/* Move dword from data segment at offset 0xdeadbeef into eax. */ a->mov(ec, a->eax, a->offset(ec, 0xdeadbeef));
Encoding segment overrides
The
es(),
cs(),
ss(),
ds(),
fs() and
gs()
assembler functions are used to encode the memory segment. These functions
return the assembler, allowing it to be chained with the instruction to
enforce the segment override on.
/* Move dword from fs:[rcx] into eax. */ a->fs(ec)->mov(ec, a->eax, a->memory(ec, a->rcx, /*index=*/0, /*scale=*/0, /*disp=*/0));
Encoding jump labels
The
label()
assembler function associates the current instruction buffer offset with
label, allowing instructions to encode operands
referring to the same offset using the
relative()
assembler function and by passing the same label. The
size argument must be either 8, 16 or 32 and
represents the number of bits required to express the relative offset
between the instruction and the label.
The effective relative offsets
for such operands are encoded by the
AED_x86_encoder_encode_labels()
function, intended to be called after encoding the final instruction. The
offset argument can be used when the encoded
instructions are expected to reside at a certain offset in memory, which
affects the effective relative offsets. The
AED_x86_encoder_encode_labels() function returns
non-zero on success and zero on error. Errors can be further diagnosed using
AED_x86_encoder_get_error().
/* Label for loop. */ const int Lloop = 0; /* Loop until ecx reaches zero. */ a->label(ec, Lloop); a->dec(ec, a->ecx); a->jnz(ec, a->relative(ec, 8, Lloop)); AED_x86_encoder_encode_labels(ec, 0);
Instead of associating the current
instruction buffer offset with a label, an explicit offset can be defined
using the
label_at_offset()
assembler function. Intended to be used when the encoded instructions are
expected to reside at a certain offset within memory.
/* Label for global. */ const int Lglobal = 0; /* Position Lglobal at offset 0x1000. */ a->label_at_offset(ec, Lglobal, 0x1000);
Encoding RIP-relative addressing
Labels can be used to encode RIP-relative addressing using the
rip_relative()
assembler function. The size argument must be either 8
or 32 and represents the number of bits required to express the relative
offset between the instruction pointer and the label.
/* Label for global accessed through RIP-relative addressing. */ const int Lglobal = 0; /* Position Lglobal at offset 0x1000. */ a->label_at_offset(ec, Lglobal, 0x1000); /* Move Lglobal to register, instruction expected to reside at * offset 0x2000. */ a->mov(ec, a->rax, a->rip_relative(ec, 32, Lglobal)); AED_x86_encoder_encode_labels(ec, 0x2000);
Encoding alignment
The
align()
assembler function aligns the instruction buffer to the next multiple of
alignment using as few NOP instructions as
possible.
/* Align the instruction buffer to the next multiple of 16. */ a->align(ec, 16);
The
fill()
assembler function fills the instruction buffer with as few NOP instructions
as possible that fit within length.
/* Fill the instruction buffer with as few as possible NOP * instructions that fit within 16 bytes. */ a->fill(ec, 16);
Encoding AVX-512 instructions
The
broadcast()
assembler function can be used to turn a memory operand into a broadcast in
which the element loaded from memory will be broadcast to all other
elements.
a->vaddpd(ec, a->zmm0, a->zmm0,
a->memory(ec, a->rax, /*index=*/0, /*scale=*/0, /*disp=*/0) + a->broadcast());
The
opmask()
assembler function can be used to annotate a destination register operand as
using opmask as the opmask register. A non-zero
z argument enables zeroing-masking; otherwise,
merging-masking is favored.
a->vmovupd(ec, a->zmm0 + a->opmask(7, /*z=*/0), a->zmm1);
The
sae() assembler
function can be used to enable Suppress All Exceptions (SAE). Note that
sae() can only be added to source register operands,
not to destination operands.
a->vgetmantps(ec, a->zmm2, a->zmm1 + a->sae(), a->immediate(ec, 0));
The desired rounding control can be defined using the
rn_sae(),
rd_sae(),
ru_sae(),
and
rz_sae()
assembler functions. Note that these functions can only be added to source
register operands, not to destination operands.
a->vcvtsd2si(ec, a->rax, a->xmm0 + a->rn_sae());
C and C++ gotchas
Since int is a reserved keyword in C, the assembler cannot expose
an instruction encoder function named
int()
for the INT instruction. Instead, the function is suffixed with an
underscore.
a->int_(ec, a->immediate(ec, 3));
In C++, the following additional instructions conflict with reserved identifiers and thus must also be suffixed with an underscore: and, not, or and xor.
a->xor_(ec, a->rax, a->rax);
Custom memory allocator
The memory allocated by the encoder can be managed by supplying
implementations to the
AED_x86_encoder_alloc()
function using the alloc and
free arguments.
- The alloc argument must conform to
realloc(3)-like semantics. If the given
ptr is not
NULL, the returned memory address must contain old_size number of bytes copied from ptr. The returned memory address must always have a capacity of new_size number of bytes. - The free argument is guaranteed never to be given a
NULLptr argument. - The opaque argument is passed as-is to both callbacks.
static void *
encoder_alloc(void *ptr, size_t old_size, size_t new_size, void *arg)
{
return realloc(ptr, new_size);
}
static void
encoder_free(void *ptr, size_t size, void *arg)
{
free(ptr);
}
AED_x86_encoder *ec = AED_x86_encoder_alloc(0, encoder_alloc, encoder_free, NULL);
RETURN VALUES
The return value of all instruction encoder functions,
align() and fill() is
interpreted as follows:
- > 0
- Instruction successfully encoded. Returns the length of the encoded instruction.
- = 0
- Failed to encode instruction. Errors can be further diagnosed using
AED_x86_encoder_get_error().
AUTHORS
Anton Lindqvist <anton@basename.se>