QEMU RISCV TCG 详解十一 - RISCV CPU 状态在TCG Variables中的表示（Representation）

在前文《RISCV ISA -> TCG Ops》中，TCG IR 操作对象是 Variables，其中 Fixed Global Variable（TEMP_FIXED），与 Global Variable（TEMP_GLOBAL）比较特殊，是用于指代 CPUState 以及特定CPU 的状态的。本文就梳理清楚，在 TCG IR 中，这些 Variable 具体定义及其实现方式。

首先，整个目标CPU的状态存在 CPUState *cpu 中，也就是执行主循环传入的 cpu_exec_loop(CPUState *cpu, SyncClocks *sc)。最终溯源到在创建 QEMU Machine 的时候，创建的CPU，也就是 RISCVCPU:CPUState:DeviceState。其包含了，CPURISCVState env，继而包括了 RISCV CPU 的通用寄存器（General Purpose Registers / GPRs）以及状态寄存器（Control and Status Registers / CSRs）。

那么，在 TCG IR 的 Global Variables 需要通过指针去指向 RISCV CPU 的 GPRs，而 CSRs的读写则通过 RISCV csr 指令进行读写访问，在 TCG IR 中通过调用 RISCV CPU 中定义的函数来实现，这个在后续文章会进行详细讲解。现在，就只关注 GPR 等寄存器，能够在 TCG IR中，直接通过 Global Variables 来访问的。

Global Variables 是通过记录了 RISCV CPU GPRs 与 cpu_env 的偏移量（offset）来进行寻址的。因此，首先要确定 cpu_env 是怎么从 CPUState *cpu 进入到 TCG IR 中的。

在 TCG IR，cpu_env 即上述的 CPURISCVState env，由 TCGv_env tcg_env 指代，是一个C的公共变量（Global Variable），其定义如下：

TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
typedef TCGv_ptr TCGv_env;
TCGv_env tcg_env;

static void tcg_context_init(unsigned max_threads){
TCGContext *s = &tcg_init_ctx;
TCGTemp *ts;
…
ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
tcg_env = temp_tcgv_ptr(ts);}
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
TCGReg reg, const char *name){
TCGTemp *ts;
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
ts = tcg_global_alloc(s);
ts->base_type = type;
ts->type = type;
ts->kind = TEMP_FIXED;
ts->reg = reg;
ts->name = name;
tcg_regset_set_reg(s->reserved_regs, reg);
return ts;}
static TCGTemp *tcg_global_alloc(TCGContext *s){
TCGTemp *ts;
…
s->nb_globals++;
ts = tcg_temp_alloc(s);
ts->kind = TEMP_GLOBAL;
return ts;}
static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t){
return (TCGv_ptr)temp_tcgv_i32(t);}
static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t) {
(void)temp_idx(t); /* trigger embedded assert */
return (TCGv_i32)((void *)t – (void *)tcg_ctx);}
static inline size_t temp_idx(TCGTemp *ts){
return ts – tcg_ctx->temps;}

就是说，tcg_env 并不是直接指向描述 cpu_env 的 TCGTemp，记为 TCGTemp cpu_env，而是记录了其与 tcg_ctx 的偏移量。而 TCGTemp cpu_env 描述了 TCG_AREG0 寄存器指向了 CPURISCVState env。

在TCG 的 X86后端中，定义了 TCG_AREG0 由 EBP 来存放，即：

// qemu/tcg/i386/tcg-target.h
typedef enum {
…
TCG_AREG0 = TCG_REG_EBP,
TCG_REG_CALL_STACK = TCG_REG_ESP
} TCGReg;

并在 X86 生成 TB 代码时，在 TB prologue 中，设定，如下：

static int __attribute__((noinline))
cpu_exec_loop(CPUState *cpu, SyncClocks *sc){
…
cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit);
…}
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
vaddr pc, TranslationBlock **last_tb,
int *tb_exit){
…
tb = cpu_tb_exec(cpu, tb, tb_exit);
…}
/* Execute a TB, and fix up the CPU state afterwards if necessary */
/*
* Disable CFI checks.
* TCG creates binary blobs at runtime, with the transformed code.
* A TB is a blob of binary code, created at runtime and called with an
* indirect function call. Since such function did not exist at compile time,
* the CFI runtime has no way to verify its signature and would fail.
* TCG is not considered a security-sensitive part of QEMU so this does not
* affect the impact of CFI in environment with high security requirements
*/
static inline TranslationBlock * QEMU_DISABLE_CFI
cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) {
…
ret = tcg_qemu_tb_exec(cpu_env(cpu), tb_ptr);
…}
void tcg_prologue_init(void) {
…
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
…
/* Generate the prologue. */
tcg_target_qemu_prologue(s);
…}
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s) {
…
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
/* jmp *tb. */
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
…}

对应的输出有：

PROLOGUE: [size=45]
…
// %rdi used to pass 1st argument to functions, store cpu_env(cpu)
// %rsi used to pass 2nd argument to functions, point to tb code buffer
0x7d0f441b300a: 48 8b ef movq %rdi, %rbp
0x7d0f441b300d: 48 81 c4 78 fb ff ff addq $-0x488, %rsp
0x7d0f441b3014: ff e6 jmpq *%rsi
…

其中 tcg_qemu_tb_exec(cpu_env(cpu), tb_ptr) 就是传入了 cpu_env 到 %rbp 了。

那么，找到了 cpu_env 后，其它 TCG IR 直接使用的寄存器的定义如下：

/* global register indices */
static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
static TCGv load_res;
static TCGv load_val;
…
void riscv_translate_init(void){
int i;
/*
* cpu_gpr[0] is a placeholder for the zero register. Do not use it.
* Use the gen_set_gpr and get_gpr helper functions when accessing regs,
* unless you specifically block reads/writes to reg 0.
*/
cpu_gpr[0] = NULL;
cpu_gprh[0] = NULL;
for (i = 1; i < 32; i++) {
cpu_gpr[i] = tcg_global_mem_new(tcg_env,
offsetof(CPURISCVState, gpr[i]), riscv_int_regnames[i]);
cpu_gprh[i] = tcg_global_mem_new(tcg_env,
offsetof(CPURISCVState, gprh[i]), riscv_int_regnamesh[i]);
}
for (i = 0; i < 32; i++) {
cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env,
offsetof(CPURISCVState, fpr[i]), riscv_fpr_regnames[i]);
}
cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc");
cpu_vl = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vl), "vl");
cpu_vstart = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vstart),
"vstart");
load_res = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_res),
"load_res");
load_val = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_val),
"load_val");}

都是通过 tcg_global_mem_new() 函数来定义。以 static TCGv cpu_pc 为例，有：

__thread TCGContext *tcg_ctx;
typedef struct TCGv_i64_d *TCGv_i64;
typedef TCGv_i64 TCGv;
static TCGv cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc");
TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name){
TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
return temp_tcgv_i64(ts);}
static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
const char *name, TCGType type) {
TCGContext *s = tcg_ctx;
// In this case, base_ts == TCGTemp* cpu_env.
TCGTemp *base_ts = tcgv_ptr_temp(base);
TCGTemp *ts = tcg_global_alloc(s);
int indirect_reg = 0;
switch (base_ts->kind) {
case TEMP_FIXED:
break;
…}
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
…
} else {
ts->base_type = type;
ts->type = type;
ts->indirect_reg = indirect_reg;
ts->mem_allocated = 1;
ts->mem_base = base_ts;
ts->mem_offset = offset;
ts->name = name;
}
return ts;
}
static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v){
return tcgv_i32_temp((TCGv_i32)v);}
static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t){
return (TCGv_i64)temp_tcgv_i32(t);}
/*
* Using the offset of a temporary, relative to TCGContext, rather than
* its index means that we don't use 0. That leaves offset 0 free for
* a NULL representation without having to leave index 0 unused.
*/
static inline TCGTemp *tcgv_i32_temp(TCGv_i32 v){
return (void *)tcg_ctx + (uintptr_t)v;}
static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t){
(void)temp_idx(t); /* trigger embedded assert */
return (TCGv_i32)((void *)t – (void *)tcg_ctx);}

那么，这里的 static TCGv cpu_pc 记录了 TCGTemp* cpu_pc 与 __thread TCGContext *tcg_ctx 的偏移量（offset）。

也就是 TCGv_* 与 TCGTemp* 的转换需要经过其与tcg_ctx的偏移量，而 TCGv_* 就是记录了对应的偏移量，而非 TCGTemp* 指针。而对应的 TCGTemp* cpu_pc 则记录了 pc 寄存器与 cpu_env 的偏移量。

那么，回顾前文的 auipc t0,0 的转译，有

static bool trans_auipc(DisasContext *ctx, arg_auipc *a){
// target_pc <- a->rd
TCGv target_pc = dest_gpr(ctx, a->rd);
// target_pc <- pc + a->imm
gen_pc_plus_diff(target_pc, ctx, a->imm);
// a->rd <- targe_pc
gen_set_gpr(ctx, a->rd, target_pc);
return true;}
static TCGv dest_gpr(DisasContext *ctx, int reg_num){
if (reg_num == 0 || get_olen(ctx) < TARGET_LONG_BITS) {
return tcg_temp_new();
}
return cpu_gpr[reg_num];}
static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
target_long diff){
target_ulong dest = ctx->base.pc_next + diff;
…
if (tb_cflags(ctx->base.tb) & CF_PCREL) {
tcg_gen_addi_tl(target, cpu_pc, dest – ctx->pc_save);
…} else {…}}
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2){
/* some cases can be optimized here */
if (arg2 == 0) {
tcg_gen_mov_i64(ret, arg1);
} else if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
} else {
tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1),
tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));}}
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg){
if (ret == arg) {return;}
if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_op2_i64(INDEX_op_mov, ret, arg);
} else {…}}
static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2){
tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));}
TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2){
TCGOp *op = tcg_emit_op(opc, 2);
TCGOP_TYPE(op) = type;
op->args[0] = a1;
op->args[1] = a2;
return op;}
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs){
TCGOp *op = tcg_op_alloc(opc, nargs);
if (tcg_ctx->emit_before_op) {
QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
} else {
QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
}
return op;}
static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t){
if (reg_num != 0) {
switch (get_ol(ctx)) {
…
case MXL_RV64:
case MXL_RV128:
tcg_gen_mov_tl(cpu_gpr[reg_num], t);
break;
…}
…}}
static inline TCGArg tcgv_i64_arg(TCGv_i64 v){
return temp_arg(tcgv_i64_temp(v));}
static inline TCGArg temp_arg(TCGTemp *ts) {
return (uintptr_t)ts;}

在生成对应的 struct TCGOp 时，会将 TCGv_* 转变为 TCGArg。而 TCGArg 实际上就是 TCGTemp*。因此，在 TCGArg (struct TCGOp)::args 就是记录了 TCGTemp*。也就是，可以直接通过 TCGArg (struct TCGOp)::args[index] 来访问 TCG IR 操作的操作对象（Operands），如 TCGTemp* cpu_pc，TCGTemp* cpu_gpr_x5/t0 等。

其中TCG IR 中的一个操作由 struct TCGOp 定义，如下：

// TCG IR Operations
struct TCGOp {
TCGOpcode opc : 8;
unsigned nargs : 8;
/* Parameters for this opcode. See below. */
unsigned param1 : 8;
unsigned param2 : 8;
/* Lifetime data of the operands. */
TCGLifeData life;
/* Next and previous opcodes. */
QTAILQ_ENTRY(TCGOp) link;
/* Register preferences for the output(s). */
TCGRegSet output_pref[2];
/* Arguments for the opcode. */
TCGArg args[];};

typedef enum TCGOpcode {
#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
#include "tcg/tcg-opc.h"
#undef DEF
NB_OPS,
} TCGOpcode;

// qemu/include/tcg/tcg-opc.h
…
/*
* DEF(name, oargs, iargs, cargs, flags)
*/
/* predefined ops */
DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
/* variable number of parameters */
DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT)
DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT)
…

由此，可以了解到，在 TCG IR 中，是如何对 RISCV CPU 的寄存器进行访问。关键点在于对 struct ArchCPU* cpu，struct CPUArchState* env，TCGv_*，TCGTemp*，TCGArg 的理解。

QEMU RISCV TCG 详解十一 -- RISCV CPU 状态在TCG Variables中的表示（Representation）

相关推荐

评论抢沙发

评论前必须登录！

热门标签

置顶推荐

热门文章

最新文章