Improve insn selection for helper calls. Attempt to evaluate arguments
into the real register that is mandated by the ABI instead of evaluating
it in a virtual register and then move the result.
Observed savings in insns between 0.5% and 1.4%. 
Probably an overrated optimization given current helper functions which
rarely take more than one argument.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2326 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host_s390_isel.c b/priv/host_s390_isel.c
index 4add4c6..4d85a0f 100644
--- a/priv/host_s390_isel.c
+++ b/priv/host_s390_isel.c
@@ -423,19 +423,72 @@
 }
 
 
+/* If EXPR can be evaluated with a single s390_insn, do so and assign the
+   value to register DST. If not, return NULL. */
+static s390_insn *
+s390_isel_single_insn_for_int_expr(ISelEnv *env, HReg dst, IRExpr *expr)
+{
+   switch (expr->tag) {
+   case Iex_Const: {
+      ULong value;
+      const IRConst *con = expr->Iex.Const.con;
+
+      /* Bitwise copy of the value. No sign/zero-extension */
+      switch (con->tag) {
+      case Ico_U64: value = con->Ico.U64; break;
+      case Ico_U32: value = con->Ico.U32; break;
+      case Ico_U16: value = con->Ico.U16; break;
+      case Ico_U8:  value = con->Ico.U8;  break;
+      default:      vpanic("s390_isel_single_insn_for_int_expr: invalid constant");
+      }
+
+      return s390_insn_load_immediate(8, dst, value);
+   }
+
+   case Iex_RdTmp: {
+      HReg src = lookupIRTemp(env, expr->Iex.RdTmp.tmp);
+      return s390_insn_move(8, dst, src);
+   }
+
+   case Iex_Get: {
+      UInt size = sizeofIRType(typeOfIRExpr(env->type_env, expr));
+      s390_amode *am = s390_amode_for_guest_state(expr->Iex.Get.offset);
+
+      return s390_insn_load(size, dst, am);
+   }
+
+   case Iex_Load: {
+      UInt size = sizeofIRType(typeOfIRExpr(env->type_env, expr));
+      s390_amode *am = s390_isel_amode(env, expr->Iex.Load.addr);
+
+      vassert(expr->Iex.Load.end == Iend_BE);
+
+      return s390_insn_load(size, dst, am);
+   }
+
+   default:
+      /* too complex */
+      return NULL;
+   }
+}
+
+
 /* Call a helper (clean or dirty)
    Arguments must satisfy the following conditions:
+
    (a) they are expressions yielding an integer result
    (b) there can be no more than S390_NUM_GPRPARMS arguments
-       guard is a Ity_Bit expression indicating whether or not the
-       call happens.  If guard==NULL, the call is unconditional.
+
+   guard is a Ity_Bit expression indicating whether or not the
+   call happens.  If guard == NULL, the call is unconditional.
 */
 static void
 doHelperCall(ISelEnv *env, Bool passBBP, IRExpr *guard,
              IRCallee *callee, IRExpr **args)
 {
-   UInt n_args, i, argreg, size;
+   UInt n_args, i, argreg;
    ULong target;
+   s390_insn *insn_for_arg[S390_NUM_GPRPARMS];
    HReg tmpregs[S390_NUM_GPRPARMS];
    s390_cc_t cc;
 
@@ -447,23 +500,6 @@
       vpanic("doHelperCall: too many arguments");
    }
 
-   /* This is the "slow scheme". fixs390: implement the fast one */
-   argreg = 0;
-
-   /* If we need the guest state pointer put it in a temporary arg reg */
-   if (passBBP) {
-      tmpregs[argreg] = newVRegI(env);
-      addInstr(env, s390_insn_move(sizeof(ULong), tmpregs[argreg],
-                                   s390_hreg_guest_state_pointer()));
-      argreg++;
-   }
-
-   /* Compute the function arguments into a temporary register each */
-   for (i = 0; i < n_args; i++) {
-      tmpregs[argreg] = s390_isel_int_expr(env, args[i]);
-      argreg++;
-   }
-
    /* Compute the condition */
    cc = S390_CC_ALWAYS;
    if (guard) {
@@ -476,13 +512,66 @@
       }
    }
 
-   /* Move the args to the final register */
-   for (i = 0; i < argreg; i++) {
-      HReg finalreg;
+   /* Evaluate the function arguments for the helper call.
+      First, as the helper function is written in C, we are free to evaluate
+      argumeqnts in any order we like, as the evaluation order is unspecified
+      by the C standard.
 
-      finalreg = make_gpr(s390_gprno_from_arg_index(i));
-      size = sizeofIRType(Ity_I64);
-      addInstr(env, s390_insn_move(size, finalreg, tmpregs[i]));
+      Secondly, if possible, we want to evaluate the arguments such that its
+      value ends up in the register that is mandated by the ABI for this 
+      argument. The idea is to avoid an additional move insn from a virtual 
+      to a real register. Currently, the register allocator is not very good
+      at eliminating those.
+
+      The complication is that we need to make sure that real registers do not
+      get overwritten while evaluating the arguments. Consider a helper call 
+      with two arguments. The first argument is an integer constant and the 
+      second argument is a complex expression. If we evaluate the 1st argument
+      and load the constant into r2 (as mandated by ABI) and the 2nd argument
+      expression contains a helper call with arguments, then evaluation of the
+      2nd argument will overwrite the contents of r2. Not so good.  Therefore,
+      we first evaluate all those arguments which are complex and assign their
+      value to a virtual register. Then in a second pass we evaluate those
+      arguments where the value can be assigned to the ABI destination register
+      directly. There is no danger now that those can be overwritten. This is
+      what happens conceptually. The implementation is slightly different. */
+
+   argreg = 0;
+
+   /* If we need the guest state pointer put it into the correct register */
+   if (passBBP) {
+      insn_for_arg[argreg] =
+         s390_insn_move(sizeof(ULong),
+                        make_gpr(s390_gprno_from_arg_index(argreg)),
+                        s390_hreg_guest_state_pointer());
+      argreg++;
+   }
+
+   /* Compute the function arguments. Issue insns for complex arguments.
+      Store their value in a temporary register each. For arguments that
+      can be evaluated in their ABI destination register, generate the
+      insn but do not issue it yet. */
+   for (i = 0; i < n_args; i++) {
+      HReg dst = make_gpr(s390_gprno_from_arg_index(argreg));
+
+      insn_for_arg[argreg] =
+         s390_isel_single_insn_for_int_expr(env, dst, args[i]);
+
+      if (insn_for_arg[argreg] == NULL) {
+         tmpregs[argreg] = s390_isel_int_expr(env, args[i]);
+      }
+      argreg++;
+   }
+
+   /* Assign values to ABI registers */
+   for (i = 0; i < argreg; i++) {
+      if (insn_for_arg[i]) {
+         addInstr(env, insn_for_arg[i]);  /* issue insn */
+      } else {
+         /* Move value to ABI register */
+         HReg finalreg = make_gpr(s390_gprno_from_arg_index(i));
+         addInstr(env, s390_insn_move(8, finalreg, tmpregs[i]));
+      }
    }
 
    target = Ptr_to_ULong(callee->addr);