blob: d4a5cc14dee7c30fa25bd4c022d39ea78ea624eb [file] [log] [blame]
Thomas Heller8bdf81d2008-03-04 20:09:11 +00001#ifdef __x86_64__
2
3/* -----------------------------------------------------------------------
4 x86-ffi64.c - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
5
6 x86-64 Foreign Function Interface
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 ``Software''), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 OTHER DEALINGS IN THE SOFTWARE.
26 ----------------------------------------------------------------------- */
27
28#include <ffi.h>
29#include <ffi_common.h>
30
31#include <stdlib.h>
32#include <stdarg.h>
33
34#define MAX_GPR_REGS 6
35#define MAX_SSE_REGS 8
36
37typedef struct RegisterArgs {
38 /* Registers for argument passing. */
39 UINT64 gpr[MAX_GPR_REGS];
40 __int128_t sse[MAX_SSE_REGS];
41} RegisterArgs;
42
43extern void
44ffi_call_unix64(
45 void* args,
46 unsigned long bytes,
47 unsigned flags,
48 void* raddr,
49 void (*fnaddr)(),
50 unsigned ssecount);
51
52/* All reference to register classes here is identical to the code in
53 gcc/config/i386/i386.c. Do *not* change one without the other. */
54
55/* Register class used for passing given 64bit part of the argument.
56 These represent classes as documented by the PS ABI, with the exception
57 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
Ronald Oussoren16766d72009-09-20 18:54:16 +000058 use SF or DFmode move instead of DImode to avoid reformating penalties.
Thomas Heller8bdf81d2008-03-04 20:09:11 +000059
60 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
61 whenever possible (upper half does contain padding). */
62enum x86_64_reg_class
63{
64 X86_64_NO_CLASS,
65 X86_64_INTEGER_CLASS,
66 X86_64_INTEGERSI_CLASS,
67 X86_64_SSE_CLASS,
68 X86_64_SSESF_CLASS,
69 X86_64_SSEDF_CLASS,
70 X86_64_SSEUP_CLASS,
71 X86_64_X87_CLASS,
72 X86_64_X87UP_CLASS,
73 X86_64_COMPLEX_X87_CLASS,
74 X86_64_MEMORY_CLASS
75};
76
77#define MAX_CLASSES 4
78#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
79
80/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
81 of this code is to classify each 8bytes of incoming argument by the register
82 class and assign registers accordingly. */
83
84/* Return the union class of CLASS1 and CLASS2.
85 See the x86-64 PS ABI for details. */
86static enum x86_64_reg_class
87merge_classes(
88 enum x86_64_reg_class class1,
89 enum x86_64_reg_class class2)
90{
91 /* Rule #1: If both classes are equal, this is the resulting class. */
92 if (class1 == class2)
93 return class1;
94
95 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
96 the other class. */
97 if (class1 == X86_64_NO_CLASS)
98 return class2;
99
100 if (class2 == X86_64_NO_CLASS)
101 return class1;
102
103 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
104 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
105 return X86_64_MEMORY_CLASS;
106
107 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
108 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
109 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
110 return X86_64_INTEGERSI_CLASS;
111
112 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
113 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
114 return X86_64_INTEGER_CLASS;
115
116 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
117 MEMORY is used. */
118 if (class1 == X86_64_X87_CLASS
119 || class1 == X86_64_X87UP_CLASS
120 || class1 == X86_64_COMPLEX_X87_CLASS
121 || class2 == X86_64_X87_CLASS
122 || class2 == X86_64_X87UP_CLASS
123 || class2 == X86_64_COMPLEX_X87_CLASS)
124 return X86_64_MEMORY_CLASS;
125
126 /* Rule #6: Otherwise class SSE is used. */
127 return X86_64_SSE_CLASS;
128}
129
130/* Classify the argument of type TYPE and mode MODE.
131 CLASSES will be filled by the register class used to pass each word
132 of the operand. The number of words is returned. In case the parameter
133 should be passed in memory, 0 is returned. As a special case for zero
134 sized containers, classes[0] will be NO_CLASS and 1 is returned.
135
136 See the x86-64 PS ABI for details. */
137
138static int
139classify_argument(
140 ffi_type* type,
141 enum x86_64_reg_class classes[],
142 size_t byte_offset)
143{
144 switch (type->type)
145 {
146 case FFI_TYPE_UINT8:
147 case FFI_TYPE_SINT8:
148 case FFI_TYPE_UINT16:
149 case FFI_TYPE_SINT16:
150 case FFI_TYPE_UINT32:
151 case FFI_TYPE_SINT32:
152 case FFI_TYPE_UINT64:
153 case FFI_TYPE_SINT64:
154 case FFI_TYPE_POINTER:
155 if (byte_offset + type->size <= 4)
156 classes[0] = X86_64_INTEGERSI_CLASS;
157 else
158 classes[0] = X86_64_INTEGER_CLASS;
159
160 return 1;
161
162 case FFI_TYPE_FLOAT:
163 if (byte_offset == 0)
164 classes[0] = X86_64_SSESF_CLASS;
165 else
166 classes[0] = X86_64_SSE_CLASS;
167
168 return 1;
169
170 case FFI_TYPE_DOUBLE:
171 classes[0] = X86_64_SSEDF_CLASS;
172 return 1;
173
174 case FFI_TYPE_LONGDOUBLE:
175 classes[0] = X86_64_X87_CLASS;
176 classes[1] = X86_64_X87UP_CLASS;
177 return 2;
178
179 case FFI_TYPE_STRUCT:
180 {
181 ffi_type** ptr;
182 int i;
183 enum x86_64_reg_class subclasses[MAX_CLASSES];
184 const int UNITS_PER_WORD = 8;
185 int words =
186 (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
187
188 /* If the struct is larger than 16 bytes, pass it on the stack. */
189 if (type->size > 16)
190 return 0;
191
192 for (i = 0; i < words; i++)
193 classes[i] = X86_64_NO_CLASS;
194
195 /* Merge the fields of structure. */
196 for (ptr = type->elements; *ptr != NULL; ptr++)
197 {
198 byte_offset = ALIGN(byte_offset, (*ptr)->alignment);
199
200 int num = classify_argument(*ptr, subclasses, byte_offset % 8);
201
202 if (num == 0)
203 return 0;
204
205 int pos = byte_offset / 8;
206
207 for (i = 0; i < num; i++)
208 {
209 classes[i + pos] =
210 merge_classes(subclasses[i], classes[i + pos]);
211 }
212
213 byte_offset += (*ptr)->size;
214 }
215
216 /* Final merger cleanup. */
217 for (i = 0; i < words; i++)
218 {
219 /* If one class is MEMORY, everything should be passed in
220 memory. */
221 if (classes[i] == X86_64_MEMORY_CLASS)
222 return 0;
223
224 /* The X86_64_SSEUP_CLASS should be always preceded by
225 X86_64_SSE_CLASS. */
226 if (classes[i] == X86_64_SSEUP_CLASS
227 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
228 classes[i] = X86_64_SSE_CLASS;
229
230 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
231 if (classes[i] == X86_64_X87UP_CLASS
232 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
233 classes[i] = X86_64_SSE_CLASS;
234 }
235
236 return words;
237 }
238
239 default:
240 FFI_ASSERT(0);
241 }
242
243 return 0; /* Never reached. */
244}
245
246/* Examine the argument and return set number of register required in each
247 class. Return zero if parameter should be passed in memory, otherwise
248 the number of registers. */
249static int
250examine_argument(
251 ffi_type* type,
252 enum x86_64_reg_class classes[MAX_CLASSES],
253 _Bool in_return,
254 int* pngpr,
255 int* pnsse)
256{
257 int n = classify_argument(type, classes, 0);
258 int ngpr = 0;
259 int nsse = 0;
260 int i;
261
262 if (n == 0)
263 return 0;
264
265 for (i = 0; i < n; ++i)
266 {
267 switch (classes[i])
268 {
269 case X86_64_INTEGER_CLASS:
270 case X86_64_INTEGERSI_CLASS:
271 ngpr++;
272 break;
273
274 case X86_64_SSE_CLASS:
275 case X86_64_SSESF_CLASS:
276 case X86_64_SSEDF_CLASS:
277 nsse++;
278 break;
279
280 case X86_64_NO_CLASS:
281 case X86_64_SSEUP_CLASS:
282 break;
283
284 case X86_64_X87_CLASS:
285 case X86_64_X87UP_CLASS:
286 case X86_64_COMPLEX_X87_CLASS:
287 return in_return != 0;
288
289 default:
290 abort();
291 }
292 }
293
294 *pngpr = ngpr;
295 *pnsse = nsse;
296
297 return n;
298}
299
300/* Perform machine dependent cif processing. */
301ffi_status
302ffi_prep_cif_machdep(
303 ffi_cif* cif)
304{
305 int gprcount = 0;
306 int ssecount = 0;
307 int flags = cif->rtype->type;
308 int i, avn, n, ngpr, nsse;
309 enum x86_64_reg_class classes[MAX_CLASSES];
310 size_t bytes;
311
312 if (flags != FFI_TYPE_VOID)
313 {
314 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
315
316 if (n == 0)
317 {
318 /* The return value is passed in memory. A pointer to that
319 memory is the first argument. Allocate a register for it. */
320 gprcount++;
321
322 /* We don't have to do anything in asm for the return. */
323 flags = FFI_TYPE_VOID;
324 }
325 else if (flags == FFI_TYPE_STRUCT)
326 {
327 /* Mark which registers the result appears in. */
328 _Bool sse0 = SSE_CLASS_P(classes[0]);
329 _Bool sse1 = n == 2 && SSE_CLASS_P(classes[1]);
330
331 if (sse0 && !sse1)
332 flags |= 1 << 8;
333 else if (!sse0 && sse1)
334 flags |= 1 << 9;
335 else if (sse0 && sse1)
336 flags |= 1 << 10;
337
338 /* Mark the true size of the structure. */
339 flags |= cif->rtype->size << 12;
340 }
341 }
342
343 /* Go over all arguments and determine the way they should be passed.
344 If it's in a register and there is space for it, let that be so. If
345 not, add it's size to the stack byte count. */
346 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
347 {
348 if (examine_argument(cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
349 || gprcount + ngpr > MAX_GPR_REGS
350 || ssecount + nsse > MAX_SSE_REGS)
351 {
352 long align = cif->arg_types[i]->alignment;
353
354 if (align < 8)
355 align = 8;
356
357 bytes = ALIGN(bytes, align);
358 bytes += cif->arg_types[i]->size;
359 }
360 else
361 {
362 gprcount += ngpr;
363 ssecount += nsse;
364 }
365 }
366
367 if (ssecount)
368 flags |= 1 << 11;
369
370 cif->flags = flags;
371 cif->bytes = bytes;
372
373 return FFI_OK;
374}
375
376void
377ffi_call(
378 ffi_cif* cif,
379 void (*fn)(),
380 void* rvalue,
381 void** avalue)
382{
383 enum x86_64_reg_class classes[MAX_CLASSES];
384 char* stack;
385 char* argp;
386 ffi_type** arg_types;
387 int gprcount, ssecount, ngpr, nsse, i, avn;
388 _Bool ret_in_memory;
389 RegisterArgs* reg_args;
390
391 /* Can't call 32-bit mode from 64-bit mode. */
392 FFI_ASSERT(cif->abi == FFI_UNIX64);
393
394 /* If the return value is a struct and we don't have a return value
395 address then we need to make one. Note the setting of flags to
396 VOID above in ffi_prep_cif_machdep. */
397 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
398 && (cif->flags & 0xff) == FFI_TYPE_VOID);
399
400 if (rvalue == NULL && ret_in_memory)
401 rvalue = alloca (cif->rtype->size);
402
403 /* Allocate the space for the arguments, plus 4 words of temp space. */
404 stack = alloca(sizeof(RegisterArgs) + cif->bytes + 4 * 8);
405 reg_args = (RegisterArgs*)stack;
406 argp = stack + sizeof(RegisterArgs);
407
408 gprcount = ssecount = 0;
409
410 /* If the return value is passed in memory, add the pointer as the
411 first integer argument. */
412 if (ret_in_memory)
413 reg_args->gpr[gprcount++] = (long) rvalue;
414
415 avn = cif->nargs;
416 arg_types = cif->arg_types;
417
418 for (i = 0; i < avn; ++i)
419 {
420 size_t size = arg_types[i]->size;
421 int n;
422
423 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
424
425 if (n == 0
426 || gprcount + ngpr > MAX_GPR_REGS
427 || ssecount + nsse > MAX_SSE_REGS)
428 {
429 long align = arg_types[i]->alignment;
430
431 /* Stack arguments are *always* at least 8 byte aligned. */
432 if (align < 8)
433 align = 8;
434
435 /* Pass this argument in memory. */
436 argp = (void *) ALIGN (argp, align);
437 memcpy (argp, avalue[i], size);
438 argp += size;
439 }
440 else
441 { /* The argument is passed entirely in registers. */
442 char *a = (char *) avalue[i];
443 int j;
444
445 for (j = 0; j < n; j++, a += 8, size -= 8)
446 {
447 switch (classes[j])
448 {
449 case X86_64_INTEGER_CLASS:
450 case X86_64_INTEGERSI_CLASS:
451 reg_args->gpr[gprcount] = 0;
452 memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
453 gprcount++;
454 break;
455
456 case X86_64_SSE_CLASS:
457 case X86_64_SSEDF_CLASS:
458 reg_args->sse[ssecount++] = *(UINT64 *) a;
459 break;
460
461 case X86_64_SSESF_CLASS:
462 reg_args->sse[ssecount++] = *(UINT32 *) a;
463 break;
464
465 default:
466 abort();
467 }
468 }
469 }
470 }
471
472 ffi_call_unix64 (stack, cif->bytes + sizeof(RegisterArgs),
473 cif->flags, rvalue, fn, ssecount);
474}
475
476extern void ffi_closure_unix64(void);
477
478ffi_status
479ffi_prep_closure(
480 ffi_closure* closure,
481 ffi_cif* cif,
482 void (*fun)(ffi_cif*, void*, void**, void*),
483 void* user_data)
484{
485 if (cif->abi != FFI_UNIX64)
486 return FFI_BAD_ABI;
487
488 volatile unsigned short* tramp =
489 (volatile unsigned short*)&closure->tramp[0];
490
491 tramp[0] = 0xbb49; /* mov <code>, %r11 */
492 *(void* volatile*)&tramp[1] = ffi_closure_unix64;
493 tramp[5] = 0xba49; /* mov <data>, %r10 */
494 *(void* volatile*)&tramp[6] = closure;
495
496 /* Set the carry bit if the function uses any sse registers.
497 This is clc or stc, together with the first byte of the jmp. */
498 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
499 tramp[11] = 0xe3ff; /* jmp *%r11 */
500
501 closure->cif = cif;
502 closure->fun = fun;
503 closure->user_data = user_data;
504
505 return FFI_OK;
506}
507
508int
509ffi_closure_unix64_inner(
510 ffi_closure* closure,
511 void* rvalue,
512 RegisterArgs* reg_args,
513 char* argp)
514{
515 ffi_cif* cif = closure->cif;
516 void** avalue = alloca(cif->nargs * sizeof(void *));
517 ffi_type** arg_types;
518 long i, avn;
519 int gprcount = 0;
520 int ssecount = 0;
521 int ngpr, nsse;
522 int ret;
523
524 ret = cif->rtype->type;
525
526 if (ret != FFI_TYPE_VOID)
527 {
528 enum x86_64_reg_class classes[MAX_CLASSES];
529 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
530
531 if (n == 0)
532 {
533 /* The return value goes in memory. Arrange for the closure
534 return value to go directly back to the original caller. */
535 rvalue = (void *) reg_args->gpr[gprcount++];
536
537 /* We don't have to do anything in asm for the return. */
538 ret = FFI_TYPE_VOID;
539 }
540 else if (ret == FFI_TYPE_STRUCT && n == 2)
541 {
542 /* Mark which register the second word of the structure goes in. */
543 _Bool sse0 = SSE_CLASS_P (classes[0]);
544 _Bool sse1 = SSE_CLASS_P (classes[1]);
545
546 if (!sse0 && sse1)
547 ret |= 1 << 8;
548 else if (sse0 && !sse1)
549 ret |= 1 << 9;
550 }
551 }
552
553 avn = cif->nargs;
554 arg_types = cif->arg_types;
555
556 for (i = 0; i < avn; ++i)
557 {
558 enum x86_64_reg_class classes[MAX_CLASSES];
559 int n;
560
561 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
562
563 if (n == 0
564 || gprcount + ngpr > MAX_GPR_REGS
565 || ssecount + nsse > MAX_SSE_REGS)
566 {
567 long align = arg_types[i]->alignment;
568
569 /* Stack arguments are *always* at least 8 byte aligned. */
570 if (align < 8)
571 align = 8;
572
573 /* Pass this argument in memory. */
574 argp = (void *) ALIGN (argp, align);
575 avalue[i] = argp;
576 argp += arg_types[i]->size;
577 }
578
579#if !defined(X86_DARWIN)
580 /* If the argument is in a single register, or two consecutive
581 registers, then we can use that address directly. */
582 else if (n == 1 || (n == 2 &&
583 SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
584 {
585 // The argument is in a single register.
586 if (SSE_CLASS_P (classes[0]))
587 {
588 avalue[i] = &reg_args->sse[ssecount];
589 ssecount += n;
590 }
591 else
592 {
593 avalue[i] = &reg_args->gpr[gprcount];
594 gprcount += n;
595 }
596 }
597#endif
598
599 /* Otherwise, allocate space to make them consecutive. */
600 else
601 {
602 char *a = alloca (16);
603 int j;
604
605 avalue[i] = a;
606
607 for (j = 0; j < n; j++, a += 8)
608 {
609 if (SSE_CLASS_P (classes[j]))
610 memcpy (a, &reg_args->sse[ssecount++], 8);
611 else
612 memcpy (a, &reg_args->gpr[gprcount++], 8);
613 }
614 }
615 }
616
617 /* Invoke the closure. */
618 closure->fun (cif, rvalue, avalue, closure->user_data);
619
620 /* Tell assembly how to perform return type promotions. */
621 return ret;
622}
623
Ronald Oussoren16766d72009-09-20 18:54:16 +0000624#endif /* __x86_64__ */