blob: cb6d4a3203700674230be50651e58c63cf3e1fc8 [file] [log] [blame]
Henrik Smiding86a16002014-05-16 13:26:08 +02001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Henrik Smiding86a16002014-05-16 13:26:08 +020016
17#include "cache.h"
18
Varvara Rainchik458d1252014-09-08 16:27:01 +040019#ifndef MEMSET
20# define MEMSET android_memset16
21#endif
22
Henrik Smiding86a16002014-05-16 13:26:08 +020023#ifndef L
24# define L(label) .L##label
25#endif
26
27#ifndef ALIGN
28# define ALIGN(n) .p2align n
29#endif
30
31#ifndef cfi_startproc
32# define cfi_startproc .cfi_startproc
33#endif
34
35#ifndef cfi_endproc
36# define cfi_endproc .cfi_endproc
37#endif
38
39#ifndef ENTRY
40# define ENTRY(name) \
41 .type name, @function; \
42 .globl name; \
43 .p2align 4; \
44name: \
45 cfi_startproc
46#endif
47
48#ifndef END
49# define END(name) \
50 cfi_endproc; \
51 .size name, .-name
52#endif
53
54#define JMPTBL(I, B) I - B
55
56/* Branch to an entry in a jump table. TABLE is a jump table with
57 relative offsets. INDEX is a register contains the index into the
58 jump table. SCALE is the scale of INDEX. */
59#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
60 lea TABLE(%rip), %r11; \
61 movslq (%r11, INDEX, SCALE), INDEX; \
62 lea (%r11, INDEX), INDEX; \
63 jmp *INDEX
64
65 .section .text.sse2,"ax",@progbits
66 ALIGN (4)
Varvara Rainchik458d1252014-09-08 16:27:01 +040067ENTRY (MEMSET) // Address in rdi
Henrik Smiding86a16002014-05-16 13:26:08 +020068 shr $1, %rdx // Count in rdx
69 movzwl %si, %ecx
70 /* Fill the whole ECX with pattern. */
71 shl $16, %esi
72 or %esi, %ecx // Pattern in ecx
73
74 cmp $32, %rdx
75 jae L(32wordsormore)
76
77L(write_less32words):
78 lea (%rdi, %rdx, 2), %rdi
79 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words), %rdx, 4)
80
81 .pushsection .rodata.sse2,"a",@progbits
82 ALIGN (2)
83L(table_less32words):
84 .int JMPTBL (L(write_0words), L(table_less32words))
85 .int JMPTBL (L(write_1words), L(table_less32words))
86 .int JMPTBL (L(write_2words), L(table_less32words))
87 .int JMPTBL (L(write_3words), L(table_less32words))
88 .int JMPTBL (L(write_4words), L(table_less32words))
89 .int JMPTBL (L(write_5words), L(table_less32words))
90 .int JMPTBL (L(write_6words), L(table_less32words))
91 .int JMPTBL (L(write_7words), L(table_less32words))
92 .int JMPTBL (L(write_8words), L(table_less32words))
93 .int JMPTBL (L(write_9words), L(table_less32words))
94 .int JMPTBL (L(write_10words), L(table_less32words))
95 .int JMPTBL (L(write_11words), L(table_less32words))
96 .int JMPTBL (L(write_12words), L(table_less32words))
97 .int JMPTBL (L(write_13words), L(table_less32words))
98 .int JMPTBL (L(write_14words), L(table_less32words))
99 .int JMPTBL (L(write_15words), L(table_less32words))
100 .int JMPTBL (L(write_16words), L(table_less32words))
101 .int JMPTBL (L(write_17words), L(table_less32words))
102 .int JMPTBL (L(write_18words), L(table_less32words))
103 .int JMPTBL (L(write_19words), L(table_less32words))
104 .int JMPTBL (L(write_20words), L(table_less32words))
105 .int JMPTBL (L(write_21words), L(table_less32words))
106 .int JMPTBL (L(write_22words), L(table_less32words))
107 .int JMPTBL (L(write_23words), L(table_less32words))
108 .int JMPTBL (L(write_24words), L(table_less32words))
109 .int JMPTBL (L(write_25words), L(table_less32words))
110 .int JMPTBL (L(write_26words), L(table_less32words))
111 .int JMPTBL (L(write_27words), L(table_less32words))
112 .int JMPTBL (L(write_28words), L(table_less32words))
113 .int JMPTBL (L(write_29words), L(table_less32words))
114 .int JMPTBL (L(write_30words), L(table_less32words))
115 .int JMPTBL (L(write_31words), L(table_less32words))
116 .popsection
117
118 ALIGN (4)
119L(write_28words):
120 movl %ecx, -56(%rdi)
121 movl %ecx, -52(%rdi)
122L(write_24words):
123 movl %ecx, -48(%rdi)
124 movl %ecx, -44(%rdi)
125L(write_20words):
126 movl %ecx, -40(%rdi)
127 movl %ecx, -36(%rdi)
128L(write_16words):
129 movl %ecx, -32(%rdi)
130 movl %ecx, -28(%rdi)
131L(write_12words):
132 movl %ecx, -24(%rdi)
133 movl %ecx, -20(%rdi)
134L(write_8words):
135 movl %ecx, -16(%rdi)
136 movl %ecx, -12(%rdi)
137L(write_4words):
138 movl %ecx, -8(%rdi)
139 movl %ecx, -4(%rdi)
140L(write_0words):
141 ret
142
143 ALIGN (4)
144L(write_29words):
145 movl %ecx, -58(%rdi)
146 movl %ecx, -54(%rdi)
147L(write_25words):
148 movl %ecx, -50(%rdi)
149 movl %ecx, -46(%rdi)
150L(write_21words):
151 movl %ecx, -42(%rdi)
152 movl %ecx, -38(%rdi)
153L(write_17words):
154 movl %ecx, -34(%rdi)
155 movl %ecx, -30(%rdi)
156L(write_13words):
157 movl %ecx, -26(%rdi)
158 movl %ecx, -22(%rdi)
159L(write_9words):
160 movl %ecx, -18(%rdi)
161 movl %ecx, -14(%rdi)
162L(write_5words):
163 movl %ecx, -10(%rdi)
164 movl %ecx, -6(%rdi)
165L(write_1words):
166 mov %cx, -2(%rdi)
167 ret
168
169 ALIGN (4)
170L(write_30words):
171 movl %ecx, -60(%rdi)
172 movl %ecx, -56(%rdi)
173L(write_26words):
174 movl %ecx, -52(%rdi)
175 movl %ecx, -48(%rdi)
176L(write_22words):
177 movl %ecx, -44(%rdi)
178 movl %ecx, -40(%rdi)
179L(write_18words):
180 movl %ecx, -36(%rdi)
181 movl %ecx, -32(%rdi)
182L(write_14words):
183 movl %ecx, -28(%rdi)
184 movl %ecx, -24(%rdi)
185L(write_10words):
186 movl %ecx, -20(%rdi)
187 movl %ecx, -16(%rdi)
188L(write_6words):
189 movl %ecx, -12(%rdi)
190 movl %ecx, -8(%rdi)
191L(write_2words):
192 movl %ecx, -4(%rdi)
193 ret
194
195 ALIGN (4)
196L(write_31words):
197 movl %ecx, -62(%rdi)
198 movl %ecx, -58(%rdi)
199L(write_27words):
200 movl %ecx, -54(%rdi)
201 movl %ecx, -50(%rdi)
202L(write_23words):
203 movl %ecx, -46(%rdi)
204 movl %ecx, -42(%rdi)
205L(write_19words):
206 movl %ecx, -38(%rdi)
207 movl %ecx, -34(%rdi)
208L(write_15words):
209 movl %ecx, -30(%rdi)
210 movl %ecx, -26(%rdi)
211L(write_11words):
212 movl %ecx, -22(%rdi)
213 movl %ecx, -18(%rdi)
214L(write_7words):
215 movl %ecx, -14(%rdi)
216 movl %ecx, -10(%rdi)
217L(write_3words):
218 movl %ecx, -6(%rdi)
219 movw %cx, -2(%rdi)
220 ret
221
222 ALIGN (4)
223L(32wordsormore):
224 shl $1, %rdx
225 test $0x01, %edi
226 jz L(aligned2bytes)
227 mov %ecx, (%rdi)
228 mov %ecx, -4(%rdi, %rdx)
229 sub $2, %rdx
230 add $1, %rdi
231 rol $8, %ecx
232L(aligned2bytes):
233 /* Fill xmm0 with the pattern. */
234 movd %ecx, %xmm0
235 pshufd $0, %xmm0, %xmm0
236
237 testl $0xf, %edi
238 jz L(aligned_16)
239/* RDX > 32 and RDI is not 16 byte aligned. */
240 movdqu %xmm0, (%rdi)
241 mov %rdi, %rsi
242 and $-16, %rdi
243 add $16, %rdi
244 sub %rdi, %rsi
245 add %rsi, %rdx
246
247 ALIGN (4)
248L(aligned_16):
249 cmp $128, %rdx
250 jge L(128bytesormore)
251
252L(aligned_16_less128bytes):
253 add %rdx, %rdi
254 shr $1, %rdx
255 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
256
257 ALIGN (4)
258L(128bytesormore):
259 cmp $SHARED_CACHE_SIZE, %rdx
260 jg L(128bytesormore_nt)
261
262L(128bytesormore_normal):
263 sub $128, %rdx
264 movdqa %xmm0, (%rdi)
265 movdqa %xmm0, 0x10(%rdi)
266 movdqa %xmm0, 0x20(%rdi)
267 movdqa %xmm0, 0x30(%rdi)
268 movdqa %xmm0, 0x40(%rdi)
269 movdqa %xmm0, 0x50(%rdi)
270 movdqa %xmm0, 0x60(%rdi)
271 movdqa %xmm0, 0x70(%rdi)
272 lea 128(%rdi), %rdi
273 cmp $128, %rdx
274 jl L(128bytesless_normal)
275
276 sub $128, %rdx
277 movdqa %xmm0, (%rdi)
278 movdqa %xmm0, 0x10(%rdi)
279 movdqa %xmm0, 0x20(%rdi)
280 movdqa %xmm0, 0x30(%rdi)
281 movdqa %xmm0, 0x40(%rdi)
282 movdqa %xmm0, 0x50(%rdi)
283 movdqa %xmm0, 0x60(%rdi)
284 movdqa %xmm0, 0x70(%rdi)
285 lea 128(%rdi), %rdi
286 cmp $128, %rdx
287 jl L(128bytesless_normal)
288
289 sub $128, %rdx
290 movdqa %xmm0, (%rdi)
291 movdqa %xmm0, 0x10(%rdi)
292 movdqa %xmm0, 0x20(%rdi)
293 movdqa %xmm0, 0x30(%rdi)
294 movdqa %xmm0, 0x40(%rdi)
295 movdqa %xmm0, 0x50(%rdi)
296 movdqa %xmm0, 0x60(%rdi)
297 movdqa %xmm0, 0x70(%rdi)
298 lea 128(%rdi), %rdi
299 cmp $128, %rdx
300 jl L(128bytesless_normal)
301
302 sub $128, %rdx
303 movdqa %xmm0, (%rdi)
304 movdqa %xmm0, 0x10(%rdi)
305 movdqa %xmm0, 0x20(%rdi)
306 movdqa %xmm0, 0x30(%rdi)
307 movdqa %xmm0, 0x40(%rdi)
308 movdqa %xmm0, 0x50(%rdi)
309 movdqa %xmm0, 0x60(%rdi)
310 movdqa %xmm0, 0x70(%rdi)
311 lea 128(%rdi), %rdi
312 cmp $128, %rdx
313 jge L(128bytesormore_normal)
314
315L(128bytesless_normal):
316 add %rdx, %rdi
317 shr $1, %rdx
318 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
319
320 ALIGN (4)
321L(128bytesormore_nt):
322 sub $128, %rdx
323 movntdq %xmm0, (%rdi)
324 movntdq %xmm0, 0x10(%rdi)
325 movntdq %xmm0, 0x20(%rdi)
326 movntdq %xmm0, 0x30(%rdi)
327 movntdq %xmm0, 0x40(%rdi)
328 movntdq %xmm0, 0x50(%rdi)
329 movntdq %xmm0, 0x60(%rdi)
330 movntdq %xmm0, 0x70(%rdi)
331 lea 128(%rdi), %rdi
332 cmp $128, %rdx
333 jge L(128bytesormore_nt)
334
335 sfence
336 add %rdx, %rdi
337 shr $1, %rdx
338 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
339
340 .pushsection .rodata.sse2,"a",@progbits
341 ALIGN (2)
342L(table_16_128bytes):
343 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
344 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
345 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
346 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
347 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
348 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
349 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
350 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
351 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
352 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
353 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
354 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
355 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
356 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
357 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
358 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
359 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
360 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
361 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
362 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
363 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
364 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
365 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
366 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
367 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
368 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
369 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
370 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
371 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
372 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
373 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
374 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
375 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
407 .popsection
408
409 ALIGN (4)
410L(aligned_16_112bytes):
411 movdqa %xmm0, -112(%rdi)
412L(aligned_16_96bytes):
413 movdqa %xmm0, -96(%rdi)
414L(aligned_16_80bytes):
415 movdqa %xmm0, -80(%rdi)
416L(aligned_16_64bytes):
417 movdqa %xmm0, -64(%rdi)
418L(aligned_16_48bytes):
419 movdqa %xmm0, -48(%rdi)
420L(aligned_16_32bytes):
421 movdqa %xmm0, -32(%rdi)
422L(aligned_16_16bytes):
423 movdqa %xmm0, -16(%rdi)
424L(aligned_16_0bytes):
425 ret
426
427 ALIGN (4)
428L(aligned_16_114bytes):
429 movdqa %xmm0, -114(%rdi)
430L(aligned_16_98bytes):
431 movdqa %xmm0, -98(%rdi)
432L(aligned_16_82bytes):
433 movdqa %xmm0, -82(%rdi)
434L(aligned_16_66bytes):
435 movdqa %xmm0, -66(%rdi)
436L(aligned_16_50bytes):
437 movdqa %xmm0, -50(%rdi)
438L(aligned_16_34bytes):
439 movdqa %xmm0, -34(%rdi)
440L(aligned_16_18bytes):
441 movdqa %xmm0, -18(%rdi)
442L(aligned_16_2bytes):
443 movw %cx, -2(%rdi)
444 ret
445
446 ALIGN (4)
447L(aligned_16_116bytes):
448 movdqa %xmm0, -116(%rdi)
449L(aligned_16_100bytes):
450 movdqa %xmm0, -100(%rdi)
451L(aligned_16_84bytes):
452 movdqa %xmm0, -84(%rdi)
453L(aligned_16_68bytes):
454 movdqa %xmm0, -68(%rdi)
455L(aligned_16_52bytes):
456 movdqa %xmm0, -52(%rdi)
457L(aligned_16_36bytes):
458 movdqa %xmm0, -36(%rdi)
459L(aligned_16_20bytes):
460 movdqa %xmm0, -20(%rdi)
461L(aligned_16_4bytes):
462 movl %ecx, -4(%rdi)
463 ret
464
465 ALIGN (4)
466L(aligned_16_118bytes):
467 movdqa %xmm0, -118(%rdi)
468L(aligned_16_102bytes):
469 movdqa %xmm0, -102(%rdi)
470L(aligned_16_86bytes):
471 movdqa %xmm0, -86(%rdi)
472L(aligned_16_70bytes):
473 movdqa %xmm0, -70(%rdi)
474L(aligned_16_54bytes):
475 movdqa %xmm0, -54(%rdi)
476L(aligned_16_38bytes):
477 movdqa %xmm0, -38(%rdi)
478L(aligned_16_22bytes):
479 movdqa %xmm0, -22(%rdi)
480L(aligned_16_6bytes):
481 movl %ecx, -6(%rdi)
482 movw %cx, -2(%rdi)
483 ret
484
485 ALIGN (4)
486L(aligned_16_120bytes):
487 movdqa %xmm0, -120(%rdi)
488L(aligned_16_104bytes):
489 movdqa %xmm0, -104(%rdi)
490L(aligned_16_88bytes):
491 movdqa %xmm0, -88(%rdi)
492L(aligned_16_72bytes):
493 movdqa %xmm0, -72(%rdi)
494L(aligned_16_56bytes):
495 movdqa %xmm0, -56(%rdi)
496L(aligned_16_40bytes):
497 movdqa %xmm0, -40(%rdi)
498L(aligned_16_24bytes):
499 movdqa %xmm0, -24(%rdi)
500L(aligned_16_8bytes):
501 movq %xmm0, -8(%rdi)
502 ret
503
504 ALIGN (4)
505L(aligned_16_122bytes):
506 movdqa %xmm0, -122(%rdi)
507L(aligned_16_106bytes):
508 movdqa %xmm0, -106(%rdi)
509L(aligned_16_90bytes):
510 movdqa %xmm0, -90(%rdi)
511L(aligned_16_74bytes):
512 movdqa %xmm0, -74(%rdi)
513L(aligned_16_58bytes):
514 movdqa %xmm0, -58(%rdi)
515L(aligned_16_42bytes):
516 movdqa %xmm0, -42(%rdi)
517L(aligned_16_26bytes):
518 movdqa %xmm0, -26(%rdi)
519L(aligned_16_10bytes):
520 movq %xmm0, -10(%rdi)
521 movw %cx, -2(%rdi)
522 ret
523
524 ALIGN (4)
525L(aligned_16_124bytes):
526 movdqa %xmm0, -124(%rdi)
527L(aligned_16_108bytes):
528 movdqa %xmm0, -108(%rdi)
529L(aligned_16_92bytes):
530 movdqa %xmm0, -92(%rdi)
531L(aligned_16_76bytes):
532 movdqa %xmm0, -76(%rdi)
533L(aligned_16_60bytes):
534 movdqa %xmm0, -60(%rdi)
535L(aligned_16_44bytes):
536 movdqa %xmm0, -44(%rdi)
537L(aligned_16_28bytes):
538 movdqa %xmm0, -28(%rdi)
539L(aligned_16_12bytes):
540 movq %xmm0, -12(%rdi)
541 movl %ecx, -4(%rdi)
542 ret
543
544 ALIGN (4)
545L(aligned_16_126bytes):
546 movdqa %xmm0, -126(%rdi)
547L(aligned_16_110bytes):
548 movdqa %xmm0, -110(%rdi)
549L(aligned_16_94bytes):
550 movdqa %xmm0, -94(%rdi)
551L(aligned_16_78bytes):
552 movdqa %xmm0, -78(%rdi)
553L(aligned_16_62bytes):
554 movdqa %xmm0, -62(%rdi)
555L(aligned_16_46bytes):
556 movdqa %xmm0, -46(%rdi)
557L(aligned_16_30bytes):
558 movdqa %xmm0, -30(%rdi)
559L(aligned_16_14bytes):
560 movq %xmm0, -14(%rdi)
561 movl %ecx, -6(%rdi)
562 movw %cx, -2(%rdi)
563 ret
564
Varvara Rainchik458d1252014-09-08 16:27:01 +0400565END (MEMSET)