blob: 833d5c433b9107e5e13e4037fd7e8e7a9838fcd3 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27/*
28 * FUNCTIONS
29 * mlib_ImageCopy - Direct copy from one image to another.
30 *
31 * SYNOPSIS
32 * mlib_status mlib_ImageCopy(mlib_image *dst,
33 * const mlib_image *src);
34 *
35 * ARGUMENT
36 * dst pointer to output or destination image
37 * src pointer to input or source image
38 *
39 * RESTRICTION
40 * src and dst must have the same size, type and number of channels.
41 * They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT,
42 * MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type.
43 *
44 * DESCRIPTION
45 * Direct copy from one image to another
46 */
47
48#include <stdlib.h>
49#include "mlib_image.h"
50#include "mlib_ImageCheck.h"
51#include "mlib_ImageCopy.h"
52
53/***************************************************************/
54#ifdef _MSC_VER
55#pragma optimize("", off) /* Fix bug 4195132 */
56#endif /* _MSC_VER */
57
58/***************************************************************/
59/* do not perform the coping by mlib_d64 data type for x86 */
60#ifdef i386
61
62typedef struct {
63 mlib_s32 int0, int1;
64} two_int;
65
66#define TYPE_64BIT two_int
67
68#else /* i386 */
69
70#define TYPE_64BIT mlib_d64
71#endif /* i386 */
72
73/***************************************************************/
74static void mlib_c_ImageCopy_u8(const mlib_image *src,
75 mlib_image *dst);
76static void mlib_c_ImageCopy_s16(const mlib_image *src,
77 mlib_image *dst);
78static void mlib_c_ImageCopy_s32(const mlib_image *src,
79 mlib_image *dst);
80static void mlib_c_ImageCopy_d64(const mlib_image *src,
81 mlib_image *dst);
82static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
83 TYPE_64BIT *dp,
84 mlib_s32 size);
85
86/***************************************************************/
87mlib_status mlib_ImageCopy(mlib_image *dst,
88 const mlib_image *src)
89{
90 mlib_s32 s_offset, d_offset;
91 mlib_s32 size, s_stride, d_stride;
92 mlib_s32 width; /* width in bytes of src and dst */
93 mlib_s32 height; /* height in lines of src and dst */
94 mlib_u8 *sa, *da;
95 mlib_s32 j;
96
97 MLIB_IMAGE_CHECK(src);
98 MLIB_IMAGE_CHECK(dst);
99 MLIB_IMAGE_TYPE_EQUAL(src, dst);
100 MLIB_IMAGE_CHAN_EQUAL(src, dst);
101 MLIB_IMAGE_SIZE_EQUAL(src, dst);
102
103 switch (mlib_ImageGetType(dst)) {
104 case MLIB_BIT:
105 width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */
106 height = mlib_ImageGetHeight(src);
107 sa = (mlib_u8 *) mlib_ImageGetData(src);
108 da = (mlib_u8 *) mlib_ImageGetData(dst);
109
110 if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) {
111 size = height * (width >> 3);
112 if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) {
113
114 mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3);
115 }
116 else {
117
118 mlib_ImageCopy_na(sa, da, size);
119 }
120 }
121 else {
122 s_stride = mlib_ImageGetStride(src);
123 d_stride = mlib_ImageGetStride(dst);
124 s_offset = mlib_ImageGetBitOffset(src); /* in bits */
125 d_offset = mlib_ImageGetBitOffset(dst); /* in bits */
126 if (s_offset == d_offset) {
127 for (j = 0; j < height; j++) {
128 mlib_ImageCopy_bit_al(sa, da, width, s_offset);
129 sa += s_stride;
130 da += d_stride;
131 }
132 }
133 else {
134 for (j = 0; j < height; j++) {
135 mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset);
136 sa += s_stride;
137 da += d_stride;
138 }
139 }
140 }
141
142 break;
143 case MLIB_BYTE:
144 mlib_c_ImageCopy_u8(src, dst);
145 break;
146 case MLIB_SHORT:
147 case MLIB_USHORT:
148 mlib_c_ImageCopy_s16(src, dst);
149 break;
150 case MLIB_INT:
151 case MLIB_FLOAT:
152 mlib_c_ImageCopy_s32(src, dst);
153 break;
154 case MLIB_DOUBLE:
155 mlib_c_ImageCopy_d64(src, dst);
156 break;
157 default:
158 return MLIB_FAILURE; /* MLIB_BIT is not supported here */
159 }
160
161 return MLIB_SUCCESS;
162}
163
164/***************************************************************/
165#define PREPAREVARS(type) \
166 type *psrc = (type *) mlib_ImageGetData(src); \
167 type *pdst = (type *) mlib_ImageGetData(dst); \
168 mlib_s32 src_height = mlib_ImageGetHeight(src); \
169 mlib_s32 src_width = mlib_ImageGetWidth(src); \
170 mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
171 mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
172 mlib_s32 chan = mlib_ImageGetChannels(dst); \
173 mlib_s32 i, j; \
174 \
175 src_width *= chan; \
176 if (src_width == src_stride && src_width == dst_stride) { \
177 src_width *= src_height; \
178 src_height = 1; \
179 }
180
181/***************************************************************/
182#define STRIP(pd, ps, w, h, data_type) { \
183 data_type s0, s1; \
184 for ( i = 0; i < h; i++ ) { \
185 if (j = w & 1) \
186 pd[i * dst_stride] = ps[i * src_stride]; \
187 for (; j < w; j += 2) { \
188 s0 = ps[i * src_stride + j]; \
189 s1 = ps[i * src_stride + j + 1]; \
190 pd[i * dst_stride + j] = s0; \
191 pd[i * dst_stride + j + 1] = s1; \
192 } \
193 } \
194}
195
196/***************************************************************/
197/*
198 * Both bit offsets of source and distination are the same
199 */
200
201void mlib_ImageCopy_bit_al(const mlib_u8 *sa,
202 mlib_u8 *da,
203 mlib_s32 size,
204 mlib_s32 offset)
205{
206 mlib_s32 b_size, i, j;
207 TYPE_64BIT *sp, *dp;
208 mlib_u8 mask0 = 0xFF;
209 mlib_u8 src, mask;
210
211 if (size <= 0) return;
212
213 if (size <= (8 - offset)) {
214 mask = mask0 << (8 - size);
215 mask >>= offset;
216 src = da[0];
217 da[0] = (src & (~mask)) | (sa[0] & mask);
218 return;
219 }
220
221 mask = mask0 >> offset;
222 src = da[0];
223 da[0] = (src & (~mask)) | (sa[0] & mask);
224 da++;
225 sa++;
226 size = size - 8 + offset;
227 b_size = size >> 3; /* size in bytes */
228
229 for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
230 *da++ = *sa++;
231
232 if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
233 sp = (TYPE_64BIT *) sa;
234 dp = (TYPE_64BIT *) da;
235#ifdef __SUNPRO_C
236#pragma pipeloop(0)
237#endif /* __SUNPRO_C */
238 for (i = 0; j <= (b_size - 8); j += 8, i++) {
239 dp[i] = sp[i];
240 }
241
242 sa += i << 3;
243 da += i << 3;
244 }
245 else {
246#ifdef _NO_LONGLONG
247 if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
248 mlib_u32 *pws, *pwd;
249
250 pws = (mlib_u32 *) sa;
251 pwd = (mlib_u32 *) da;
252#ifdef __SUNPRO_C
253#pragma pipeloop(0)
254#endif /* __SUNPRO_C */
255 for (i = 0; j <= (b_size - 4); j += 4, i++) {
256 pwd[i] = pws[i];
257 }
258
259 sa += i << 2;
260 da += i << 2;
261 }
262 else {
263 mlib_u32 *pws, *pwd, src0, src1;
264 mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
265
266 pwd = (mlib_u32 *) da;
267 pws = (mlib_u32 *) (sa - lshift);
268 lshift <<= 3;
269 rshift = 32 - lshift;
270
271 src1 = pws[0];
272#ifdef __SUNPRO_C
273#pragma pipeloop(0)
274#endif /* __SUNPRO_C */
275 for (i = 0; j <= (b_size - 4); j += 4, i++) {
276 src0 = src1;
277 src1 = pws[i + 1];
278#ifdef _LITTLE_ENDIAN
279 pwd[i] = (src0 >> lshift) | (src1 << rshift);
280#else
281 pwd[i] = (src0 << lshift) | (src1 >> rshift);
282#endif /* _LITTLE_ENDIAN */
283 }
284
285 sa += i << 2;
286 da += i << 2;
287 }
288
289#else
290 mlib_u64 *pws, *pwd, src0, src1;
291 mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
292
293 pwd = (mlib_u64 *) da;
294 pws = (mlib_u64 *) (sa - lshift);
295 lshift <<= 3;
296 rshift = 64 - lshift;
297
298 src1 = pws[0];
299#ifdef __SUNPRO_C
300#pragma pipeloop(0)
301#endif /* __SUNPRO_C */
302 for (i = 0; j <= (b_size - 8); j += 8, i++) {
303 src0 = src1;
304 src1 = pws[i + 1];
305 pwd[i] = (src0 << lshift) | (src1 >> rshift);
306 }
307
308 sa += i << 3;
309 da += i << 3;
310#endif /* _NO_LONGLONG */
311 }
312
313 for (; j < b_size; j++)
314 *da++ = *sa++;
315
316 j = size & 7;
317
318 if (j > 0) {
319 mask = mask0 << (8 - j);
320 src = da[0];
321 da[0] = (src & (~mask)) | (sa[0] & mask);
322 }
323}
324
325/***************************************************************/
326void mlib_c_ImageCopy_u8(const mlib_image *src,
327 mlib_image *dst)
328{
329 PREPAREVARS(mlib_u8);
330 if (src_width < 16) {
331 STRIP(pdst, psrc, src_width, src_height, mlib_u8);
332 return;
333 }
334
335 for (i = 0; i < src_height; i++) {
336 mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
337
338 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
339 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
340 pdst_row[j] = psrc_row[j];
341 }
342
343#ifdef __SUNPRO_C
344#pragma pipeloop(0)
345#endif /* __SUNPRO_C */
346 for (; j <= (src_width - 8); j += 8) {
347 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
348
349 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
350 }
351 }
352 else {
353
354#ifdef _NO_LONGLONG
355
356 for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
357 pdst_row[j] = psrc_row[j];
358 }
359
360 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
361#ifdef __SUNPRO_C
362#pragma pipeloop(0)
363#endif /* __SUNPRO_C */
364 for (; j <= (src_width - 4); j += 4) {
365 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
366 }
367 }
368 else {
369 mlib_u32 *ps, shl, shr, src0, src1;
370
371 ps = (mlib_u32 *) (psrc_row + j);
372 shl = (mlib_addr) ps & 3;
373 ps = (mlib_u32 *) ((mlib_addr) ps - shl);
374 shl <<= 3;
375 shr = 32 - shl;
376
377 src1 = ps[0];
378#ifdef __SUNPRO_C
379#pragma pipeloop(0)
380#endif /* __SUNPRO_C */
381 for (; j <= (src_width - 4); j += 4) {
382 src0 = src1;
383 src1 = ps[1];
384#ifdef _LITTLE_ENDIAN
385 *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
386#else
387 *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
388#endif /* _LITTLE_ENDIAN */
389 ps++;
390 }
391 }
392
393#else
394
395 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
396 pdst_row[j] = psrc_row[j];
397 }
398
399 {
400 mlib_s32 shl, shr;
401 mlib_u64 *ps, src0, src1;
402
403 ps = (mlib_u64 *) (psrc_row + j);
404 /* shl and shr are in range [0, 64] */
405 shl = (mlib_s32) ((mlib_addr) ps & 7);
406 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
407 shl <<= 3;
408 shr = 64 - shl;
409
410 src1 = ps[0];
411#ifdef __SUNPRO_C
412#pragma pipeloop(0)
413#endif /* __SUNPRO_C */
414 for (; j <= (src_width - 8); j += 8) {
415 src0 = src1;
416 src1 = ps[1];
417#ifdef _LITTLE_ENDIAN
418 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
419#else
420 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
421#endif /* _LITTLE_ENDIAN */
422 ps++;
423 }
424 }
425#endif /* _NO_LONGLONG */
426 }
427
428 for (; j < src_width; j++)
429 pdst_row[j] = psrc_row[j];
430 }
431}
432
433/***************************************************************/
434void mlib_c_ImageCopy_s16(const mlib_image *src,
435 mlib_image *dst)
436{
437 PREPAREVARS(mlib_u16);
438 if (src_width < 8) {
439 STRIP(pdst, psrc, src_width, src_height, mlib_u16);
440 return;
441 }
442
443 for (i = 0; i < src_height; i++) {
444 mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
445
446 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
447 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
448 pdst_row[j] = psrc_row[j];
449 }
450
451#ifdef __SUNPRO_C
452#pragma pipeloop(0)
453#endif /* __SUNPRO_C */
454 for (; j <= (src_width - 4); j += 4) {
455 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
456
457 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
458 }
459 }
460 else {
461
462#ifdef _NO_LONGLONG
463
464 if (j = (((mlib_addr) pdst_row & 2) != 0)) {
465 pdst_row[0] = psrc_row[0];
466 }
467
468 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
469#ifdef __SUNPRO_C
470#pragma pipeloop(0)
471#endif /* __SUNPRO_C */
472 for (; j <= (src_width - 2); j += 2) {
473 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
474 }
475 }
476 else {
477 mlib_u32 *ps, src0, src1;
478
479 ps = (mlib_u32 *) (psrc_row + j - 1);
480 src1 = ps[0];
481#ifdef __SUNPRO_C
482#pragma pipeloop(0)
483#endif /* __SUNPRO_C */
484 for (; j <= (src_width - 2); j += 2) {
485 src0 = src1;
486 src1 = ps[1];
487#ifdef _LITTLE_ENDIAN
488 *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
489#else
490 *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
491#endif /* _LITTLE_ENDIAN */
492 ps++;
493 }
494 }
495
496#else
497
498 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
499 pdst_row[j] = psrc_row[j];
500 }
501
502 {
503 mlib_s32 shl, shr;
504 mlib_u64 *ps, src0, src1;
505
506 ps = (mlib_u64 *) (psrc_row + j);
507 shl = (mlib_s32) ((mlib_addr) ps & 7);
508 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
509 shl <<= 3;
510 shr = 64 - shl;
511
512 src1 = ps[0];
513#ifdef __SUNPRO_C
514#pragma pipeloop(0)
515#endif /* __SUNPRO_C */
516 for (; j <= (src_width - 4); j += 4) {
517 src0 = src1;
518 src1 = ps[1];
519#ifdef _LITTLE_ENDIAN
520 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
521#else
522 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
523#endif /* _LITTLE_ENDIAN */
524 ps++;
525 }
526 }
527#endif /* _NO_LONGLONG */
528 }
529
530 for (; j < src_width; j++)
531 pdst_row[j] = psrc_row[j];
532 }
533}
534
535/***************************************************************/
536void mlib_c_ImageCopy_s32(const mlib_image *src,
537 mlib_image *dst)
538{
539 PREPAREVARS(mlib_u32);
540 if (src_width < 4) {
541 STRIP(pdst, psrc, src_width, src_height, mlib_u32);
542 return;
543 }
544
545 for (i = 0; i < src_height; i++) {
546 mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
547
548 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
549 if (j = ((mlib_s32) ((mlib_addr) psrc_row & 4) >> 2)) {
550 pdst_row[0] = psrc_row[0];
551 }
552
553#ifdef __SUNPRO_C
554#pragma pipeloop(0)
555#endif /* __SUNPRO_C */
556 for (; j <= (src_width - 2); j += 2) {
557 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
558
559 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
560 }
561 }
562 else {
563
564#ifdef _NO_LONGLONG
565
566#ifdef __SUNPRO_C
567#pragma pipeloop(0)
568#endif /* __SUNPRO_C */
569 for (j = 0; j <= (src_width - 1); j++) {
570 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
571 }
572
573#else
574
575 {
576 mlib_u64 *ps, src0, src1;
577
578 if (j = ((mlib_s32) ((mlib_addr) pdst_row & 4) >> 2))
579 pdst_row[0] = psrc_row[0];
580 ps = (mlib_u64 *) (psrc_row + j - 1);
581 src1 = ps[0];
582#ifdef __SUNPRO_C
583#pragma pipeloop(0)
584#endif /* __SUNPRO_C */
585 for (; j <= (src_width - 2); j += 2) {
586 src0 = src1;
587 src1 = ps[1];
588#ifdef _LITTLE_ENDIAN
589 *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
590#else
591 *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
592#endif /* _LITTLE_ENDIAN */
593 ps++;
594 }
595 }
596#endif /* _NO_LONGLONG */
597 }
598
599 for (; j < src_width; j++)
600 pdst_row[j] = psrc_row[j];
601 }
602}
603
604/***************************************************************/
605void mlib_c_ImageCopy_d64(const mlib_image *src,
606 mlib_image *dst)
607{
608 PREPAREVARS(mlib_d64);
609 for (i = 0; i < src_height; i++) {
610 mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
611
612#ifdef __SUNPRO_C
613#pragma pipeloop(0)
614#endif /* __SUNPRO_C */
615 for (j = 0; j < src_width; j++)
616 *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
617 }
618}
619
620/***************************************************************/
621/*
622 * Both source and destination image data are 1 - d vectors and
623 * 8 - byte aligned. And size is in 8 - bytes.
624 */
625
626void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
627 TYPE_64BIT *dp,
628 mlib_s32 size)
629{
630 mlib_s32 i;
631
632#ifdef __SUNPRO_C
633#pragma pipeloop(0)
634#endif /* __SUNPRO_C */
635 for (i = 0; i < size; i++) {
636 *dp++ = *sp++;
637 }
638}
639
640/***************************************************************/
641#ifndef _NO_LONGLONG
642#define TYPE mlib_u64
643#define BSIZE 64
644#define SIZE 8
645#else
646#define TYPE mlib_u32
647#define BSIZE 32
648#define SIZE 4
649#endif /* _NO_LONGLONG */
650
651/***************************************************************/
652void mlib_ImageCopy_na(const mlib_u8 *sp,
653 mlib_u8 *dp,
654 mlib_s32 n)
655{
656 mlib_s32 shr, shl;
657 TYPE *tmp, s0, s1;
658
659 if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
660
661#ifdef __SUNPRO_C
662#pragma pipeloop(0)
663#endif /* __SUNPRO_C */
664 for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
665 *dp++ = *sp++;
666
667#ifdef _NO_LONGLONG
668
669 if (((mlib_addr) sp & (SIZE - 1)) == 0) {
670 for (; n > SIZE; n -= SIZE) {
671 *(TYPE *) dp = *(TYPE *) sp;
672 dp += SIZE;
673 sp += SIZE;
674 }
675 }
676 else
677#endif /* _NO_LONGLONG */
678 {
679 tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
680 /* shl and shr do not exceed 64 here */
681 shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
682 shr = BSIZE - shl;
683 s0 = *tmp++;
684
685#ifdef __SUNPRO_C
686#pragma pipeloop(0)
687#endif /* __SUNPRO_C */
688 for (; n > SIZE; n -= SIZE) {
689 s1 = *tmp++;
690#ifdef _LITTLE_ENDIAN
691 *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
692#else
693 *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
694#endif /* _LITTLE_ENDIAN */
695 s0 = s1;
696 dp += SIZE;
697 sp += SIZE;
698 }
699 }
700 }
701 else {
702#ifdef __SUNPRO_C
703#pragma pipeloop(0)
704#endif /* __SUNPRO_C */
705 for (; (n > 0) && (mlib_addr) dp & 7; n--)
706 *dp++ = *sp++;
707
708#ifdef __SUNPRO_C
709#pragma pipeloop(0)
710#endif /* __SUNPRO_C */
711 for (; n > 8; n -= 8) {
712 *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
713 dp += 8;
714 sp += 8;
715 }
716 }
717
718#ifdef __SUNPRO_C
719#pragma pipeloop(0)
720#endif /* __SUNPRO_C */
721 for (; n > 0; n--)
722 *dp++ = *sp++;
723}
724
725/***************************************************************/
726#ifdef _MSC_VER
727#pragma optimize("", on)
728#endif /* _MSC_VER */
729
730/***************************************************************/