initial commit of lk (little kernel) project
diff --git a/app/stringtests/mymemcpy.S b/app/stringtests/mymemcpy.S
new file mode 100644
index 0000000..975500a
--- /dev/null
+++ b/app/stringtests/mymemcpy.S
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2008 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+#include <arch/arm/cores.h>
+
+.text
+.align 2
+
+ .global mymemcpy
+mymemcpy:
+ // check for zero length copy or the same pointer
+ cmp r2, #0
+ cmpne r1, r0
+ bxeq lr
+
+ // save a few registers for use and the return code (input dst)
+ stmfd sp!, {r0, r4, r5, lr}
+
+ // check for forwards overlap (src > dst, distance < len)
+ subs r3, r0, r1
+ cmpgt r2, r3
+ bgt .L_forwardoverlap
+
+ // check for a short copy len.
+ // 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a
+ // wordwise copy worth of work to be done.
+ cmp r2, #(16+4)
+ blt .L_bytewise
+
+ // see if they are similarly aligned on 4 byte boundaries
+ eor r3, r0, r1
+ tst r3, #3
+ bne .L_bytewise // dissimilarly aligned, nothing we can do (for now)
+
+ // check for 16 byte alignment on dst.
+ // this will also catch src being not 4 byte aligned, since it is similarly 4 byte
+ // aligned with dst at this point.
+ tst r0, #15
+ bne .L_not16bytealigned
+
+ // check to see if we have at least 32 bytes of data to copy.
+ // if not, just revert to wordwise copy
+ cmp r2, #32
+ blt .L_wordwise
+
+.L_bigcopy:
+ // copy 32 bytes at a time. src & dst need to be at least 4 byte aligned,
+ // and we need at least 32 bytes remaining to copy
+
+ // save r6-r7 for use in the big copy
+ stmfd sp!, {r6-r7}
+
+ sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare
+
+.L_bigcopy_loop:
+ ldmia r1!, {r4, r5, r6, r7}
+ stmia r0!, {r4, r5, r6, r7}
+ ldmia r1!, {r4, r5, r6, r7}
+ subs r2, r2, #32
+ stmia r0!, {r4, r5, r6, r7}
+ bge .L_bigcopy_loop
+
+ // restore r6-r7
+ ldmfd sp!, {r6-r7}
+
+ // see if we are done
+ adds r2, r2, #32
+ beq .L_done
+
+ // less then 4 bytes left?
+ cmp r2, #4
+ blt .L_bytewise
+
+.L_wordwise:
+ // copy 4 bytes at a time.
+ // src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy.
+ subs r2, r2, #4
+
+.L_wordwise_loop:
+ ldr r3, [r1], #4
+ subs r2, r2, #4
+ str r3, [r0], #4
+ bge .L_wordwise_loop
+
+ // correct the remaining len and test for completion
+ adds r2, r2, #4
+ beq .L_done
+
+.L_bytewise:
+ // simple bytewise copy
+ ldrb r3, [r1], #1
+ subs r2, r2, #1
+ strb r3, [r0], #1
+ bgt .L_bytewise
+
+.L_done:
+ // load dst for return and restore r4,r5
+#if ARM_ARCH_LEVEL >= 5
+ ldmfd sp!, {r0, r4, r5, pc}
+#else
+ ldmfd sp!, {r0, r4, r5, lr}
+ bx lr
+#endif
+
+.L_not16bytealigned:
+ // dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned.
+ // src is guaranteed to be similarly word aligned with dst.
+
+ // set the condition flags based on the alignment.
+ lsl r12, r0, #28
+ rsb r12, r12, #0
+ msr CPSR_f, r12 // move into NZCV fields in CPSR
+
+ // move as many bytes as necessary to get the dst aligned
+ ldrvsb r3, [r1], #1 // V set
+ ldrcsh r4, [r1], #2 // C set
+ ldreq r5, [r1], #4 // Z set
+
+ strvsb r3, [r0], #1
+ strcsh r4, [r0], #2
+ streq r5, [r0], #4
+
+ ldmmiia r1!, {r3-r4} // N set
+ stmmiia r0!, {r3-r4}
+
+ // fix the remaining len
+ sub r2, r2, r12, lsr #28
+
+ // test to see what we should do now
+ cmp r2, #32
+ bge .L_bigcopy
+ b .L_wordwise
+
+ // src and dest overlap 'forwards' or dst > src
+.L_forwardoverlap:
+
+ // do a bytewise reverse copy for now
+ add r1, r1, r2
+ add r0, r0, r2
+
+.L_bytewisereverse:
+ // simple bytewise reverse copy
+ ldrb r3, [r1], #-1
+ subs r2, r2, #1
+ strb r3, [r0], #-1
+ bgt .L_bytewisereverse
+
+ b .L_done
+
diff --git a/app/stringtests/mymemset.S b/app/stringtests/mymemset.S
new file mode 100644
index 0000000..2d35d4f
--- /dev/null
+++ b/app/stringtests/mymemset.S
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2008 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <asm.h>
+#include <arch/arm/cores.h>
+
+.text
+.align 2
+
+/* void *memset(void *s, int c, size_t n); */
+ .global mymemset
+mymemset:
+ // check for zero length
+ cmp r2, #0
+ bxeq lr
+
+ // save the original pointer
+ mov r12, r0
+
+ // short memsets aren't worth optimizing
+ cmp r2, #(32 + 16)
+ blt .L_bytewise
+
+ // fill a 32 bit register with the 8 bit value
+ and r1, r1, #0xff
+ orr r1, r1, r1, lsl #8
+ orr r1, r1, r1, lsl #16
+
+ // check for 16 byte alignment
+ tst r0, #15
+ bne .L_not16bytealigned
+
+.L_bigset:
+ // dump some registers to make space for our values
+ stmfd sp!, { r4-r5 }
+
+ // fill a bunch of registers with the set value
+ mov r3, r1
+ mov r4, r1
+ mov r5, r1
+
+ // prepare the count register so we can avoid an extra compare
+ sub r2, r2, #32
+
+ // 32 bytes at a time
+.L_bigset_loop:
+ stmia r0!, { r1, r3, r4, r5 }
+ subs r2, r2, #32
+ stmia r0!, { r1, r3, r4, r5 }
+ bge .L_bigset_loop
+
+ // restore our dumped registers
+ ldmfd sp!, { r4-r5 }
+
+ // see if we're done
+ adds r2, r2, #32
+ beq .L_done
+
+.L_bytewise:
+ // bytewise memset
+ subs r2, r2, #1
+ strb r1, [r0], #1
+ bgt .L_bytewise
+
+.L_done:
+ // restore the base pointer as return value
+ mov r0, r12
+ bx lr
+
+.L_not16bytealigned:
+ // dst is not 16 byte aligned, so we will set up to 15 bytes to get it aligned.
+
+ // set the condition flags based on the alignment.
+ lsl r3, r0, #28
+ rsb r3, r3, #0
+ msr CPSR_f, r3 // move into NZCV fields in CPSR
+
+ // move as many bytes as necessary to get the dst aligned
+ strvsb r1, [r0], #1 // V set
+ strcsh r1, [r0], #2 // C set
+ streq r1, [r0], #4 // Z set
+ strmi r1, [r0], #4 // N set
+ strmi r1, [r0], #4 // N set
+
+ // fix the remaining len
+ sub r2, r2, r3, lsr #28
+
+ // do the large memset
+ b .L_bigset
+
diff --git a/app/stringtests/rules.mk b/app/stringtests/rules.mk
new file mode 100644
index 0000000..853a4fa
--- /dev/null
+++ b/app/stringtests/rules.mk
@@ -0,0 +1,6 @@
+LOCAL_DIR := $(GET_LOCAL_DIR)
+
+OBJS += \
+ $(LOCAL_DIR)/string_tests.o \
+ $(LOCAL_DIR)/mymemcpy.o \
+ $(LOCAL_DIR)/mymemset.o
diff --git a/app/stringtests/string_tests.c b/app/stringtests/string_tests.c
new file mode 100644
index 0000000..6f15877
--- /dev/null
+++ b/app/stringtests/string_tests.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2008 Travis Geiselbrecht
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <debug.h>
+#include <string.h>
+#include <malloc.h>
+#include <platform.h>
+#include <kernel/thread.h>
+
+static uint8_t *src;
+static uint8_t *dst;
+
+static uint8_t *src2;
+static uint8_t *dst2;
+
+#define BUFFER_SIZE (1024*1024)
+#define ITERATIONS 16
+
+extern void *mymemcpy(void *dst, const void *src, size_t len);
+extern void *mymemset(void *dst, int c, size_t len);
+
+static void *null_memcpy(void *dst, const void *src, size_t len)
+{
+ return dst;
+}
+
+static time_t bench_memcpy_routine(void *memcpy_routine(void *, const void *, size_t), size_t srcalign, size_t dstalign)
+{
+ int i;
+ time_t t0;
+
+ t0 = current_time();
+ for (i=0; i < ITERATIONS; i++) {
+ memcpy_routine(dst + dstalign, src + srcalign, BUFFER_SIZE);
+ }
+ return current_time() - t0;
+}
+
+static void bench_memcpy(void)
+{
+ time_t null, libc, mine;
+ size_t srcalign, dstalign;
+
+ dprintf("memcpy speed test\n");
+ thread_sleep(200); // let the debug string clear the serial port
+
+ for (srcalign = 0; srcalign < 64; ) {
+ for (dstalign = 0; dstalign < 64; ) {
+
+ null = bench_memcpy_routine(&null_memcpy, srcalign, dstalign);
+ libc = bench_memcpy_routine(&memcpy, srcalign, dstalign);
+ mine = bench_memcpy_routine(&mymemcpy, srcalign, dstalign);
+
+ dprintf("srcalign %lu, dstalign %lu\n", srcalign, dstalign);
+ dprintf(" null memcpy %u msecs\n", null);
+ dprintf(" libc memcpy %u msecs, %llu bytes/sec\n", libc, BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
+ dprintf(" my memcpy %u msecs, %llu bytes/sec\n", mine, BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
+
+ if (dstalign == 0)
+ dstalign = 1;
+ else
+ dstalign <<= 1;
+ }
+ if (srcalign == 0)
+ srcalign = 1;
+ else
+ srcalign <<= 1;
+ }
+}
+
+static void fillbuf(void *ptr, size_t len, uint32_t seed)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ ((char *)ptr)[i] = seed;
+ seed *= 0x1234567;
+ }
+}
+
+static void validate_memcpy(void)
+{
+ size_t srcalign, dstalign, size;
+ const size_t maxsize = 256;
+
+ dprintf("testing memcpy for correctness\n");
+
+ /*
+ * do the simple tests to make sure that memcpy doesn't color outside
+ * the lines for all alignment cases
+ */
+ for (srcalign = 0; srcalign < 64; srcalign++) {
+ for (dstalign = 0; dstalign < 64; dstalign++) {
+// dprintf("srcalign %zu, dstalign %zu\n", srcalign, dstalign);
+ for (size = 0; size < maxsize; size++) {
+
+// dprintf("srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
+
+ fillbuf(src, maxsize * 2, 567);
+ fillbuf(src2, maxsize * 2, 567);
+ fillbuf(dst, maxsize * 2, 123514);
+ fillbuf(dst2, maxsize * 2, 123514);
+
+ memcpy(dst + dstalign, src + srcalign, size);
+ mymemcpy(dst2 + dstalign, src2 + srcalign, size);
+
+ int comp = memcmp(dst, dst2, maxsize * 2);
+ if (comp != 0) {
+ dprintf("error! srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
+ }
+ }
+ }
+ }
+}
+
+static time_t bench_memset_routine(void *memset_routine(void *, int, size_t), size_t dstalign)
+{
+ int i;
+ time_t t0;
+
+ t0 = current_time();
+ for (i=0; i < ITERATIONS; i++) {
+ memset_routine(dst + dstalign, 0, BUFFER_SIZE);
+ }
+ return current_time() - t0;
+}
+
+static void bench_memset(void)
+{
+ time_t libc, mine;
+ size_t dstalign;
+
+ dprintf("memset speed test\n");
+ thread_sleep(200); // let the debug string clear the serial port
+
+ for (dstalign = 0; dstalign < 64; dstalign++) {
+
+ libc = bench_memset_routine(&memset, dstalign);
+ mine = bench_memset_routine(&mymemset, dstalign);
+
+ dprintf("dstalign %lu\n", dstalign);
+ dprintf(" libc memset %u msecs, %llu bytes/sec\n", libc, BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
+ dprintf(" my memset %u msecs, %llu bytes/sec\n", mine, BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
+ }
+}
+
+static void validate_memset(void)
+{
+ size_t dstalign, size;
+ int c;
+ const size_t maxsize = 256;
+
+ dprintf("testing memset for correctness\n");
+
+ for (dstalign = 0; dstalign < 64; dstalign++) {
+ dprintf("align %zd\n", dstalign);
+ for (size = 0; size < maxsize; size++) {
+ for (c = 0; c < 256; c++) {
+
+ fillbuf(dst, maxsize * 2, 123514);
+ fillbuf(dst2, maxsize * 2, 123514);
+
+ memset(dst + dstalign, c, size);
+ mymemset(dst2 + dstalign, c, size);
+
+ int comp = memcmp(dst, dst2, maxsize * 2);
+ if (comp != 0) {
+ dprintf("error! align %zu, c %d, size %zu\n", dstalign, c, size);
+ }
+ }
+ }
+ }
+}
+
+#if defined(WITH_APP_CONSOLE)
+#include <app/console.h>
+
+static int string_tests(int argc, cmd_args *argv)
+{
+ src = memalign(64, BUFFER_SIZE + 256);
+ dst = memalign(64, BUFFER_SIZE + 256);
+ src2 = memalign(64, BUFFER_SIZE + 256);
+ dst2 = memalign(64, BUFFER_SIZE + 256);
+
+ dprintf("src %p, dst %p\n", src, dst);
+ dprintf("src2 %p, dst2 %p\n", src2, dst2);
+
+ if (argc < 3) {
+ dprintf("not enough arguments:\n");
+usage:
+ dprintf("%s validate <routine>\n", argv[0].str);
+ dprintf("%s bench <routine>\n", argv[0].str);
+ goto out;
+ }
+
+ if (!strcmp(argv[1].str, "validate")) {
+ if (!strcmp(argv[2].str, "memcpy")) {
+ validate_memcpy();
+ } else if (!strcmp(argv[2].str, "memset")) {
+ validate_memset();
+ }
+ } else if (!strcmp(argv[1].str, "bench")) {
+ if (!strcmp(argv[2].str, "memcpy")) {
+ bench_memcpy();
+ } else if (!strcmp(argv[2].str, "memset")) {
+ bench_memset();
+ }
+ } else {
+ goto usage;
+ }
+
+out:
+ free(src);
+ free(dst);
+ free(src2);
+ free(dst2);
+
+ return 0;
+}
+
+STATIC_COMMAND_START
+{ "string", NULL, &string_tests },
+STATIC_COMMAND_END(stringtests);
+
+#endif
+