[Vm-dev] [commit][2744] add fast bitblt support files
commits at squeakvm.org
commits at squeakvm.org
Tue Jun 18 23:13:52 UTC 2013
Revision: 2744
Author: rowledge
Date: 2013-06-18 16:13:50 -0700 (Tue, 18 Jun 2013)
Log Message:
-----------
add fast bitblt support files
Modified Paths:
--------------
trunk/platforms/Cross/vm/sqMemoryAccess.h
Added Paths:
-----------
trunk/platforms/Cross/plugins/BitBltPlugin/
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.h
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmLinux.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmOther.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.h
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAlphaBlend.s
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAsm.hdr
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdBitLogical.s
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdPixPaint.s
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdSourceWord.s
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltDispatch.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltDispatch.h
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltGeneric.c
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltGeneric.h
trunk/platforms/Cross/plugins/BitBltPlugin/BitBltInternal.h
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.c
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.c (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.c 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2013 Raspberry Pi Foundation
+ * Copyright © 2013 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ */
+
+#include "BitBltArm.h"
+#include "BitBltArmSimd.h"
+
+arm_cpu_features_t armCpuFeatures;
+
+void addArmFastPaths(void)
+{
+ armCpuFeatures = detectCpuFeatures();
+ if (armCpuFeatures & ARM_V6)
+ addArmSimdFastPaths();
+}
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.h
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.h (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArm.h 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2013 Raspberry Pi Foundation
+ * Copyright © 2013 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ */
+
+#ifndef BITBLTARM_H_
+#define BITBLTARM_H_
+
+typedef enum {
+ ARM_V7 = (1 << 0),
+ ARM_V6 = (1 << 1),
+ ARM_VFP = (1 << 2),
+ ARM_NEON = (1 << 3),
+ ARM_IWMMXT = (1 << 4)
+} arm_cpu_features_t;
+
+extern arm_cpu_features_t armCpuFeatures;
+
+/* There's a separate implementation of this function for each OS */
+arm_cpu_features_t detectCpuFeatures(void);
+
+void addArmFastPaths(void);
+
+#endif /* BITBLTARM_H_ */
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmLinux.c
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmLinux.c (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmLinux.c 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+* WRT the usage in the Squeak Smalltalk system -
+* This file provides a function to discover the cpu features supported at runtime; we assume
+* you understand that 'arm_cpu_features' is meaningful only on ARM cpu machines.
+* An equivalent file will be required for other ARM platforms; see BitBtArmOther.c in this directory
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+#include "BitBltArm.h"
+
+arm_cpu_features_t detectCpuFeatures(void)
+{
+ arm_cpu_features_t features = 0;
+ Elf32_auxv_t aux;
+ int fd;
+
+ fd = open ("/proc/self/auxv", O_RDONLY);
+ if (fd >= 0)
+ {
+ while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
+ {
+ if (aux.a_type == AT_HWCAP)
+ {
+ uint32_t hwcap = aux.a_un.a_val;
+
+ /* hardcode these values to avoid depending on specific
+ * versions of the hwcap header, e.g. HWCAP_NEON
+ */
+ if ((hwcap & 64) != 0)
+ features |= ARM_VFP;
+ if ((hwcap & 512) != 0)
+ features |= ARM_IWMMXT;
+ /* this flag is only present on kernel 2.6.29 */
+ if ((hwcap & 4096) != 0)
+ features |= ARM_NEON;
+ }
+ else if (aux.a_type == AT_PLATFORM)
+ {
+ const char *plat = (const char*) aux.a_un.a_val;
+
+ if (strncmp (plat, "v7l", 3) == 0)
+ features |= (ARM_V7 | ARM_V6);
+ else if (strncmp (plat, "v6l", 3) == 0)
+ features |= ARM_V6;
+ }
+ }
+ close (fd);
+ }
+
+ return features;
+}
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmOther.c
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmOther.c (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmOther.c 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2013 Raspberry Pi Foundation
+ * Copyright © 2013 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * WRT the usage in the Squeak Smalltalk system -
+* This file provides a function to discover the cpu features supported at runtime; we assume
+* you understand that 'arm_cpu_features' is meaningful only on ARM cpu machines.
+* Obviously, this is a null function and a suitable equivalent file will be required for actual ARM platforms;
+* see BitBtArmLinux.c in this directory as an example
+ */
+
+#include "BitBltArm.h"
+
+/* There is no OS-neutral way of determining which type of ARM this is */
+
+arm_cpu_features_t detectCpuFeatures(void)
+{
+ return 0;
+}
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.c
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.c (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.c 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2013 Raspberry Pi Foundation
+ * Copyright © 2013 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "BitBltInternal.h"
+
+enum {
+ HALFTONE_NONE,
+ HALFTONE_SCALAR,
+ HALFTONE_VECTOR
+};
+
+//typedef void (*armSimdAsmFn)(uint32_t width, uint32_t height, uint32_t *dst, uint32_t dstStride, uint32_t *src, uint32_t srcStride, uint32_t halftone, uint32_t halftoneInfo, uint32_t *colourMap, uint32_t bitPtrs, ...);
+
+#define FAST_PATH(op, src_bpp, dst_bpp, qualifier, halftone_type) \
+extern void armSimd##op##src_bpp##_##dst_bpp##qualifier##_wide (uint32_t width, uint32_t height, uint32_t *dst, uint32_t dstStride, uint32_t *src, uint32_t srcStride, uint32_t halftone, uint32_t halftoneInfo, uint32_t *colourMap, uint32_t bitPtrs, ...); \
+extern void armSimd##op##src_bpp##_##dst_bpp##qualifier##_narrow(uint32_t width, uint32_t height, uint32_t *dst, uint32_t dstStride, uint32_t *src, uint32_t srcStride, uint32_t halftone, uint32_t halftoneInfo, uint32_t *colourMap, uint32_t bitPtrs, ...); \
+extern void armSimd##op##src_bpp##_##dst_bpp##qualifier##_tiny (uint32_t width, uint32_t height, uint32_t *dst, uint32_t dstStride, uint32_t *src, uint32_t srcStride, uint32_t halftone, uint32_t halftoneInfo, uint32_t *colourMap, uint32_t bitPtrs, ...); \
+static void fastPath##op##src_bpp##_##dst_bpp##qualifier(operation_t *op, uint32_t flags) \
+{ \
+ IGNORE(flags); \
+ /* Copy certain parts of the operation structure to locals to help compiler */ \
+ uint32_t *srcBits = op->src.bits; \
+ uint32_t srcPitch = op->src.pitch / sizeof (uint32_t); \
+ uint32_t srcX = op->src.x; \
+ uint32_t srcY = op->src.y; \
+ uint32_t *dstBits = op->dest.bits; \
+ uint32_t dstPitch = op->dest.pitch / sizeof (uint32_t); \
+ uint32_t dstX = op->dest.x; \
+ uint32_t dstY = op->dest.y; \
+ uint32_t width = op->width; \
+ uint32_t height = op->height; \
+ uint32_t *cmLookupTable = *op->cmLookupTable; \
+ uint32_t halftoneHeight = op->halftoneHeight; \
+ uint32_t *halftoneBase = (uint32_t *) *op->halftoneBase; \
+ /* Get pointers to initial words */ \
+ uint32_t *src = 0; \
+ if (src_bpp > 0) \
+ src = srcBits + srcPitch * srcY + srcX * src_bpp / 32; \
+ uint32_t *dst = dstBits + dstPitch * dstY + dstX * dst_bpp / 32; \
+ /* Get initial pixel offset within words, mangle into pitch if possible */ \
+ uint32_t bitPtrs = 0; \
+ uint32_t srcXpix = 0; \
+ if (src_bpp > 0) { \
+ srcXpix = srcX & (31 / (src_bpp == 0 ? 1 : src_bpp)); /* ?: to avoid compiler warning on GCC! */ \
+ if (src_bpp < 8) \
+ bitPtrs = srcXpix << 27; \
+ else if (src_bpp == 8 || src_bpp == 16) \
+ srcPitch |= srcXpix << 30; \
+ } \
+ uint32_t dstXpix = dstX & (31/dst_bpp); \
+ if (dst_bpp < 8) \
+ bitPtrs |= dstXpix; \
+ else if (dst_bpp == 8 || dst_bpp == 16) \
+ dstPitch |= dstXpix << 30; \
+ /* Adjust strides to remove number of words partially or wholly read/written */ \
+ if (src_bpp > 0) \
+ srcPitch -= (src_bpp * (srcXpix + width) + 31) / 32; \
+ dstPitch -= (dst_bpp * (dstXpix + width) + 31) / 32; \
+ /* Deal with halftoning */ \
+ uint32_t halftone = 0; \
+ uint32_t halftoneInfo = 0; \
+ if (halftone_type == HALFTONE_SCALAR) \
+ halftone = halftoneBase[0]; \
+ else if (halftone_type == HALFTONE_VECTOR) { \
+ halftone = (uint32_t) (halftoneBase + halftoneHeight); \
+ halftoneInfo = (((dstY % halftoneHeight) - halftoneHeight) << 17) | (-halftoneHeight & 0x7FFF); \
+ } \
+ /* Work out which width class this operation is. \
+ * Rather than re-evaluate this for each line, we want one choice \
+ * for the whole operation; this means we can't assume anything about \
+ * alignment to sizes larger than 4 bytes, because that's the only \
+ * guarantee we have about line stride. */ \
+ if (width > (128-32)/dst_bpp && (((dstXpix-1) ^ (dstXpix+width-(128-32)/dst_bpp)) &~ (31/dst_bpp))) \
+ armSimd##op##src_bpp##_##dst_bpp##qualifier##_wide(width, height, dst, dstPitch, src, srcPitch, halftone, halftoneInfo, cmLookupTable, bitPtrs); \
+ else if (dst_bpp > 8 || (((dstXpix-1) ^ (dstXpix+width)) &~ (31/dst_bpp))) \
+ armSimd##op##src_bpp##_##dst_bpp##qualifier##_narrow(width, height, dst, dstPitch, src, srcPitch, halftone, halftoneInfo, cmLookupTable, bitPtrs); \
+ else \
+ armSimd##op##src_bpp##_##dst_bpp##qualifier##_tiny(width, height, dst, dstPitch, src, srcPitch, halftone, halftoneInfo, cmLookupTable, bitPtrs); \
+}
+
+FAST_PATH(SourceWord,1,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,1,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,2,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,1,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,2,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,4,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,1,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,2,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,4,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,8,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,1,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,2,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,4,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,8,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,16,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,1,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,2,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,4,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,8,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,16,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,32,32,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,2,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,4,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,8,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,16,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,32,16,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,4,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,8,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,16,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,32,8,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,8,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,16,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,32,4,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,16,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,32,2,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,32,1,,HALFTONE_NONE)
+
+FAST_PATH(SourceWord,0,1,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,1,_scalar,HALFTONE_SCALAR)
+FAST_PATH(SourceWord,0,2,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,2,_scalar,HALFTONE_SCALAR)
+FAST_PATH(SourceWord,0,4,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,4,_scalar,HALFTONE_SCALAR)
+FAST_PATH(SourceWord,0,8,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,8,_scalar,HALFTONE_SCALAR)
+FAST_PATH(SourceWord,0,16,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,16,_scalar,HALFTONE_SCALAR)
+FAST_PATH(SourceWord,0,32,,HALFTONE_NONE)
+FAST_PATH(SourceWord,0,32,_scalar,HALFTONE_SCALAR)
+
+FAST_PATH(PixPaint,1,1,,HALFTONE_NONE)
+FAST_PATH(PixPaint,2,2,,HALFTONE_NONE)
+FAST_PATH(PixPaint,4,4,,HALFTONE_NONE)
+FAST_PATH(PixPaint,8,8,,HALFTONE_NONE)
+FAST_PATH(PixPaint,16,16,,HALFTONE_NONE)
+FAST_PATH(PixPaint,32,32,,HALFTONE_NONE)
+
+FAST_PATH(AlphaBlend,32,32,,HALFTONE_NONE)
+
+FAST_PATH(BitAnd,1,1,,HALFTONE_NONE)
+FAST_PATH(BitAnd,2,2,,HALFTONE_NONE)
+FAST_PATH(BitAnd,4,4,,HALFTONE_NONE)
+FAST_PATH(BitAnd,8,8,,HALFTONE_NONE)
+FAST_PATH(BitAnd,16,16,,HALFTONE_NONE)
+FAST_PATH(BitAnd,32,32,,HALFTONE_NONE)
+
+static fast_path_t fastPaths[] = {
+ { fastPathSourceWord1_32, CR_sourceWord, STD_FLAGS(1,32,DIRECT,NO) },
+
+ { fastPathSourceWord1_16, CR_sourceWord, STD_FLAGS(1,16,DIRECT,NO) },
+ { fastPathSourceWord2_32, CR_sourceWord, STD_FLAGS(2,32,DIRECT,NO) },
+
+ { fastPathSourceWord1_8, CR_sourceWord, STD_FLAGS(1,8,DIRECT,NO) },
+ { fastPathSourceWord2_16, CR_sourceWord, STD_FLAGS(2,16,DIRECT,NO) },
+ { fastPathSourceWord4_32, CR_sourceWord, STD_FLAGS(4,32,DIRECT,NO) },
+
+ { fastPathSourceWord1_4, CR_sourceWord, STD_FLAGS(1,4,DIRECT,NO) },
+ { fastPathSourceWord2_8, CR_sourceWord, STD_FLAGS(2,8,DIRECT,NO) },
+ { fastPathSourceWord4_16, CR_sourceWord, STD_FLAGS(4,16,DIRECT,NO) },
+ { fastPathSourceWord8_32, CR_sourceWord, STD_FLAGS(8,32,DIRECT,NO) },
+
+ { fastPathSourceWord1_2, CR_sourceWord, STD_FLAGS(1,2,DIRECT,NO) },
+ { fastPathSourceWord2_4, CR_sourceWord, STD_FLAGS(2,4,DIRECT,NO) },
+ { fastPathSourceWord4_8, CR_sourceWord, STD_FLAGS(4,8,DIRECT,NO) },
+ { fastPathSourceWord8_16, CR_sourceWord, STD_FLAGS(8,16,DIRECT,NO) },
+ { fastPathSourceWord16_32, CR_sourceWord, STD_FLAGS(16,32,NO,NO) },
+
+ { fastPathSourceWord1_1, CR_sourceWord, STD_FLAGS(1,1,NO,NO) },
+ { fastPathSourceWord2_2, CR_sourceWord, STD_FLAGS(2,2,NO,NO) },
+ { fastPathSourceWord4_4, CR_sourceWord, STD_FLAGS(4,4,NO,NO) },
+ { fastPathSourceWord8_8, CR_sourceWord, STD_FLAGS(8,8,NO,NO) },
+ { fastPathSourceWord16_16, CR_sourceWord, STD_FLAGS(16,16,NO,NO) },
+ { fastPathSourceWord32_32, CR_sourceWord, STD_FLAGS(32,32,NO,NO) },
+
+ { fastPathSourceWord2_1, CR_sourceWord, STD_FLAGS(2,1,DIRECT,NO) },
+ { fastPathSourceWord4_2, CR_sourceWord, STD_FLAGS(4,2,DIRECT,NO) },
+ { fastPathSourceWord8_4, CR_sourceWord, STD_FLAGS(8,4,DIRECT,NO) },
+ { fastPathSourceWord16_8, CR_sourceWord, STD_FLAGS(16,8,DIRECT,NO) },
+ { fastPathSourceWord32_16, CR_sourceWord, STD_FLAGS(32,16,NO,NO) },
+
+ { fastPathSourceWord4_1, CR_sourceWord, STD_FLAGS(4,1,DIRECT,NO) },
+ { fastPathSourceWord8_2, CR_sourceWord, STD_FLAGS(8,2,DIRECT,NO) },
+ { fastPathSourceWord16_4, CR_sourceWord, STD_FLAGS(16,4,DIRECT,NO) },
+ { fastPathSourceWord32_8, CR_sourceWord, STD_FLAGS(32,8,15BIT,NO) },
+
+ { fastPathSourceWord8_1, CR_sourceWord, STD_FLAGS(8,1,DIRECT,NO) },
+ { fastPathSourceWord16_2, CR_sourceWord, STD_FLAGS(16,2,DIRECT,NO) },
+ { fastPathSourceWord32_4, CR_sourceWord, STD_FLAGS(32,4,15BIT,NO) },
+
+ { fastPathSourceWord16_1, CR_sourceWord, STD_FLAGS(16,1,DIRECT,NO) },
+ { fastPathSourceWord32_2, CR_sourceWord, STD_FLAGS(32,2,15BIT,NO) },
+
+ { fastPathSourceWord32_1, CR_sourceWord, STD_FLAGS(32,1,15BIT,NO) },
+
+ { fastPathSourceWord0_1, CR_sourceWord, STD_FLAGS_NO_SOURCE(1,NO) },
+ { fastPathSourceWord0_1_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(1,SCALAR) },
+ { fastPathSourceWord0_2, CR_sourceWord, STD_FLAGS_NO_SOURCE(2,NO) },
+ { fastPathSourceWord0_2_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(2,SCALAR) },
+ { fastPathSourceWord0_4, CR_sourceWord, STD_FLAGS_NO_SOURCE(4,NO) },
+ { fastPathSourceWord0_4_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(4,SCALAR) },
+ { fastPathSourceWord0_8, CR_sourceWord, STD_FLAGS_NO_SOURCE(8,NO) },
+ { fastPathSourceWord0_8_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(8,SCALAR) },
+ { fastPathSourceWord0_16, CR_sourceWord, STD_FLAGS_NO_SOURCE(16,NO) },
+ { fastPathSourceWord0_16_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(16,SCALAR) },
+ { fastPathSourceWord0_32, CR_sourceWord, STD_FLAGS_NO_SOURCE(32,NO) },
+ { fastPathSourceWord0_32_scalar, CR_sourceWord, STD_FLAGS_NO_SOURCE(32,SCALAR) },
+
+ { fastPathPixPaint1_1, CR_pixPaint, STD_FLAGS(1,1,NO,NO) },
+ { fastPathPixPaint2_2, CR_pixPaint, STD_FLAGS(2,2,NO,NO) },
+ { fastPathPixPaint4_4, CR_pixPaint, STD_FLAGS(4,4,NO,NO) },
+ { fastPathPixPaint8_8, CR_pixPaint, STD_FLAGS(8,8,NO,NO) },
+ { fastPathPixPaint16_16, CR_pixPaint, STD_FLAGS(16,16,NO,NO) },
+ { fastPathPixPaint32_32, CR_pixPaint, STD_FLAGS(32,32,NO,NO) },
+
+ { fastPathAlphaBlend32_32, CR_alphaBlend, STD_FLAGS(32,32,NO,NO) },
+
+ { fastPathBitAnd1_1, CR_bitAnd, STD_FLAGS(1,1,NO,NO) },
+ { fastPathBitAnd2_2, CR_bitAnd, STD_FLAGS(2,2,NO,NO) },
+ { fastPathBitAnd4_4, CR_bitAnd, STD_FLAGS(4,4,NO,NO) },
+ { fastPathBitAnd8_8, CR_bitAnd, STD_FLAGS(8,8,NO,NO) },
+ { fastPathBitAnd16_16, CR_bitAnd, STD_FLAGS(16,16,NO,NO) },
+ { fastPathBitAnd32_32, CR_bitAnd, STD_FLAGS(32,32,NO,NO) },
+};
+
+void addArmSimdFastPaths(void)
+{
+ addFastPaths(fastPaths, sizeof fastPaths / sizeof *fastPaths);
+}
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.h
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.h (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimd.h 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2013 Raspberry Pi Foundation
+ * Copyright © 2013 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ */
+
+#ifndef BITBLTARMSIMD_H_
+#define BITBLTARMSIMD_H_
+
+void addArmSimdFastPaths(void);
+
+#endif /* BITBLTARMSIMD_H_ */
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAlphaBlend.s
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAlphaBlend.s (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAlphaBlend.s 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,141 @@
+;
+; Copyright © 2013 Raspberry Pi Foundation
+; Copyright © 2013 RISC OS Open Ltd
+;
+; Permission to use, copy, modify, distribute, and sell this software and its
+; documentation for any purpose is hereby granted without fee, provided that
+; the above copyright notice appear in all copies and that both that
+; copyright notice and this permission notice appear in supporting
+; documentation, and that the name of the copyright holders not be used in
+; advertising or publicity pertaining to distribution of the software without
+; specific, written prior permission. The copyright holders make no
+; representations about the suitability of this software for any purpose. It
+; is provided "as is" without express or implied warranty.
+;
+; THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+; SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+; FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+; SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+; OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+; SOFTWARE.
+;
+
+; Debug options
+ GBLL DebugData
+;DebugData SETL {TRUE}
+ GBLL DebugPld
+;DebugPld SETL {TRUE}
+ GBLL VerboseBuild
+;VerboseBuild SETL {TRUE}
+
+ GET BitBltArmSimdAsm.hdr
+
+ AREA |BitBltArmSimdAlphaBlend$$Code|, CODE, READONLY
+ ARM
+
+; ********************************************************************
+
+ MACRO
+ AlphaBlend32_32_init
+ MOV ht_info, #1
+ MOV ht, #0
+ ORR ht_info, ht_info, ht_info, LSL #16 ; &10001
+ MEND
+
+ MACRO
+ AlphaBlend32_32_1pixel $src, $dst, $tmp0, $tmp1, $tmp2, $known_not_transp
+ [ "$known_not_transp" = ""
+ MOVS $tmp2, $src, LSR #24 ; s_a
+ BEQ %FT09 ; fully transparent - use dst
+ ]
+ TEQ $tmp2, #&FF
+ BEQ %FT10 ; fully opaque - use src
+ UXTB $tmp0, $src, ROR #8 ; s_ag
+ ORR $tmp0, $tmp0, #&FF0000
+ UXTB16 $tmp1, $src ; s_rb
+ MUL $tmp0, $tmp0, $tmp2
+ MUL $tmp1, $tmp1, $tmp2
+ RSB $tmp2, $tmp2, #&FF
+ UXTB16 $src, $dst, ROR #8 ; d_ag
+ UXTB16 $dst, $dst ; d_rb
+ MLA $src, $src, $tmp2, $tmp0 ; ag
+ MLA $dst, $dst, $tmp2, $tmp1 ; rb
+ USUB16 $tmp0, $src, ht_info
+ UXTAB16 $src, $src, $src, ROR #8
+ SEL $tmp1, ht_info, ht
+ UXTAB16 $src, $tmp1, $src, ROR #8
+ USUB16 $tmp0, $dst, ht_info
+ UXTAB16 $dst, $dst, $dst, ROR #8
+ SEL $tmp1, ht_info, ht
+ UXTAB16 $dst, $tmp1, $dst, ROR #8
+ ORR $src, $dst, $src, LSL #8 ; recombine
+ B %FT10
+09 MOV $src, $dst
+10
+ MEND
+
+ MACRO
+ AlphaBlend32_32_32bits $src, $dst, $fixed_skew
+ Read1Word src, 0, carry, $fixed_skew, skew, scratch
+ ADD dst, dst, #1*4
+ MOVS $wk7, $wk0, LSR #24
+ BEQ %FT01 ; all pixels fully transparent - don't touch destination
+ LDR $wk4, [dst, #-1*4]
+ AlphaBlend32_32_1pixel $wk0, $wk4, $wk5, $wk6, $wk7, known_not_transp
+ Write1Word dst, 0
+01
+ MEND
+
+ MACRO
+ AlphaBlend32_32_64bits $src, $fixed_skew
+ Read2Words src, 0, carry, $fixed_skew, skew, scratch
+ ADD dst, dst, #2*4
+ MOVS $wk7, $wk0, LSR #24
+ MOVEQS $wk7, $wk1, LSR #24
+ BEQ %FT01 ; all pixels fully transparent - don't touch destination
+ LDR $wk4, [dst, #-2*4]
+ AlphaBlend32_32_1pixel $wk0, $wk4, $wk5, $wk6, $wk7
+ LDR $wk4, [dst, #-1*4]
+ AlphaBlend32_32_1pixel $wk1, $wk4, $wk5, $wk6, $wk7
+ Write2Words dst, 0
+01
+ MEND
+
+ MACRO
+ AlphaBlend32_32_128bits_head $src, $fixed_skew, $intra_preloads
+ Read4Words src, 0, carry, $fixed_skew, skew, scratch
+ MEND
+
+ MACRO
+ AlphaBlend32_32_128bits_tail $src
+ ADD dst, dst, #4*4
+ MOVS $wk7, $wk0, LSR #24
+ MOVEQS $wk7, $wk1, LSR #24
+ MOVEQS $wk7, $wk2, LSR #24
+ MOVEQS $wk7, $wk3, LSR #24
+ BEQ %FT01 ; all pixels fully transparent - don't touch destination
+ LDR $wk4, [dst, #-4*4]
+ AlphaBlend32_32_1pixel $wk0, $wk4, $wk5, $wk6, $wk7
+ LDR $wk4, [dst, #-3*4]
+ AlphaBlend32_32_1pixel $wk1, $wk4, $wk5, $wk6, $wk7
+ LDR $wk4, [dst, #-2*4]
+ AlphaBlend32_32_1pixel $wk2, $wk4, $wk5, $wk6, $wk7
+ LDR $wk4, [dst, #-1*4]
+ AlphaBlend32_32_1pixel $wk3, $wk4, $wk5, $wk6, $wk7
+ Write4Words dst, 0
+01
+ MEND
+
+;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
+; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
+
+AlphaBlend GenerateFunctions 32, 32,, \
+ FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 1, \
+ "stride_d,stride_s,map,bitptrs,skew,orig_w,scratch,carry", \
+ "x,stride_d,stride_s", bitptrs,, init ; leading_pixels_reg = wk3
+
+; ********************************************************************
+
+ END
Added: trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAsm.hdr
===================================================================
--- trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAsm.hdr (rev 0)
+++ trunk/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdAsm.hdr 2013-06-18 23:13:50 UTC (rev 2744)
@@ -0,0 +1,1939 @@
+;
+; Copyright © 2013 Raspberry Pi Foundation
+; Copyright © 2013 RISC OS Open Ltd
+;
+; Permission to use, copy, modify, distribute, and sell this software and its
+; documentation for any purpose is hereby granted without fee, provided that
+; the above copyright notice appear in all copies and that both that
+; copyright notice and this permission notice appear in supporting
+; documentation, and that the name of the copyright holders not be used in
+; advertising or publicity pertaining to distribution of the software without
+; specific, written prior permission. The copyright holders make no
+; representations about the suitability of this software for any purpose. It
+; is provided "as is" without express or implied warranty.
+;
+; THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+; SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+; FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+; SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+; OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+; SOFTWARE.
+;
+
+ [ :LNOT: :DEF: DebugData
+ GBLL DebugData
+ ]
+ [ :LNOT: :DEF: DebugPld
+ GBLL DebugPld
+ ]
+ [ :LNOT: :DEF: VerboseBuild
+ GBLL VerboseBuild
+ ]
+
+; Flag bitfield definitions
+FLAG_NO_HALFTONE * 0 :SHL: 0
+FLAG_SCALAR_HALFTONE * 1 :SHL: 0
+FLAG_VECTOR_HALFTONE * 2 :SHL: 0
+FLAG_NO_COLOUR_MAP * 0 :SHL: 2
+FLAG_COLOUR_MAP * 1 :SHL: 2
+
+FLAG_DST_WRITEONLY * 0 :SHL: 3
+FLAG_DST_READWRITE * 1 :SHL: 3
+FLAG_SPILL_NO_LINE_VARS * 0 :SHL: 4
+FLAG_SPILL_LINE_VARS_WIDE * 1 :SHL: 4
+FLAG_SPILL_LINE_VARS_NON_WIDE * 2 :SHL: 4
+FLAG_SPILL_LINE_VARS * 3 :SHL: 4
+FLAG_EXPAND_SKEW * 0 :SHL: 6
+FLAG_NO_EXPAND_SKEW * 1 :SHL: 6
+FLAG_PROCESS_SERIAL * 0 :SHL: 7 ; sub-word data is presented MS-aligned, and results are expected LS-aligned
+FLAG_PROCESS_PARALLEL * 1 :SHL: 7 ; sub-word data retains its original alignment throughout (only useful if src & dest depths same)
+FLAG_MAX_128BIT_MACRO * 0 :SHL: 8
+FLAG_MAX_256BIT_MACRO * 1 :SHL: 8 ; particularly tight loops can sometimes benefit from being unrolled to allow 2x 128-bit blocks to be staggered
+FLAG_PRELOAD_DST * 0 :SHL: 9
+FLAG_NO_PRELOAD_DST * 1 :SHL: 9
+
+; Offsets into stack
+ GBLA args_stack_offset
+args_stack_offset SETA 9*4
+ GBLA locals_stack_offset
+locals_stack_offset SETA 0
+
+; Top-level macro arguments are held in variables for convenience
+ GBLA src_bpp
+ GBLA dst_w_bpp
+ GBLA flags
+ GBLA prefetch_distance
+ GBLS leading_pixels_reg
+ GBLS preload_offset_reg
+ GBLS line_saved_regs
+ GBLS init
+ GBLS newline
+ GBLS reinitwk
+ GBLS cleanup
+; Derived values
+ GBLS prefix
+ GBLA dst_r_bpp
+ GBLA src_bpp_shift
+ GBLA dst_bpp_shift
+ GBLL sub_byte
+ GBLA num_line_saved_regs
+ GBLA pix_per_block
+
+; Work registers - variables so they can be reassigned between functions
+; (should always be assigned in increasing register number though)
+ GBLA wk0_num
+ GBLA wk1_num
+ GBLA wk2_num
+ GBLA wk3_num
+ GBLA wk4_num
+ GBLA wk5_num
+ GBLA wk6_num
+ GBLA wk7_num
+ GBLA wk8_num
+ GBLA wk9_num
+ GBLA wk10_num
+; String versions of the same
+ GBLS wk0
+ GBLS wk1
+ GBLS wk2
+ GBLS wk3
+ GBLS wk4
+ GBLS wk5
+ GBLS wk6
+ GBLS wk7
+ GBLS wk8
+ GBLS wk9
+ GBLS wk10
+
+
+ [ DebugData :LOR: DebugPld
+ IMPORT printf
+ ]
+ GBLL PrintAtStartOfLine
+PrintAtStartOfLine SETL {TRUE}
+ MACRO
+ Print$cond $switch, $fmt, $reg0, $reg1, $reg2
+ [ Debug$switch
+ [ "$cond" <> "" :LAND: "$cond" <> "AL"
+ LCLS opp
+opp SETS :REVERSE_CC: "$cond"
+ B$opp %FT82
+ ]
+ PUSH {r12,r14}
+ PUSH {r0-r12}
+ ADD ip, sp, #15*4
+ STR ip, [sp, #13*4]
+ MRS v1, CPSR
+ [ "$reg0" <> ""
+ LDR a2, [sp, #:RCONST:$reg0 * 4]
+ ]
+ [ "$reg1" <> ""
+ LDR a3, [sp, #:RCONST:$reg1 * 4]
+ ]
+ [ "$reg2" <> ""
+ LDR a4, [sp, #:RCONST:$reg2 * 4]
+ ]
+ ADR a1, %FT80
+ ADR lr, %FT81
+ B printf
+80
+ [ PrintAtStartOfLine
+ = "$switch: "
+ ]
+ = "$fmt", 0
+PrintAtStartOfLine SETL "$fmt" :RIGHT: 1 = "\n"
+ ALIGN
+81 MSR CPSR_cxsf, v1
+ POP {r0-r12}
+ ADD sp, sp, #4
+ POP {r14}
+82
+ ]
+ MEND
+
+ [ :LNOT: :DEF: |objasm$version| :LAND: :LNOT: :DEF: |ads$version|
+ ; Assume asasm, which is lacking a number of key opcodes
+ ; Note there's a bug in asasm, the CC_ENCODING value shouldn't need shifting
+
+ MACRO
+$label SEL$cond $Rd, $Rn, $Rm
+$label DCI :CC_ENCODING:"$cond":SHL:28 :OR: &06800FB0 :OR: (:RCONST:$Rn :SHL: 16) :OR: (:RCONST:$Rd :SHL: 12) :OR: (:RCONST:$Rm :SHL: 0)
+ MEND
+
+ MACRO
+$label UADD8$cond $Rd, $Rn, $Rm
+$label DCI :CC_ENCODING:"$cond":SHL:28 :OR: &06500F90 :OR: (:RCONST:$Rn :SHL: 16) :OR: (:RCONST:$Rd :SHL: 12) :OR: (:RCONST:$Rm :SHL: 0)
+ MEND
+
+ MACRO
+$label USUB8$cond $Rd, $Rn, $Rm
+$label DCI :CC_ENCODING:"$cond":SHL:28 :OR: &06500FF0 :OR: (:RCONST:$Rn :SHL: 16) :OR: (:RCONST:$Rd :SHL: 12) :OR: (:RCONST:$Rm :SHL: 0)
+ MEND
+
+ MACRO
+$label USUB16$cond $Rd, $Rn, $Rm
+$label DCI :CC_ENCODING:"$cond":SHL:28 :OR: &06500F70 :OR: (:RCONST:$Rn :SHL: 16) :OR: (:RCONST:$Rd :SHL: 12) :OR: (:RCONST:$Rm :SHL: 0)
+ MEND
+
+ MACRO
+$label SETEND $endian
+ IF "$endian" = "LE"
+$label DCI &F1010000
+ ELIF "$endian" = "BE"
+$label DCI &F1010200
+ ELSE
+ ! 1, "Unrecognised SETEND endianness"
+ ENDIF
+ MEND
+
+ ]
+
+; Add a constant, using a minimal number of ARM instructions
+; Doesn't handle cases where bit 31 of constant is set, but we're not expecting any of those
+ MACRO
+$lab AddL $dst, $src, $const
+ LCLA tmp
+tmp SETA $const
+tmp SETA tmp :OR: (((tmp :AND: &55555555) :SHL: 1) + ((tmp :AND: &AAAAAAAA) :SHR: 1))
+ LCLA lsb
+lsb SETA tmp :AND::NOT: (tmp-1)
+tmp SETA tmp :OR: (tmp :SHR: 2)
+tmp SETA tmp :OR: (tmp :SHR: 4)
+tmp SETA tmp :OR: (tmp :SHR: 8)
+tmp SETA tmp :OR: (tmp :SHR: 16)
+ LCLA msb
+msb SETA (tmp+1) :AND::NOT: tmp
+ LCLS reg
+reg SETS "$src"
+$lab
+ WHILE lsb < msb
+ ADD $dst, $reg, #($const) :AND: (lsb * &FF)
+lsb SETA lsb * 256
+reg SETS "$dst"
+ WEND
+ MEND
+
+; Find log2 of a variable
+ MACRO
+$out Log2 $in
+ [ $in = 0
+$out SETA -1
+ |
+ LCLA tmp
+tmp SETA $in
+$out SETA 0
+ WHILE tmp > 1
+tmp SETA tmp / 2
+$out SETA $out + 1
+ WEND
+ ]
+ MEND
+
+; Find max of two numbers
+ MACRO
+$out Max $a, $b
+ [ $a > $b
+$out SETA $a
+ |
+$out SETA $b
+ ]
+ MEND
+
+; Find if an integer is the last in a group of a power-of-2 integers
+ MACRO
+$result IsEndOfGroup $index, $size
+ LCLA index
+index SETA $index
+ LCLA size
+size SETA $size
+ [ size < 2
+$result SETL {TRUE}
+ |
+$result SETL (index :AND::NOT: (index + 1)) :AND: (size / 2) > 0
+ ]
+ MEND
+
+; Convert an integer to a decimal string
+ MACRO
+$str DecimalStr $num
+ LCLA n
+n SETA $num
+$str SETS ""
+ WHILE n <> 0
+$str SETS :CHR:(48 + n % 10) :CC: $str
+n SETA n / 10
+ WEND
+ IF :LEN: $str = 0
+$str SETS "0"
+ ENDIF
+ MEND
+
+; Convert a wk register index into the name of the physical register
+ MACRO
+$str LookupWk $index
+ LCLS wk
+wk DecimalStr $index
+wk SETS "wk$wk"
+$str SETS $wk
+ MEND
+
+; Assign the wk registers from a list of registers
+ MACRO
+ AssignWk $list
+ LCLA wk_num
+ LCLS wk
+ LCLS tail
+ LCLS reg
+wk_num SETA 0
+tail SETS "$list,"
+ WHILE :LEN: tail > 0
+wk DecimalStr wk_num
+wk_num SETA wk_num + 1
+reg SETS ""
+ WHILE tail :LEFT: 1 <> ","
+reg SETS reg :CC: (tail :LEFT: 1)
+tail SETS tail :RIGHT: (:LEN:tail - 1)
+ WEND
+tail SETS tail :RIGHT: (:LEN:tail - 1)
+wk$wk._num SETA :RCONST: $reg
+wk$wk DecimalStr wk$wk._num
+wk$wk SETS "r" :CC: wk$wk
+ WEND
+ ; Ensure the remaining ones aren't used
+ WHILE wk_num <= 10
+wk DecimalStr wk_num
+wk_num SETA wk_num + 1
+wk$wk._num SETA -1
+wk$wk SETS "invalid_register_wk$wk"
+ WEND
+ MEND
+
+; See if a given register name is in a comma-separated list of registers
+ MACRO
+$out RegIsInList $reg, $list
+ LCLS tail
+tail SETS "$list,"
+ WHILE :LEN: tail > 0
+ [ :LEN: "$reg," <= :LEN: tail
+ [ "$reg," = tail :LEFT: :LEN: "$reg,"
+$out SETL {TRUE}
+ MEXIT
+ ]
+ ]
+ WHILE tail :LEFT: 1 <> ","
+tail SETS tail :RIGHT: (:LEN:tail - 1)
+ WEND
+tail SETS tail :RIGHT: (:LEN:tail - 1)
+ WEND
+$out SETL {FALSE}
+ MEND
+
+; Count how many registers are in a comma-separated list of registers
+ MACRO
+$out CountRegsInList $list
+$out SETA 1
+ LCLS tail
+tail SETS "$list"
+ WHILE :LEN: tail > 0
+ [ tail :LEFT: 1 = ","
+$out SETA $out + 1
+ ]
+tail SETS tail :RIGHT: (:LEN:tail -1)
+ WEND
+ MEND
+
+; Data read macros
+
+ MACRO
+$lab ReadFirstSubWord $base, $data, $carry, $pixels, $fixed_skew, $skew, $tmp
+$lab
+ [ src_bpp > 0 :LAND: src_bpp < 32
+ LCLS reg0
+reg0 LookupWk $data
+ IF (flags :AND: FLAG_NO_EXPAND_SKEW) > 0
+ [ "$pixels" <> "#0"
+ AND $tmp, $pixels, #32/src_bpp - 1
+ CMP $tmp, $skew, LSR #src_bpp_shift
+ PrintHI Data, "ReadFirstSubWord: left@%p", $base
+ LDRHI $reg0, [$base], #4
+ PrintHI Data, " %08X\n", $reg0
+ ]
+ CMP $skew, #0
+ PrintHI Data, "ReadFirstSubWord: right@%p", $base
+ LDRHI $carry, [$base], #4
+ PrintHI Data, " %08X\n", $carry
+ CMP $tmp, #0
+ BEQ %FT01
+ RSB $tmp, $skew, #32
+ MOV $reg0, $reg0, LSL $skew
+ ORR $reg0, $reg0, $carry, LSR $tmp
+ Print Data, "ReadFirstSubWord: skew %u -> %08X\n", $skew, $reg0
+ [ flags :AND: FLAG_PROCESS_PARALLEL = 0 :LAND: "$pixels" <> "#0"
+ AND $tmp, $pixels, #32/src_bpp - 1
+ MOV $tmp, $tmp, LSL #src_bpp_shift
+ MOV $reg0, $reg0, ROR $tmp
+ ]
+01
+ ELIF $fixed_skew == 0
+ [ "$pixels" <> "#0"
+ ANDS $tmp, $pixels, #32/src_bpp - 1
+ [ flags :AND: FLAG_PROCESS_PARALLEL = 0
+ BEQ %FT01
+ Print Data, "ReadFirstSubWord: left@%p", $base
+ LDR $reg0, [$base], #4
+ Print Data, " %08X\n", $reg0
+ MOV $tmp, $tmp, LSL #src_bpp_shift
+ MOV $reg0, $reg0, ROR $tmp
+01
+ |
+ PrintNE Data, "ReadFirstSubWord: left@%p", $base
+ LDRNE $reg0, [$base], #4
+ PrintNE Data, " %08X\n", $reg0
+ ]
+ ]
+ ELSE
+ [ "$pixels" <> "#0"
+ AND $tmp, $pixels, #32/src_bpp - 1
+ CMP $tmp, #$fixed_skew/src_bpp
+ PrintHI Data, "ReadFirstSubWord: left@%p", $base
+ LDRHI $reg0, [$base], #4
+ PrintHI Data, " %08X\n", $reg0
+ ]
+ Print Data, "ReadFirstSubWord: right@%p", $base
+ LDR $carry, [$base], #4
+ Print Data, " %08X\n", $carry
+ CMP $tmp, #0
+ BEQ %FT01
+ MOV $reg0, $reg0, LSL #$fixed_skew
+ ORR $reg0, $reg0, $carry, LSR #32-$fixed_skew
+ Print Data, "ReadFirstSubWord: skew $fixed_skew -> %08X\n", $reg0
+ [ flags :AND: FLAG_PROCESS_PARALLEL = 0
+ MOV $tmp, $tmp, LSL #src_bpp_shift
+ MOV $reg0, $reg0, ROR $tmp
+ ]
+01
+ ENDIF
+ ]
+ MEND
+
+ MACRO
+$lab ReadLastSubWord $base, $data, $carry, $pixels, $fixed_skew, $skew, $tmp
+$lab
+ [ src_bpp > 0 :LAND: src_bpp < 32
+ LCLS reg0
+reg0 LookupWk $data
+ IF (flags :AND: FLAG_NO_EXPAND_SKEW) > 0
+ CMP $skew, #0
+ BHI %FT01
+ TST $pixels, #32/src_bpp - 1
+ PrintNE Data, "ReadLastSubWord: next@%p", $base
+ LDRNE $reg0, [$base], #4
+ PrintNE Data, " %08X\n", $reg0
+ B %FT02
+01
+ Print Data, "ReadLastSubWord: left %08X\n", $carry
+ MOV $reg0, $carry, LSL $skew
+ AND $tmp, $pixels, #32/src_bpp - 1
+ RSB $tmp, $tmp, #32/src_bpp
+ CMP $tmp, $skew, LSR #src_bpp_shift
+ BHS %FT02
+ Print Data, "ReadLastSubWord: right@%p", $base
+ LDR $carry, [$base], #4
@@ Diff output truncated at 50000 characters. @@
More information about the Vm-dev
mailing list