[Vm-dev] [commit][2924] merge in ARM fast blt code, part II
commits at squeakvm.org
commits at squeakvm.org
Tue May 20 20:05:16 UTC 2014
Revision: 2924
Author: rowledge
Date: 2014-05-20 13:05:16 -0700 (Tue, 20 May 2014)
Log Message:
-----------
merge in ARM fast blt code, part II
Added Paths:
-----------
branches/Cog/platforms/unix/vm-display-X11/sqUnixX11Arm.S
Added: branches/Cog/platforms/unix/vm-display-X11/sqUnixX11Arm.S
===================================================================
--- branches/Cog/platforms/unix/vm-display-X11/sqUnixX11Arm.S (rev 0)
+++ branches/Cog/platforms/unix/vm-display-X11/sqUnixX11Arm.S 2014-05-20 20:05:16 UTC (rev 2924)
@@ -0,0 +1,246 @@
+;
+; Copyright © 2013 Raspberry Pi Foundation
+; Copyright © 2013 RISC OS Open Ltd
+;
+; Permission to use, copy, modify, distribute, and sell this software and its
+; documentation for any purpose is hereby granted without fee, provided that
+; the above copyright notice appear in all copies and that both that
+; copyright notice and this permission notice appear in supporting
+; documentation, and that the name of the copyright holders not be used in
+; advertising or publicity pertaining to distribution of the software without
+; specific, written prior permission. The copyright holders make no
+; representations about the suitability of this software for any purpose. It
+; is provided "as is" without express or implied warranty.
+;
+; THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+; SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+; FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+; SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+; OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+; SOFTWARE.
+;
+
+; Debug options
+ GBLL DebugData
+;DebugData SETL {TRUE}
+ GBLL DebugPld
+;DebugPld SETL {TRUE}
+ GBLL VerboseBuild
+;VerboseBuild SETL {TRUE}
+
+ GET BitBltArmSimdAsm.hdr
+
+ AREA |sqUnixX11Arm$$Code|, CODE, READONLY
+ ARM
+
+; ********************************************************************
+
+ MACRO
+ Convert_x888_8_LEPacking_1pixel $src, $dst
+ AND $dst, ht, $src, LSR #6 ; 00000000000000rrr0000000000000bb
+ AND $src, $src, #&E000 ; 0000000000000000ggg0000000000000
+ ORR $dst, $dst, $dst, LSR #10 ; 00000000000000rrr0000000rrr000bb
+ ORR $dst, $dst, $src, LSR #11 ; 00000000000000rrr0000000rrrgggbb
+ AND $dst, $dst, #&FF ; 000000000000000000000000rrrgggbb
+ LDR $dst, [map, $dst, LSL #2]
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking_4pixels $src0, $src1, $src2, $src3, $dst
+ Convert_x888_8_LEPacking_1pixel $src0, $dst
+ Convert_x888_8_LEPacking_1pixel $src1, $src0
+ Convert_x888_8_LEPacking_1pixel $src2, $src1
+ Convert_x888_8_LEPacking_1pixel $src3, $src2
+ ORR $dst, $dst, $src0, LSL #8
+ ORR $dst, $dst, $src1, LSL #16
+ ORR $dst, $dst, $src2, LSL #24
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_init
+ LDR ht, =&38003
+ B %FT00
+ LTORG
+00
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_8bits $src, $dst, $fixed_skew
+ ; This code should never be executed. It's for handling stray
+ ; pixels at the start or end of the row, but for now the
+ ; assembler framework only supports packing these big-endian
+ ; into words, which isn't what we want.
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_16bits $src, $dst, $fixed_skew
+ ; This code should never be executed. It's for handling stray
+ ; pixels at the start or end of the row, but for now the
+ ; assembler framework only supports packing these big-endian
+ ; into words, which isn't what we want.
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_32bits $src, $dst, $fixed_skew
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk0
+ Write1Word dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_64bits $src, $fixed_skew
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk0
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk1
+ Write2Words dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_128bits_head $src, $fixed_skew, $intra_preloads
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk0
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ [ "$intra_preloads" <> ""
+ PreloadMiddle
+ ]
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk1
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk2
+ Read4Words src, 4, carry, $fixed_skew, skew, unused
+ MEND
+
+ MACRO
+ Convert_x888_8_LEPacking32_8_128bits_tail $src
+ Convert_x888_8_LEPacking_4pixels $wk4, $wk5, $wk6, $wk7, $wk3
+ Write4Words dst, 0
+ MEND
+
+;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
+; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
+
+Convert_x888_8_LEPacking GenerateFunctions 32, 8,, \
+ FLAG_COLOUR_MAP :OR: FLAG_DST_WRITEONLY :OR: FLAG_SPILL_LINE_VARS, 3, \
+ "y,stride_d,stride_s,ht_info,bitptrs,skew,orig_w,carry", \
+ "x,y,stride_d,stride_s", ht_info,, init ; leading_pixels_reg = wk3
+
+; ********************************************************************
+
+ MACRO
+ Convert_x888_0565_LEPacking_2pixels $src0, $src1, $dst
+ AND $dst, ht, $src1, LSR #3 ; 00000000000rrrrr00000000000bbbbb
+ AND $src1, $src1, #&FC00 ; 0000000000000000gggggg0000000000
+ ORR $dst, $dst, $dst, LSR #5 ; 00000000000rrrrrrrrrr000000bbbbb
+ ORR $src1, $dst, $src1, LSR #5 ; 00000000000rrrrrrrrrrggggggbbbbb
+ AND $dst, ht, $src0, LSR #3 ; 00000000000RRRRR00000000000BBBBB
+ AND $src0, $src0, #&FC00 ; 0000000000000000GGGGGG0000000000
+ ORR $dst, $dst, $dst, LSR #5 ; 00000000000RRRRRRRRRR000000BBBBB
+ ORR $src0, $dst, $src0, LSR #5 ; 00000000000RRRRRRRRRRGGGGGGBBBBB
+ BIC $src0, $src0, #&1F0000 ; 0000000000000000RRRRRGGGGGGBBBBB
+ ORR $dst, $src0, $src1, LSL #16 ; rrrrrggggggbbbbbRRRRRGGGGGGBBBBB
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_init
+ LDR ht, =&001F001F
+ B %FT00
+ LTORG
+00
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_16bits $src, $dst, $fixed_skew
+ ; This code should never be executed. It's for handling stray
+ ; pixels at the start or end of the row, but for now the
+ ; assembler framework only supports packing these big-endian
+ ; into words, which isn't what we want.
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_32bits $src, $dst, $fixed_skew
+ Read2Words src, 3, carry, $fixed_skew, skew, unused
+ Convert_x888_0565_LEPacking_2pixels $wk3, $wk4, $wk0
+ Write1Word dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_64bits $src, $fixed_skew
+ Read4Words src, 3, carry, $fixed_skew, skew, unused
+ Convert_x888_0565_LEPacking_2pixels $wk3, $wk4, $wk0
+ Convert_x888_0565_LEPacking_2pixels $wk5, $wk6, $wk1
+ Write2Words dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_128bits_head $src, $fixed_skew, $intra_preloads
+ Read4Words src, 3, carry, $fixed_skew, skew, unused
+ Convert_x888_0565_LEPacking_2pixels $wk3, $wk4, $wk0
+ Convert_x888_0565_LEPacking_2pixels $wk5, $wk6, $wk1
+ Read4Words src, 3, carry, $fixed_skew, skew, unused
+ MEND
+
+ MACRO
+ Convert_x888_0565_LEPacking32_16_128bits_tail $src
+ Convert_x888_0565_LEPacking_2pixels $wk3, $wk4, $wk2
+ Convert_x888_0565_LEPacking_2pixels $wk5, $wk6, $wk3
+ Write4Words dst, 0
+ MEND
+
+;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
+; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
+
+Convert_x888_0565_LEPacking GenerateFunctions 32, 16,, \
+ FLAG_DST_WRITEONLY :OR: FLAG_SPILL_LINE_VARS, 3, \
+ "stride_s,ht_info,map,bitptrs,skew,orig_w,carry", \
+ "x,stride_s", map, scratch, init ; leading_pixels_reg = wk2
+
+; ********************************************************************
+
+ MACRO
+ Convert_x888_x888BGR_LEPacking32_32_32bits $src, $dst, $fixed_skew
+ SETEND BE
+ Read1Word src, 0, carry, $fixed_skew, skew, unused
+ SETEND LE
+ MOV $wk0, $wk0, LSR #8
+ Write1Word dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_x888BGR_LEPacking32_32_64bits $src, $fixed_skew
+ SETEND BE
+ Read2Words src, 0, carry, $fixed_skew, skew, unused
+ SETEND LE
+ MOV $wk0, $wk0, LSR #8
+ MOV $wk1, $wk1, LSR #8
+ Write2Words dst, 0
+ MEND
+
+ MACRO
+ Convert_x888_x888BGR_LEPacking32_32_128bits_head $src, $fixed_skew, $intra_preloads
+ SETEND BE
+ Read4Words src, 0, carry, $fixed_skew, skew, unused
+ MEND
+
+ MACRO
+ Convert_x888_x888BGR_LEPacking32_32_128bits_tail $src
+ SETEND LE
+ MOV $wk0, $wk0, LSR #8
+ MOV $wk1, $wk1, LSR #8
+ MOV $wk2, $wk2, LSR #8
+ MOV $wk3, $wk3, LSR #8
+ Write4Words dst, 0
+ MEND
+
+;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
+; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
+
+Convert_x888_x888BGR_LEPacking GenerateFunctions 32, 32,, \
+ FLAG_DST_WRITEONLY, 2, \
+ "ht,ht_info,map,bitptrs", \
+ "", skew, scratch
+
+; ********************************************************************
+
+ END
More information about the Vm-dev
mailing list