[Vm-dev] VM Maker: VMMaker-tpr.321.mcz

commits at source.squeak.org commits at source.squeak.org
Tue Jun 18 20:59:49 UTC 2013


tim Rowledge uploaded a new version of VMMaker to project VM Maker:
http://source.squeak.org/VMMaker/VMMaker-tpr.321.mcz

==================== Summary ====================

Name: VMMaker-tpr.321
Author: tpr
Time: 18 June 2013, 1:58:30.315 pm
UUID: f4c1a8a7-e897-437a-8622-645cc63e4382
Ancestors: VMMaker-dtl.320

Add the VMMaker parts to support a new faster BitBLT plugin.
In copyBIts we can now branch off to a specialised function that will make use of more tightly code routines for many important BLT rules and cases. For ARM cpu platforms there are also very specialised assembler routines for some cases - but many are in C and suitable for any platform.

=============== Diff against VMMaker-dtl.320 ===============

Item was changed:
  ----- Method: BitBltSimulation class>>declareCVarsIn: (in category 'translation') -----
  declareCVarsIn: aCCodeGenerator
+ 
+ 	"add option of  fast path BitBLT code header"
+ 	aCCodeGenerator
+ 		addHeaderFile:'#ifdef ENABLE_FAST_BLT
+ #include "BitBltDispatch.h"
+ #else
+ // to handle the unavoidable decl in the spec of copyBitsFallback();
+ #define operation_t void
+ #endif'.
+ 		
  	aCCodeGenerator var: 'opTable'
  		declareC: 'void *opTable[' , OpTableSize printString , ']'.
  	aCCodeGenerator var: 'maskTable'
  		declareC:'int maskTable[33] = {
  0, 1, 3, 0, 15, 31, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 65535,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1
  }'.
  	aCCodeGenerator var: 'ditherMatrix4x4'
  		declareC:'const int ditherMatrix4x4[16] = {
  0,	8,	2,	10,
  12,	4,	14,	6,
  3,	11,	1,	9,
  15,	7,	13,	5
  }'.
  	aCCodeGenerator var: 'ditherThresholds16'
  		declareC:'const int ditherThresholds16[8] = { 0, 2, 4, 6, 8, 12, 14, 16 }'.
  	aCCodeGenerator var: 'ditherValues16'
  		declareC:'const int ditherValues16[32] = {
  0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
  }'.
  
  	aCCodeGenerator var: 'warpBitShiftTable'
  		declareC:'int warpBitShiftTable[32]'.
  
  	aCCodeGenerator var:'cmShiftTable' 
  		type:'int *'.
  	aCCodeGenerator var:'cmMaskTable' 
  		type:'unsigned int *'.
  	aCCodeGenerator var:'cmLookupTable' 
  		type:'unsigned int *'.
  
  	aCCodeGenerator var: 'dither8Lookup'
  		declareC:' unsigned char dither8Lookup[4096]'.
  
  	aCCodeGenerator var:'ungammaLookupTable' 
  		type: 'unsigned char *'.
  	aCCodeGenerator var:'gammaLookupTable' 
  		type: 'unsigned char *'.
  
  	aCCodeGenerator var: 'querySurfaceFn' type: 'void *'.
  	aCCodeGenerator var: 'lockSurfaceFn' type: 'void *'.
  	aCCodeGenerator var: 'unlockSurfaceFn' type: 'void *'!

Item was changed:
  ----- Method: BitBltSimulation class>>requiredMethodNames (in category 'translation') -----
  requiredMethodNames
+ 	^(self opTable, #(copyBits:Fallback:)) asSet!
- 	^self opTable asSet!

Item was changed:
  ----- Method: BitBltSimulation>>copyBits (in category 'setup') -----
  copyBits
  	"This function is exported for the Balloon engine"
  	<export: true>
  	<inline: false>
  	self clipRange.
  	(bbW <= 0 or: [bbH <= 0]) ifTrue:
  		["zero width or height; noop"
  		affectedL := affectedR := affectedT := affectedB := 0.
  		^ nil].
  	"Lock the surfaces"
  	self lockSurfaces ifFalse:[^interpreterProxy primitiveFail].
+ 	self isDefined: 'ENABLE_FAST_BLT'
+ 		inSmalltalk: [false "there is no current fast path specialisation code in-image"]
+ 		comment: '// you really, really mustn''t call this unless you have the rest of the code to link to'
+ 		ifTrue:[self copyBitsFastPathSpecialised]
+ 		ifFalse: [self copyBitsLockedAndClipped].
- 	self copyBitsLockedAndClipped.
  	self unlockSurfaces.!

Item was added:
+ ----- Method: BitBltSimulation>>copyBits:Fallback: (in category 'setup') -----
+ copyBits: op Fallback: flags
+ 	"Recover from the fast path specialised code saying Help-I-cant-cope"
+ 	|done |
+ 	<static: false>
+ 	<returnTypeC: 'void'>
+ 	<inline: false>
+ 	<var: #op type: 'operation_t *'>
+ 	<var: #flags type:'unsigned int'>
+ 	self isDefined: #'ENABLE_FAST_BLT'
+ 	inSmalltalk: [false]
+ 	comment: 'only for fast blt platform specific code'
+ 	ifTrue:[
+ 			"recover values from the operation struct used by the fast ARM code"
+ 			self cCode:'
+ 	combinationRule = op->combinationRule;
+ 	noSource = op->noSource;
+ 	sourceBits = (sqInt) op->src.bits;
+ 	sourcePitch = op->src.pitch;
+ 	sourceDepth = op->src.depth;
+ 	sourceMSB = op->src.msb;
+ 	sx = op->src.x;
+ 	sy = op->src.y;
+ 	destBits = (sqInt) op->dest.bits;
+ 	destPitch = op->dest.pitch;
+ 	destDepth = op->dest.depth;
+ 	destMSB = op->dest.msb;
+ 	dx = op->dest.x;
+ 	dy = op->dest.y;
+ 	bbW = op->width;
+ 	bbH = op->height;
+ 	cmFlags = op->cmFlags;
+ 	cmShiftTable = (void *) op->cmShiftTable;
+ 	cmMaskTable = (void *) op->cmMaskTable;
+ 	cmMask = op->cmMask;
+ 	cmLookupTable = (void *) op->cmLookupTable;
+ 	noHalftone = op->noHalftone;
+ 	halftoneHeight = op->halftoneHeight;
+ 	halftoneBase = (sqInt) op->halftoneBase;
+ 	if (combinationRule == 30 || combinationRule == 31) {
+ 		sourceAlpha = op->opt.sourceAlpha;
+ 	}
+ 	if (combinationRule == 41) {
+ 		componentAlphaModeColor = op->opt.componentAlpha.componentAlphaModeColor;
+ 		componentAlphaModeAlpha = op->opt.componentAlpha.componentAlphaModeAlpha;
+ 		gammaLookupTable = (void *) op->opt.componentAlpha.gammaLookupTable;
+ 		ungammaLookupTable = (void *) op->opt.componentAlpha.ungammaLookupTable;
+ 	}'.
+ 	
+ 				destPPW := 32 / destDepth.
+ 				cmBitsPerColor := 0.
+ 				cmMask = 16r1FF ifTrue: [cmBitsPerColor := 3].
+ 				cmMask = 16rFFF ifTrue: [cmBitsPerColor := 4].
+ 				cmMask = 16r3FFF ifTrue: [cmBitsPerColor := 5].
+ 	
+ 				"Try a shortcut for stuff that should be run as quickly as possible"
+ 				done := self tryCopyingBitsQuickly.
+ 				done ifTrue:[^nil].
+ 
+ 				bitCount := 0.
+ 				"Choose and perform the actual copy loop."
+ 				self performCopyLoop]
+ 
+ 	
+ 
+ 
+ !

Item was added:
+ ----- Method: BitBltSimulation>>copyBitsFastPathSpecialised (in category 'setup') -----
+ copyBitsFastPathSpecialised
+ 	"Perform the actual copyBits operation using the fast path specialised code; fail some cases by falling back to normal code.
+ 	Assume: Surfaces have been locked and clipping was performed."
+ 	<inline: false>
+ 
+ 	self
+ 		isDefined: #'ENABLE_FAST_BLT'
+ 		inSmalltalk: [false]
+ 		comment: 'only for ARM'
+ 		ifTrue:[
+ 	"set the affected area to 0 first"
+ 	affectedL := affectedR := affectedT := affectedB := 0.
+ 	
+ 	self copyBitsRule41Test.	
+ 	(interpreterProxy failed not)
+ 		ifFalse: [^ interpreterProxy primitiveFail].
+ 
+  	"we skip the tryCopyingBitsQuickly and leave that to falback code"
+ 	 
+ 	(combinationRule = 30) | (combinationRule = 31) ifTrue:
+ 		["Check and fetch source alpha parameter for alpha blend"
+ 		interpreterProxy methodArgumentCount = 1
+ 			ifTrue: [sourceAlpha := interpreterProxy stackIntegerValue: 0.
+ 					(interpreterProxy failed not and: [(sourceAlpha >= 0) & (sourceAlpha <= 255)])
+ 						ifFalse: [^ interpreterProxy primitiveFail]]
+ 			ifFalse: [^ interpreterProxy primitiveFail]].
+ 
+ 	"we don't worry about bitCount"
+ 	"bitCount := 0."
+ 
+ 	"We don't  do - Choose and perform the actual copy loop."
+ 	"self performCopyLoop."
+ 
+ 	"this is done inversely to plain copyBitsLockedAndClipped"
+ 	(combinationRule ~= 22) & (combinationRule ~= 32) ifTrue:
+ 		["zero width and height; return the count"
+ 		affectedL := dx.
+ 		affectedR := dx + bbW.
+ 		affectedT := dy.
+ 		affectedB := dy + bbH].
+ 	
+ 	"Now we fill the 'operation' structure and pass it to the sneaky ARM code"
+ 	self cCode:'
+ 	// fill the operation structure
+ 	operation_t op;
+ 	op.combinationRule = combinationRule;
+ 	op.noSource = noSource;
+ 	op.src.bits = (void *) sourceBits;
+ 	op.src.pitch = sourcePitch;
+ 	op.src.depth = sourceDepth;
+ 	op.src.msb = sourceMSB;
+ 	op.src.x = sx;
+ 	op.src.y = sy;
+ 	op.dest.bits = (void *) destBits;
+ 	op.dest.pitch = destPitch;
+ 	op.dest.depth = destDepth;
+ 	op.dest.msb = destMSB;
+ 	op.dest.x = dx;
+ 	op.dest.y = dy;
+ 	op.width = bbW;
+ 	op.height = bbH;
+ 	op.cmFlags = cmFlags;
+ 	op.cmShiftTable = (void *) cmShiftTable;
+ 	op.cmMaskTable = (void *) cmMaskTable;
+ 	op.cmMask = cmMask;
+ 	op.cmLookupTable = (void *) cmLookupTable;
+ 	op.noHalftone = noHalftone;
+ 	op.halftoneHeight = halftoneHeight;
+ 	op.halftoneBase = (void *) halftoneBase;
+ 	if (combinationRule == 30 || combinationRule == 31) {
+ 		op.opt.sourceAlpha = sourceAlpha;
+ 	}
+ 	if (combinationRule == 41) {
+ 		op.opt.componentAlpha.componentAlphaModeColor = componentAlphaModeColor;
+ 		op.opt.componentAlpha.componentAlphaModeAlpha = componentAlphaModeAlpha;
+ 		op.opt.componentAlpha.gammaLookupTable = (void *) gammaLookupTable;
+ 		op.opt.componentAlpha.ungammaLookupTable = (void *) ungammaLookupTable;
+ 	}
+ 	// call the sneaky code
+ 	copyBitsDispatch(&op)'
+ 	]!

Item was changed:
  ----- Method: BitBltSimulation>>copyBitsLockedAndClipped (in category 'setup') -----
  copyBitsLockedAndClipped
  	"Perform the actual copyBits operation.
  	Assume: Surfaces have been locked and clipping was performed."
+ 	| done |
+ 	<inline: false>
- 	| done gammaLookupTableOop ungammaLookupTableOop |
- 	<inline: true>
- 	"Try a shortcut for stuff that should be run as quickly as possible"
  	
+ 	self copyBitsRule41Test.	
+ 	(interpreterProxy failed not)
+ 		ifFalse: [^ interpreterProxy primitiveFail].
+ 
+  	"Try a shortcut for stuff that should be run as quickly as possible"
+ 	done := self tryCopyingBitsQuickly.
- 	combinationRule = 41
- 		ifTrue:["fetch the forecolor into componentAlphaModeColor."
- 			componentAlphaModeAlpha := 255.
- 			componentAlphaModeColor := 16777215.
- 			gammaLookupTable := nil.
- 			ungammaLookupTable := nil.
- 			interpreterProxy methodArgumentCount >= 2
- 				ifTrue:[
- 					componentAlphaModeAlpha := interpreterProxy stackIntegerValue: (interpreterProxy methodArgumentCount - 2).
- 					(interpreterProxy failed not)
- 						ifFalse: [^ interpreterProxy primitiveFail].
- 					componentAlphaModeColor := interpreterProxy stackIntegerValue: (interpreterProxy methodArgumentCount - 1).
- 					(interpreterProxy failed not)
- 						ifFalse: [^ interpreterProxy primitiveFail].
- 					interpreterProxy methodArgumentCount = 4
- 						ifTrue:[
- 							gammaLookupTableOop := interpreterProxy stackObjectValue: 1.
- 							(interpreterProxy isBytes: gammaLookupTableOop) 
- 								ifTrue:[gammaLookupTable := interpreterProxy firstIndexableField: gammaLookupTableOop.].
- 							ungammaLookupTableOop := interpreterProxy stackObjectValue: 0.
- 							(interpreterProxy isBytes: ungammaLookupTableOop) 
- 								ifTrue:[ungammaLookupTable := interpreterProxy firstIndexableField: ungammaLookupTableOop]]]
- 				ifFalse:[
- 					interpreterProxy methodArgumentCount = 1
- 						ifTrue: [
- 							componentAlphaModeColor := interpreterProxy stackIntegerValue: 0.
- 							(interpreterProxy failed not)
- 								ifFalse: [^ interpreterProxy primitiveFail]]
- 						ifFalse:[^ interpreterProxy primitiveFail]]].	
- 	
-  	done := self tryCopyingBitsQuickly.
  	done ifTrue:[^nil].
  
  	(combinationRule = 30) | (combinationRule = 31) ifTrue:
  		["Check and fetch source alpha parameter for alpha blend"
  		interpreterProxy methodArgumentCount = 1
  			ifTrue: [sourceAlpha := interpreterProxy stackIntegerValue: 0.
  					(interpreterProxy failed not and: [(sourceAlpha >= 0) & (sourceAlpha <= 255)])
  						ifFalse: [^ interpreterProxy primitiveFail]]
  			ifFalse: [^ interpreterProxy primitiveFail]].
  
  	bitCount := 0.
  	"Choose and perform the actual copy loop."
  	self performCopyLoop.
  
  	(combinationRule = 22) | (combinationRule = 32) ifTrue:
  		["zero width and height; return the count"
  		affectedL := affectedR := affectedT := affectedB := 0]. 
  	hDir > 0
  		ifTrue: [affectedL := dx.
  				affectedR := dx + bbW]
  		ifFalse: [affectedL := dx - bbW + 1.
  				affectedR := dx + 1].
  	vDir > 0
  		ifTrue: [affectedT := dy.
  				affectedB := dy + bbH]
  		ifFalse: [affectedT := dy - bbH + 1.
  				affectedB := dy + 1]!

Item was added:
+ ----- Method: BitBltSimulation>>copyBitsRule41Test (in category 'setup') -----
+ copyBitsRule41Test
+ 	"Test possible use of rule 41, rgbComponentAlpha:with: Nothing to return, just set up some variables"
+ 	| gammaLookupTableOop ungammaLookupTableOop |
+ 	<inline: false>
+ 	
+ 	combinationRule = 41
+ 		ifTrue:["fetch the forecolor into componentAlphaModeColor."
+ 			componentAlphaModeAlpha := 255.
+ 			componentAlphaModeColor := 16777215.
+ 			gammaLookupTable := nil.
+ 			ungammaLookupTable := nil.
+ 			interpreterProxy methodArgumentCount >= 2
+ 				ifTrue:[
+ 					componentAlphaModeAlpha := interpreterProxy stackIntegerValue: (interpreterProxy methodArgumentCount - 2).
+ 					(interpreterProxy failed not)
+ 						ifFalse: [^ interpreterProxy primitiveFail].
+ 					componentAlphaModeColor := interpreterProxy stackIntegerValue: (interpreterProxy methodArgumentCount - 1).
+ 					(interpreterProxy failed not)
+ 						ifFalse: [^ interpreterProxy primitiveFail].
+ 					interpreterProxy methodArgumentCount = 4
+ 						ifTrue:[
+ 							gammaLookupTableOop := interpreterProxy stackObjectValue: 1.
+ 							(interpreterProxy isBytes: gammaLookupTableOop) 
+ 								ifTrue:[gammaLookupTable := interpreterProxy firstIndexableField: gammaLookupTableOop.].
+ 							ungammaLookupTableOop := interpreterProxy stackObjectValue: 0.
+ 							(interpreterProxy isBytes: ungammaLookupTableOop) 
+ 								ifTrue:[ungammaLookupTable := interpreterProxy firstIndexableField: ungammaLookupTableOop]]]
+ 				ifFalse:[
+ 					interpreterProxy methodArgumentCount = 1
+ 						ifTrue: [
+ 							componentAlphaModeColor := interpreterProxy stackIntegerValue: 0.
+ 							(interpreterProxy failed not)
+ 								ifFalse: [^ interpreterProxy primitiveFail]]
+ 						ifFalse:[^ interpreterProxy primitiveFail]]].	
+ 
+ 
+ !

Item was changed:
  ----- Method: BitBltSimulation>>initialiseModule (in category 'initialize-release') -----
  initialiseModule
  	<export: true>
  	self initBBOpTable.
  	self initDither8Lookup.
+ 	self 
+ 	 	isDefined: #'ENABLE_FAST_BLT'
+ 		inSmalltalk: [false]
+ 		comment: 'init the fastpath lists'
+ 		ifTrue:[self initialiseCopyBits].
  	^true!

Item was changed:
  ----- Method: CCodeGenerator>>emitCHeaderForPrimitivesOn: (in category 'C code generator') -----
  emitCHeaderForPrimitivesOn: aStream
  	"Write a C file header for compiled primitives onto the given stream."
  
  	aStream
  		nextPutAll: '/* Automatically generated from Squeak (';
  		nextPutAll: VMMaker versionString;
  		nextPutAll: ') on '.
  	Time dateAndTimeNow do: [:e | aStream nextPutAll: e asString; nextPut: Character space].
  	aStream
  		nextPutAll: '*/';
  		cr; cr.
  	self fileHeaderVersionStampForSourceClass: vmClass.
  	aStream cr; cr;
  		nextPutAll: '#include "sq.h"'; cr; cr.
  
  	"Additional header files"
+ 		self emitHeaderFilesOn: aStream.
- 	headerFiles do:[:hdr|
- 		aStream nextPutAll:'#include '; nextPutAll: hdr; cr].
  
  	aStream nextPutAll: '
  #include "sqMemoryAccess.h"
  
  /*** Imported Functions/Variables ***/
  extern sqInt stackValue(sqInt);
  extern sqInt stackIntegerValue(sqInt);
  extern sqInt successFlag;
  
  /* allows accessing Strings in both C and Smalltalk */
  #define asciiValue(c) c
  '.
  	aStream cr.!

Item was changed:
  ----- Method: CCodeGenerator>>emitCHeaderOn: (in category 'C code generator') -----
  emitCHeaderOn: aStream
  	"Write a C file header onto the given stream."
  
  	aStream nextPutAll: '/* '.
  	aStream nextPutAll: VMMaker headerNotice.
  	aStream nextPutAll: ' */'; cr;
  		nextPutAll: (self fileHeaderVersionStampForSourceClass: vmClass);
  		cr; cr.
  	self emitGlobalStructFlagOn: aStream.
  	aStream nextPutAll: '#include "sq.h"'; cr.
  
  	"Additional header files"
+ 	self emitHeaderFilesOn: aStream.
- 	headerFiles do:[:hdr|
- 		aStream nextPutAll:'#include '; nextPutAll: hdr; cr].
  
  	"Default definitions for optional functions, provided for backward compatibility"
  	self emitDefaultMacrosOn: aStream.
  
  	aStream nextPutAll: '
  #include "sqMemoryAccess.h"
  
  sqInt printCallStack(void);
  void defaultErrorProc(char *s) {
  	/* Print an error message and exit. */
  	static sqInt printingStack = false;
  
  	printf("\n%s\n\n", s);
  	if (!!printingStack) {
  		/* flag prevents recursive error when trying to print a broken stack */
  		printingStack = true;
  		printCallStack();
  	}
  	exit(-1);
  }
  '.
  	aStream cr.!

Item was added:
+ ----- Method: CCodeGenerator>>emitHeaderFilesOn: (in category 'C code generator') -----
+ emitHeaderFilesOn: aStream
+ 	"Write a header files onto the given stream. A special hack allows use of 
+ 	#ifdef THING
+ 	#include <blah.h>
+ 	#endif
+ 	constructs"
+ 
+ 	"Additional header files"
+ 	headerFiles do:[:hdr|
+ 		hdr first ~= $# ifTrue:
+ 			[aStream nextPutAll: '#include '].
+ 		aStream nextPutAll: hdr; cr].
+ !

Item was changed:
  ----- Method: VMPluginCodeGenerator>>emitCHeaderOn: (in category 'C code generator') -----
  emitCHeaderOn: aStream
  	"Write a C file header onto the given stream."
  
  	aStream nextPutAll: '/* '.
  	aStream nextPutAll: VMMaker headerNotice.
  	aStream nextPutAll: ' */';cr.
  	aStream nextPutAll: (self fileHeaderVersionStampForSourceClass: vmClass).
  	aStream cr; cr.
  
  	aStream nextPutAll:'
  #include <math.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <time.h>
  
  /* Default EXPORT macro that does nothing (see comment in sq.h): */
  #define EXPORT(returnType) returnType
  
  /* Do not include the entire sq.h file but just those parts needed. */
  /*  The virtual machine proxy definition */
  #include "sqVirtualMachine.h"
  /* Configuration options */
  #include "sqConfig.h"
  /* Platform specific definitions */
  #include "sqPlatformSpecific.h"
  
  #define true 1
  #define false 0
  #define null 0  /* using ''null'' because nil is predefined in Think C */
  #ifdef SQUEAK_BUILTIN_PLUGIN
  #undef EXPORT
  // was #undef EXPORT(returnType) but screws NorCroft cc
  #define EXPORT(returnType) static returnType
  #endif
  '.
  
  	"Additional header files"
+ 	self emitHeaderFilesOn: aStream.
- 	headerFiles do:[:hdr|
- 		aStream nextPutAll:'#include '; nextPutAll: hdr; cr].
  
- 
  	aStream nextPutAll: '
  #include "sqMemoryAccess.h"
  
  '.
  	aStream cr.!



More information about the Vm-dev mailing list