[Vm-dev] VM Maker: VMMaker.oscog-eem.2481.mcz

Mon Nov 5 18:33:02 UTC 2018

Eliot Miranda uploaded a new version of VMMaker to project VM Maker:
http://source.squeak.org/VMMaker/VMMaker.oscog-eem.2481.mcz

==================== Summary ====================

Name: VMMaker.oscog-eem.2481
Author: eem
Time: 5 November 2018, 10:32:17.399439 am
UUID: 43e5789a-92ab-47cd-9c5b-ffe711bbfa4e
Ancestors: VMMaker.oscog-eem.2480

ThreadedFFIPlugin:
Fix a bug passing floats on the stack on X64.  The original code passed stacked floats as doubles.
Mark ffiPush*Float:in: as <inline: #always> since they are inined and this eliminates unused functions.
Add some commentary to stack alignment methods to aid locating associated defines.

=============== Diff against VMMaker.oscog-eem.2480 ===============

Item was changed:
  ----- Method: ThreadedARMFFIPlugin>>ffiPushDoubleFloat:in: (in category 'marshalling') -----
  ffiPushDoubleFloat: value in: calloutState
  	<var: #value type: #double>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>

  	calloutState floatRegisterIndex < (NumFloatRegArgs - 1)
  		ifTrue:
  			[(calloutState floatRegisterIndex bitAnd: 1) = 1
  				ifTrue: 
  					[calloutState backfillFloatRegisterIndex: calloutState floatRegisterIndex.
  					 calloutState floatRegisterIndex: (calloutState floatRegisterIndex + 1)].
  		  	 (self cCoerceSimple: (self addressOf: (calloutState floatRegisters at: calloutState floatRegisterIndex)) 
  				 	to: 'double*')
  						at: 0
  						put: value.
  			 calloutState floatRegisterIndex: calloutState floatRegisterIndex + 2]
  		ifFalse:
  			[calloutState currentArg + 8 > calloutState limit ifTrue:
  				[^FFIErrorCallFrameTooBig].
  			 calloutState floatRegisterIndex: NumFloatRegArgs.
  			 interpreterProxy storeFloatAtPointer: calloutState currentArg from: value.
  			 calloutState currentArg: calloutState currentArg + 8].
  	^0!

Item was changed:
  ----- Method: ThreadedARMFFIPlugin>>ffiPushSingleFloat:in: (in category 'marshalling') -----
  ffiPushSingleFloat: value in: calloutState
  	<var: #value type: #float>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>
  	calloutState floatRegisterIndex < NumFloatRegArgs
  		ifTrue: 
  			[calloutState backfillFloatRegisterIndex > 0
  				ifTrue: 
  					[calloutState floatRegisters at: calloutState backfillFloatRegisterIndex  put: value.
  					 calloutState backfillFloatRegisterIndex: 0]
  				ifFalse: 
  					[calloutState floatRegisters at: calloutState floatRegisterIndex  put: value.
  					 calloutState floatRegisterIndex: calloutState floatRegisterIndex + 1]]
  		ifFalse:
  			[calloutState currentArg + 4 > calloutState limit ifTrue:
  				[^FFIErrorCallFrameTooBig].
  			 interpreterProxy storeSingleFloatAtPointer: calloutState currentArg from: value.
  			 calloutState currentArg: calloutState currentArg + 4].
  	^0!

Item was changed:
  ----- Method: ThreadedFFIPlugin class>>preambleCCode (in category 'translation') -----
  preambleCCode
+ 	"For a source of builtin defines grep for builtin_define in a gcc release config directory.
+ 	 See See platforms/Cross/vm/sqCogStackAlignment.h for per-platform definitions for
+ 	 STACK_ALIGN_BYTES MUST_ALIGN_STACK et al."
- 	"For a source of builtin defines grep for builtin_define in a gcc release config directory."
  	^'
  #include "sqAssert.h" /* for assert */
  #define ThreadedFFIPlugin 1 /* to filter-out unwanted declarations from sqFFI.h */
  #include "sqFFI.h" /* for logging and surface functions */
  #include "sqCogStackAlignment.h" /* for STACK_ALIGN_BYTES and getsp() */

  #ifdef _MSC_VER
  # define alloca _alloca
  #endif
  #if defined(__GNUC__) && (defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__))
  # define setsp(sp) asm volatile ("movl %0,%%esp" : : "m"(sp))
  # elif defined(__GNUC__) && (defined(__amd64__) || defined(__x86_64__) ||  defined(__amd64) || defined(__x86_64))
  # define setsp(sp) asm volatile ("movq %0,%%rsp" : : "m"(sp))
  # elif defined(__GNUC__) && (defined(__arm__))
  # define setsp(sp) asm volatile ("ldr %%sp, %0" : : "m"(sp))
  #endif
  #if !!defined(getsp)
  # define getsp() 0
  #endif 
  #if !!defined(setsp)
  # define setsp(ignored) 0
  #endif 

  #if !!defined(STACK_ALIGN_BYTES)
  #  define STACK_ALIGN_BYTES 0
  #endif /* !!defined(STACK_ALIGN_BYTES) */

  /* For ABI that require stack alignment greater than natural word size */
  #define MUST_ALIGN_STACK (STACK_ALIGN_BYTES > sizeof(void*))

  #if defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__)
  /* Both Mac OS X x86 and Win32 x86 return structs of a power of two in size
   * less than or equal to eight bytes in length in registers. Linux never does so.
   */
  # if __linux__
  #	define WIN32_X86_STRUCT_RETURN 0
  # else
  #	define WIN32_X86_STRUCT_RETURN 1
  # endif
  # if _WIN32
  #	define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 1
  # endif
  # elif defined(__amd64__) || defined(__x86_64__) ||  defined(__amd64) || defined(__x86_64)
  # if _WIN32 | _WIN64
  #	define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 1
  # endif
  #endif /* defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__) */

  #if !!defined(ALLOCA_LIES_SO_SETSP_BEFORE_CALL)
  # if defined(__MINGW32__) && !!defined(__clang__) && (__GNUC__ >= 3) && (defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__))
      /*
       * cygwin -mno-cygwin (MinGW) gcc 3.4.x''s alloca is a library routine that answers
       * %esp + xx, so the outgoing stack is offset by one or more word if uncorrected.
       * Grab the actual stack pointer to correct.
       */
  #	define ALLOCA_LIES_SO_SETSP_BEFORE_CALL 1
  # else
  #	define ALLOCA_LIES_SO_SETSP_BEFORE_CALL 0
  # endif
  #endif /* !!defined(ALLOCA_LIES_SO_SETSP_BEFORE_CALL) */

  #if !!defined(PLATFORM_API_USES_CALLEE_POPS_CONVENTION)
  # define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 0
  #endif

  /* This alignment stuff is a hack for integerAt:put:size:signed:/primitiveFFIIntegerAt[Put].
+  * The assumption right now is that all processors support unaligned access.  That only
+  * holds true for x86, x86-64 & ARMv6 & later.  But this keeps us going until we can address
+  * it properly.
-  * The assumption right now is that all processors suppoprt unaligned access.  That only
-  * holds true for x86, x86-64 & ARMv6 & later.  But this keeps us going until we can addresws it properly.
   */
  #define unalignedShortAt(a) shortAt(a)
  #define unalignedShortAtput(a,v) shortAtput(a,v)
  #define unalignedLong32At(a) long32At(a)
  #define unalignedLong32Atput(a,v) long32Atput(a,v)
  #define unalignedLong64At(a) long64At(a)
  #define unalignedLong64Atput(a,v) long64Atput(a,v)

  /* The dispatchOn:in:with:with: generates an unwanted call on error.  Just squash it. */
  #define error(foo) 0
  #ifndef SQUEAK_BUILTIN_PLUGIN
  /* but print assert failures. */
  void
  warning(char *s) { /* Print an error message but don''t exit. */
  	printf("\n%s\n", s);
  }
  #endif

  /* sanitize */
  #ifdef SQUEAK_BUILTIN_PLUGIN
  # define EXTERN 
  #else
  # define EXTERN extern
  #endif
  '!

Item was changed:
  ----- Method: ThreadedFFIPlugin>>cStackAlignment (in category 'marshalling') -----
  cStackAlignment
+ 	"Many ABIs mandate a particular stack alignment greater than the natural word
+ 	 size. If so, this macro will answer that alignment.  If not, this macro will answer 0.
+ 	 See platforms/Cross/vm/sqCogStackAlignment.h and class-side preambleCCode."
- 	"Many ABIs mandate a particular stack alignment greater than the natural word size.
- 	 If so, this macro will answer that alignment.  If not, this macro will answer 0.  See
- 	 class-side preambleCCode."
  	<cmacro: '() STACK_ALIGN_BYTES'>
  	^0!

Item was changed:
  ----- Method: ThreadedFFIPlugin>>mustAlignStack (in category 'marshalling') -----
  mustAlignStack
  	"Many ABIs mandate a particular stack alignment greater than the natural word size.
+ 	 If so, this macro will answer true.  See platforms/Cross/vm/sqCogStackAlignment.h
+ 	 and class-side preambleCCode."
- 	 If so, this macro will answer true.  See class-side preambleCCode."
  	<cmacro: '() MUST_ALIGN_STACK'>
  	^false!

Item was changed:
  ----- Method: ThreadedIA32FFIPlugin>>ffiPushDoubleFloat:in: (in category 'marshalling') -----
  ffiPushDoubleFloat: value in: calloutState
  	<var: #value type: #double>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>
  	calloutState currentArg + 8 > calloutState limit ifTrue:
  		[^FFIErrorCallFrameTooBig].
  	interpreterProxy storeFloatAtPointer: calloutState currentArg from: value.
  	calloutState currentArg: calloutState currentArg + 8.
  	^0!

Item was changed:
  ----- Method: ThreadedIA32FFIPlugin>>ffiPushSingleFloat:in: (in category 'marshalling') -----
  ffiPushSingleFloat: value in: calloutState
  	<var: #value type: #float>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>
  	calloutState currentArg + 4 > calloutState limit ifTrue:
  		[^FFIErrorCallFrameTooBig].
  	interpreterProxy storeSingleFloatAtPointer: calloutState currentArg from: value.
  	calloutState currentArg: calloutState currentArg + 4.
  	^0!

Item was changed:
  ----- Method: ThreadedX64FFIPlugin>>ffiPushDoubleFloat:in: (in category 'marshalling') -----
  ffiPushDoubleFloat: value in: calloutState
  	<var: #value type: #double>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>

  	calloutState floatRegisterIndex < NumFloatRegArgs
  		ifTrue:
  			[calloutState floatRegisters at: calloutState floatRegisterIndex put: value.
  			 calloutState incrementFloatRegisterIndex]
  		ifFalse:
  			[calloutState currentArg + WordSize > calloutState limit ifTrue:
  				[^FFIErrorCallFrameTooBig].
  			 interpreterProxy storeFloatAtPointer: calloutState currentArg from: value.
  			 calloutState currentArg: calloutState currentArg + WordSize].
  	^0!

Item was changed:
  ----- Method: ThreadedX64FFIPlugin>>ffiPushSingleFloat:in: (in category 'marshalling') -----
  ffiPushSingleFloat: value in: calloutState
  	<var: #value type: #float>
  	<var: #calloutState type: #'CalloutState *'>
+ 	<inline: #always>
- 	<inline: true>

  	calloutState floatRegisterIndex < NumFloatRegArgs
  		ifTrue:
  			[(self cCoerce: calloutState floatRegisters + calloutState floatRegisterIndex to: #'float *') at: 0 put: value.
  			 calloutState incrementFloatRegisterIndex]
  		ifFalse:
  			[calloutState currentArg + WordSize > calloutState limit ifTrue:
  				[^FFIErrorCallFrameTooBig].
+ 			 "Float passed in least significant word, but good to zero the high bits for clarity."
+ 			 interpreterProxy
+ 				long64AtPointer: calloutState currentArg put: 0;
+ 				storeSingleFloatAtPointer: calloutState currentArg from: value.
- 			 interpreterProxy storeFloatAtPointer: calloutState currentArg from: value.
  			 calloutState currentArg: calloutState currentArg + WordSize].
  	^0!