[Vm-dev] VM Maker: VMMaker.oscog-eem.3045.mcz

commits at source.squeak.org commits at source.squeak.org
Sat Aug 21 07:22:38 UTC 2021


Eliot Miranda uploaded a new version of VMMaker to project VM Maker:
http://source.squeak.org/VMMaker/VMMaker.oscog-eem.3045.mcz

==================== Summary ====================

Name: VMMaker.oscog-eem.3045
Author: eem
Time: 21 August 2021, 12:22:27.421152 am
UUID: a6da6772-a12a-4ebc-a2ce-bae4ff4dc663
Ancestors: VMMaker.oscog-eem.3044

Much faster per-method profiling of cog methods by attributing samples to entities in moveProfileToMethods, not when sampling processor pc.

=============== Diff against VMMaker.oscog-eem.3044 ===============

Item was changed:
  ----- Method: CogVMSimulator>>hasFastCLinkage: (in category 'plugin primitive support') -----
  hasFastCLinkage: methodObj
  	"Simulation only!!!! the attempt to look up is entirely inappropriate for the real VM!!!!"
  	| header primIdx literal metadata |
  	objectMemory hasSpurMemoryManagerAPI ifFalse:
  		[^false].
  	header := objectMemory methodHeaderOf: methodObj.
  	primIdx := self primitiveIndexOfMethod: methodObj header: header.
  	(primIdx = PrimNumberExternalCall
  	 and: [(objectMemory literalCountOfMethodHeader: header) > 0
  	 and: [(objectMemory isArray: (literal := self literal: 0 ofMethod: methodObj))
  	 and: [(objectMemory numSlotsOf: literal) = 4]]]) ifFalse:
+ 		[^primIdx = PrimNumberObjectAtPut].
- 		[^false].
  	(objectMemory fetchPointer: ExternalCallLiteralTargetFunctionIndex ofObject: literal) ~= (objectMemory integerObjectOf: 0) ifTrue:
  		[^FastCPrimitiveFlag anyMask: (objectMemory integerValueOf: (objectMemory fetchPointer: ExternalCallLiteralFlagsIndex ofObject: literal))].
  	^(self ioLoadFunction: (self stringOf: (objectMemory fetchPointer: ExternalCallLiteralFunctionNameIndex ofObject: literal))
  		From: (self stringOf: (objectMemory fetchPointer: ExternalCallLiteralModuleNameIndex ofObject: literal))
  		MetadataInto: (self addressOf: metadata put: [:v| metadata := v])) ~= 0
  	 and: [FastCPrimitiveFlag anyMask: metadata]!

Item was changed:
  CogClass subclass: #Cogit
(excessive size, no diff calculated)

Item was changed:
  ----- Method: Cogit class>>declareCVarsIn: (in category 'translation') -----
  declareCVarsIn: aCCodeGenerator
  	| backEnd |
  	backEnd := CogCompilerClass basicNew.
  	#(	'coInterpreter' 'objectMemory' 'methodZone' 'objectRepresentation'
  		'cogBlockMethodSurrogateClass' 'cogMethodSurrogateClass' 'nsSendCacheSurrogateClass'
  		'threadManager' 'processor' 'lastNInstructions' 'simulatedAddresses' 'ioHighResClock'
  		'simulatedTrampolines' 'simulatedVariableGetters' 'simulatedVariableSetters'
  		'processorFrameValid' 'printRegisters' 'printInstructions' 'clickConfirm' 'singleStep'
  		'codeZoneIsExecutableNotWritable' 'debugAPISelector' 'shortCutTrampolineBlocks'
+ 		'perMethodProfile' 'instructionProfile') do:
- 		'perMethodProfile' 'currentProfileMethod') do:
  			[:simulationVariableUnusedByRealVM|
  			aCCodeGenerator removeVariable: simulationVariableUnusedByRealVM].
  	NewspeakVM ifFalse:
  		[#(	'selfSendTrampolines' 'dynamicSuperSendTrampolines'
  			'implicitReceiverSendTrampolines' 'outerSendTrampolines'
  			'ceEnclosingObjectTrampoline' 'numIRCs' 'indexOfIRC' 'theIRCs') do:
  				[:variableNotNeededInNormalVM|
  				aCCodeGenerator removeVariable: variableNotNeededInNormalVM]].
  	aCCodeGenerator removeConstant: #COGMTVM. "this should be defined at compile time"
  	"N.B. We *do not* include sq.h; it pulls in conflicting definitions now that sqVirtualMachine.h
  	 declares cointerp's functions, and declares some of them inaccurately for histrical reasons.
  	 We pull in CoInterpreter's api via cointerp.h which is accurate."
  	aCCodeGenerator
  		addHeaderFile:'"sqConfig.h"'; "config.h must be first on linux"
  		addHeaderFile:'<stddef.h>'; "for e.g. offsetof"
  		addHeaderFile:'<stdio.h>';
  		addHeaderFile:'<stdlib.h>';
  		addHeaderFile:'<string.h>';
  		addHeaderFile:'"sqPlatformSpecific.h"'; "e.g. solaris overrides things for sqCogStackAlignment.h"
  		addHeaderFile:'"sqMemoryAccess.h"';
  		addHeaderFile:'"sqCogStackAlignment.h"';
  		addHeaderFile:'"dispdbg.h"'; "must precede cointerp.h & cogit.h otherwise NoDbgRegParms gets screwed up"
  		addHeaderFile:'"cogmethod.h"'.
  	NewspeakVM ifTrue:
  		[aCCodeGenerator addHeaderFile:'"nssendcache.h"'].
  	aCCodeGenerator
  		addHeaderFile:'#if COGMTVM';
  		addHeaderFile:'"cointerpmt.h"';
  		addHeaderFile:'#else';
  		addHeaderFile:'"cointerp.h"';
  		addHeaderFile:'#endif';
  		addHeaderFile:'"cogit.h"'.
  	aCCodeGenerator
  		var: #ceGetFP
  			declareC: 'usqIntptr_t (*ceGetFP)(void)';
  		var: #ceGetSP
  			declareC: 'usqIntptr_t (*ceGetSP)(void)';
  		var: #ceCaptureCStackPointers
  			declareC: 'void (*ceCaptureCStackPointers)(void)';
  		var: #ceInvokeInterpret
  			declareC: 'void (*ceInvokeInterpret)(void)';
  		var: #ceEnterCogCodePopReceiverReg
  			declareC: 'void (*ceEnterCogCodePopReceiverReg)(void)';
  		var: #realCEEnterCogCodePopReceiverReg
  			declareC: 'void (*realCEEnterCogCodePopReceiverReg)(void)';
  		var: #ceCallCogCodePopReceiverReg
  			declareC: 'void (*ceCallCogCodePopReceiverReg)(void)';
  		var: #realCECallCogCodePopReceiverReg
  			declareC: 'void (*realCECallCogCodePopReceiverReg)(void)';
  		var: #ceCallCogCodePopReceiverAndClassRegs
  			declareC: 'void (*ceCallCogCodePopReceiverAndClassRegs)(void)';
  		var: #realCECallCogCodePopReceiverAndClassRegs
  			declareC: 'void (*realCECallCogCodePopReceiverAndClassRegs)(void)';
  		var: #postCompileHook
  			declareC: 'void (*postCompileHook)(CogMethod *)';
  		var: #openPICList declareC: 'CogMethod *openPICList = 0';
  		var: #maxMethodBefore type: #'CogBlockMethod *';
  		var: 'enumeratingCogMethod' type: #'CogMethod *'.
  	
  	aCCodeGenerator
  		var: #ceTryLockVMOwner
  		declareC: '#if COGMTVM\usqIntptr_t (*ceTryLockVMOwner)(usqIntptr_t)\#endif'.
  
  	backEnd numICacheFlushOpcodes > 0 ifTrue:
  		[aCCodeGenerator
  			var: #ceFlushICache
  				declareC: 'static void (*ceFlushICache)(usqIntptr_t from, usqIntptr_t to)'].
  	aCCodeGenerator
  		var: #ceFlushDCache
  			declareC: '#if DUAL_MAPPED_CODE_ZONE\static void (*ceFlushDCache)(usqIntptr_t from, usqIntptr_t to)\#endif';
  		var: #codeToDataDelta
  			declareC: '#if DUAL_MAPPED_CODE_ZONE\static sqInt codeToDataDelta\#else\# define codeToDataDelta 0\#endif'.
  
  	aCCodeGenerator
  		declareVar: 'aMethodLabel' type: #'AbstractInstruction'; "Has to come lexicographically before backEnd & methodLabel"
  		var: #backEnd declareC: 'AbstractInstruction * const backEnd = &aMethodLabel';
  		var: #methodLabel declareC: 'AbstractInstruction * const methodLabel = &aMethodLabel'.
  	self declareC: #(abstractOpcodes stackCheckLabel
  					blockEntryLabel blockEntryNoContextSwitch
  					stackOverflowCall sendMiss
  					entry noCheckEntry selfSendEntry dynSuperEntry
  					fullBlockNoContextSwitchEntry fullBlockEntry
  					picMNUAbort picInterpretAbort  endCPICCase0 endCPICCase1 cPICEndOfCodeLabel)
  			as: #'AbstractInstruction *'
  				in: aCCodeGenerator.
  	aCCodeGenerator
  		declareVar: #cPICPrototype type: #'CogMethod *';
  		declareVar: #blockStarts type: #'BlockStart *';
  		declareVar: #fixups type: #'BytecodeFixup *';
  		declareVar: #methodZoneBase type: #usqInt.
  	aCCodeGenerator
  		var: #ordinarySendTrampolines
  			declareC: 'sqInt ordinarySendTrampolines[NumSendTrampolines]';
  		var: #superSendTrampolines
  			declareC: 'sqInt superSendTrampolines[NumSendTrampolines]'.
  	BytecodeSetHasDirectedSuperSend ifTrue:
  		[aCCodeGenerator
  			var: #directedSuperSendTrampolines
  				declareC: 'sqInt directedSuperSendTrampolines[NumSendTrampolines]';
  			var: #directedSuperBindingSendTrampolines
  				declareC: 'sqInt directedSuperBindingSendTrampolines[NumSendTrampolines]'].
  	NewspeakVM ifTrue:
  		[aCCodeGenerator
  			var: #selfSendTrampolines
  				declareC: 'sqInt selfSendTrampolines[NumSendTrampolines]';
  			var: #dynamicSuperSendTrampolines
  				declareC: 'sqInt dynamicSuperSendTrampolines[NumSendTrampolines]';
  			var: #implicitReceiverSendTrampolines
  				declareC: 'sqInt implicitReceiverSendTrampolines[NumSendTrampolines]';
  			var: #outerSendTrampolines
  				declareC: 'sqInt outerSendTrampolines[NumSendTrampolines]'].
  	aCCodeGenerator
  		addConstantForBinding: self bindingForNumTrampolines;
  		var: #trampolineAddresses
  			declareC: 'static char *trampolineAddresses[NumTrampolines*2]';
  		var: #objectReferencesInRuntime
  			declareC: 'static usqInt objectReferencesInRuntime[NumObjRefsInRuntime+1]';
  		var: #labelCounter
  			type: #int;
  		var: #traceFlags
  			declareC: 'int traceFlags = 8 /* prim trace log on by default */';
  		var: #cStackAlignment
  			declareC: 'const int cStackAlignment = STACK_ALIGN_BYTES'.
  	aCCodeGenerator
  		declareVar: #minValidCallAddress type: #'usqIntptr_t'.
  	aCCodeGenerator vmClass generatorTable ifNotNil:
  		[:bytecodeGenTable|
  		aCCodeGenerator
  			var: #generatorTable
  				declareC: 'static BytecodeDescriptor generatorTable[', bytecodeGenTable size printString, ']',
  							(self tableInitializerFor: bytecodeGenTable
  								in: aCCodeGenerator)].
  	"In C the abstract opcode names clash with the Smalltalk generator syntactic sugar.
  	 Most of the syntactic sugar is inlined, but alas some remains.  Rename the syntactic
  	 sugar to avoid the clash."
  	(self organization listAtCategoryNamed: #'abstract instructions') do:
  		[:s|
  		aCCodeGenerator addSelectorTranslation: s to: 'g', (aCCodeGenerator cFunctionNameFor: s)].
  	aCCodeGenerator addSelectorTranslation: #halt: to: 'haltmsg'.
  	self declareFlagVarsAsByteIn: aCCodeGenerator!

Item was removed:
- ----- Method: Cogit>>collectProfile (in category 'analysis') -----
- collectProfile
- 	<doNotGenerate>
- 	perMethodProfile ifNotNil:
- 		[:pmp| | pc thang |
- 		(pc := processor pc) < methodZoneBase
- 			ifTrue: [thang := self codeEntryFor: pc]
- 			ifFalse:
- 				[(currentProfileMethod notNil
- 				 and: [currentProfileMethod containsAddress: pc]) ifFalse:
- 					["inline cogMethodContaining: to avoid useful assert there-in..."
- 					 | cogMethod prevMethod |
- 					 cogMethod := self cogMethodSurrogateAt: methodZoneBase.
- 					 [cogMethod asUnsignedInteger < pc] whileTrue:
- 						[prevMethod := cogMethod.
- 						 cogMethod := methodZone methodAfter: cogMethod].
- 					 currentProfileMethod := prevMethod].
- 				thang := currentProfileMethod address].
- 		pmp at: thang put: (pmp at: thang ifAbsent: 0) + 1]!

Item was changed:
  ----- Method: Cogit>>enablePerMethodProfiling (in category 'analysis') -----
  enablePerMethodProfiling
  	<doNotGenerate>
  	perMethodProfile := Dictionary new.
+ 	instructionProfile := DoubleWordArray new: methodZone zoneEnd.
- 	currentProfileMethod := nil.
  	singleStep := true!

Item was changed:
  ----- Method: Cogit>>handleCallOrJumpSimulationTrap: (in category 'simulation only') -----
  handleCallOrJumpSimulationTrap: aProcessorSimulationTrap
  	<doNotGenerate>
  	| evaluable function memory result savedFramePointer savedStackPointer savedArgumentCount retpc invalidStackPointersExpected index |
  	"Execution of a single instruction must be within the processorLock critical section to ensure
  	 simulation traps are executed atomically.  However, at this point control is leaving machine
  	 code and entering the run-time and hence the lock must be released."
  	processorLock primitiveExitCriticalSection.
  	"This is a hack fix before we revise the simulators.  When a jump call is made, the next
  	 pc is effectively the return address on the stack, not the instruction following the jump."
  	aProcessorSimulationTrap type == #jump ifTrue:
  		[processor hackFixNextPCOfJumpFor: aProcessorSimulationTrap using: objectMemory].
  
  	evaluable := simulatedTrampolines
  					at: aProcessorSimulationTrap address
  					ifAbsent: [self errorProcessingSimulationTrap: aProcessorSimulationTrap
  								in: simulatedTrampolines].
  	function := evaluable isBlock
  					ifTrue: ['aBlock; probably some plugin primitive']
  					ifFalse:
  						[evaluable receiver == backEnd ifTrue: "this is for invoking ARMv5 floating-point intrinsics, and for the short-cut tracing trampolines"
  							[^self handleABICallOrJumpSimulationTrap: aProcessorSimulationTrap evaluable: evaluable].
  						 evaluable selector].
  	memory := coInterpreter memory.
  	function == #interpret ifTrue: "i.e. we're here via ceInvokeInterpret/ceReturnToInterpreterTrampoline and should discard all state back to enterSmalltalkExecutiveImplementation"
  		[self recordInstruction: {'(simulated jump call of '. aProcessorSimulationTrap address. '/'. function. ')'}.
  		 "self halt: evaluable selector."
  	   	 clickConfirm ifTrue:
  		 	[(self confirm: 'skip jump to interpret?') ifFalse:
  				[clickConfirm := false. self halt]].
  		 processor simulateJumpCallOf: aProcessorSimulationTrap address memory: memory.
  		 coInterpreter reenterInterpreter.
  		 "NOTREACHED"
  		 self halt].
  	(function == #ceBaseFrameReturn:
+ 	or: [function == #ceTakeProfileSample:
+ 	or: [function == #primitiveObjectAtPut]])
- 	or: [function == #ceTakeProfileSample:])
  		ifTrue: [invalidStackPointersExpected := true]
  		ifFalse:
  			[invalidStackPointersExpected := false.
  			 evaluable isBlock
  				ifTrue: "external primitives..."
  					["The only acceptable exception to the rule are fast C primitive calls..."
  					 (methodZone cogMethodContaining: (self mostLikelyPrimInvocationPC: processor pc or: (processor leafRetpcIn: memory)))
  						ifNil: [self assertf: 'call to block evaluable from non-external method']
  						ifNotNil: [:cogMethod|
  								self assert: (self cogMethodHasExternalPrim: cogMethod).
  								(coInterpreter hasFastCLinkage: cogMethod methodObject)
  									ifTrue: [invalidStackPointersExpected := true. coInterpreter nilLocalFP]
  									ifFalse: [coInterpreter assertValidExternalStackPointers]]]
  				ifFalse:
  					[coInterpreter assertValidExternalStackPointers]].
  	processor
  		simulateCallOf: aProcessorSimulationTrap address
  		nextpc: aProcessorSimulationTrap nextpc
  		memory: memory.
  	retpc := processor retpcIn: memory.
  	self recordInstruction: {'(simulated call of '. aProcessorSimulationTrap address. '/'. function. ')'}.
  	savedFramePointer := coInterpreter framePointer.
  	savedStackPointer := coInterpreter stackPointer.
  	savedArgumentCount := coInterpreter argumentCount.
  	result := ["self halt: evaluable selector."
  		   	   clickConfirm ifTrue:
  			 	[(self confirm: 'skip run-time call?') ifFalse:
  					[clickConfirm := false. self halt]].
  			   evaluable valueWithArguments: (processor
  												postCallArgumentsNumArgs: evaluable numArgs
  												in: memory)]
  				on: ReenterMachineCode
  				do: [:ex| ex return: #continueNoReturn].
  			
  	invalidStackPointersExpected ifFalse:
  		[coInterpreter assertValidExternalStackPointers].
  	"Verify the stack layout assumption compileInterpreterPrimitive: makes, provided we've
  	 not called something that has built a frame, such as closure value or evaluate method, or
  	 switched frames, such as primitiveSignal, primitiveWait, primitiveResume, primitiveSuspend et al."
  	(function beginsWith: 'primitive') ifTrue:
  		[coInterpreter primFailCode = 0
  			ifTrue: [(CogVMSimulator stackAlteringPrimitives includes: function) ifFalse:
  						["This is a rare case (e.g. in Scorch where a married context's sender is set to nil on trapTrpped and hence the stack layout is altered."
  						 (function == #primitiveSlotAtPut and: [objectMemory isContext: (coInterpreter frameReceiver: coInterpreter framePointer)]) ifFalse:
  							[self assert: savedFramePointer = coInterpreter framePointer.
  							 self assert: savedStackPointer + (savedArgumentCount * objectMemory wordSize)
  									= coInterpreter stackPointer]]]
  			ifFalse:
  				[self assert: savedFramePointer = coInterpreter framePointer.
  				 self assert: savedStackPointer = coInterpreter stackPointer]].
  	result ~~ #continueNoReturn ifTrue:
  		[self recordInstruction: {'(simulated return to '. processor retpcIn: memory. ')'}.
  		 processor simulateReturnIn: memory.
  		 self assert: processor pc = retpc.
  		 processor smashCallerSavedRegistersWithValuesFrom: 16r80000000 by: objectMemory wordSize].
  	self assert: (result isInteger "an oop result"
  			or: [result == coInterpreter
  			or: [result == objectMemory
  			or: [(index := #(nil true false continueNoReturn) indexOf: result) > 0
  				and: [result := #(0 1 0 16rF00BA4) at: index. true]]]]).
  	processor cResultRegister: (result
  								ifNil: [0]
  								ifNotNil: [result isInteger
  											ifTrue: [result]
  											ifFalse: [16rF00BA222]])!

Item was changed:
  ----- Method: Cogit>>moveProfileToMethods (in category 'analysis') -----
  moveProfileToMethods
  	"Simulation only counting of instructions per method/pic/trampoline..."
  	<cmacro: '() 0'>
  	"Whenever a change in the code zone is about to occur (e.g. compact code zone, free a method)
  	 move all counts to the corresponding Smalltalk objects."
  	perMethodProfile ifNotNil:
+ 		[:pmp|
+ 		0 to: trampolineTableIndex - 3 by: 2 do:
+ 			[:i| | trampoline total |
+ 			total := 0.
+ 			trampoline := trampolineAddresses at: i + 1.
+ 			trampoline to: (trampolineAddresses at: i + 3) - 1 do:
+ 				[:pc| total := total + (instructionProfile at: pc)].
+ 			total > 0 ifTrue:
+ 				[perMethodProfile at: trampoline put: (perMethodProfile at: trampoline ifAbsent: 0) + total]].
+ 		methodZone methodsDo:
+ 			[:cogMethod| | total |
+ 			total := 0.
+ 			cogMethod + 1 to: cogMethod address + cogMethod blockSize do:
+ 				[:pc| total := total + (instructionProfile at: pc)].
+ 			total > 0 ifTrue:
+ 				[(cogMethod cmType = CMMethod
+ 					ifTrue: [cogMethod methodObject]
+ 					ifFalse:
+ 						[(cogMethod cmType = CMClosedPIC
+ 						  or: [cogMethod cmType = CMOpenPIC]) ifTrue:
+ 							[cogMethod selector]]) ifNotNil:
+ 					[:thang|
+ 					perMethodProfile at: thang put: (perMethodProfile at: thang ifAbsent: 0) + total]]].
+ 		instructionProfile atAllPut: 0]!
- 		[:pmp| | update |
- 		 update := Dictionary new.
- 		 pmp keysAndValuesDo:
- 			[:methodOopOrCodeThang :count|
- 			(methodOopOrCodeThang between: methodZoneBase and: methodZone limitZony) ifTrue:
- 			 	[update at: methodOopOrCodeThang put: count]].
- 		 update keysAndValuesDo:
- 			[:cogMethodAddress :count| | cogMethod |
- 			pmp removeKey: cogMethodAddress.
- 			cogMethod := self cogMethodSurrogateAt: cogMethodAddress.
- 			cogMethod cmType = CMMethod ifTrue:
- 				[pmp at: cogMethod methodObject put: (pmp at: cogMethod methodObject ifAbsent: 0) + count].
- 			(cogMethod cmType = CMClosedPIC
- 			or: [cogMethod cmType = CMOpenPIC]) ifTrue:
- 				[pmp at: cogMethod selector put: (pmp at: cogMethod selector ifAbsent: 0) + count]]]!

Item was changed:
  ----- Method: Cogit>>recordProcessing (in category 'simulation only') -----
  recordProcessing
+ 	| pc inst |
+ 	instructionProfile ifNotNil:
+ 		[:ip| ip at: (pc := processor pc) put: (ip at: pc) + 1].
- 	| inst |
- 	self collectProfile.
  	self recordRegisters.
  	inst := self recordLastInstruction.
  	"Set RRRName ito the selector that accesses ReceiverResultReg (RRR) to alter instruction printing to add the value of RRR as a suffix
  		(RRRName := #rdx)
  		(RRRName := #edx)
  		(RRRName := nil)"
  	printRegisters ifTrue:
  		[RRRName ifNil: [processor printRegistersOn: coInterpreter transcript].
  		 printInstructions ifFalse:
  			[coInterpreter transcript cr]].
  	printInstructions ifTrue:
  		[printRegisters ifTrue:
  			[coInterpreter transcript cr].
  		 coInterpreter transcript nextPutAll: inst.
  		 RRRName ifNotNil:
  			[coInterpreter transcript space; nextPutAll: RRRName; space.
  			 (processor perform: RRRName) printOn: coInterpreter transcript base: 16 length: 8 padded: false].
  		 coInterpreter transcript cr; flush]!



More information about the Vm-dev mailing list