[Vm-dev] [commit] r2338 - OSCogVM StackToRegisterMappingCogit as per VMMaker-oscog.41.

commits at squeakvm.org commits at squeakvm.org
Sat Jan 1 20:26:17 UTC 2011


Author: eliot
Date: 2011-01-01 12:26:17 -0800 (Sat, 01 Jan 2011)
New Revision: 2338

Modified:
   branches/Cog/src/vm/cogit.c
   branches/Cog/src/vm/cogit.h
Log:
OSCogVM StackToRegisterMappingCogit as per VMMaker-oscog.41.
In the SRMCogit mark the conditional jump following #== as dead
code and elide it.


Modified: branches/Cog/src/vm/cogit.c
===================================================================
--- branches/Cog/src/vm/cogit.c	2011-01-01 20:18:49 UTC (rev 2337)
+++ branches/Cog/src/vm/cogit.c	2011-01-01 20:26:17 UTC (rev 2338)
@@ -1,9 +1,9 @@
 /* Automatically generated by
 	CCodeGenerator VMMaker-oscog.41 uuid: 096b8a29-e7e8-4cbf-b29c-0f096abbdd5c
    from
-	SimpleStackBasedCogit VMMaker-oscog.41 uuid: 096b8a29-e7e8-4cbf-b29c-0f096abbdd5c
+	StackToRegisterMappingCogit VMMaker-oscog.41 uuid: 096b8a29-e7e8-4cbf-b29c-0f096abbdd5c
  */
-static char __buildInfo[] = "SimpleStackBasedCogit VMMaker-oscog.41 uuid: 096b8a29-e7e8-4cbf-b29c-0f096abbdd5c " __DATE__ ;
+static char __buildInfo[] = "StackToRegisterMappingCogit VMMaker-oscog.41 uuid: 096b8a29-e7e8-4cbf-b29c-0f096abbdd5c " __DATE__ ;
 char *__cogitBuildInfo = __buildInfo;
 
 
@@ -83,14 +83,6 @@
 
 
 typedef struct {
-	AbstractInstruction *targetInstruction;
-	sqInt	instructionIndex;
- } BytecodeFixup;
-
-#define CogBytecodeFixup BytecodeFixup
-
-
-typedef struct {
 	sqInt	annotation;
 	AbstractInstruction *instruction;
  } InstructionAnnotation;
@@ -107,7 +99,35 @@
 #define CogPrimitiveDescriptor PrimitiveDescriptor
 
 
+typedef struct {
+	AbstractInstruction *targetInstruction;
+	sqInt	instructionIndex;
+	sqInt	simStackPtr;
+	sqInt	simSpillBase;
+	sqInt	mergeBase;
+	sqInt	optStatus;
+ } BytecodeFixup;
 
+#define CogSSBytecodeFixup BytecodeFixup
+
+
+typedef struct {
+	char	type;
+	char	spilled;
+	sqInt	registerr;
+	sqInt	offset;
+	sqInt	constant;
+	sqInt	bcptr;
+ } CogSimStackEntry;
+
+
+typedef struct {
+	sqInt	isReceiverResultRegLive;
+	CogSimStackEntry *ssEntry;
+ } CogSSOptStatus;
+
+
+
 /*** Constants ***/
 #define AddCqR 82
 #define AddCwR 89
@@ -123,6 +143,7 @@
 #define ArithmeticShiftRightCqR 68
 #define ArithmeticShiftRightRR 69
 #define BaseHeaderSize 4
+#define BytesPerOop 4
 #define BytesPerWord 4
 #define Call 8
 #define CDQ 102
@@ -143,6 +164,7 @@
 #define CmpCwR 88
 #define CmpRdRd 95
 #define CmpRR 74
+#define ConstZero 1
 #define ConvertRRd 101
 #define CPUID 105
 #define Debug DEBUGVM
@@ -176,6 +198,8 @@
 #define FoxMFReceiver -12
 #define FoxThisContext -8
 #define FPReg -1
+#define GPRegMax -3
+#define GPRegMin -8
 #define HasBytecodePC 5
 #define HashBitsOffset 17
 #define HashMaskUnshifted 0xFFF
@@ -268,7 +292,7 @@
 #define NegateR 67
 #define Nop 7
 #define NumSendTrampolines 4
-#define NumTrampolines 38
+#define NumTrampolines 50
 #define OrCqR 85
 #define OrRR 78
 #define PopR 62
@@ -294,6 +318,10 @@
 #define SizeMask 0xFC
 #define SPReg -2
 #define SqrtRd 100
+#define SSBaseOffset 1
+#define SSConstant 2
+#define SSRegister 3
+#define SSSpill 4
 #define StackPointerIndex 2
 #define SubCqR 83
 #define SubCwR 90
@@ -348,6 +376,7 @@
 static AbstractInstruction * annotateobjRef(AbstractInstruction *abstractInstruction, sqInt anOop);
 static AbstractInstruction * annotatewith(AbstractInstruction *abstractInstruction, sqInt annotationFlag);
 static void assertSaneJumpTarget(void *jumpTarget);
+static sqInt availableRegisterOrNil(void);
 static sqInt blockCodeSize(unsigned char byteZero, unsigned char byteOne, unsigned char byteTwo, unsigned char byteThree);
 static sqInt blockDispatchTargetsForperformarg(CogMethod *cogMethod, usqInt (*binaryFunction)(sqInt mcpc, sqInt arg), sqInt arg);
 sqInt bytecodePCForstartBcpcin(sqInt mcpc, sqInt startbcpc, CogBlockMethod *cogMethod);
@@ -500,8 +529,13 @@
 static sqInt doubleExtendedDoAnythingBytecode(void);
 static sqInt duplicateTopBytecode(void);
 static BytecodeFixup * ensureFixupAt(sqInt targetIndex);
+static BytecodeFixup * ensureNonMergeFixupAt(sqInt targetIndex);
+static void ensureReceiverResultRegContainsSelf(void);
+static void ensureSpilledAtfrom(CogSimStackEntry * self_in_ensureSpilledAtfrom, sqInt baseOffset, sqInt baseRegister);
 void enterCogCodePopReceiver(void);
 void enterCogCodePopReceiverAndClassRegs(void);
+void enterCogCodePopReceiverArg0Regs(void);
+void enterCogCodePopReceiverArg1Arg0Regs(void);
 static sqInt extendedPushBytecode(void);
 static sqInt extendedStoreAndPopBytecode(void);
 static sqInt extendedStoreBytecode(void);
@@ -530,8 +564,10 @@
 static sqInt genDoubleArithmeticpreOpCheck(sqInt arithmeticOperator, AbstractInstruction *(*preOpCheckOrNil)(int rcvrReg, int argReg));
 static sqInt genDoubleComparisoninvert(AbstractInstruction *(*jumpOpcodeGenerator)(void *), sqInt invertComparison);
 static AbstractInstruction * genDoubleFailIfZeroArgRcvrarg(sqInt rcvrReg, sqInt argReg);
+static void (*genEnilopmartForandandcalled(sqInt regArg1, sqInt regArg2, sqInt regArg3, char *trampolineName))(void) ;
 static void (*genEnilopmartForandcalled(sqInt regArg1, sqInt regArg2, char *trampolineName))(void) ;
 static void (*genEnilopmartForcalled(sqInt regArg, char *trampolineName))(void) ;
+static void (*genEnterPICEnilopmartNumArgs(sqInt numArgs))(void) ;
 static sqInt genExtendedSendBytecode(void);
 static sqInt genExtendedSuperBytecode(void);
 static sqInt genExternalizePointersForPrimitiveCall(void);
@@ -589,13 +625,16 @@
 static sqInt genLongJumpIfTrue(void);
 static sqInt genLongUnconditionalBackwardJump(void);
 static sqInt genLongUnconditionalForwardJump(void);
-static sqInt genMethodAbortTrampoline(void);
+static sqInt genMarshalledSendSupernumArgs(sqInt selector, sqInt numArgs);
+static sqInt genMarshalledSendnumArgs(sqInt selector, sqInt numArgs);
+static sqInt genMethodAbortTrampolineFor(sqInt numArgs);
 static void genMulRR(AbstractInstruction * self_in_genMulRR, sqInt regSource, sqInt regDest);
 static sqInt genMustBeBooleanTrampolineForcalled(sqInt boolean, char *trampolineName);
 static sqInt genNonLocalReturnTrampoline(void);
 static sqInt genPassConstasArgument(AbstractInstruction * self_in_genPassConstasArgument, sqInt constant, sqInt zeroRelativeArgIndex);
 static sqInt genPassRegasArgument(AbstractInstruction * self_in_genPassRegasArgument, sqInt abstractRegister, sqInt zeroRelativeArgIndex);
-static sqInt genPICAbortTrampoline(void);
+static sqInt genPICAbortTrampolineFor(sqInt numArgs);
+static sqInt genPICMissTrampolineFor(sqInt numArgs);
 static sqInt genPopStackBytecode(void);
 static sqInt genPrimitiveAdd(void);
 static sqInt genPrimitiveAsFloat(void);
@@ -650,6 +689,9 @@
 static sqInt genPushReceiverBytecode(void);
 static sqInt genPushReceiverVariableBytecode(void);
 static sqInt genPushReceiverVariable(sqInt index);
+static void genPushRegisterArgs(void);
+static void genPushRegisterArgsForAbortMissNumArgs(sqInt numArgs);
+static void genPushRegisterArgsForNumArgs(sqInt numArgs);
 static sqInt genPushRemoteTempLongBytecode(void);
 static sqInt genPushTemporaryVariableBytecode(void);
 static sqInt genPushTemporaryVariable(sqInt index);
@@ -675,19 +717,24 @@
 static sqInt genSendLiteralSelector1ArgBytecode(void);
 static sqInt genSendLiteralSelector2ArgsBytecode(void);
 static sqInt genSendSupernumArgs(sqInt selector, sqInt numArgs);
+static sqInt genSendTrampolineFornumArgscalledargargargarg(void *aRoutine, sqInt numArgs, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3);
 static sqInt genSendnumArgs(sqInt selector, sqInt numArgs);
 static sqInt genSetSmallIntegerTagsIn(sqInt scratchReg);
 static sqInt genShiftAwaySmallIntegerTagsInScratchReg(sqInt scratchReg);
 static sqInt genShortJumpIfFalse(void);
 static sqInt genShortUnconditionalJump(void);
 static sqInt genSmallIntegerComparison(sqInt jumpOpcode);
+static sqInt genSpecialSelectorArithmetic(void);
 static sqInt genSpecialSelectorClass(void);
+static sqInt genSpecialSelectorComparison(void);
 static sqInt genSpecialSelectorEqualsEquals(void);
 static sqInt genSpecialSelectorSend(void);
+static sqInt genSSPushSlotreg(sqInt index, sqInt baseReg);
 static sqInt genStoreAndPopReceiverVariableBytecode(void);
 static sqInt genStoreAndPopRemoteTempLongBytecode(void);
 static sqInt genStoreAndPopTemporaryVariableBytecode(void);
 static sqInt genStoreCheckTrampoline(void);
+static sqInt genStoreImmediateInSourceRegslotIndexdestReg(sqInt sourceReg, sqInt index, sqInt destReg);
 static sqInt genStorePopLiteralVariable(sqInt popBoolean, sqInt litVarIndex);
 static sqInt genStorePopMaybeContextReceiverVariable(sqInt popBoolean, sqInt slotIndex);
 static sqInt genStorePopReceiverVariable(sqInt popBoolean, sqInt slotIndex);
@@ -699,8 +746,6 @@
 static AbstractInstruction * genSubstituteReturnAddress(AbstractInstruction * self_in_genSubstituteReturnAddress, sqInt retpc);
 static sqInt genTrampolineForcalled(void *aRoutine, char *aString);
 static sqInt genTrampolineForcalledarg(void *aRoutine, char *aString, sqInt regOrConst0);
-static sqInt genTrampolineForcalledargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1);
-static sqInt genTrampolineForcalledargargargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3);
 static sqInt genTrampolineForcalledargargargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt resultReg);
 static sqInt genTrampolineForcalledargargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt resultReg);
 static sqInt genTrampolineForcalledargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt resultReg);
@@ -726,11 +771,14 @@
 static BytecodeFixup * initializeFixupAt(sqInt targetIndex);
 static sqInt initialMethodUsageCount(void);
 static sqInt initialOpenPICUsageCount(void);
+static void initSimStackForFramefulMethod(sqInt startpc);
+static void initSimStackForFramelessMethod(sqInt startpc);
 static sqInt inlineCacheTagAt(AbstractInstruction * self_in_inlineCacheTagAt, sqInt callSiteReturnAddress);
 static sqInt inlineCacheTagForInstance(sqInt oop);
 static sqInt inlineCacheTagIsYoung(sqInt cacheTag);
 static sqInt instructionSizeAt(AbstractInstruction * self_in_instructionSizeAt, sqInt pc);
 sqInt interpretOffset(void);
+static sqInt inverseBranchFor(sqInt opcode);
 static sqInt isAFixup(AbstractInstruction * self_in_isAFixup, void *fixupOrAddress);
 static sqInt isAnInstruction(AbstractInstruction * self_in_isAnInstruction, void *addressOrInstruction);
 static sqInt isBigEndian(AbstractInstruction * self_in_isBigEndian);
@@ -741,6 +789,7 @@
 static sqInt isPCDependent(AbstractInstruction * self_in_isPCDependent);
 static sqInt isQuick(AbstractInstruction * self_in_isQuick, unsigned long operand);
 sqInt isSendReturnPC(sqInt retpc);
+static sqInt isSmallIntegerTagNonZero(void);
 static AbstractInstruction * gJumpAboveOrEqual(void *jumpTarget);
 static AbstractInstruction * gJumpAbove(void *jumpTarget);
 static AbstractInstruction * gJumpBelow(void *jumpTarget);
@@ -756,6 +805,7 @@
 static AbstractInstruction * gJumpLong(void *jumpTarget);
 static AbstractInstruction * gJumpNegative(void *jumpTarget);
 static AbstractInstruction * gJumpNonZero(void *jumpTarget);
+static AbstractInstruction * gJumpNoOverflow(void *jumpTarget);
 static AbstractInstruction * gJumpOverflow(void *jumpTarget);
 static AbstractInstruction * JumpRT(sqInt callTarget);
 static AbstractInstruction * gJumpR(sqInt reg);
@@ -777,6 +827,7 @@
 static sqInt leafCallStackPointerDelta(AbstractInstruction * self_in_leafCallStackPointerDelta);
 void linkSendAtintocheckedreceiver(sqInt callSiteReturnAddress, CogMethod *sendingMethod, CogMethod *targetMethod, sqInt checked, sqInt receiver);
 static sqInt literalBeforeFollowingAddress(AbstractInstruction * self_in_literalBeforeFollowingAddress, sqInt followingAddress);
+static sqInt liveRegisters(void);
 static sqInt loadLiteralByteSize(AbstractInstruction * self_in_loadLiteralByteSize);
 static sqInt longBranchDistance(unsigned char byteZero, unsigned char byteOne);
 static sqInt longForwardBranchDistance(unsigned char byteZero, unsigned char byteOne);
@@ -816,11 +867,14 @@
 void markMethodAndReferents(CogBlockMethod *aCogMethod);
 static void markYoungObjectsIn(CogMethod *cogMethod);
 static sqInt markYoungObjectspcmethod(sqInt annotation, char *mcpc, sqInt cogMethod);
+static void marshallSendArguments(sqInt numArgs);
 usqInt maxCogMethodAddress(void);
 static sqInt maybeFreeCogMethodDoesntLookKosher(CogMethod *cogMethod);
 static void maybeGenerateCheckFeatures(void);
 static void maybeGenerateICacheFlush(void);
 sqInt mcPCForstartBcpcin(sqInt bcpc, sqInt startbcpc, CogBlockMethod *cogMethod);
+static void mergeAtfrom(CogSimStackEntry * self_in_mergeAtfrom, sqInt baseOffset, sqInt baseRegister);
+static void mergeafterReturn(BytecodeFixup *fixup, sqInt mergeFollowsReturn);
 static sqInt methodAbortTrampolineFor(sqInt numArgs);
 static CogMethod * methodAfter(CogMethod *cogMethod);
 CogMethod * methodFor(void *address);
@@ -828,6 +882,7 @@
 sqInt mnuOffset(void);
 static sqInt modRMRO(AbstractInstruction * self_in_modRMRO, sqInt mod, sqInt regMode, sqInt regOpcode);
 static AbstractInstruction * gNegateR(sqInt reg);
+static AbstractInstruction * gNop(void);
 static sqInt nextBytecodePCForatbyte0in(BytecodeDescriptor *descriptor, sqInt pc, sqInt opcodeByte, sqInt aMethodObj);
 static sqInt nextBytecodePCInMapAfterininBlockupTo(sqInt startbcpc, sqInt methodObject, sqInt isInBlock, sqInt endpc);
 static sqInt noCogMethodsMaximallyMarked(void);
@@ -852,6 +907,7 @@
 sqInt pcisWithinMethod(char *address, CogMethod *cogMethod);
 static sqInt picAbortTrampolineFor(sqInt numArgs);
 static void planCompaction(void);
+static void popToReg(CogSimStackEntry * self_in_popToReg, sqInt reg);
 static PrimitiveDescriptor * primitiveGeneratorOrNil(void);
 void printCogMethodFor(void *address);
 void printCogMethods(void);
@@ -866,7 +922,10 @@
 void recordCallOffsetInof(CogMethod *cogMethod, void *callLabelArg);
 static void recordGeneratedRunTimeaddress(char *aString, sqInt address);
 sqInt recordPrimTraceFunc(void);
+static sqInt registerMask(CogSimStackEntry * self_in_registerMask);
+static sqInt registerMaskFor(sqInt reg);
 static sqInt registerMaskForandand(sqInt reg1, sqInt reg2, sqInt reg3);
+static sqInt registerOrNil(CogSimStackEntry * self_in_registerOrNil);
 static void relocateAndPruneYoungReferrers(void);
 static void relocateCallBeforeReturnPCby(AbstractInstruction * self_in_relocateCallBeforeReturnPCby, sqInt retpc, sqInt delta);
 static void relocateCallsAndSelfReferencesInMethod(CogMethod *cogMethod);
@@ -906,10 +965,31 @@
 static sqInt sizePCDependentInstructionAt(AbstractInstruction * self_in_sizePCDependentInstructionAt, sqInt eventualAbsoluteAddress);
 static sqInt slotOffsetOfInstVarIndex(sqInt index);
 static sqInt spanForatbyte0in(BytecodeDescriptor *descriptor, sqInt pc, sqInt opcodeByte, sqInt aMethodObj);
+static void ssAllocateCallReg(sqInt requiredReg1);
+static void ssAllocateCallRegand(sqInt requiredReg1, sqInt requiredReg2);
+static sqInt ssAllocatePreferredReg(sqInt preferredReg);
+static void ssAllocateRequiredRegMaskupThrough(sqInt requiredRegsMask, sqInt stackPtr);
+static void ssAllocateRequiredReg(sqInt requiredReg);
+static void ssAllocateRequiredRegand(sqInt requiredReg1, sqInt requiredReg2);
+static void ssAllocateRequiredRegupThrough(sqInt requiredReg, sqInt stackPtr);
+static void ssFlushTo(sqInt index);
+static void ssFlushUpThroughReceiverVariable(sqInt slotIndex);
+static void ssFlushUpThroughTemporaryVariable(sqInt tempIndex);
+static void ssPop(sqInt n);
+static sqInt ssPushBaseoffset(sqInt reg, sqInt offset);
+static sqInt ssPushConstant(sqInt literal);
+static sqInt ssPushDesc(CogSimStackEntry simStackEntry);
+static sqInt ssPushRegister(sqInt reg);
+static void ssPush(sqInt n);
+static sqInt ssStorePoptoPreferredReg(sqInt popBoolean, sqInt preferredReg);
+static CogSimStackEntry * ssTop(void);
+static CogSimStackEntry ssTopDescriptor(void);
+static CogSimStackEntry * ssValue(sqInt n);
 static sqInt stackBytesForNumArgs(AbstractInstruction * self_in_stackBytesForNumArgs, sqInt numArgs);
 sqInt stackPageHeadroomBytes(void);
 static sqInt stackPageInterruptHeadroomBytes(AbstractInstruction * self_in_stackPageInterruptHeadroomBytes);
 static void storeLiteralbeforeFollowingAddress(AbstractInstruction * self_in_storeLiteralbeforeFollowingAddress, sqInt literal, sqInt followingAddress);
+static void storeToReg(CogSimStackEntry * self_in_storeToReg, sqInt reg);
 static sqInt sib(AbstractInstruction * self_in_sib, sqInt scale, sqInt indexReg, sqInt baseReg);
 sqInt traceLinkedSendOffset(void);
 static char * trampolineNamenumArgs(char *routinePrefix, sqInt numArgs);
@@ -956,6 +1036,7 @@
 static sqInt bytecodePointer;
 void * CFramePointer;
 void * CStackPointer;
+static sqInt callerSavedRegMask;
 sqInt ceBaseFrameReturnTrampoline;
 sqInt ceCannotResumeTrampoline;
 void (*ceCaptureCStackPointers)(void);
@@ -964,7 +1045,12 @@
 static sqInt ceClosureCopyTrampoline;
 static sqInt ceCPICMissTrampoline;
 static sqInt ceCreateNewArrayTrampoline;
+void (*ceEnter0ArgsPIC)(void);
+void (*ceEnter1ArgsPIC)(void);
+void (*ceEnter2ArgsPIC)(void);
 void (*ceEnterCogCodePopReceiverAndClassRegs)(void);
+void (*ceEnterCogCodePopReceiverArg0Regs)(void);
+void (*ceEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*ceEnterCogCodePopReceiverReg)(void);
 static sqInt ceFetchContextInstVarTrampoline;
 static void (*ceFlushICache)(unsigned long from, unsigned long to);
@@ -997,6 +1083,8 @@
 static sqInt cPICCaseSize;
 static sqInt cPICEndSize;
 static const int cStackAlignment = STACK_ALIGN_BYTES;
+static sqInt deadCode;
+static sqInt debugFixupBreaks;
 unsigned long debugPrimCallStackOffset;
 static AbstractInstruction * endCPICCase0;
 static AbstractInstruction * endCPICCase1;
@@ -1011,22 +1099,22 @@
 static sqInt firstSend;
 static BytecodeFixup * fixups;
 static BytecodeDescriptor generatorTable[256] = {
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
@@ -1123,7 +1211,7 @@
 	{ genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ genPushReceiverBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ genPushReceiverBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushConstantTrueBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushConstantFalseBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genPushConstantNilBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
@@ -1187,28 +1275,28 @@
 	{ genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
 	{ genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
 	{ genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
+	{ genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 75, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 76, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 23, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 25, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 26, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 24, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 15, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 77, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+	{ genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 78, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
-	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
 	{ genSpecialSelectorEqualsEquals, (sqInt (*)(unsigned char,...))0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genSpecialSelectorClass, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 	{ genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
@@ -1277,10 +1365,12 @@
 static sqInt lastSend;
 static usqInt limitAddress;
 static CogBlockMethod * maxMethodBefore;
+static sqInt methodAbortTrampolines[4];
 static sqInt methodBytesFreedSinceLastCompaction;
 static AbstractInstruction *methodLabel = &aMethodLabel;
 static sqInt methodObj;
 static sqInt methodOrBlockNumArgs;
+static sqInt methodOrBlockNumTemps;
 static sqInt methodZoneBase;
 static sqInt missOffset;
 static AbstractInstruction * mnuCall;
@@ -1292,6 +1382,9 @@
 static sqInt opcodeIndex;
 static CogMethod *openPICList = 0;
 static sqInt openPICSize;
+static CogSSOptStatus optStatus;
+static sqInt picAbortTrampolines[4];
+static sqInt picMissTrampolines[4];
 static void (*postCompileHook)(CogMethod *, void *);
 static AbstractInstruction * primInvokeLabel;
 static PrimitiveDescriptor primitiveGeneratorTable[MaxCompiledPrimitiveIndex+1] = {
@@ -1521,9 +1614,16 @@
 };
 static sqInt primitiveIndex;
 void (*realCEEnterCogCodePopReceiverAndClassRegs)(void);
+void (*realCEEnterCogCodePopReceiverArg0Regs)(void);
+void (*realCEEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*realCEEnterCogCodePopReceiverReg)(void);
+static sqInt regArgsHaveBeenPushed;
 static AbstractInstruction * sendMissCall;
 static sqInt sendTrampolines[NumSendTrampolines];
+static CogSimStackEntry simSelf;
+static sqInt simSpillBase;
+static CogSimStackEntry simStack[256];
+static sqInt simStackPtr;
 static AbstractInstruction * stackCheckLabel;
 static AbstractInstruction * stackOverflowCall;
 static sqInt superSendTrampolines[NumSendTrampolines];
@@ -1576,7 +1676,7 @@
 #define noCheckEntryOffset() cmNoCheckEntryOffset
 #define noContextSwitchBlockEntryOffset() blockNoContextSwitchOffset
 #define notYetImplemented() warning("not yet implemented")
-#define numRegArgs() 0
+#define numRegArgs() 1
 #define printNum(n) printf("%ld", (long) n)
 #define printOnTrace() (traceLinkedSends & 8)
 #define print(aString) printf(aString)
@@ -1586,7 +1686,12 @@
 #define reportError(n) warning("compilation error")
 #define setCFramePointer(theFP) (CFramePointer = (void *)(theFP))
 #define setCStackPointer(theSP) (CStackPointer = (void *)(theSP))
+#define simStackAt(index) (simStack + (index))
+#define traceDescriptor(ign) 0
+#define traceFixup(ign) 0
 #define traceMapbyteatfor(ig,no,re,d) 0
+#define traceMerge(ign) 0
+#define traceSimStack() 0
 #define tryLockVMOwner() (ceTryLockVMOwner() != 0)
 #define typeEtAlWord(cm) (((long *)(cm))[1])
 #define unlockVMOwner() ceUnlockVMOwner()
@@ -1932,6 +2037,30 @@
 }
 
 static sqInt
+availableRegisterOrNil(void)
+{
+    sqInt liveRegs;
+
+	liveRegs = liveRegisters();
+	if (!(liveRegs & (registerMaskFor(Arg1Reg)))) {
+		return Arg1Reg;
+	}
+	if (!(liveRegs & (registerMaskFor(Arg0Reg)))) {
+		return Arg0Reg;
+	}
+	if (!(liveRegs & (registerMaskFor(SendNumArgsReg)))) {
+		return SendNumArgsReg;
+	}
+	if (!(liveRegs & (registerMaskFor(ClassReg)))) {
+		return ClassReg;
+	}
+	if (!(liveRegs & (registerMaskFor(ReceiverResultReg)))) {
+		return ReceiverResultReg;
+	}
+	return null;
+}
+
+static sqInt
 blockCodeSize(unsigned char byteZero, unsigned char byteOne, unsigned char byteTwo, unsigned char byteThree)
 {
 	return (byteTwo * 256) + byteThree;
@@ -2997,13 +3126,33 @@
 static sqInt
 compileAbstractInstructionsFromthrough(sqInt start, sqInt end)
 {
+    sqInt debugBytecodePointers;
     BytecodeDescriptor *descriptor;
     BytecodeFixup *fixup;
     sqInt nextOpcodeIndex;
     sqInt result;
 
+	traceSimStack();
 	bytecodePointer = start;
+	descriptor = null;
+	deadCode = 0;
 	do {
+		;
+		fixup = fixupAt(bytecodePointer - initialPC);
+		if ((((usqInt)((fixup->targetInstruction)))) > 0) {
+			deadCode = 0;
+			if ((((usqInt)((fixup->targetInstruction)))) >= 2) {
+				mergeafterReturn(fixup, (descriptor != null)
+				 && ((descriptor->isReturn)));
+			}
+		}
+		else {
+			if ((descriptor != null)
+			 && ((descriptor->isReturn))) {
+				deadCode = 1;
+			}
+		}
+		;
 		byte0 = fetchByteofObject(bytecodePointer, methodObj);
 		descriptor = generatorAt(byte0);
 		if (((descriptor->numBytes)) > 1) {
@@ -3019,9 +3168,16 @@
 			}
 		}
 		nextOpcodeIndex = opcodeIndex;
-		result = ((descriptor->generator))();
-		fixup = fixupAt(bytecodePointer - initialPC);
-		if (((fixup->targetInstruction)) != 0) {
+		result = (deadCode
+			? (((descriptor->isMapped))
+ || (inBlock
+ && ((descriptor->isMappedInBlock)))
+	? annotateBytecode(gNop())
+	: 0),0
+			: ((descriptor->generator))());
+		traceDescriptor(descriptor);
+		traceSimStack();
+		if ((((((usqInt)((fixup->targetInstruction)))) >= 1) && ((((usqInt)((fixup->targetInstruction)))) <= 2))) {
 			if (opcodeIndex == nextOpcodeIndex) {
 				gLabel();
 			}
@@ -3118,12 +3274,18 @@
 	sp->	Nth temp
 	Avoid use of SendNumArgsReg which is the flag determining whether
 	context switch is allowed on stack-overflow. */
+/*	Build a frame for a block activation. See CoInterpreter
+	class>>initializeFrameIndices. Override to push the register receiver and
+	register arguments, if any, and to correctly
+	initialize the explicitly nilled/pushed temp entries (they are /not/ of
+	type constant nil). */
 
 static void
 compileBlockFrameBuild(BlockStart *blockStart)
 {
     AbstractInstruction * cascade0;
     sqInt i;
+    sqInt ign;
 
 	annotateBytecode(gLabel());
 	gPushR(FPReg);
@@ -3144,6 +3306,20 @@
 	gCmpRR(TempReg, SPReg);
 	gJumpBelow(stackOverflowCall);
 	(blockStart->stackCheckLabel = annotateBytecode(gLabel()));
+	methodOrBlockNumTemps = (((blockStart->numArgs)) + ((blockStart->numCopied))) + ((blockStart->numInitialNils));
+	initSimStackForFramefulMethod((blockStart->startpc));
+	if (((blockStart->numInitialNils)) > 0) {
+		if (((blockStart->numInitialNils)) > 1) {
+			annotateobjRef(gMoveCwR(nilObject(), TempReg), nilObject());
+			for (ign = 1; ign <= ((blockStart->numInitialNils)); ign += 1) {
+				gPushR(TempReg);
+			}
+		}
+		else {
+			annotateobjRef(gPushCw(nilObject()), nilObject());
+		}
+		methodOrBlockNumTemps = ((blockStart->numArgs)) + ((blockStart->numCopied));
+	}
 }
 
 
@@ -3151,10 +3327,14 @@
 	which is what is initially in ReceiverResultReg. We must annotate the
 	first instruction so that findMethodForStartBcpc:inHomeMethod: can
 	function. We need two annotations because the first is a fiducial. */
+/*	Make sure ReceiverResultReg holds the receiver, loaded from
+	the closure, which is what is initially in ReceiverResultReg */
 
 static void
 compileBlockFramelessEntry(BlockStart *blockStart)
 {
+	methodOrBlockNumTemps = ((blockStart->numArgs)) + ((blockStart->numCopied));
+	initSimStackForFramelessMethod((blockStart->startpc));
 	annotateBytecode((blockStart->entryLabel));
 	annotateBytecode((blockStart->entryLabel));
 	genLoadSlotsourceRegdestReg(ClosureOuterContextIndex, ReceiverResultReg, TempReg);
@@ -3195,11 +3375,14 @@
 static CogMethod *
 compileCogMethod(sqInt selector)
 {
+    sqInt debugStackPointers;
     sqInt extra;
     sqInt numBlocks;
     sqInt numBytecodes;
     sqInt result;
 
+	methodOrBlockNumTemps = tempCountOf(methodObj);
+	;
 	hasYoungReferent = (isYoung(methodObj))
 	 || (isYoung(selector));
 	methodOrBlockNumArgs = argumentCountOf(methodObj);
@@ -3320,6 +3503,9 @@
 	Ensure SendNumArgsReg is set early on (incidentally to nilObj) because
 	it is the flag determining whether context switch is allowed on
 	stack-overflow.  */
+/*	Build a frame for a CogMethod activation. See CoInterpreter
+	class>>initializeFrameIndices. Override to push the register receiver and
+	register arguments, if any. */
 
 static void
 compileFrameBuild(void)
@@ -3328,8 +3514,13 @@
     AbstractInstruction *jumpSkip;
 
 	if (!(needsFrame)) {
+		initSimStackForFramelessMethod(initialPC);
 		return;
 	}
+	genPushRegisterArgs();
+	if (!(needsFrame)) {
+		return;
+	}
 	gPushR(FPReg);
 	gMoveRR(SPReg, FPReg);
 	addDependent(methodLabel, annotateMethodRef(gPushCw(((sqInt)methodLabel))));
@@ -3356,6 +3547,7 @@
 		jmpTarget(jumpSkip, stackCheckLabel = gLabel());
 	}
 	annotateBytecode(stackCheckLabel);
+	initSimStackForFramefulMethod(initialPC);
 }
 
 
@@ -3512,12 +3704,14 @@
 
 
 /*	Compile the abstract instructions for the entire method. */
+/*	Compile the abstract instructions for a method. */
 
 static sqInt
 compileMethod(void)
 {
     sqInt result;
 
+	regArgsHaveBeenPushed = 0;
 	compileProlog();
 	compileEntry();
 	if (((result = compilePrimitive())) < 0) {
@@ -3559,7 +3753,7 @@
 	while (compiledBlocksCount < blockCount) {
 		blockStart = blockStartAt(compiledBlocksCount);
 		compileBlockEntry(blockStart);
-		if (((result = compileAbstractInstructionsFromthrough((blockStart->startpc), (((blockStart->startpc)) + ((blockStart->span))) - 1))) < 0) {
+		if (((result = compileAbstractInstructionsFromthrough(((blockStart->startpc)) + ((blockStart->numInitialNils)), (((blockStart->startpc)) + ((blockStart->span))) - 1))) < 0) {
 			return result;
 		}
 		compiledBlocksCount += 1;
@@ -3592,7 +3786,7 @@
 
 /*	Compile the code for an open PIC. Perform a probe of the first-level
 	method lookup cache followed by a call of ceSendFromOpenPIC: if the probe
-	fails.  */
+	fails. Override to push the register args when calling ceSendFromOpenPIC: */
 
 static void
 compileOpenPICnumArgs(sqInt selector, sqInt numArgs)
@@ -3654,6 +3848,7 @@
 	gCmpRR(SendNumArgsReg, TempReg);
 	gJumpZero(itsAHit);
 	jmpTarget(jumpSelectorMiss, gLabel());
+	genPushRegisterArgsForNumArgs(numArgs);
 	genSaveStackPointers();
 	genLoadCStackPointers();
 	addDependent(methodLabel, annotateMethodRef(gMoveCwR(((sqInt)methodLabel), SendNumArgsReg)));
@@ -5951,7 +6146,7 @@
 static sqInt
 cPICMissTrampolineFor(sqInt numArgs)
 {
-	return ceCPICMissTrampoline;
+	return picMissTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 static sqInt
@@ -6343,9 +6538,10 @@
 static sqInt
 duplicateTopBytecode(void)
 {
-	gMoveMwrR(0, SPReg, TempReg);
-	gPushR(TempReg);
-	return 0;
+    CogSimStackEntry desc;
+
+	desc = ssTopDescriptor();
+	return ssPushDesc(desc);
 }
 
 
@@ -6360,13 +6556,93 @@
     BytecodeFixup *fixup;
 
 	fixup = fixupAt(targetIndex);
+	traceFixup(fixup);
+	;
+	if ((((usqInt)((fixup->targetInstruction)))) <= 1) {
+		(fixup->targetInstruction = ((AbstractInstruction *) 2));
+		(fixup->simStackPtr = simStackPtr);
+	}
+	else {
+		if (((fixup->simStackPtr)) <= -2) {
+			(fixup->simStackPtr = simStackPtr);
+		}
+		else {
+			assert(((fixup->simStackPtr)) == simStackPtr);
+		}
+	}
+	return fixup;
+}
+
+
+/*	Make sure there's a flagged fixup at the targetIndex (pc relative to first
+	pc) in fixups.
+	Initially a fixup's target is just a flag. Later on it is replaced with a
+	proper instruction. */
+
+static BytecodeFixup *
+ensureNonMergeFixupAt(sqInt targetIndex)
+{
+    BytecodeFixup *fixup;
+
+	fixup = fixupAt(targetIndex);
 	if (((fixup->targetInstruction)) == 0) {
 		(fixup->targetInstruction = ((AbstractInstruction *) 1));
 	}
+	;
 	return fixup;
 }
 
+static void
+ensureReceiverResultRegContainsSelf(void)
+{
+	if (needsFrame) {
+		if (!(((optStatus.isReceiverResultRegLive))
+			 && (((optStatus.ssEntry)) == ((&simSelf))))) {
+			ssAllocateRequiredReg(ReceiverResultReg);
+			storeToReg((&simSelf), ReceiverResultReg);
+		}
+		(optStatus.isReceiverResultRegLive = 1);
+		(optStatus.ssEntry = (&simSelf));
+	}
+	else {
+		assert((((simSelf.type)) == SSRegister)
+		 && (((simSelf.registerr)) == ReceiverResultReg));
+		assert(((optStatus.isReceiverResultRegLive))
+		 && (((optStatus.ssEntry)) == ((&simSelf))));
+	}
+}
 
+static void
+ensureSpilledAtfrom(CogSimStackEntry * self_in_ensureSpilledAtfrom, sqInt baseOffset, sqInt baseRegister)
+{
+	if ((self_in_ensureSpilledAtfrom->spilled)) {
+		if (((self_in_ensureSpilledAtfrom->type)) == SSSpill) {
+			assert((((self_in_ensureSpilledAtfrom->offset)) == baseOffset)
+			 && (((self_in_ensureSpilledAtfrom->registerr)) == baseRegister));
+			return;
+		}
+	}
+	assert(((self_in_ensureSpilledAtfrom->type)) != SSSpill);
+	if (((self_in_ensureSpilledAtfrom->type)) == SSConstant) {
+		annotateobjRef(gPushCw((self_in_ensureSpilledAtfrom->constant)), (self_in_ensureSpilledAtfrom->constant));
+	}
+	else {
+		if (((self_in_ensureSpilledAtfrom->type)) == SSBaseOffset) {
+			gMoveMwrR((self_in_ensureSpilledAtfrom->offset), (self_in_ensureSpilledAtfrom->registerr), TempReg);
+			gPushR(TempReg);
+		}
+		else {
+			assert(((self_in_ensureSpilledAtfrom->type)) == SSRegister);
+			gPushR((self_in_ensureSpilledAtfrom->registerr));
+		}
+		(self_in_ensureSpilledAtfrom->type) = SSSpill;
+		(self_in_ensureSpilledAtfrom->offset) = baseOffset;
+		(self_in_ensureSpilledAtfrom->registerr) = baseRegister;
+	}
+	(self_in_ensureSpilledAtfrom->spilled) = 1;
+}
+
+
 /*	This is a static version of ceEnterCogCodePopReceiverReg
 	for break-pointing when debugging in C. */
 /*	(and this exists only to reference Debug) */
@@ -6394,6 +6670,34 @@
 	realCEEnterCogCodePopReceiverAndClassRegs();
 }
 
+
+/*	This is a static version of ceEnterCogCodePopReceiverArg0Regs
+	for break-pointing when debugging in C. */
+/*	(and this exists only to reference Debug) */
+
+void
+enterCogCodePopReceiverArg0Regs(void)
+{
+	if (!(Debug)) {
+		error("what??");
+	}
+	realCEEnterCogCodePopReceiverArg0Regs();
+}
+
+
+/*	This is a static version of ceEnterCogCodePopReceiverArg1Arg0Regs
+	for break-pointing when debugging in C. */
+/*	(and this exists only to reference Debug) */
+
+void
+enterCogCodePopReceiverArg1Arg0Regs(void)
+{
+	if (!(Debug)) {
+		error("what??");
+	}
+	realCEEnterCogCodePopReceiverArg1Arg0Regs();
+}
+
 static sqInt
 extendedPushBytecode(void)
 {
@@ -6935,29 +7239,27 @@
 }
 
 
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
+/*	Receiver and arg in registers.
+	Stack looks like
 	return address */
 
 static sqInt
 genDoubleArithmeticpreOpCheck(sqInt arithmeticOperator, AbstractInstruction *(*preOpCheckOrNil)(int rcvrReg, int argReg))
 {
     AbstractInstruction *doOp;
-    AbstractInstruction *fail;
     AbstractInstruction *jumpFailAlloc;
     AbstractInstruction *jumpFailCheck;
     AbstractInstruction *jumpFailClass;
     AbstractInstruction *jumpSmallInt;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
+	gMoveRR(Arg0Reg, TempReg);
 	genGetDoubleValueOfinto(ReceiverResultReg, DPFPReg0);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, ClassReg);
 	jumpSmallInt = genJumpSmallIntegerInScratchReg(TempReg);
-	genGetCompactClassIndexNonIntOfinto(ClassReg, SendNumArgsReg);
+	genGetCompactClassIndexNonIntOfinto(Arg0Reg, SendNumArgsReg);
 	gCmpCqR(classFloatCompactIndex(), SendNumArgsReg);
 	jumpFailClass = gJumpNonZero(0);
-	genGetDoubleValueOfinto(ClassReg, DPFPReg1);
+	genGetDoubleValueOfinto(Arg0Reg, DPFPReg1);
 	doOp = gLabel();
 	if (preOpCheckOrNil == null) {
 		null;
@@ -6968,29 +7270,30 @@
 	genoperandoperand(arithmeticOperator, DPFPReg1, DPFPReg0);
 	jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
 	gMoveRR(SendNumArgsReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
+	assert(methodOrBlockNumArgs <= (numRegArgs()));
+	jmpTarget(jumpFailClass, gLabel());
+	if (preOpCheckOrNil == null) {
+		null;
+	}
+	else {
+		jmpTarget(jumpFailCheck, getJmpTarget(jumpFailClass));
+	}
+	genPushRegisterArgsForNumArgs(methodOrBlockNumArgs);
+	jumpFailClass = gJump(0);
 	jmpTarget(jumpSmallInt, gLabel());
 	genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
 	gConvertRRd(ClassReg, DPFPReg1);
 	gJump(doOp);
 	jmpTarget(jumpFailAlloc, gLabel());
 	compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
-	fail = gLabel();
 	jmpTarget(jumpFailClass, gLabel());
-	if (preOpCheckOrNil == null) {
-		null;
-	}
-	else {
-		jmpTarget(jumpFailCheck, fail);
-	}
 	return 0;
 }
 
 
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
+/*	Receiver and arg in registers.
+	Stack looks like
 	return address */
 
 static sqInt
@@ -7001,14 +7304,13 @@
     AbstractInstruction *jumpFail;
     AbstractInstruction *jumpSmallInt;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
+	gMoveRR(Arg0Reg, TempReg);
 	genGetDoubleValueOfinto(ReceiverResultReg, DPFPReg0);
-	gMoveRR(TempReg, ClassReg);
 	jumpSmallInt = genJumpSmallIntegerInScratchReg(TempReg);
-	genGetCompactClassIndexNonIntOfinto(ClassReg, SendNumArgsReg);
+	genGetCompactClassIndexNonIntOfinto(Arg0Reg, SendNumArgsReg);
 	gCmpCqR(classFloatCompactIndex(), SendNumArgsReg);
 	jumpFail = gJumpNonZero(0);
-	genGetDoubleValueOfinto(ClassReg, DPFPReg1);
+	genGetDoubleValueOfinto(Arg0Reg, DPFPReg1);
 	if (invertComparison) {
 
 		/* May need to invert for NaNs */
@@ -7023,13 +7325,12 @@
 
 	jumpCond = jumpOpcodeGenerator(0);
 	annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject());
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpCond, annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject()));
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpSmallInt, gLabel());
-	genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
-	gConvertRRd(ClassReg, DPFPReg1);
+	genConvertSmallIntegerToIntegerInScratchReg(Arg0Reg);
+	gConvertRRd(Arg0Reg, DPFPReg1);
 	gJump(compare);
 	jmpTarget(jumpFail, gLabel());
 	return 0;
@@ -7052,6 +7353,38 @@
 	then executes a return instruction to pop off the entry-point and jump to
 	it.  */
 
+static void (*genEnilopmartForandandcalled(sqInt regArg1, sqInt regArg2, sqInt regArg3, char *trampolineName))(void)
+
+{
+    sqInt endAddress;
+    sqInt enilopmart;
+    sqInt size;
+
+	opcodeIndex = 0;
+	genLoadStackPointers();
+	gPopR(regArg3);
+	gPopR(regArg2);
+	gPopR(regArg1);
+	gRetN(0);
+	computeMaximumSizes();
+	size = generateInstructionsAt(methodZoneBase);
+	endAddress = outputInstructionsAt(methodZoneBase);
+	assert((methodZoneBase + size) == endAddress);
+	enilopmart = methodZoneBase;
+	methodZoneBase = alignUptoRoutineBoundary(endAddress);
+	nopsFromto(backEnd, endAddress, methodZoneBase - 1);
+	recordGeneratedRunTimeaddress(trampolineName, enilopmart);
+	return ((void (*)(void)) enilopmart);
+}
+
+
+/*	An enilopmart (the reverse of a trampoline) is a piece of code that makes
+	the system-call-like transition from the C runtime into generated machine
+	code. The desired arguments and entry-point are pushed on a stackPage's
+	stack. The enilopmart pops off the values to be loaded into registers and
+	then executes a return instruction to pop off the entry-point and jump to
+	it.  */
+
 static void (*genEnilopmartForandcalled(sqInt regArg1, sqInt regArg2, char *trampolineName))(void)
 
 {
@@ -7106,6 +7439,44 @@
 }
 
 
+/*	Generate special versions of the ceEnterCogCodePopReceiverAndClassRegs
+	enilopmart that also pop register args from the stack to undo the pushing
+	of register args in the abort/miss trampolines. */
+
+static void (*genEnterPICEnilopmartNumArgs(sqInt numArgs))(void)
+
+{
+    sqInt endAddress;
+    sqInt enilopmart;
+    sqInt size;
+
+	opcodeIndex = 0;
+	genLoadStackPointers();
+	gPopR(ClassReg);
+	gPopR(TempReg);
+	gPopR(SendNumArgsReg);
+	if (numArgs > 0) {
+		if (numArgs > 1) {
+			gPopR(Arg1Reg);
+			assert((numRegArgs()) == 2);
+		}
+		gPopR(Arg0Reg);
+	}
+	gPopR(ReceiverResultReg);
+	gPushR(SendNumArgsReg);
+	gJumpR(TempReg);
+	computeMaximumSizes();
+	size = generateInstructionsAt(methodZoneBase);
+	endAddress = outputInstructionsAt(methodZoneBase);
+	assert((methodZoneBase + size) == endAddress);
+	enilopmart = methodZoneBase;
+	methodZoneBase = alignUptoRoutineBoundary(endAddress);
+	nopsFromto(backEnd, endAddress, methodZoneBase - 1);
+	recordGeneratedRunTimeaddress(trampolineNamenumArgs("ceEnterPIC", numArgs), enilopmart);
+	return ((void (*)(void)) enilopmart);
+}
+
+
 /*	Can use any of the first 32 literals for the selector and pass up to 7
 	arguments. 
  */
@@ -7122,9 +7493,13 @@
 	return genSendSupernumArgs(literalofMethod(byte1 & 31, methodObj), ((usqInt) byte1) >> 5);
 }
 
+
+/*	Override to push the register receiver and register arguments, if any. */
+
 static sqInt
 genExternalizePointersForPrimitiveCall(void)
 {
+	genPushRegisterArgs();
 	gMoveMwrR(0, SPReg, ClassReg);
 	gMoveRAw(FPReg, framePointerAddress());
 	gLoadEffectiveAddressMwrR(BytesPerWord, SPReg, TempReg);
@@ -7308,6 +7683,9 @@
 /*	Enilopmarts transfer control from C into machine code (backwards
 	trampolines). 
  */
+/*	Enilopmarts transfer control from C into machine code (backwards
+	trampolines). Override to add version for generic and PIC-specific entry
+	with reg args. */
 
 static void
 generateEnilopmarts(void)
@@ -7333,6 +7711,27 @@
 	cePrimReturnEnterCogCodeProfiling = methodZoneBase;
 	outputInstructionsForGeneratedRuntimeAt(cePrimReturnEnterCogCodeProfiling);
 	recordGeneratedRunTimeaddress("cePrimReturnEnterCogCodeProfiling", cePrimReturnEnterCogCodeProfiling);
+	
+#  if Debug
+	realCEEnterCogCodePopReceiverArg0Regs = genEnilopmartForandcalled(ReceiverResultReg, Arg0Reg, "realCEEnterCogCodePopReceiverArg0Regs");
+	ceEnterCogCodePopReceiverArg0Regs = enterCogCodePopReceiverArg0Regs;
+	realCEEnterCogCodePopReceiverArg1Arg0Regs = genEnilopmartForandandcalled(ReceiverResultReg, Arg0Reg, Arg1Reg, "realCEEnterCogCodePopReceiverArg1Arg0Regs");
+	ceEnterCogCodePopReceiverArg1Arg0Regs = enterCogCodePopReceiverArg1Arg0Regs;
+
+#  else /* Debug */
+	ceEnterCogCodePopReceiverArg0Regs = genEnilopmartForandcalled(ReceiverResultReg, Arg0Reg, "ceEnterCogCodePopReceiverArg0Regs");
+	ceEnterCogCodePopReceiverArg1Arg0Regs = genEnilopmartForandandcalled(ReceiverResultReg, Arg0Reg, Arg1Reg, "ceEnterCogCodePopReceiverArg1Arg0Regs");
+
+#  endif /* Debug */
+
+	ceEnter0ArgsPIC = genEnterPICEnilopmartNumArgs(0);
+	if ((numRegArgs()) >= 1) {
+		ceEnter1ArgsPIC = genEnterPICEnilopmartNumArgs(1);
+		if ((numRegArgs()) >= 2) {
+			ceEnter1ArgsPIC = genEnterPICEnilopmartNumArgs(2);
+			assert((numRegArgs()) == 2);
+		}
+	}
 }
 
 
@@ -7487,9 +7886,17 @@
 static void
 generateMissAbortTrampolines(void)
 {
-	ceMethodAbortTrampoline = genMethodAbortTrampoline();
-	cePICAbortTrampoline = genPICAbortTrampoline();
-	ceCPICMissTrampoline = genTrampolineForcalledargarg(ceCPICMissreceiver, "ceCPICMissTrampoline", ClassReg, ReceiverResultReg);
+    sqInt numArgs;
+
+	for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+		methodAbortTrampolines[numArgs] = (genMethodAbortTrampolineFor(numArgs));
+	}
+	for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+		picAbortTrampolines[numArgs] = (genPICAbortTrampolineFor(numArgs));
+	}
+	for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+		picMissTrampolines[numArgs] = (genPICMissTrampolineFor(numArgs));
+	}
 	;
 }
 
@@ -7549,6 +7956,9 @@
 }
 
 
+/*	Override to generate code to push the register arg(s) for <= numRegArg
+	arity sends.
+ */
 /*	Slang needs these apparently superfluous asSymbol sends. */
 
 static void
@@ -7557,13 +7967,13 @@
     sqInt numArgs;
 
 	for (numArgs = 0; numArgs <= (NumSendTrampolines - 2); numArgs += 1) {
-		sendTrampolines[numArgs] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSend", numArgs), ClassReg, 0, ReceiverResultReg, numArgs));
+		sendTrampolines[numArgs] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, numArgs, trampolineNamenumArgs("ceSend", numArgs), ClassReg, 0, ReceiverResultReg, numArgs));
 	}
-	sendTrampolines[NumSendTrampolines - 1] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSend", -1), ClassReg, 0, ReceiverResultReg, SendNumArgsReg));
+	sendTrampolines[NumSendTrampolines - 1] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, (numRegArgs()) + 1, trampolineNamenumArgs("ceSend", -1), ClassReg, 0, ReceiverResultReg, SendNumArgsReg));
 	for (numArgs = 0; numArgs <= (NumSendTrampolines - 2); numArgs += 1) {
-		superSendTrampolines[numArgs] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSuperSend", numArgs), ClassReg, 1, ReceiverResultReg, numArgs));
+		superSendTrampolines[numArgs] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, numArgs, trampolineNamenumArgs("ceSuperSend", numArgs), ClassReg, 1, ReceiverResultReg, numArgs));
 	}
-	superSendTrampolines[NumSendTrampolines - 1] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSuperSend", -1), ClassReg, 1, ReceiverResultReg, SendNumArgsReg));
+	superSendTrampolines[NumSendTrampolines - 1] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, (numRegArgs()) + 1, trampolineNamenumArgs("ceSuperSend", -1), ClassReg, 1, ReceiverResultReg, SendNumArgsReg));
 	firstSend = sendTrampolines[0];
 	lastSend = superSendTrampolines[NumSendTrampolines - 1];
 }
@@ -7603,7 +8013,7 @@
 {
 	ceTraceLinkedSendTrampoline = genSafeTrampolineForcalledarg(ceTraceLinkedSend, "ceTraceLinkedSendTrampoline", ReceiverResultReg);
 	ceTraceBlockActivationTrampoline = genTrampolineForcalled(ceTraceBlockActivation, "ceTraceBlockActivationTrampoline");
-	ceTraceStoreTrampoline = genSafeTrampolineForcalledargarg(ceTraceStoreOfinto, "ceTraceStoreTrampoline", ClassReg, ReceiverResultReg);
+	ceTraceStoreTrampoline = genSafeTrampolineForcalledargarg(ceTraceStoreOfinto, "ceTraceStoreTrampoline", TempReg, ReceiverResultReg);
 }
 
 
@@ -8073,6 +8483,7 @@
 static sqInt
 genJumpBackTo(sqInt targetBytecodePC)
 {
+	ssFlushTo(simStackPtr);
 	gMoveAwR(stackLimitAddress(), TempReg);
 	gCmpRR(TempReg, SPReg);
 	gJumpAboveOrEqual(fixupAt(targetBytecodePC - initialPC));
@@ -8129,19 +8540,25 @@
 	return jumpToTarget;
 }
 
-
-/*	Cunning trick by LPD. If true and false are contiguous subtract the
-	smaller. Correct result is either 0 or the distance between them. If
-	result is not 0 or
-	their distance send mustBeBoolean. */
-
 static sqInt
 genJumpIfto(sqInt boolean, sqInt targetBytecodePC)
 {
+    CogSimStackEntry *desc;
     AbstractInstruction *ok;
 
+	ssFlushTo(simStackPtr - 1);
+	desc = ssTop();
+	ssPop(1);
+	if ((((desc->type)) == SSConstant)
+	 && ((((desc->constant)) == (trueObject()))
+ || (((desc->constant)) == (falseObject())))) {
+		annotateBytecode((((desc->constant)) == boolean
+			? gJump(ensureFixupAt(targetBytecodePC - initialPC))
+			: gLabel()));
+		return 0;
+	}
+	popToReg(desc, TempReg);
 	assert((objectAfter(falseObject())) == (trueObject()));
-	gPopR(TempReg);
 	annotateobjRef(gSubCwR(boolean, TempReg), boolean);
 	gJumpZero(ensureFixupAt(targetBytecodePC - initialPC));
 	gCmpCqR((boolean == (falseObject())
@@ -8172,6 +8589,7 @@
 static sqInt
 genJumpTo(sqInt targetBytecodePC)
 {
+	ssFlushTo(simStackPtr);
 	gJump(ensureFixupAt(targetBytecodePC - initialPC));
 	return 0;
 }
@@ -8268,7 +8686,39 @@
 	return genJumpTo(targetpc);
 }
 
+static sqInt
+genMarshalledSendSupernumArgs(sqInt selector, sqInt numArgs)
+{
+	if (isYoung(selector)) {
+		hasYoungReferent = 1;
+	}
+	assert(needsFrame);
+	if (numArgs > 2) {
+		gMoveCqR(numArgs, SendNumArgsReg);
+	}
+	gMoveCwR(selector, ClassReg);
+	CallSend(superSendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
+	(optStatus.isReceiverResultRegLive = 0);
+	return ssPushRegister(ReceiverResultReg);
+}
 
+static sqInt
+genMarshalledSendnumArgs(sqInt selector, sqInt numArgs)
+{
+	if (isYoung(selector)) {
+		hasYoungReferent = 1;
+	}
+	assert(needsFrame);
+	if (numArgs > 2) {
+		gMoveCqR(numArgs, SendNumArgsReg);
+	}
+	gMoveCwR(selector, ClassReg);
+	CallSend(sendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
+	(optStatus.isReceiverResultRegLive = 0);
+	return ssPushRegister(ReceiverResultReg);
+}
+
+
 /*	Generate the abort for a method. This abort performs either a call of
 	ceSICMiss: to handle a single-in-line cache miss or a call of
 	ceStackOverflow: to handle a
@@ -8280,7 +8730,7 @@
 	miss.  */
 
 static sqInt
-genMethodAbortTrampoline(void)
+genMethodAbortTrampolineFor(sqInt numArgs)
 {
     AbstractInstruction *jumpSICMiss;
 
@@ -8289,7 +8739,10 @@
 	jumpSICMiss = gJumpNonZero(0);
 	compileTrampolineForcallJumpBarnumArgsargargargargsaveRegsresultReg(ceStackOverflow, 1, 1, SendNumArgsReg, null, null, null, 0, null);
 	jmpTarget(jumpSICMiss, gLabel());
-	return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceSICMiss, "ceMethodAbort", 1, 1, ReceiverResultReg, null, null, null, 0, null, 1);
+	genPushRegisterArgsForAbortMissNumArgs(numArgs);
+	return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceSICMiss, trampolineNamenumArgs("ceMethodAbort", (numArgs <= (numRegArgs())
+		? numArgs
+		: -1)), 1, 1, ReceiverResultReg, null, null, null, 0, null, 1);
 }
 
 static void
@@ -8342,62 +8795,71 @@
 	ClassReg. If the register is zero then this is an MNU. */
 
 static sqInt
-genPICAbortTrampoline(void)
+genPICAbortTrampolineFor(sqInt numArgs)
 {
 	opcodeIndex = 0;
-	return genInnerPICAbortTrampoline("cePICAbort");
+	genPushRegisterArgsForAbortMissNumArgs(numArgs);
+	return genInnerPICAbortTrampoline(trampolineNamenumArgs("cePICAbort", (numArgs <= (numRegArgs())
+		? numArgs
+		: -1)));
 }
 
 static sqInt
+genPICMissTrampolineFor(sqInt numArgs)
+{
+    sqInt startAddress;
+
+	startAddress = methodZoneBase;
+
+	/* N.B. a closed PIC jumps to the miss routine, not calls it, so there is only one retpc on the stack. */
+
+	opcodeIndex = 0;
+	genPushRegisterArgsForNumArgs(numArgs);
+	genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceCPICMissreceiver, trampolineNamenumArgs("cePICMiss", (numArgs <= (numRegArgs())
+		? numArgs
+		: -1)), 1, 2, ClassReg, ReceiverResultReg, null, null, 0, null, 1);
+	return startAddress;
+}
+
+static sqInt
 genPopStackBytecode(void)
 {
-	gAddCqR(BytesPerWord, SPReg);
+	if ((ssTop()->spilled)) {
+		gAddCqR(BytesPerWord, SPReg);
+	}
+	ssPop(1);
 	return 0;
 }
 
-
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
-	return address */
-
 static sqInt
 genPrimitiveAdd(void)
 {
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
 	genRemoveSmallIntegerTagsInScratchReg(ClassReg);
-	gMoveRR(ReceiverResultReg, TempReg);
-	gAddRR(ClassReg, TempReg);
+	gAddRR(ReceiverResultReg, ClassReg);
 	jumpOvfl = gJumpOverflow(0);
-	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gMoveRR(ClassReg, ReceiverResultReg);
+	gRetN(0);
 	jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
 	return 0;
 }
 
-
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	return address */
-
 static sqInt
 genPrimitiveAsFloat(void)
 {
     AbstractInstruction *jumpFailAlloc;
 
-	gMoveRR(ReceiverResultReg, ClassReg);
-	genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
-	gConvertRRd(ClassReg, DPFPReg0);
+	gMoveRR(ReceiverResultReg, TempReg);
+	genConvertSmallIntegerToIntegerInScratchReg(TempReg);
+	gConvertRRd(TempReg, DPFPReg0);
 	jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
 	gMoveRR(SendNumArgsReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord);
+	gRetN(0);
 	jmpTarget(jumpFailAlloc, gLabel());
 	compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
 	return 0;
@@ -8406,8 +8868,8 @@
 static sqInt
 genPrimitiveAt(void)
 {
-	gMoveMwrR(BytesPerWord, SPReg, Arg0Reg);
-	return genInnerPrimitiveAt(BytesPerWord * 2);
+	assert((numRegArgs()) >= 1);
+	return genInnerPrimitiveAt(0);
 }
 
 static sqInt
@@ -8415,15 +8877,13 @@
 {
     AbstractInstruction *jumpNotSI;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
 
-	/* Whether the SmallInteger tags are zero or non-zero, anding them together will preserve them. */
+	/* Whether the SmallInteger tags are zero or non-zero, oring them together will preserve them. */
 
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
-	gAndRR(ClassReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gAndRR(Arg0Reg, ReceiverResultReg);
+	gRetN(0);
 	jmpTarget(jumpNotSI, gLabel());
 	return 0;
 }
@@ -8433,26 +8893,23 @@
 {
     AbstractInstruction *jumpNotSI;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
 
 	/* Whether the SmallInteger tags are zero or non-zero, oring them together will preserve them. */
 
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
-	gOrRR(ClassReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gOrRR(Arg0Reg, ReceiverResultReg);
+	gRetN(0);
 	jmpTarget(jumpNotSI, gLabel());
 	return 0;
 }
 
 
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
+/*	Receiver and arg in registers.
+	Stack looks like
 	return address
 	
-	rTemp := ArgOffset(SP)
+	rTemp := rArg0
 	rClass := tTemp
 	rTemp := rTemp & 1
 	jz nonInt
@@ -8494,8 +8951,9 @@
     AbstractInstruction *jumpOvfl;
     AbstractInstruction *jumpTooBig;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	assert((numRegArgs()) >= 1);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
 	genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
 	if (!(setsConditionCodesFor(lastOpcode(), JumpNegative))) {
@@ -8512,14 +8970,14 @@
 	genRemoveSmallIntegerTagsInScratchReg(ReceiverResultReg);
 	gLogicalShiftLeftRR(ClassReg, ReceiverResultReg);
 	genAddSmallIntegerTagsTo(ReceiverResultReg);
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpNegative, gNegateR(ClassReg));
 	gCmpCqR(numSmallIntegerBits(), ClassReg);
 	jumpInRange = gJumpLessOrEqual(0);
 	gMoveCqR(numSmallIntegerBits(), ClassReg);
 	jmpTarget(jumpInRange, gArithmeticShiftRightRR(ClassReg, ReceiverResultReg));
 	genSetSmallIntegerTagsIn(ReceiverResultReg);
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpNotSI, jmpTarget(jumpTooBig, jmpTarget(jumpOvfl, gLabel())));
 	return 0;
 }
@@ -8529,16 +8987,14 @@
 {
     AbstractInstruction *jumpNotSI;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
 
 	/* Clear one or the other tag so that xoring will preserve them. */
 
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
-	genRemoveSmallIntegerTagsInScratchReg(ClassReg);
-	gXorRR(ClassReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	genRemoveSmallIntegerTagsInScratchReg(Arg0Reg);
+	gXorRR(Arg0Reg, ReceiverResultReg);
+	gRetN(0);
 	jmpTarget(jumpNotSI, gLabel());
 	return 0;
 }
@@ -8550,6 +9006,13 @@
 	block entry or the no-context-switch entry, as appropriate, and we're
 	done. If not,
 	invoke the interpreter primitive. */
+/*	Check the argument count. Fail if wrong.
+	Get the method from the outerContext and see if it is cogged. If so, jump
+	to the
+	block entry or the no-context-switch entry, as appropriate, and we're
+	done. If not,
+	invoke the interpreter primitive.
+	Override to push the register args first. */
 
 static sqInt
 genPrimitiveClosureValue(void)
@@ -8559,6 +9022,7 @@
     void (*primitiveRoutine)();
     sqInt result;
 
+	genPushRegisterArgs();
 	genLoadSlotsourceRegdestReg(ClosureNumArgsIndex, ReceiverResultReg, TempReg);
 	gCmpCqR(((methodOrBlockNumArgs << 1) | 1), TempReg);
 	jumpFail = gJumpNonZero(0);
@@ -8591,9 +9055,9 @@
     AbstractInstruction *jumpSameSign;
     AbstractInstruction *jumpZero;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
-	gMoveRR(TempReg, Arg1Reg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
+	gMoveRR(Arg0Reg, Arg1Reg);
 
 	/* We must shift away the tags, not just subtract them, so that the
 	 overflow case doesn't actually overflow the machine instruction. */
@@ -8621,8 +9085,7 @@
 	jmpTarget(jumpSameSign, convert = gLabel());
 	genConvertIntegerToSmallIntegerInScratchReg(TempReg);
 	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpExact, gCmpCqR(1 << ((numSmallIntegerBits()) - 1), TempReg));
 	gJumpLess(convert);
 	jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()));
@@ -8637,8 +9100,8 @@
     AbstractInstruction *jumpOverflow;
     AbstractInstruction *jumpZero;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
 
 	/* We must shift away the tags, not just subtract them, so that the
 	 overflow case doesn't actually overflow the machine instruction. */
@@ -8658,8 +9121,7 @@
 	jumpOverflow = gJumpGreaterOrEqual(0);
 	genConvertIntegerToSmallIntegerInScratchReg(TempReg);
 	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpOverflow, jmpTarget(jumpInexact, jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()))));
 	return 0;
 }
@@ -8671,9 +9133,8 @@
 }
 
 
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
+/*	Receiver and arg in registers.
+	Stack looks like
 	return address */
 
 static sqInt
@@ -8681,14 +9142,12 @@
 {
     AbstractInstruction *jumpFalse;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gCmpRR(TempReg, ReceiverResultReg);
+	gCmpRR(Arg0Reg, ReceiverResultReg);
 	jumpFalse = gJumpNonZero(0);
 	annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject());
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpFalse, annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject()));
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	return 0;
 }
 
@@ -8746,11 +9205,6 @@
 	return genDoubleComparisoninvert(gJumpFPNotEqual, 0);
 }
 
-
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	return address */
-
 static sqInt
 genPrimitiveFloatSquareRoot(void)
 {
@@ -8760,8 +9214,7 @@
 	gSqrtRd(DPFPReg0);
 	jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
 	gMoveRR(SendNumArgsReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord);
+	gRetN(0);
 	jmpTarget(jumpFailAlloc, gLabel());
 	compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
 	return 0;
@@ -8794,8 +9247,7 @@
 	jumpSI = genJumpSmallIntegerInScratchReg(ClassReg);
 	genGetHashFieldNonIntOfasSmallIntegerInto(ReceiverResultReg, TempReg);
 	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord);
+	gRetN(0);
 	jmpTarget(jumpSI, gLabel());
 	return 0;
 }
@@ -8820,8 +9272,8 @@
     AbstractInstruction *jumpSameSign;
     AbstractInstruction *jumpZero;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
 	genRemoveSmallIntegerTagsInScratchReg(ClassReg);
 	jumpZero = gJumpZero(0);
@@ -8844,8 +9296,7 @@
 	jmpTarget(jumpSameSign, jmpTarget(jumpExact, gLabel()));
 	genSetSmallIntegerTagsIn(ClassReg);
 	gMoveRR(ClassReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()));
 	return 0;
 }
@@ -8856,18 +9307,17 @@
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
+	gMoveRR(ReceiverResultReg, Arg1Reg);
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
 	genShiftAwaySmallIntegerTagsInScratchReg(ClassReg);
-	gMoveRR(ReceiverResultReg, TempReg);
-	genRemoveSmallIntegerTagsInScratchReg(TempReg);
-	gMulRR(TempReg, ClassReg);
+	genRemoveSmallIntegerTagsInScratchReg(Arg1Reg);
+	gMulRR(Arg1Reg, ClassReg);
 	jumpOvfl = gJumpOverflow(0);
 	genSetSmallIntegerTagsIn(ClassReg);
 	gMoveRR(ClassReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
 	return 0;
 }
@@ -8885,8 +9335,8 @@
     AbstractInstruction *jumpOverflow;
     AbstractInstruction *jumpZero;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
+	gMoveRR(Arg0Reg, ClassReg);
 
 	/* We must shift away the tags, not just subtract them, so that the
 	 overflow case doesn't actually overflow the machine instruction. */
@@ -8904,8 +9354,7 @@
 	jumpOverflow = gJumpGreaterOrEqual(0);
 	genConvertIntegerToSmallIntegerInScratchReg(TempReg);
 	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpOverflow, jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel())));
 	return 0;
 }
@@ -8913,38 +9362,30 @@
 static sqInt
 genPrimitiveSize(void)
 {
-	return genInnerPrimitiveSize(BytesPerWord);
+	return genInnerPrimitiveSize(0);
 }
 
 static sqInt
 genPrimitiveStringAt(void)
 {
-	gMoveMwrR(BytesPerWord, SPReg, Arg0Reg);
-	return genInnerPrimitiveStringAt(BytesPerWord * 2);
+	assert((numRegArgs()) >= 1);
+	return genInnerPrimitiveStringAt(0);
 }
 
-
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
-	return address */
-
 static sqInt
 genPrimitiveSubtract(void)
 {
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
 	jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
 	gMoveRR(ReceiverResultReg, TempReg);
-	gSubRR(ClassReg, TempReg);
+	gSubRR(Arg0Reg, TempReg);
 	jumpOvfl = gJumpOverflow(0);
 	genAddSmallIntegerTagsTo(TempReg);
 	gMoveRR(TempReg, ReceiverResultReg);
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
 	return 0;
 }
@@ -9018,9 +9459,10 @@
 genPushActiveContextBytecode(void)
 {
 	assert(needsFrame);
+	(optStatus.isReceiverResultRegLive = 0);
+	ssAllocateCallReg(ReceiverResultReg);
 	CallRT(cePushActiveContextTrampoline);
-	gPushR(ReceiverResultReg);
-	return 0;
+	return ssPushRegister(ReceiverResultReg);
 }
 
 
@@ -9054,18 +9496,18 @@
 
 	assert(needsFrame);
 	addBlockStartAtnumArgsnumCopiedspan(bytecodePointer + 4, byte1 & 15, numCopied = ((usqInt) byte1) >> 4, (byte2 << 8) + byte3);
+	if (numCopied > 0) {
+		ssFlushTo(simStackPtr);
+	}
+	(optStatus.isReceiverResultRegLive = 0);
+	ssAllocateCallRegand(SendNumArgsReg, ReceiverResultReg);
 	gMoveCqR(byte1 | ((bytecodePointer + 5) << 8), SendNumArgsReg);
 	CallRT(ceClosureCopyTrampoline);
 	if (numCopied > 0) {
-		if (numCopied > 1) {
-			gAddCqR((numCopied - 1) * BytesPerWord, SPReg);
-		}
-		gMoveRMwr(ReceiverResultReg, 0, SPReg);
+		gAddCqR(numCopied * BytesPerWord, SPReg);
+		ssPop(numCopied);
 	}
-	else {
-		gPushR(ReceiverResultReg);
-	}
-	return 0;
+	return ssPushRegister(ReceiverResultReg);
 }
 
 static sqInt
@@ -9114,22 +9556,25 @@
 genPushLiteralVariable(sqInt literalIndex)
 {
     sqInt association;
+    sqInt freeReg;
 
+	freeReg = ssAllocatePreferredReg(ClassReg);
 
 	/* N.B. Do _not_ use ReceiverResultReg to avoid overwriting receiver in assignment in frameless methods. */
+	/* So far descriptors are not rich enough to describe the entire dereference so generate the register
+	 load but don't push the result.  There is an order-or-evaluation issue if we defer the dereference. */
 
 	association = literalofMethod(literalIndex, methodObj);
-	annotateobjRef(gMoveCwR(association, ClassReg), association);
-	genLoadSlotsourceRegdestReg(ValueIndex, ClassReg, TempReg);
-	gPushR(TempReg);
+	annotateobjRef(gMoveCwR(association, TempReg), association);
+	genLoadSlotsourceRegdestReg(ValueIndex, TempReg, freeReg);
+	ssPushRegister(freeReg);
 	return 0;
 }
 
 static sqInt
 genPushLiteral(sqInt literal)
 {
-	annotateobjRef(gPushCw(literal), literal);
-	return 0;
+	return ssPushConstant(literal);
 }
 
 static sqInt
@@ -9139,14 +9584,16 @@
     AbstractInstruction *jmpSingle;
 
 	assert(needsFrame);
+	ssAllocateCallRegand(ReceiverResultReg, SendNumArgsReg);
+	ensureReceiverResultRegContainsSelf();
+	if ((registerMaskFor(ReceiverResultReg)) & callerSavedRegMask) {
+		(optStatus.isReceiverResultRegLive = 0);
+	}
 	if (slotIndex == InstructionPointerIndex) {
-		gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
 		gMoveCqR(slotIndex, SendNumArgsReg);
 		CallRT(ceFetchContextInstVarTrampoline);
-		gPushR(SendNumArgsReg);
-		return 0;
+		return ssPushRegister(SendNumArgsReg);
 	}
-	gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
 	genLoadSlotsourceRegdestReg(SenderIndex, ReceiverResultReg, TempReg);
 	jmpSingle = genJumpNotSmallIntegerInScratchReg(TempReg);
 	gMoveCqR(slotIndex, SendNumArgsReg);
@@ -9154,8 +9601,8 @@
 	jmpDone = gJump(0);
 	jmpTarget(jmpSingle, gLabel());
 	genLoadSlotsourceRegdestReg(slotIndex, ReceiverResultReg, SendNumArgsReg);
-	jmpTarget(jmpDone, gPushR(SendNumArgsReg));
-	return 0;
+	jmpTarget(jmpDone, gLabel());
+	return ssPushRegister(SendNumArgsReg);
 }
 
 static sqInt
@@ -9166,7 +9613,13 @@
     sqInt size;
 
 	assert(needsFrame);
-	popValues = byte1 > 127;
+	(optStatus.isReceiverResultRegLive = 0);
+	if ((popValues = byte1 > 127)) {
+		ssFlushTo(simStackPtr);
+	}
+	else {
+		ssAllocateCallRegand(SendNumArgsReg, ReceiverResultReg);
+	}
 	size = byte1 & 127;
 	gMoveCqR(size, SendNumArgsReg);
 	CallRT(ceCreateNewArrayTrampoline);
@@ -9175,9 +9628,9 @@
 			gPopR(TempReg);
 			genStoreSourceRegslotIndexintoNewObjectInDestReg(TempReg, i, ReceiverResultReg);
 		}
+		ssPop(size);
 	}
-	gPushR(ReceiverResultReg);
-	return 0;
+	return ssPushRegister(ReceiverResultReg);
 }
 
 static sqInt
@@ -9189,14 +9642,7 @@
 static sqInt
 genPushReceiverBytecode(void)
 {
-	if (needsFrame) {
-		gMoveMwrR(FoxMFReceiver, FPReg, TempReg);
-		gPushR(TempReg);
-	}
-	else {
-		gPushR(ReceiverResultReg);
-	}
-	return 0;
+	return ssPushDesc(simSelf);
 }
 
 static sqInt
@@ -9208,26 +9654,113 @@
 static sqInt
 genPushReceiverVariable(sqInt index)
 {
-    sqInt maybeErr;
+	ensureReceiverResultRegContainsSelf();
+	return genSSPushSlotreg(index, ReceiverResultReg);
+}
 
-	if (needsFrame) {
-		gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+
+/*	Ensure that the register args are pushed before the retpc for methods with
+	arity <= self numRegArgs.
+ */
+/*	This won't be as clumsy on a RISC. But putting the receiver and
+	args above the return address means the CoInterpreter has a
+	single machine-code frame format which saves us a lot of work. */
+
+static void
+genPushRegisterArgs(void)
+{
+	if (!(regArgsHaveBeenPushed
+		 || (methodOrBlockNumArgs > (numRegArgs())))) {
+		genPushRegisterArgsForNumArgs(methodOrBlockNumArgs);
+		regArgsHaveBeenPushed = 1;
 	}
-	maybeErr = genLoadSlotsourceRegdestReg(index, ReceiverResultReg, TempReg);
-	if (maybeErr < 0) {
-		return maybeErr;
+}
+
+
+/*	Ensure that the register args are pushed before the outer and
+	inner retpcs at an entry miss for arity <= self numRegArgs. The
+	outer retpc is that of a call at a send site. The inner is the call
+	from a method or PIC abort/miss to the trampoline. */
+/*	This won't be as clumsy on a RISC. But putting the receiver and
+	args above the return address means the CoInterpreter has a
+	single machine-code frame format which saves us a lot of work. */
+/*	Iff there are register args convert
+	base	->	outerRetpc		(send site retpc)
+	sp		->	innerRetpc		(PIC abort/miss retpc)
+	to
+	base	->	receiver
+	(arg0)
+	(arg1)
+	outerRetpc
+	sp		->	innerRetpc		(PIC abort/miss retpc) */
+
+static void
+genPushRegisterArgsForAbortMissNumArgs(sqInt numArgs)
+{
+	if (numArgs <= (numRegArgs())) {
+		assert((numRegArgs()) <= 2);
+		if (numArgs == 0) {
+			gMoveMwrR(0, SPReg, TempReg);
+			gPushR(TempReg);
+			gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+			gMoveRMwr(TempReg, BytesPerWord, SPReg);
+			gMoveRMwr(ReceiverResultReg, 2 * BytesPerWord, SPReg);
+			return;
+		}
+		if (numArgs == 1) {
+			gMoveMwrR(BytesPerWord, SPReg, TempReg);
+			gPushR(TempReg);
+			gMoveMwrR(BytesPerWord, SPReg, TempReg);
+			gPushR(TempReg);
+			gMoveRMwr(ReceiverResultReg, 3 * BytesPerWord, SPReg);
+			gMoveRMwr(Arg0Reg, 2 * BytesPerWord, SPReg);
+			return;
+		}
+		if (numArgs == 2) {
+			gPushR(Arg1Reg);
+			gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+			gPushR(TempReg);
+			gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+			gPushR(TempReg);
+			gMoveRMwr(ReceiverResultReg, 4 * BytesPerWord, SPReg);
+			gMoveRMwr(Arg0Reg, 3 * BytesPerWord, SPReg);
+			return;
+		}
 	}
-	gPushR(TempReg);
-	return 0;
 }
 
+
+/*	Ensure that the register args are pushed before the retpc for arity <=
+	self numRegArgs.
+ */
+/*	This won't be as clumsy on a RISC. But putting the receiver and
+	args above the return address means the CoInterpreter has a
+	single machine-code frame format which saves us a lot of work. */
+
+static void
+genPushRegisterArgsForNumArgs(sqInt numArgs)
+{
+	if (numArgs <= (numRegArgs())) {
+		gMoveMwrR(0, SPReg, TempReg);
+		gMoveRMwr(ReceiverResultReg, 0, SPReg);
+		assert((numRegArgs()) <= 2);
+		if (numArgs > 0) {
+			gPushR(Arg0Reg);
+			if (numArgs > 1) {
+				gPushR(Arg1Reg);
+			}
+		}
+		gPushR(TempReg);
+	}
+}
+
 static sqInt
 genPushRemoteTempLongBytecode(void)
 {
+	ssAllocateRequiredRegand(ClassReg, SendNumArgsReg);
 	gMoveMwrR(frameOffsetOfTemporary(byte2), FPReg, ClassReg);
-	genLoadSlotsourceRegdestReg(byte1, ClassReg, TempReg);
-	gPushR(TempReg);
-	return 0;
+	genLoadSlotsourceRegdestReg(byte1, ClassReg, SendNumArgsReg);
+	return ssPushRegister(SendNumArgsReg);
 }
 
 static sqInt
@@ -9239,9 +9772,7 @@
 static sqInt
 genPushTemporaryVariable(sqInt index)
 {
-	gMoveMwrR(frameOffsetOfTemporary(index), FPReg, TempReg);
-	gPushR(TempReg);
-	return 0;
+	return ssPushDesc(simStack[index]);
 }
 
 
@@ -9388,8 +9919,8 @@
 genReturnTopFromBlock(void)
 {
 	assert(inBlock);
-	flag("currently caller pushes result");
-	gPopR(ReceiverResultReg);
+	popToReg(ssTop(), ReceiverResultReg);
+	ssPop(1);
 	if (needsFrame) {
 		gMoveRR(FPReg, SPReg);
 		gPopR(FPReg);
@@ -9398,16 +9929,11 @@
 	return 0;
 }
 
-
-/*	Return pops receiver and arguments off the stack. Callee pushes the
-	result. 
- */
-
 static sqInt
 genReturnTopFromMethod(void)
 {
-	flag("currently caller pushes result");
-	gPopR(ReceiverResultReg);
+	popToReg(ssTop(), ReceiverResultReg);
+	ssPop(1);
 	return genUpArrowReturn();
 }
 
@@ -9490,37 +10016,32 @@
 static sqInt
 genSendSupernumArgs(sqInt selector, sqInt numArgs)
 {
-	assert(needsFrame);
-	if (isYoung(selector)) {
-		hasYoungReferent = 1;
-	}
-	gMoveMwrR(numArgs * BytesPerWord, SPReg, ReceiverResultReg);
-	if (numArgs > 2) {
-		gMoveCqR(numArgs, SendNumArgsReg);
-	}
-	gMoveCwR(selector, ClassReg);
-	CallSend(superSendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
-	flag("currently caller pushes result");
-	gPushR(ReceiverResultReg);
-	return 0;
+	marshallSendArguments(numArgs);
+	return genMarshalledSendSupernumArgs(selector, numArgs);
 }
 
+
+/*	Generate a trampoline with four arguments.
+	Hack: a negative value indicates an abstract register, a non-negative
+	value indicates a constant. */
+
 static sqInt
+genSendTrampolineFornumArgscalledargargargarg(void *aRoutine, sqInt numArgs, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3)
+{
+    sqInt startAddress;
+
+	startAddress = methodZoneBase;
+	opcodeIndex = 0;
+	genPushRegisterArgsForNumArgs(numArgs);
+	genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 4, regOrConst0, regOrConst1, regOrConst2, regOrConst3, 0, null, 1);
+	return startAddress;
+}
+
+static sqInt
 genSendnumArgs(sqInt selector, sqInt numArgs)
 {
-	if (isYoung(selector)) {
-		hasYoungReferent = 1;
-	}
-	assert(needsFrame);
-	gMoveMwrR(numArgs * BytesPerWord, SPReg, ReceiverResultReg);
-	if (numArgs > 2) {
-		gMoveCqR(numArgs, SendNumArgsReg);
-	}
-	gMoveCwR(selector, ClassReg);
-	CallSend(sendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
-	flag("currently caller pushes result");
-	gPushR(ReceiverResultReg);
-	return 0;
+	marshallSendArguments(numArgs);
+	return genMarshalledSendnumArgs(selector, numArgs);
 }
 
 static sqInt
@@ -9552,55 +10073,376 @@
 	return genJumpTo(target);
 }
 
-
-/*	Stack looks like
-	receiver (also in ResultReceiverReg)
-	arg
-	return address */
-
 static sqInt
 genSmallIntegerComparison(sqInt jumpOpcode)
 {
     AbstractInstruction *jumpFail;
     AbstractInstruction *jumpTrue;
 
-	gMoveMwrR(BytesPerWord, SPReg, TempReg);
-	gMoveRR(TempReg, ClassReg);
+	gMoveRR(Arg0Reg, TempReg);
 	jumpFail = genJumpNotSmallIntegerInScratchReg(TempReg);
-	gCmpRR(ClassReg, ReceiverResultReg);
+	gCmpRR(Arg0Reg, ReceiverResultReg);
 	jumpTrue = gen(jumpOpcode);
 	annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject());
-	flag("currently caller pushes result");
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpTrue, annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject()));
-	gRetN(BytesPerWord * 2);
+	gRetN(0);
 	jmpTarget(jumpFail, gLabel());
 	return 0;
 }
 
 static sqInt
+genSpecialSelectorArithmetic(void)
+{
+    sqInt argInt;
+    sqInt argIsInt;
+    AbstractInstruction *jumpContinue;
+    AbstractInstruction *jumpNotSmallInts;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrInt;
+    sqInt rcvrIsInt;
+    sqInt result;
+
+	primDescriptor = generatorAt(byte0);
+	argIsInt = (((ssTop()->type)) == SSConstant)
+	 && ((((argInt = (ssTop()->constant))) & 1));
+	rcvrIsInt = (((ssValue(1)->type)) == SSConstant)
+	 && ((((rcvrInt = (ssValue(1)->constant))) & 1));
+	if (argIsInt
+	 && (rcvrIsInt)) {
+		rcvrInt = (rcvrInt >> 1);
+		argInt = (argInt >> 1);
+		
+		switch ((primDescriptor->opcode)) {
+		case AddRR:
+						result = rcvrInt + argInt;
+			break;
+		case SubRR:
+						result = rcvrInt - argInt;
+			break;
+		case AndRR:
+						result = rcvrInt && argInt;
+			break;
+		case OrRR:
+						result = rcvrInt || argInt;
+			break;
+		default:
+			error("Case not found and no otherwise clause");
+		}
+		if (isIntegerValue(result)) {
+			annotateBytecode(gLabel());
+			return ssPop(2),ssPushConstant(((result << 1) | 1));
+		}
+		return genSpecialSelectorSend();
+	}
+	if (!(argIsInt
+		 || (rcvrIsInt))) {
+		return genSpecialSelectorSend();
+	}
+	if (argIsInt) {
+		ssFlushTo(simStackPtr - 2);
+		popToReg(ssValue(1), ReceiverResultReg);
+		ssPop(2);
+		gMoveRR(ReceiverResultReg, TempReg);
+	}
+	else {
+		marshallSendArguments(1);
+		gMoveRR(Arg0Reg, TempReg);
+		if (!(rcvrIsInt)) {
+			if (isSmallIntegerTagNonZero()) {
+				gAndRR(ReceiverResultReg, TempReg);
+			}
+			else {
+				gOrRR(ReceiverResultReg, TempReg);
+			}
+		}
+	}
+	jumpNotSmallInts = genJumpNotSmallIntegerInScratchReg(TempReg);
+	
+	switch ((primDescriptor->opcode)) {
+	case AddRR:
+				if (argIsInt) {
+			gAddCqR(argInt - ConstZero, ReceiverResultReg);
+
+			/* overflow; must undo the damage before continuing */
+
+			jumpContinue = gJumpNoOverflow(0);
+			gSubCqR(argInt - ConstZero, ReceiverResultReg);
+		}
+		else {
+			genRemoveSmallIntegerTagsInScratchReg(ReceiverResultReg);
+			gAddRR(Arg0Reg, ReceiverResultReg);
+
+			/* overflow; must undo the damage before continuing */
+
+			jumpContinue = gJumpNoOverflow(0);
+			if (rcvrIsInt) {
+				gMoveCqR(rcvrInt, ReceiverResultReg);
+			}
+			else {
+				gSubRR(Arg0Reg, ReceiverResultReg);
+				genSetSmallIntegerTagsIn(ReceiverResultReg);
+			}
+		}
+		break;
+	case SubRR:
+				if (argIsInt) {
+			gSubCqR(argInt - ConstZero, ReceiverResultReg);
+
+			/* overflow; must undo the damage before continuing */
+
+			jumpContinue = gJumpNoOverflow(0);
+			gAddCqR(argInt - ConstZero, ReceiverResultReg);
+		}
+		else {
+			genRemoveSmallIntegerTagsInScratchReg(Arg0Reg);
+			gSubRR(Arg0Reg, ReceiverResultReg);
+
+			/* overflow; must undo the damage before continuing */
+
+			jumpContinue = gJumpNoOverflow(0);
+			gAddRR(Arg0Reg, ReceiverResultReg);
+			genSetSmallIntegerTagsIn(Arg0Reg);
+		}
+		break;
+	case AndRR:
+				if (argIsInt) {
+			gAndCqR(argInt, ReceiverResultReg);
+		}
+		else {
+			gAndRR(Arg0Reg, ReceiverResultReg);
+		}
+		jumpContinue = gJump(0);
+		break;
+	case OrRR:
+				if (argIsInt) {
+			gOrCqR(argInt, ReceiverResultReg);
+		}
+		else {
+			gOrRR(Arg0Reg, ReceiverResultReg);
+		}
+		jumpContinue = gJump(0);
+		break;
+	default:
+		error("Case not found and no otherwise clause");
+	}
+	jmpTarget(jumpNotSmallInts, gLabel());
+	if (argIsInt) {
+		gMoveCqR(argInt, Arg0Reg);
+	}
+	genMarshalledSendnumArgs(specialSelector(byte0 - 176), 1);
+	jmpTarget(jumpContinue, gLabel());
+	return 0;
+}
+
+static sqInt
 genSpecialSelectorClass(void)
 {
-	gMoveMwrR(0, SPReg, SendNumArgsReg);
+	ssPop(1);
+	ssAllocateRequiredRegand(SendNumArgsReg, ClassReg);
+	ssPush(1);
+	popToReg(ssTop(), SendNumArgsReg);
 	genGetClassObjectOfintoscratchReg(SendNumArgsReg, ClassReg, TempReg);
-	gMoveRMwr(ClassReg, 0, SPReg);
-	return 0;
+	return ssPop(1),ssPushRegister(ClassReg);
 }
 
 static sqInt
+genSpecialSelectorComparison(void)
+{
+    sqInt argInt;
+    sqInt argIsInt;
+    sqInt branchBytecode;
+    BytecodeDescriptor *branchDescriptor;
+    sqInt branchPC;
+    sqInt inlineCAB;
+    AbstractInstruction *jumpNotSmallInts;
+    sqInt postBranchPC;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrInt;
+    sqInt rcvrIsInt;
+    sqInt result;
+    sqInt targetBytecodePC;
+
+	ssFlushTo(simStackPtr - 2);
+	primDescriptor = generatorAt(byte0);
+	argIsInt = (((ssTop()->type)) == SSConstant)
+	 && ((((argInt = (ssTop()->constant))) & 1));
+	rcvrIsInt = (((ssValue(1)->type)) == SSConstant)
+	 && ((((rcvrInt = (ssValue(1)->constant))) & 1));
+	if (argIsInt
+	 && (rcvrIsInt)) {
+		;
+		
+		switch ((primDescriptor->opcode)) {
+		case JumpLess:
+						result = rcvrInt < argInt;
+			break;
+		case JumpLessOrEqual:
+						result = rcvrInt <= argInt;
+			break;
+		case JumpGreater:
+						result = rcvrInt > argInt;
+			break;
+		case JumpGreaterOrEqual:
+						result = rcvrInt >= argInt;
+			break;
+		case JumpZero:
+						result = rcvrInt == argInt;
+			break;
+		case JumpNonZero:
+						result = rcvrInt != argInt;
+			break;
+		default:
+			error("Case not found and no otherwise clause");
+		}
+		annotateBytecode(gLabel());
+		ssPop(2);
+		return ssPushConstant((result
+			? trueObject()
+			: falseObject()));
+	}
+	branchPC = bytecodePointer + ((primDescriptor->numBytes));
+	branchBytecode = fetchByteofObject(branchPC, methodObj);
+
+	/* Only interested in inlining if followed by a conditional branch. */
+
+	branchDescriptor = generatorAt(branchBytecode);
+
+	/* Further, only interested in inlining = and ~= if there's a SmallInteger constant involved.
+	 The relational operators successfully staticaly predict SmallIntegers; the equality operators do not. */
+
+	inlineCAB = ((branchDescriptor->isBranchTrue))
+	 || ((branchDescriptor->isBranchFalse));
+	if (inlineCAB
+	 && ((((primDescriptor->opcode)) == JumpZero)
+ || (((primDescriptor->opcode)) == JumpNonZero))) {
+		inlineCAB = argIsInt
+		 || (rcvrIsInt);
+	}
+	if (!(inlineCAB)) {
+		return genSpecialSelectorSend();
+	}
+	targetBytecodePC = (branchPC + ((branchDescriptor->numBytes))) + (spanForatbyte0in(branchDescriptor, branchPC, branchBytecode, methodObj));
+	postBranchPC = branchPC + ((branchDescriptor->numBytes));
+	if (argIsInt) {
+		ssFlushTo(simStackPtr - 2);
+		popToReg(ssValue(1), ReceiverResultReg);
+		ssPop(2);
+		gMoveRR(ReceiverResultReg, TempReg);
+	}
+	else {
+		marshallSendArguments(1);
+		gMoveRR(Arg0Reg, TempReg);
+		if (!(rcvrIsInt)) {
+			if (isSmallIntegerTagNonZero()) {
+				gAndRR(ReceiverResultReg, TempReg);
+			}
+			else {
+				gOrRR(ReceiverResultReg, TempReg);
+			}
+		}
+	}
+	jumpNotSmallInts = genJumpNotSmallIntegerInScratchReg(TempReg);
+	if (argIsInt) {
+		gCmpCqR(argInt, ReceiverResultReg);
+	}
+	else {
+		gCmpRR(Arg0Reg, ReceiverResultReg);
+	}
+	genoperand(((branchDescriptor->isBranchTrue)
+		? (primDescriptor->opcode)
+		: inverseBranchFor((primDescriptor->opcode))), ((usqInt)(ensureNonMergeFixupAt(targetBytecodePC - initialPC))));
+	gJump(ensureNonMergeFixupAt(postBranchPC - initialPC));
+	jmpTarget(jumpNotSmallInts, gLabel());
+	if (argIsInt) {
+		gMoveCqR(argInt, Arg0Reg);
+	}
+	return genMarshalledSendnumArgs(specialSelector(byte0 - 176), 1);
+}
+
+static sqInt
 genSpecialSelectorEqualsEquals(void)
 {
+    sqInt argReg;
+    sqInt branchBytecode;
+    BytecodeDescriptor *branchDescriptor;
+    AbstractInstruction *jumpEqual;
     AbstractInstruction *jumpNotEqual;
-    AbstractInstruction *jumpPush;
+    sqInt nextPC;
+    sqInt postBranchPC;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrReg;
+    sqInt resultReg;
+    sqInt targetBytecodePC;
 
-	gPopR(TempReg);
-	gMoveMwrR(0, SPReg, ClassReg);
-	gCmpRR(TempReg, ClassReg);
-	jumpNotEqual = gJumpNonZero(0);
-	annotateobjRef(gMoveCwR(trueObject(), TempReg), trueObject());
-	jumpPush = gJump(0);
-	jmpTarget(jumpNotEqual, annotateobjRef(gMoveCwR(falseObject(), TempReg), falseObject()));
-	jmpTarget(jumpPush, gMoveRMwr(TempReg, 0, SPReg));
+	ssPop(2);
+	resultReg = availableRegisterOrNil();
+	if (!(resultReg)) {
+		ssAllocateRequiredReg(resultReg = Arg1Reg);
+	}
+	ssPush(2);
+	if ((((ssTop()->type)) == SSConstant)
+	 && (!((ssTop()->spilled)))) {
+		if (((ssValue(1)->type)) == SSRegister) {
+
+			/* if spilled we must generate a real pop */
+
+			rcvrReg = (ssValue(1)->registerr);
+		}
+		else {
+			popToReg(ssValue(1), rcvrReg = resultReg);
+		}
+		if (shouldAnnotateObjectReference((ssTop()->constant))) {
+			annotateobjRef(gCmpCwR((ssTop()->constant), rcvrReg), (ssTop()->constant));
+		}
+		else {
+			gCmpCqR((ssTop()->constant), rcvrReg);
+		}
+		ssPop(1);
+	}
+	else {
+		argReg = ssStorePoptoPreferredReg(1, TempReg);
+		rcvrReg = (argReg == resultReg
+			? TempReg
+			: resultReg);
+		popToReg(ssTop(), rcvrReg);
+		gCmpRR(argReg, rcvrReg);
+	}
+	ssPop(1);
+	ssPushRegister(resultReg);
+	primDescriptor = generatorAt(byte0);
+	nextPC = bytecodePointer + ((primDescriptor->numBytes));
+	branchBytecode = fetchByteofObject(nextPC, methodObj);
+	branchDescriptor = generatorAt(branchBytecode);
+	if (((branchDescriptor->isBranchTrue))
+	 || ((branchDescriptor->isBranchFalse))) {
+		ssFlushTo(simStackPtr - 1);
+		targetBytecodePC = (nextPC + ((branchDescriptor->numBytes))) + (spanForatbyte0in(branchDescriptor, nextPC, branchBytecode, methodObj));
+		postBranchPC = nextPC + ((branchDescriptor->numBytes));
+		if (((fixupAt(nextPC - initialPC)->targetInstruction)) == 0) {
+
+			/* The next instruction is dead.  we can skip it. */
+
+			deadCode = 1;
+			ssPop(1);
+			ensureFixupAt(targetBytecodePC - initialPC);
+			ensureFixupAt(postBranchPC - initialPC);
+		}
+		genoperand(((branchDescriptor->isBranchTrue)
+			? JumpZero
+			: JumpNonZero), ((usqInt)(ensureNonMergeFixupAt(targetBytecodePC - initialPC))));
+		gJump(ensureNonMergeFixupAt(postBranchPC - initialPC));
+	}
+	else {
+		jumpNotEqual = gJumpNonZero(0);
+		annotateobjRef(gMoveCwR(trueObject(), resultReg), trueObject());
+		jumpEqual = gJump(0);
+		jmpTarget(jumpNotEqual, annotateobjRef(gMoveCwR(falseObject(), resultReg), falseObject()));
+		jmpTarget(jumpEqual, gLabel());
+	}
+	if (resultReg == ReceiverResultReg) {
+		(optStatus.isReceiverResultRegLive = 0);
+	}
 	return 0;
 }
 
@@ -9618,6 +10460,12 @@
 }
 
 static sqInt
+genSSPushSlotreg(sqInt index, sqInt baseReg)
+{
+	return ssPushBaseoffset(baseReg, slotOffsetOfInstVarIndex(index));
+}
+
+static sqInt
 genStoreAndPopReceiverVariableBytecode(void)
 {
 	return genStorePopReceiverVariable(1, byte0 & 7);
@@ -9645,23 +10493,57 @@
 }
 
 static sqInt
+genStoreImmediateInSourceRegslotIndexdestReg(sqInt sourceReg, sqInt index, sqInt destReg)
+{
+	gMoveRMwr(sourceReg, (index * BytesPerWord) + BaseHeaderSize, destReg);
+	return 0;
+}
+
+static sqInt
 genStorePopLiteralVariable(sqInt popBoolean, sqInt litVarIndex)
 {
     sqInt association;
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
 
+	flag("with better register allocation this wouldn't need a frame.  e.g. use SendNumArgs instead of ReceiverResultReg");
 	assert(needsFrame);
 	association = literalofMethod(litVarIndex, methodObj);
-	annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
-	if (popBoolean) {
-		gPopR(ClassReg);
+	(optStatus.isReceiverResultRegLive = 0);
+	if ((((ssTop()->type)) == SSConstant)
+	 && (isImmediate((ssTop()->constant)))) {
+		constVal = (ssTop()->constant);
+		if (popBoolean) {
+			ssPop(1);
+		}
+		ssAllocateRequiredReg(ReceiverResultReg);
+		annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
+		gMoveCqR(constVal, TempReg);
+		if (traceStores > 0) {
+			CallRT(ceTraceStoreTrampoline);
+		}
+		return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, ValueIndex, ReceiverResultReg);
 	}
-	else {
-		gMoveMwrR(0, SPReg, ClassReg);
+	if ((((topReg = registerOrNil(ssTop()))) == null)
+	 || (topReg == ReceiverResultReg)) {
+		topReg = ClassReg;
 	}
+	ssPop(1);
+	ssAllocateRequiredReg(topReg);
+	ssPush(1);
+	flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+	valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+	if (valueReg == ReceiverResultReg) {
+		gMoveRR(valueReg, topReg);
+	}
+	ssAllocateCallReg(ReceiverResultReg);
+	annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
 	if (traceStores > 0) {
+		gMoveRR(topReg, TempReg);
 		CallRT(ceTraceStoreTrampoline);
 	}
-	return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, ValueIndex, ReceiverResultReg, TempReg);
+	return genStoreSourceRegslotIndexdestRegscratchReg(topReg, ValueIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
@@ -9671,21 +10553,28 @@
     AbstractInstruction *jmpSingle;
 
 	assert(needsFrame);
-	gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+	ssFlushUpThroughReceiverVariable(slotIndex);
+	ensureReceiverResultRegContainsSelf();
+	ssPop(1);
+	ssAllocateCallRegand(ClassReg, SendNumArgsReg);
+	ssPush(1);
 	genLoadSlotsourceRegdestReg(SenderIndex, ReceiverResultReg, TempReg);
-	gMoveMwrR(0, SPReg, ClassReg);
+	flag("why do we always pop??");
+	flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+	popToReg(ssTop(), ClassReg);
 	jmpSingle = genJumpNotSmallIntegerInScratchReg(TempReg);
 	gMoveCqR(slotIndex, SendNumArgsReg);
 	CallRT(ceStoreContextInstVarTrampoline);
 	jmpDone = gJump(0);
 	jmpTarget(jmpSingle, gLabel());
 	if (traceStores > 0) {
+		gMoveRR(ClassReg, TempReg);
 		CallRT(ceTraceStoreTrampoline);
 	}
 	genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
 	jmpTarget(jmpDone, gLabel());
 	if (popBoolean) {
-		gAddCqR(BytesPerWord, SPReg);
+		ssPop(1);
 	}
 	return 0;
 }
@@ -9693,48 +10582,100 @@
 static sqInt
 genStorePopReceiverVariable(sqInt popBoolean, sqInt slotIndex)
 {
-	if (needsFrame) {
-		gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
+
+	ssFlushUpThroughReceiverVariable(slotIndex);
+	if ((((ssTop()->type)) == SSConstant)
+	 && (isImmediate((ssTop()->constant)))) {
+		constVal = (ssTop()->constant);
+		if (popBoolean) {
+			ssPop(1);
+		}
+		ensureReceiverResultRegContainsSelf();
+		gMoveCqR(constVal, TempReg);
+		if (traceStores > 0) {
+			CallRT(ceTraceStoreTrampoline);
+		}
+		return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, slotIndex, ReceiverResultReg);
 	}
-	if (popBoolean) {
-		gPopR(ClassReg);
+	if ((((topReg = registerOrNil(ssTop()))) == null)
+	 || (topReg == ReceiverResultReg)) {
+		topReg = ClassReg;
 	}
-	else {
-		gMoveMwrR(0, SPReg, ClassReg);
+	ssPop(1);
+	ssAllocateCallReg(topReg);
+	ssPush(1);
+	flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+	valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+	if (valueReg == ReceiverResultReg) {
+		gMoveRR(valueReg, topReg);
 	}
+	ensureReceiverResultRegContainsSelf();
 	if (traceStores > 0) {
+		gMoveRR(topReg, TempReg);
 		CallRT(ceTraceStoreTrampoline);
 	}
-	return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
+	return genStoreSourceRegslotIndexdestRegscratchReg(topReg, slotIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
 genStorePopRemoteTempAt(sqInt popBoolean, sqInt slotIndex, sqInt remoteTempIndex)
 {
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
+
 	assert(needsFrame);
-	if (popBoolean) {
-		gPopR(ClassReg);
+	(optStatus.isReceiverResultRegLive = 0);
+	if ((((ssTop()->type)) == SSConstant)
+	 && (isImmediate((ssTop()->constant)))) {
+		constVal = (ssTop()->constant);
+		if (popBoolean) {
+			ssPop(1);
+		}
+		ssAllocateRequiredReg(ReceiverResultReg);
+		gMoveMwrR(frameOffsetOfTemporary(remoteTempIndex), FPReg, ReceiverResultReg);
+		gMoveCqR(constVal, TempReg);
+		if (traceStores > 0) {
+			CallRT(ceTraceStoreTrampoline);
+		}
+		return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, slotIndex, ReceiverResultReg);
 	}
-	else {
-		gMoveMwrR(0, SPReg, ClassReg);
+	if ((((topReg = registerOrNil(ssTop()))) == null)
+	 || (topReg == ReceiverResultReg)) {
+		topReg = ClassReg;
 	}
+	ssPop(1);
+	ssAllocateRequiredReg(topReg);
+	ssPush(1);
+	flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+	valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+	if (valueReg == ReceiverResultReg) {
+		gMoveRR(valueReg, topReg);
+	}
+	if (!(popBoolean)) {
+		ssPop(1);
+		ssPushRegister(topReg);
+	}
+	ssAllocateCallReg(ReceiverResultReg);
 	gMoveMwrR(frameOffsetOfTemporary(remoteTempIndex), FPReg, ReceiverResultReg);
 	if (traceStores > 0) {
+		gMoveRR(topReg, TempReg);
 		CallRT(ceTraceStoreTrampoline);
 	}
-	return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
+	return genStoreSourceRegslotIndexdestRegscratchReg(topReg, slotIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
 genStorePopTemporaryVariable(sqInt popBoolean, sqInt tempIndex)
 {
-	if (popBoolean) {
-		gPopR(TempReg);
-	}
-	else {
-		gMoveMwrR(0, SPReg, TempReg);
-	}
-	gMoveRMwr(TempReg, frameOffsetOfTemporary(tempIndex), FPReg);
+    sqInt reg;
+
+	ssFlushUpThroughTemporaryVariable(tempIndex);
+	reg = ssStorePoptoPreferredReg(popBoolean, TempReg);
+	gMoveRMwr(reg, frameOffsetOfTemporary(tempIndex), FPReg);
 	return 0;
 }
 
@@ -9827,28 +10768,6 @@
 }
 
 
-/*	Generate a trampoline with two arguments.
-	Hack: a negative value indicates an abstract register, a non-negative
-	value indicates a constant. */
-
-static sqInt
-genTrampolineForcalledargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1)
-{
-	return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 2, regOrConst0, regOrConst1, null, null, 0, null, 0);
-}
-
-
-/*	Generate a trampoline with four arguments.
-	Hack: a negative value indicates an abstract register, a non-negative
-	value indicates a constant. */
-
-static sqInt
-genTrampolineForcalledargargargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3)
-{
-	return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 4, regOrConst0, regOrConst1, regOrConst2, regOrConst3, 0, null, 0);
-}
-
-
 /*	Generate a trampoline with two arguments that answers a result.
 	Hack: a negative value indicates an abstract register, a non-negative
 	value indicates a constant. */
@@ -9918,7 +10837,6 @@
 static sqInt
 genUpArrowReturn(void)
 {
-	flag("currently caller pushes result");
 	if (inBlock) {
 		assert(needsFrame);
 		annotateBytecode(CallRT(ceNonLocalReturnTrampoline));
@@ -9927,8 +10845,14 @@
 	if (needsFrame) {
 		gMoveRR(FPReg, SPReg);
 		gPopR(FPReg);
+		gRetN((methodOrBlockNumArgs + 1) * BytesPerWord);
 	}
-	gRetN((methodOrBlockNumArgs + 1) * BytesPerWord);
+	else {
+		gRetN(((methodOrBlockNumArgs > (numRegArgs()))
+		 || (regArgsHaveBeenPushed)
+			? (methodOrBlockNumArgs + 1) * BytesPerWord
+			: 0));
+	}
 	return 0;
 }
 
@@ -10154,6 +11078,7 @@
 	(methodLabel->opcode = Label);
 	((methodLabel->operands))[0] = 0;
 	((methodLabel->operands))[1] = 0;
+	callerSavedRegMask = callerSavedRegisterMask(backEnd);
 }
 
 void
@@ -10178,6 +11103,9 @@
 
 /*	Make sure there's a flagged fixup at the targetIndex (pc relative to first
 	pc) in fixups.
+	These are the targets of backward branches. A backward branch fixup's
+	simStackPtr needs to be set when generating the code for the bytecode at
+	the targetIndex.
 	Initially a fixup's target is just a flag. Later on it is replaced with a
 	proper instruction. */
 
@@ -10187,7 +11115,8 @@
     BytecodeFixup *fixup;
 
 	fixup = fixupAt(targetIndex);
-	(fixup->targetInstruction = ((AbstractInstruction *) 1));
+	(fixup->targetInstruction = ((AbstractInstruction *) 2));
+	(fixup->simStackPtr = -2);
 	return fixup;
 }
 
@@ -10218,7 +11147,73 @@
 	return 3;
 }
 
+static void
+initSimStackForFramefulMethod(sqInt startpc)
+{
+    CogSimStackEntry *desc;
+    sqInt i;
 
+	(optStatus.isReceiverResultRegLive = 0);
+	(simSelf.type = SSBaseOffset);
+	(simSelf.registerr = FPReg);
+	(simSelf.offset = FoxMFReceiver);
+	(simSelf.spilled = 1);
+
+	/* N.B. Includes num args */
+
+	simSpillBase = methodOrBlockNumTemps;
+
+	/* args */
+
+	simStackPtr = simSpillBase - 1;
+	for (i = 0; i <= (methodOrBlockNumArgs - 1); i += 1) {
+		desc = simStackAt(i);
+		(desc->type = SSBaseOffset);
+		(desc->registerr = FPReg);
+		(desc->offset = FoxCallerSavedIP + ((methodOrBlockNumArgs - i) * BytesPerWord));
+		(desc->spilled = 1);
+		(desc->bcptr = startpc);
+	}
+	for (i = methodOrBlockNumArgs; i <= simStackPtr; i += 1) {
+		desc = simStackAt(i);
+		(desc->type = SSBaseOffset);
+		(desc->registerr = FPReg);
+		(desc->offset = FoxMFReceiver - (((i - methodOrBlockNumArgs) + 1) * BytesPerWord));
+		(desc->spilled = 1);
+		(desc->bcptr = startpc);
+	}
+}
+
+static void
+initSimStackForFramelessMethod(sqInt startpc)
+{
+    CogSimStackEntry *desc;
+
+	(simSelf.type = SSRegister);
+	(simSelf.registerr = ReceiverResultReg);
+	(simSelf.spilled = 0);
+	(optStatus.isReceiverResultRegLive = 1);
+	(optStatus.ssEntry = (&simSelf));
+	assert(methodOrBlockNumTemps == methodOrBlockNumArgs);
+	simStackPtr = simSpillBase = -1;
+	assert((numRegArgs()) <= 2);
+	if (((methodOrBlockNumArgs >= 1) && (methodOrBlockNumArgs <= (numRegArgs())))) {
+		desc = simStackAt(0);
+		(desc->type = SSRegister);
+		(desc->registerr = Arg0Reg);
+		(desc->spilled = 0);
+		(desc->bcptr = startpc);
+		if (methodOrBlockNumArgs > 1) {
+			desc = simStackAt(1);
+			(desc->type = SSRegister);
+			(desc->registerr = Arg1Reg);
+			(desc->spilled = 0);
+			(desc->bcptr = startpc);
+		}
+	}
+}
+
+
 /*	Answer the inline cache tag for the return address of a send. */
 
 static sqInt
@@ -10311,6 +11306,72 @@
 }
 
 static sqInt
+inverseBranchFor(sqInt opcode)
+{
+	
+	switch (opcode) {
+	case JumpLongZero:
+				return JumpLongNonZero;
+
+	case JumpLongNonZero:
+				return JumpLongZero;
+
+	case JumpZero:
+				return JumpNonZero;
+
+	case JumpNonZero:
+				return JumpZero;
+
+	case JumpNegative:
+				return JumpNonNegative;
+
+	case JumpNonNegative:
+				return JumpNegative;
+
+	case JumpOverflow:
+				return JumpNoOverflow;
+
+	case JumpNoOverflow:
+				return JumpOverflow;
+
+	case JumpCarry:
+				return JumpNoCarry;
+
+	case JumpNoCarry:
+				return JumpCarry;
+
+	case JumpLess:
+				return JumpGreaterOrEqual;
+
+	case JumpGreaterOrEqual:
+				return JumpLess;
+
+	case JumpGreater:
+				return JumpLessOrEqual;
+
+	case JumpLessOrEqual:
+				return JumpGreater;
+
+	case JumpBelow:
+				return JumpAboveOrEqual;
+
+	case JumpAboveOrEqual:
+				return JumpBelow;
+
+	case JumpAbove:
+				return JumpBelowOrEqual;
+
+	case JumpBelowOrEqual:
+				return JumpAbove;
+
+	default:
+		error("Case not found and no otherwise clause");
+	}
+	error("invalid opcode for inverse");
+	return 0;
+}
+
+static sqInt
 isAFixup(AbstractInstruction * self_in_isAFixup, void *fixupOrAddress)
 {
 	return addressIsInFixups(fixupOrAddress);
@@ -10396,6 +11457,12 @@
 	 || (((target >= methodZoneBase) && (target <= (zoneLimit()))));
 }
 
+static sqInt
+isSmallIntegerTagNonZero(void)
+{
+	return 1;
+}
+
 static AbstractInstruction *
 gJumpAboveOrEqual(void *jumpTarget)
 {
@@ -10503,6 +11570,12 @@
 }
 
 static AbstractInstruction *
+gJumpNoOverflow(void *jumpTarget)
+{
+	return genoperand(JumpNoOverflow, ((sqInt)jumpTarget));
+}
+
+static AbstractInstruction *
 gJumpOverflow(void *jumpTarget)
 {
 	return genoperand(JumpOverflow, ((sqInt)jumpTarget));
@@ -10728,7 +11801,20 @@
 	return ((((byteAt(followingAddress - 1)) << 24) + ((byteAt(followingAddress - 2)) << 16)) + ((byteAt(followingAddress - 3)) << 8)) + (byteAt(followingAddress - 4));
 }
 
+static sqInt
+liveRegisters(void)
+{
+    sqInt i;
+    sqInt regsSet;
 
+	regsSet = 0;
+	for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= simStackPtr; i += 1) {
+		regsSet = regsSet | (registerMask(simStackAt(i)));
+	}
+	return regsSet;
+}
+
+
 /*	Answer the byte size of a MoveCwR opcode's corresponding machine code */
 
 static sqInt
@@ -11430,6 +12516,45 @@
 	return 0;
 }
 
+
+/*	Spill everything on the simulated stack that needs spilling (that below
+	receiver and arguments).
+	Marshall receiver and arguments to stack and/or registers depending on arg
+	count. If the args don't fit in registers push receiver and args (spill
+	everything), but still assign
+	the receiver to ReceiverResultReg. */
+
+static void
+marshallSendArguments(sqInt numArgs)
+{
+	if (numArgs > (numRegArgs())) {
+		ssFlushTo(simStackPtr);
+		storeToReg(simStackAt(simStackPtr - numArgs), ReceiverResultReg);
+	}
+	else {
+		ssFlushTo((simStackPtr - numArgs) - 1);
+		if (numArgs > 0) {
+			if (((numRegArgs()) > 1)
+			 && (numArgs > 1)) {
+				ssAllocateRequiredRegupThrough(Arg0Reg, simStackPtr - 2);
+				ssAllocateRequiredRegupThrough(Arg1Reg, simStackPtr - 1);
+			}
+			else {
+				ssAllocateRequiredRegupThrough(Arg0Reg, simStackPtr - 1);
+			}
+		}
+		if (((numRegArgs()) > 1)
+		 && (numArgs > 1)) {
+			popToReg(simStackAt(simStackPtr), Arg1Reg);
+		}
+		if (numArgs > 0) {
+			popToReg(simStackAt((simStackPtr - numArgs) + 1), Arg0Reg);
+		}
+		popToReg(simStackAt(simStackPtr - numArgs), ReceiverResultReg);
+	}
+	ssPop(numArgs + 1);
+}
+
 usqInt
 maxCogMethodAddress(void)
 {
@@ -11509,10 +12634,64 @@
 		: absPC);
 }
 
+
+/*	Discard type information because of a control-flow merge. */
+
+static void
+mergeAtfrom(CogSimStackEntry * self_in_mergeAtfrom, sqInt baseOffset, sqInt baseRegister)
+{
+	assert((self_in_mergeAtfrom->spilled));
+	if (((self_in_mergeAtfrom->type)) == SSSpill) {
+		assert((((self_in_mergeAtfrom->offset)) == baseOffset)
+		 && (((self_in_mergeAtfrom->registerr)) == baseRegister));
+	}
+	else {
+		(self_in_mergeAtfrom->type) = SSSpill;
+		(self_in_mergeAtfrom->offset) = baseOffset;
+		(self_in_mergeAtfrom->registerr) = baseRegister;
+	}
+}
+
+
+/*	Merge control flow at a fixup. The fixup holds the simStackPtr at the jump
+	to this target.
+	See stackToRegisterMapping on the class side for a full description. */
+
+static void
+mergeafterReturn(BytecodeFixup *fixup, sqInt mergeFollowsReturn)
+{
+    sqInt i;
+
+	traceMerge(fixup);
+	(optStatus.isReceiverResultRegLive = 0);
+	if (mergeFollowsReturn) {
+		assert((((usqInt)((fixup->targetInstruction)))) >= 2);
+		simStackPtr = (fixup->simStackPtr);
+	}
+	if ((((usqInt)((fixup->targetInstruction)))) <= 2) {
+		ssFlushTo(simStackPtr);
+		if (((fixup->simStackPtr)) <= -2) {
+			(fixup->simStackPtr = simStackPtr);
+		}
+		(fixup->targetInstruction = gLabel());
+	}
+	assert(simStackPtr >= ((fixup->simStackPtr)));
+	;
+	simStackPtr = (fixup->simStackPtr);
+
+	/* For now throw away all type information for values on the stack, but sometime consider
+	 the more sophisticated merge described in the class side stackToRegisterMapping. */
+
+	simSpillBase = methodOrBlockNumTemps;
+	for (i = methodOrBlockNumTemps; i <= simStackPtr; i += 1) {
+		mergeAtfrom(simStackAt(i), FoxMFReceiver - (((i - methodOrBlockNumArgs) + 1) * BytesPerOop), FPReg);
+	}
+}
+
 static sqInt
 methodAbortTrampolineFor(sqInt numArgs)
 {
-	return ceMethodAbortTrampoline;
+	return methodAbortTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 static CogMethod *
@@ -11567,7 +12746,13 @@
 	return genoperand(NegateR, reg);
 }
 
+static AbstractInstruction *
+gNop(void)
+{
+	return gen(Nop);
+}
 
+
 /*	Compute the distance to the logically subsequent bytecode, i.e. skip over
 	blocks. 
  */
@@ -11894,7 +13079,7 @@
 static sqInt
 picAbortTrampolineFor(sqInt numArgs)
 {
-	return cePICAbortTrampoline;
+	return picAbortTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 
@@ -11922,7 +13107,37 @@
 	}
 }
 
+static void
+popToReg(CogSimStackEntry * self_in_popToReg, sqInt reg)
+{
+	if ((self_in_popToReg->spilled)) {
+		gPopR(reg);
+		return;
+	}
+	
+	switch ((self_in_popToReg->type)) {
+	case SSBaseOffset:
+				gMoveMwrR((self_in_popToReg->offset), (self_in_popToReg->registerr), reg);
+		break;
+	case SSConstant:
+				if (shouldAnnotateObjectReference((self_in_popToReg->constant))) {
+			annotateobjRef(gMoveCwR((self_in_popToReg->constant), reg), (self_in_popToReg->constant));
+		}
+		else {
+			gMoveCqR((self_in_popToReg->constant), reg);
+		}
+		break;
+	case SSRegister:
+				if (reg != ((self_in_popToReg->registerr))) {
+			gMoveRR((self_in_popToReg->registerr), reg);
+		}
+		break;
+	default:
+		error("Case not found and no otherwise clause");
+	}
+}
 
+
 /*	If there is a generator for the current primitive then answer it;
 	otherwise answer nil. */
 
@@ -12110,15 +13325,45 @@
 }
 
 
-/*	Dummy implementation for CogFooCompiler>callerSavedRegisterMask
-	which doesn't get pruned due to Slang limitations. */
+/*	Answer a bit mask for the receiver's register, if any. */
 
 static sqInt
+registerMask(CogSimStackEntry * self_in_registerMask)
+{
+	return ((((self_in_registerMask->type)) == SSBaseOffset)
+	 || (((self_in_registerMask->type)) == SSRegister)
+		? registerMaskFor((self_in_registerMask->registerr))
+		: 0);
+}
+
+
+/*	Answer a bit mask identifying the symbolic register.
+	Registers are negative numbers. */
+
+static sqInt
+registerMaskFor(sqInt reg)
+{
+	return (((1 - reg) < 0) ? ((usqInt) 1 >> -(1 - reg)) : ((usqInt) 1 << (1 - reg)));
+}
+
+
+/*	Answer a bit mask identifying the symbolic registers.
+	Registers are negative numbers. */
+
+static sqInt
 registerMaskForandand(sqInt reg1, sqInt reg2, sqInt reg3)
 {
-	return 0;
+	return (((((1 - reg1) < 0) ? ((usqInt) 1 >> -(1 - reg1)) : ((usqInt) 1 << (1 - reg1)))) | ((((1 - reg2) < 0) ? ((usqInt) 1 >> -(1 - reg2)) : ((usqInt) 1 << (1 - reg2))))) | ((((1 - reg3) < 0) ? ((usqInt) 1 >> -(1 - reg3)) : ((usqInt) 1 << (1 - reg3))));
 }
 
+static sqInt
+registerOrNil(CogSimStackEntry * self_in_registerOrNil)
+{
+	return (((self_in_registerOrNil->type)) == SSRegister
+		? (self_in_registerOrNil->registerr)
+		: 0);
+}
+
 static void
 relocateAndPruneYoungReferrers(void)
 {
@@ -12411,12 +13656,21 @@
 }
 
 
-/*	See the subclass for explanation. */
+/*	We must ensure the ReceiverResultReg is live across the store check so
+	that we can store into receiver inst vars in a frameless method since self
+	exists only in ReceiverResultReg in a frameless method. So if
+	ReceiverResultReg is
+	caller-saved we use the fact that ceStoreCheck: answers its argument to
+	reload ReceiverResultReg cheaply. Otherwise we don't care about the result
+	and use the cResultRegister, effectively a no-op (see
+	compileTrampoline...)  */
 
 static sqInt
 returnRegForStoreCheck(void)
 {
-	return cResultRegister(backEnd);
+	return ((registerMaskFor(ReceiverResultReg)) & callerSavedRegMask
+		? ReceiverResultReg
+		: cResultRegister(backEnd));
 }
 
 
@@ -12562,6 +13816,7 @@
     BytecodeDescriptor *descriptor;
     sqInt end;
     sqInt pc;
+    sqInt pushingNils;
     sqInt stackDelta;
 
 	needsFrame = 0;
@@ -12569,6 +13824,8 @@
 	pc = (blockStart->startpc);
 	end = ((blockStart->startpc)) + ((blockStart->span));
 	stackDelta = 0;
+	pushingNils = 1;
+	(blockStart->numInitialNils = 0);
 	while (pc < end) {
 		byte0 = fetchByteofObject(pc, methodObj);
 		descriptor = generatorAt(byte0);
@@ -12580,12 +13837,20 @@
 				stackDelta += (descriptor->stackDelta);
 			}
 		}
+		if (pushingNils) {
+			if ((pushingNils = (((descriptor->generator)) == (genPushConstantNilBytecode))
+			 && (((fixupAt(pc - initialPC)->targetInstruction)) == 0))) {
+				assert(((descriptor->numBytes)) == 1);
+				(blockStart->numInitialNils = ((blockStart->numInitialNils)) + 1);
+			}
+		}
 		pc = nextBytecodePCForatbyte0in(descriptor, pc, byte0, methodObj);
 	}
 	if (!(needsFrame)) {
 		if (stackDelta < 0) {
 			error("negative stack delta in block; block contains bogus code or internal error");
 		}
+		(blockStart->numInitialNils = 0);
 		while (stackDelta > 0) {
 			descriptor = generatorAt(fetchByteofObject((blockStart->startpc), methodObj));
 			if (((descriptor->generator)) != (genPushConstantNilBytecode)) {
@@ -12908,7 +14173,289 @@
 	}
 }
 
+static void
+ssAllocateCallReg(sqInt requiredReg1)
+{
+	ssAllocateRequiredRegMaskupThrough(callerSavedRegMask | (registerMaskFor(requiredReg1)), simStackPtr);
+}
+
+static void
+ssAllocateCallRegand(sqInt requiredReg1, sqInt requiredReg2)
+{
+	ssAllocateRequiredRegMaskupThrough(callerSavedRegMask | ((registerMaskFor(requiredReg1)) | (registerMaskFor(requiredReg2))), simStackPtr);
+}
+
 static sqInt
+ssAllocatePreferredReg(sqInt preferredReg)
+{
+    sqInt i;
+    sqInt lastPreferred;
+    sqInt liveRegs;
+    sqInt preferredMask;
+    sqInt reg;
+
+
+	/* compute live regs while noting the last occurrence of preferredReg.
+	 If there are none free we must spill from simSpillBase to last occurrence. */
+
+	lastPreferred = -1;
+	preferredMask = registerMaskFor(preferredReg);
+	liveRegs = registerMaskForandand(TempReg, FPReg, SPReg);
+	for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= simStackPtr; i += 1) {
+		liveRegs = liveRegs | (registerMask(simStackAt(i)));
+		if ((liveRegs & preferredMask) != 0) {
+			lastPreferred = i;
+		}
+	}
+	if ((liveRegs & (registerMaskFor(preferredReg))) == 0) {
+		return preferredReg;
+	}
+	for (reg = GPRegMin; reg <= GPRegMax; reg += 1) {
+		if ((liveRegs & (registerMaskFor(reg))) == 0) {
+			return reg;
+		}
+	}
+	ssFlushTo(lastPreferred);
+	assert(((liveRegisters()) & preferredMask) == 0);
+	return preferredReg;
+}
+
+static void
+ssAllocateRequiredRegMaskupThrough(sqInt requiredRegsMask, sqInt stackPtr)
+{
+    sqInt i;
+    sqInt lastRequired;
+    sqInt liveRegs;
+
+
+	/* compute live regs while noting the last occurrence of required regs.
+	 If these are not free we must spill from simSpillBase to last occurrence.
+	 Note we are conservative here; we could allocate FPReg in frameless methods. */
+
+	lastRequired = -1;
+	liveRegs = registerMaskForandand(TempReg, FPReg, SPReg);
+	for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= stackPtr; i += 1) {
+		liveRegs = liveRegs | (registerMask(simStackAt(i)));
+		if ((liveRegs & requiredRegsMask) != 0) {
+			lastRequired = i;
+		}
+	}
+	if (!((liveRegs & requiredRegsMask) == 0)) {
+		ssFlushTo(lastRequired);
+		assert(((liveRegisters()) & requiredRegsMask) == 0);
+	}
+}
+
+static void
+ssAllocateRequiredReg(sqInt requiredReg)
+{
+	ssAllocateRequiredRegMaskupThrough(registerMaskFor(requiredReg), simStackPtr);
+}
+
+static void
+ssAllocateRequiredRegand(sqInt requiredReg1, sqInt requiredReg2)
+{
+	ssAllocateRequiredRegMaskupThrough((registerMaskFor(requiredReg1)) | (registerMaskFor(requiredReg2)), simStackPtr);
+}
+
+static void
+ssAllocateRequiredRegupThrough(sqInt requiredReg, sqInt stackPtr)
+{
+	ssAllocateRequiredRegMaskupThrough(registerMaskFor(requiredReg), stackPtr);
+}
+
+static void
+ssFlushTo(sqInt index)
+{
+    sqInt i;
+
+	for (i = methodOrBlockNumTemps; i <= (simSpillBase - 1); i += 1) {
+		assert((simStackAt(i)->spilled));
+	}
+	if (simSpillBase <= index) {
+		for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= index; i += 1) {
+			assert(needsFrame);
+			ensureSpilledAtfrom(simStackAt(i), frameOffsetOfTemporary(i), FPReg);
+		}
+		simSpillBase = index + 1;
+	}
+}
+
+
+/*	Any occurrences on the stack of the value being stored must
+	be flushed, and hence any values colder than them stack. */
+
+static void
+ssFlushUpThroughReceiverVariable(sqInt slotIndex)
+{
+    CogSimStackEntry *desc;
+    sqInt index;
+
+	for (index = simStackPtr; index >= (((simSpillBase < 0) ? 0 : simSpillBase)); index += -1) {
+		desc = simStackAt(index);
+		if ((((desc->type)) == SSBaseOffset)
+		 && ((((desc->registerr)) == ReceiverResultReg)
+ && (((desc->offset)) == (slotOffsetOfInstVarIndex(slotIndex))))) {
+			ssFlushTo(index);
+			return;
+		}
+	}
+}
+
+
+/*	Any occurrences on the stack of the value being stored must
+	be flushed, and hence any values colder than them stack. */
+
+static void
+ssFlushUpThroughTemporaryVariable(sqInt tempIndex)
+{
+    CogSimStackEntry *desc;
+    sqInt index;
+
+	for (index = simStackPtr; index >= simSpillBase; index += -1) {
+		desc = simStackAt(index);
+		if ((((desc->type)) == SSBaseOffset)
+		 && ((((desc->registerr)) == FPReg)
+ && (((desc->offset)) == (frameOffsetOfTemporary(tempIndex))))) {
+			ssFlushTo(index);
+			return;
+		}
+	}
+}
+
+static void
+ssPop(sqInt n)
+{
+	assert(((simStackPtr - n) >= (methodOrBlockNumTemps - 1))
+	 || ((!needsFrame)
+ && ((simStackPtr - n) >= -1)));
+	simStackPtr -= n;
+}
+
+static sqInt
+ssPushBaseoffset(sqInt reg, sqInt offset)
+{
+    CogSimStackEntry * cascade0;
+
+	ssPush(1);
+	if (simSpillBase > simStackPtr) {
+		simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+	}
+	cascade0 = ssTop();
+	(cascade0->type = SSBaseOffset);
+	(cascade0->registerr = reg);
+	(cascade0->offset = offset);
+	(cascade0->spilled = 0);
+	(cascade0->bcptr = bytecodePointer);
+	return 0;
+}
+
+static sqInt
+ssPushConstant(sqInt literal)
+{
+    CogSimStackEntry * cascade0;
+
+	ssPush(1);
+	if (simSpillBase > simStackPtr) {
+		simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+	}
+	cascade0 = ssTop();
+	(cascade0->type = SSConstant);
+	(cascade0->constant = literal);
+	(cascade0->spilled = 0);
+	(cascade0->bcptr = bytecodePointer);
+	return 0;
+}
+
+static sqInt
+ssPushDesc(CogSimStackEntry simStackEntry)
+{
+	if (((simStackEntry.type)) == SSSpill) {
+		(simStackEntry.type = SSBaseOffset);
+	}
+	(simStackEntry.spilled = 0);
+	(simStackEntry.bcptr = bytecodePointer);
+	simStack[(simStackPtr += 1)] = simStackEntry;
+	if (simSpillBase > simStackPtr) {
+		simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+	}
+	return 0;
+}
+
+static sqInt
+ssPushRegister(sqInt reg)
+{
+    CogSimStackEntry * cascade0;
+
+	ssPush(1);
+	if (simSpillBase > simStackPtr) {
+		simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+	}
+	cascade0 = ssTop();
+	(cascade0->type = SSRegister);
+	(cascade0->registerr = reg);
+	(cascade0->spilled = 0);
+	(cascade0->bcptr = bytecodePointer);
+	return 0;
+}
+
+static void
+ssPush(sqInt n)
+{
+	simStackPtr += n;
+}
+
+
+/*	Store or pop the top simulated stack entry to a register.
+	Pop to preferredReg if the entry is not itself a register.
+	Answer the actual register the result ends up in. */
+
+static sqInt
+ssStorePoptoPreferredReg(sqInt popBoolean, sqInt preferredReg)
+{
+    sqInt actualReg;
+
+	actualReg = preferredReg;
+	if (popBoolean) {
+		if ((((ssTop()->type)) == SSRegister)
+		 && (!((ssTop()->spilled)))) {
+			actualReg = (ssTop()->registerr);
+		}
+		else {
+			popToReg(ssTop(), preferredReg);
+		}
+		ssPop(1);
+	}
+	else {
+		if (((ssTop()->type)) == SSRegister) {
+			actualReg = (ssTop()->registerr);
+		}
+		else {
+			storeToReg(ssTop(), preferredReg);
+		}
+	}
+	return actualReg;
+}
+
+static CogSimStackEntry *
+ssTop(void)
+{
+	return simStackAt(simStackPtr);
+}
+
+static CogSimStackEntry
+ssTopDescriptor(void)
+{
+	return simStack[simStackPtr];
+}
+
+static CogSimStackEntry *
+ssValue(sqInt n)
+{
+	return simStackAt(simStackPtr - n);
+}
+
+static sqInt
 stackBytesForNumArgs(AbstractInstruction * self_in_stackBytesForNumArgs, sqInt numArgs)
 {
 	return numArgs * 4;
@@ -12952,6 +14499,33 @@
 	byteAtput(followingAddress - 4, literal & 255);
 }
 
+static void
+storeToReg(CogSimStackEntry * self_in_storeToReg, sqInt reg)
+{
+	
+	switch ((self_in_storeToReg->type)) {
+	case SSBaseOffset:
+	case SSSpill:
+				gMoveMwrR((self_in_storeToReg->offset), (self_in_storeToReg->registerr), reg);
+		break;
+	case SSConstant:
+				if (shouldAnnotateObjectReference((self_in_storeToReg->constant))) {
+			annotateobjRef(gMoveCwR((self_in_storeToReg->constant), reg), (self_in_storeToReg->constant));
+		}
+		else {
+			gMoveCqR((self_in_storeToReg->constant), reg);
+		}
+		break;
+	case SSRegister:
+				if (reg != ((self_in_storeToReg->registerr))) {
+			gMoveRR((self_in_storeToReg->registerr), reg);
+		}
+		break;
+	default:
+		error("Case not found and no otherwise clause");
+	}
+}
+
 static sqInt
 sib(AbstractInstruction * self_in_sib, sqInt scale, sqInt indexReg, sqInt baseReg)
 {

Modified: branches/Cog/src/vm/cogit.h
===================================================================
--- branches/Cog/src/vm/cogit.h	2011-01-01 20:18:49 UTC (rev 2337)
+++ branches/Cog/src/vm/cogit.h	2011-01-01 20:26:17 UTC (rev 2338)
@@ -11,7 +11,12 @@
 sqInt canMapBytecodePCsToNativePCs(void);
 extern void (*ceCaptureCStackPointers)();
 sqInt ceCPICMissreceiver(CogMethod *cPIC, sqInt receiver);
+extern void (*ceEnter0ArgsPIC)();
+extern void (*ceEnter1ArgsPIC)();
+extern void (*ceEnter2ArgsPIC)();
 extern void (*ceEnterCogCodePopReceiverAndClassRegs)();
+extern void (*ceEnterCogCodePopReceiverArg0Regs)();
+extern void (*ceEnterCogCodePopReceiverArg1Arg0Regs)();
 extern void (*ceEnterCogCodePopReceiverReg)();
 sqInt ceSICMiss(sqInt receiver);
 void checkAssertsEnabledInCogit(void);
@@ -26,6 +31,8 @@
 void compactCogCompiledCode(void);
 void enterCogCodePopReceiver(void);
 void enterCogCodePopReceiverAndClassRegs(void);
+void enterCogCodePopReceiverArg0Regs(void);
+void enterCogCodePopReceiverArg1Arg0Regs(void);
 CogBlockMethod * findEnclosingMethodForinHomeMethod(sqInt mcpc, CogMethod *cogMethod);
 CogBlockMethod * findMethodForStartBcpcinHomeMethod(sqInt startbcpc, CogMethod *cogMethod);
 sqInt genQuickReturnConst(void);
@@ -76,7 +83,12 @@
 sqInt ceCannotResumeTrampoline;
 void (*ceCaptureCStackPointers)(void);
 sqInt ceCheckForInterruptTrampoline;
+void (*ceEnter0ArgsPIC)(void);
+void (*ceEnter1ArgsPIC)(void);
+void (*ceEnter2ArgsPIC)(void);
 void (*ceEnterCogCodePopReceiverAndClassRegs)(void);
+void (*ceEnterCogCodePopReceiverArg0Regs)(void);
+void (*ceEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*ceEnterCogCodePopReceiverReg)(void);
 unsigned long (*ceGetSP)(void);
 sqInt ceReturnToInterpreterTrampoline;
@@ -87,6 +99,8 @@
 sqInt cmNoCheckEntryOffset;
 unsigned long debugPrimCallStackOffset;
 void (*realCEEnterCogCodePopReceiverAndClassRegs)(void);
+void (*realCEEnterCogCodePopReceiverArg0Regs)(void);
+void (*realCEEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*realCEEnterCogCodePopReceiverReg)(void);
 int traceLinkedSends ;
 sqInt traceStores;
@@ -101,7 +115,7 @@
 #define getCStackPointer() CStackPointer
 #define noCheckEntryOffset() cmNoCheckEntryOffset
 #define noContextSwitchBlockEntryOffset() blockNoContextSwitchOffset
-#define numRegArgs() 0
+#define numRegArgs() 1
 #define printOnTrace() (traceLinkedSends & 8)
 #define recordEventTrace() (traceLinkedSends & 4)
 #define recordPrimTrace() (traceLinkedSends & 2)



More information about the Vm-dev mailing list