[Vm-dev] frameless instance initialization

Eliot Miranda eliot.miranda at gmail.com
Thu Apr 23 19:14:30 UTC 2015


Hi All,

    here's a challenge.  Currently the JIT is naive about a sequence of
inst var assignments.  It treats assignment separately, each getting its
own copy of the store check.  For example when initializing an Interval via:

setFrom: startInteger to: stopInteger by: stepInteger

start := startInteger.
stop := stopInteger.
step := stepInteger

it generates the following assembly on ARM:

A0D0
objhdr: 8000000A000035
nArgs: 3 type: 2
blksiz: 140
method: C03C10
mthhdr: 6180005
selctr: 6B71A0=#setFrom:to:by:
blkentry: 0
stackCheckOffset: 0
cmRefersToYoung: no
0000a0ec: mov r7, #0
0000a0f0: push {lr}
0000a0f4: bl 0x000009f8 = 16r9F8 = ceMethodAbortNArgs
0000a0f8: ands r0, r0, #1
0000a0fc: b 0x0000a114 = 16rA114 = setFrom:to:by:@44
entry:
0000a100: ands r0, r7, #3
0000a104: bne 0x0000a0f8 = 16rA0F8 = setFrom:to:by:@28
0000a108: ldr r0, [r7]
0000a10c: mvn ip, #0
0000a110: ands r0, r0, ip, lsr #10
0000a114: cmp r0, r8
0000a118: bne 0x0000a0f0 = 16rA0F0 = setFrom:to:by:@20
noCheckEntry:
0000a11c: ldr r5, [sp, #8]
         startInteger
0000a120: str r5, [r7, #8]
         start
0000a124: mov r0, r5
0000a128: ands r0, r0, #3
  startInteger immediate?
0000a12c: bne 0x0000a168 = 16rA168 = setFrom:to:by:@98
0000a130: mov r0, #0, 8
0000a134: orr r0, r0, #5308416 ; 0x510000
0000a138: orr r0, r0, #50944 ; 0xc700
0000a13c: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil
0000a140: cmp r7, r0
   self in oldSpace?  (self >= nil)
0000a144: bcc 0x0000a168 = 16rA168 = setFrom:to:by:@98
0000a148: cmp r5, r0
   startInteger young? (startInteger < nil)
0000a14c: bcs 0x0000a168 = 16rA168 = setFrom:to:by:@98
0000a150: ldrb r0, [r7, #3]
   self in remembered table?
0000a154: ands r0, r0, #32
0000a158: bne 0x0000a168 = 16rA168 = setFrom:to:by:@98
0000a15c: push {lr}
      add self to remembered table
0000a160: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline
IsRelativeCall:
0000a164: pop {lr}
0000a168: ldr r5, [sp, #4]
         stopInteger
0000a16c: str r5, [r7, #12]
0000a170: mov r0, r5
0000a174: ands r0, r0, #3
0000a178: bne 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4
0000a17c: mov r0, #0, 8
0000a180: orr r0, r0, #5308416 ; 0x510000
0000a184: orr r0, r0, #50944 ; 0xc700
0000a188: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil
0000a18c: cmp r7, r0
0000a190: bcc 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4
0000a194: cmp r5, r0
0000a198: bcs 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4
0000a19c: ldrb r0, [r7, #3]
0000a1a0: ands r0, r0, #32
0000a1a4: bne 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4
0000a1a8: push {lr}
0000a1ac: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline
IsRelativeCall:
0000a1b0: pop {lr}
0000a1b4: ldr r5, [sp]
     stepInteger
0000a1b8: str r5, [r7, #16]
0000a1bc: mov r0, r5
0000a1c0: ands r0, r0, #3
0000a1c4: bne 0x0000a200 = 16rA200 = setFrom:to:by:@130
0000a1c8: mov r0, #0, 8
0000a1cc: orr r0, r0, #5308416 ; 0x510000
0000a1d0: orr r0, r0, #50944 ; 0xc700
0000a1d4: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil
0000a1d8: cmp r7, r0
0000a1dc: bcc 0x0000a200 = 16rA200 = setFrom:to:by:@130
0000a1e0: cmp r5, r0
0000a1e4: bcs 0x0000a200 = 16rA200 = setFrom:to:by:@130
0000a1e8: ldrb r0, [r7, #3]
0000a1ec: ands r0, r0, #32
0000a1f0: bne 0x0000a200 = 16rA200 = setFrom:to:by:@130
0000a1f4: push {lr}
0000a1f8: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline
IsRelativeCall:
0000a1fc: pop {lr}
0000a200: add sp, sp, #16 ADD 16 to SP
0000a204: mov pc, lr
0000a208: nop ; (mov r0, r0)
startpc: 12
  16rA164 IsRelativeCall      (16rA20F)
  16rA1B0 IsRelativeCall      (16rA20E)
  16rA1FC IsRelativeCall      (16rA20D)

If the Cogit were smart enough to identify methods that contained only
instance initialization code (push arg or push constant, storePop inst var,
and return self) we could generate much more compact code, e.g.

noCheckEntry:
ldr r5, [sp, #8]
 startInteger
str r5, [r7, #8]
ldr r5, [sp, #4]
 stopInteger
str r5, [r7, #12]
ldr r5, [sp]
   stepInteger
str r5, [r7, #16]
mov r0, #0, 8
orr r0, r0, #5308416 ; 0x510000
orr r0, r0, #50944 ; 0xc700
orr r0, r0, #136 ; 0x88 = 16r51C788 = nil

cmp r7, r0
 self young?  if so, jump to return
bcc L4

ldr r5, [sp, #8]
   startInteger immediate?
tests r5, r5, #3
bne L1
cmp r5, r0
 startInteger old?
bcs L1

L2:
ldrb r0, [r7, #3]
 self in remembered table?
ands r0, r0, #32
bne L1
push {lr}
bl ceStoreCheckTrampoline
IsRelativeCall:
pop {lr}
add sp, sp, #16 ADD 16 to SP
mov pc, lr

L1:
ldr r5, [sp, #4]
 stopInteger
tests r5, r5, #3
bne L3
cmp r5, r0
bcs L3
ldrb r0, [r7, #3]
ands r0, r0, #32
beq L2

L3:
ldr r5, [sp]
 stepInteger
tests r5, r5, #3
bne L4
cmp r5, r0
bcs L4
ldrb r0, [r7, #3]
ands r0, r0, #32
beq L2
L4:
add sp, sp, #16 ADD 16 to SP
mov pc, lr

That's 5 times shorter, including method header, and does far less work in
common cases (self is compared with nil, which is also the old/new boundary
only once, not once for every inst var) but in the case of an Interval
using SmallIntegers may be slower because each variable is read from the
stack twice.  Interesting choices.  But I think the code density would
win.  Of course, this may make no difference to overall speed but it would
be nice to now :-).  If you're up for this, let me know.
-- 
best,
Eliot
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.squeakfoundation.org/pipermail/vm-dev/attachments/20150423/0db4ed45/attachment-0001.htm


More information about the Vm-dev mailing list