[squeak-dev] The Trunk: Collections-ul.284.mcz

commits at source.squeak.org commits at source.squeak.org
Fri Jan 29 01:17:56 UTC 2010


Levente Uzonyi uploaded a new version of Collections to project The Trunk:
http://source.squeak.org/trunk/Collections-ul.284.mcz

==================== Summary ====================

Name: Collections-ul.284
Author: ul
Time: 29 January 2010, 1:37:36.08 am
UUID: 855b55a0-8aa2-ed4a-8612-d1153fa8f656
Ancestors: Collections-ul.282

- fix: ignore Byte Order Mark in ByteString >> #utf8ToSqueak

=============== Diff against Collections-ul.282 ===============

Item was changed:
  ----- Method: ByteString>>utf8ToSqueak (in category 'converting') -----
  utf8ToSqueak
  	"Convert the given string from UTF-8 using the fast path if converting to Latin-1"
  	| outStream lastIndex nextIndex byte1 byte2 byte3 byte4 unicode |
  	Latin1ToUtf8Map ifNil:[^super utf8ToSqueak]. "installation guard"
  	lastIndex := 1.
  	nextIndex := ByteString findFirstInString: self inSet: Latin1ToUtf8Map startingAt: lastIndex.
  	nextIndex = 0 ifTrue:[^self].
  	outStream := (String new: self size) writeStream.
  	[outStream next: nextIndex-lastIndex putAll: self startingAt: lastIndex.
  	byte1 := self byteAt: nextIndex.
  	(byte1 bitAnd: 16rE0) = 192 ifTrue: [ "two bytes"
  		byte2 := self byteAt: (nextIndex := nextIndex+1).
  		(byte2 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		unicode := ((byte1 bitAnd: 31) bitShift: 6) + (byte2 bitAnd: 63)].
  	(byte1 bitAnd: 16rF0) = 224 ifTrue: [ "three bytes"
  		byte2 := self byteAt: (nextIndex := nextIndex+1).
  		(byte2 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		byte3 := self byteAt: (nextIndex := nextIndex+1).
  		(byte3 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		unicode := ((byte1 bitAnd: 15) bitShift: 12) + ((byte2 bitAnd: 63) bitShift: 6)
  			+ (byte3 bitAnd: 63)].
  	(byte1 bitAnd: 16rF8) = 240 ifTrue: [ "four bytes"
  		byte2 := self byteAt: (nextIndex := nextIndex+1).
  		(byte2 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		byte3 := self byteAt: (nextIndex := nextIndex+1).
  		(byte3 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		byte4 := self byteAt: (nextIndex := nextIndex+1).
  		(byte4 bitAnd: 16rC0) = 16r80 ifFalse:[^self]. "invalid UTF-8; presume Latin-1"
  		unicode := ((byte1 bitAnd: 16r7) bitShift: 18) +
  						((byte2 bitAnd: 63) bitShift: 12) + 
  						((byte3 bitAnd: 63) bitShift: 6) +
  						(byte4 bitAnd: 63)].
  	unicode ifNil:[^self]. "invalid UTF-8; presume Latin-1"
+ 	unicode = 16rFEFF ifFalse: [ "Skip byte order mark"
+ 		outStream nextPut: (Unicode value: unicode) ].
- 	outStream nextPut: (Unicode value: unicode).
  	lastIndex := nextIndex + 1.
  	nextIndex := ByteString findFirstInString: self inSet: Latin1ToUtf8Map startingAt: lastIndex.
  	nextIndex = 0] whileFalse.
  	outStream next: self size-lastIndex+1 putAll: self startingAt: lastIndex.
  	^outStream contents
  !




More information about the Squeak-dev mailing list