'From Pharo1.4 of 18 April 2012 [Latest update: #14459] on 25 September 2012 at 1:33:21 am'! Collection subclass: #BitmapCharacterSet instanceVariableNames: 'byteCharacters wideCharacters' classVariableNames: '' poolDictionaries: '' category: 'Collections-BitmapCharacterSet'! !BitmapCharacterSet commentStamp: '' prior: 0! This class implements a set of Character objects that uses a bitmap internally for multibyte characters to quickly tell if they belongs to it or not, rather than using a Dictionary like WideCharacterSet does. Byte characters are tested using a 256-element ByteArray.! !BitmapCharacterSet methodsFor: 'accessing' stamp: 'JAAyer 1/20/2011 01:05'! capacity ^ byteCharacters size + (wideCharacters ifNil: [0] ifNotNil: [(wideCharacters size) * 8 - 256]).! ! !BitmapCharacterSet methodsFor: 'adding' stamp: 'JAAyer 1/20/2011 01:05'! add: aCharacter | asciiValue | (asciiValue := aCharacter asciiValue) > 255 ifTrue: [| byteIndex | byteIndex := (asciiValue / 8) asInteger + 1. (wideCharacters isNil or: [byteIndex > wideCharacters size]) ifTrue: [self growWideCharacterBitmapTo: (byteIndex * 1.5) asInteger]. wideCharacters at: byteIndex put: ((wideCharacters at: byteIndex) bitOr: (16r80 bitShift: (asciiValue \\ 8) negated))] ifFalse: [byteCharacters at: asciiValue + 1 put: 1]. ^ aCharacter.! ! !BitmapCharacterSet methodsFor: 'comparing' stamp: 'JAAyer 1/20/2011 01:06'! = anObject ^ self == anObject or: [self class == anObject class and: [byteCharacters = anObject byteCharacters and: [wideCharacters = anObject wideCharacters]]].! ! !BitmapCharacterSet methodsFor: 'copying' stamp: 'JAAyer 1/20/2011 01:05'! postCopy byteCharacters := byteCharacters copy. wideCharacters ifNotNil: [wideCharacters := wideCharacters copy].! ! !BitmapCharacterSet methodsFor: 'enumerating' stamp: 'JAAyer 1/20/2011 01:05'! do: aBlock byteCharacters doWithIndex: [:each :i | each = 1 ifTrue: [aBlock value: (Character value: i - 1)]]. wideCharacters ifNil: [^ self]. wideCharacters doWithIndex: [:each :byteIndex | 0 to: 7 do: [:shiftIndex | (each bitAnd: (16r80 bitShift: shiftIndex negated)) > 0 ifTrue: [aBlock value: (Character value: (byteIndex - 1) * 8 + shiftIndex)]]]! ! !BitmapCharacterSet methodsFor: 'initialization' stamp: 'JAAyer 1/20/2011 01:05'! initialize: aCapacity byteCharacters := ByteArray new: 256. aCapacity > 256 ifTrue: [self growWideCharacterBitmapTo: ((aCapacity - 1) / 8) asInteger + 1].! ! !BitmapCharacterSet methodsFor: 'removing' stamp: 'JAAyer 1/20/2011 01:05'! remove: aCharacter ifAbsent: aBlock | asciiValue | (asciiValue := aCharacter asciiValue) > 255 ifTrue: [| byteIndex byte bitmask | byteIndex := (asciiValue / 8) asInteger + 1. (wideCharacters isNil or: [byteIndex > wideCharacters size]) ifTrue: [^ aBlock value]. bitmask := 16r80 bitShift: (asciiValue \\ 8) negated. ((byte := wideCharacters at: byteIndex) bitAnd: bitmask) > 0 ifFalse: [^ aBlock value]. wideCharacters at: byteIndex put: (byte bitAnd: bitmask bitInvert)] ifFalse: [ (byteCharacters at: asciiValue + 1) = 1 ifFalse: [^ aBlock value]. byteCharacters at: asciiValue + 1 put: 0]. ^ aCharacter.! ! !BitmapCharacterSet methodsFor: 'testing' stamp: 'JAAyer 1/20/2011 01:09'! hash | hash | hash := byteCharacters hash. wideCharacters ifNotNil: [hash := hash bitXor: wideCharacters hash]. ^ hash.! ! !BitmapCharacterSet methodsFor: 'testing' stamp: 'JAAyer 1/20/2011 01:05'! includes: aCharacter | asciiValue | ^ (asciiValue := aCharacter asciiValue) > 255 ifTrue: [ | byteIndex | byteIndex := (asciiValue / 8) asInteger + 1. (wideCharacters isNil or: [byteIndex > wideCharacters size]) ifTrue: [^ false]. ^ ((wideCharacters at: byteIndex) bitAnd: (16r80 bitShift: (asciiValue \\ 8) negated)) > 0] ifFalse: [(byteCharacters at: asciiValue + 1) = 1]! ! !BitmapCharacterSet methodsFor: 'private' stamp: 'JAAyer 1/20/2011 01:05'! byteCharacters ^ byteCharacters! ! !BitmapCharacterSet methodsFor: 'private' stamp: 'JAAyer 1/20/2011 01:05'! growWideCharacterBitmapTo: aSize wideCharacters ifNil: [ wideCharacters := ByteArray new: aSize. ^ self]. wideCharacters := (ByteArray new: aSize) replaceFrom: 1 to: wideCharacters size with: wideCharacters startingAt: 1.! ! !BitmapCharacterSet methodsFor: 'private' stamp: 'JAAyer 1/20/2011 01:05'! wideCharacters ^ wideCharacters! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! BitmapCharacterSet class instanceVariableNames: ''! !BitmapCharacterSet class methodsFor: 'instance creation' stamp: 'JAAyer 1/19/2011 23:03'! new ^ self new: 256! ! !BitmapCharacterSet class methodsFor: 'instance creation' stamp: 'JAAyer 1/19/2011 23:04'! new: aCapacity ^ self basicNew initialize: aCapacity! ! !BitmapCharacterSet class methodsFor: 'instance creation' stamp: 'JAAyer 1/20/2011 00:52'! newFrom: aCollection ^ self new addAll: aCollection; yourself ! !