[squeak-dev] The Trunk: Regex-Core-ul.38.mcz

commits at source.squeak.org commits at source.squeak.org
Mon Aug 17 20:20:49 UTC 2015


Levente Uzonyi uploaded a new version of Regex-Core to project The Trunk:
http://source.squeak.org/trunk/Regex-Core-ul.38.mcz

==================== Summary ====================

Name: Regex-Core-ul.38
Author: ul
Time: 17 August 2015, 10:09:02.436 pm
UUID: 0c1d8e56-381a-4fe0-ad20-80ede67b4ba5
Ancestors: Regex-Core-ul.37

- further optimizations

=============== Diff against Regex-Core-ul.37 ===============

Item was changed:
  ----- Method: RxMatchOptimizer>>conditionTester (in category 'accessing') -----
  conditionTester
  	"#any condition is filtered at the higher level;
  	it cannot appear among the conditions here."
  
+ 	| matchConditions size |
+ 	(size := conditions size) = 0ifTrue: [ ^nil ].
+ 	size = 1 ifTrue: [
- 	| matchConditions |
- 	conditions isEmpty ifTrue: [^nil].
- 	conditions size = 1 ifTrue: [
  		| matchCondition |
  		matchCondition := conditions anyOne.
  		"Special case all of the possible conditions."
  		#atBeginningOfLine == matchCondition ifTrue: [^[:c :matcher | matcher atBeginningOfLine]].
  		#atEndOfLine == matchCondition ifTrue: [^[:c :matcher | matcher atEndOfLine]].
  		#atBeginningOfWord == matchCondition ifTrue: [^[:c :matcher | matcher atBeginningOfWord]].
  		#atEndOfWord == matchCondition ifTrue: [^[:c :matcher | matcher atEndOfWord]].
  		#atWordBoundary == matchCondition ifTrue: [^[:c :matcher | matcher atWordBoundary]].
  		#notAtWordBoundary == matchCondition ifTrue: [^[:c :matcher | matcher notAtWordBoundary]].
  		RxParser signalCompilationException: 'invalid match condition'].
  	"More than one condition. Capture them as an array in scope."
  	matchConditions := conditions asArray.
  	^[ :c :matcher |
  		matchConditions anySatisfy: [ :conditionSelector |
  			matcher perform: conditionSelector ] ]!

Item was changed:
  ----- Method: RxMatchOptimizer>>initialize:ignoreCase: (in category 'initialize-release') -----
  initialize: aRegex ignoreCase: aBoolean 
  	"Set `testMethod' variable to a can-match predicate block:
  	two-argument block which accepts a lookahead character
  	and a matcher (presumably built from aRegex) and answers 
  	a boolean indicating whether a match could start at the given
  	lookahead. "
  
  	ignoreCase := aBoolean.
+ 	prefixes := IdentitySet new: 10.
+ 	nonPrefixes := IdentitySet new: 10.
+ 	conditions := IdentitySet new: 3.
- 	prefixes := Set new: 10.
- 	nonPrefixes := Set new: 10.
- 	conditions := Set new: 3.
  	methodPredicates := Set new: 3.
  	nonMethodPredicates := Set new: 3.
  	predicates := Set new: 3.
  	nonPredicates := Set new: 3.
  	lookarounds := Set new: 3.
  	aRegex dispatchTo: self.	"If the whole expression is nullable, 
  		end-of-line is an implicit can-match condition!!"
  	aRegex isNullable ifTrue: [conditions add: #atEndOfLine].
  	testBlock := self determineTestMethod!

Item was changed:
  ----- Method: RxMatchOptimizer>>methodPredicateTester (in category 'accessing') -----
  methodPredicateTester
  
  	| p size |
  	(size := methodPredicates size) = 0 ifTrue: [ ^nil ].
  	size = 1 ifTrue: [
+ 		| selector |
- 		|  selector |
  		"might be a pretty common case"
  		selector := methodPredicates anyOne.
  		^[ :char :matcher | 
  			RxParser doHandlingMessageNotUnderstood: [
  				char perform: selector ] ] ].
  	p := methodPredicates asArray.
  	^[ :char :matcher | 
  		RxParser doHandlingMessageNotUnderstood: [
  			p anySatisfy: [ :sel | char perform: sel ] ] ]!

Item was removed:
- ----- Method: RxMatchOptimizer>>optimizeSet: (in category 'private') -----
- optimizeSet: aSet
- 	"If a set is small, convert it to array to speed up lookup
- 	(Array has no hashing overhead, beats Set on small number
- 	of elements)."
- 
- 	^aSet size < 10 ifTrue: [aSet asArray] ifFalse: [aSet]!

Item was changed:
  ----- Method: RxMatchOptimizer>>syntaxRegex: (in category 'double dispatch') -----
  syntaxRegex: regexNode
  	"All prefixes of the regex's branches should be combined.
  	Therefore, just recurse."
  
  	regexNode branch dispatchTo: self.
+ 	regexNode regex ifNotNil: [ :regex |
+ 		regex dispatchTo: self ]!
- 	regexNode regex notNil
- 		ifTrue: [regexNode regex dispatchTo: self]!

Item was changed:
  ----- Method: RxMatcher>>allocateMarker (in category 'private') -----
  allocateMarker
  	"Answer an integer to use as an index of the next marker."
  
+ 	^markerCount := markerCount + 1!
- 	markerCount := markerCount + 1.
- 	^markerCount!

Item was changed:
  ----- Method: RxsCharSet>>enumerablePartPredicateIgnoringCase: (in category 'privileged') -----
  enumerablePartPredicateIgnoringCase: aBoolean
  
  	| enumeration |
+ 	enumeration := (self enumerableSetIgnoringCase: aBoolean) ifNil: [ ^nil ].
- 	enumeration := self enumerableSetIgnoringCase: aBoolean.
- 	enumeration ifNil: [ ^nil ].
  	negated ifTrue: [ ^[ :char | (enumeration includes: char) not ] ].
  	^[ :char | enumeration includes: char ]!

Item was changed:
  ----- Method: RxsCharSet>>enumerableSetIgnoringCase: (in category 'privileged') -----
  enumerableSetIgnoringCase: aBoolean
  	"Answer a collection of characters that make up the portion of me that can be enumerated, or nil if there are no such characters."
  
+ 	| highestCharacterCode set |
+ 	highestCharacterCode := elements detectMax: [ :each |
+ 		each maximumCharacterCodeIgnoringCase: aBoolean ].
+ 	highestCharacterCode = -1 ifTrue: [ ^nil ].
+ 	set := highestCharacterCode <= 255
+ 		ifTrue: [ CharacterSet new ]
+ 		ifFalse: [ WideCharacterSet new ].
- 	| size set |
- 	size := elements detectSum: [ :each |
- 		each enumerateSizeIgnoringCase: aBoolean ].
- 	size = 0 ifTrue: [ ^nil ].
- 	set := Set new: size.
  	elements do: [ :each |
  		each enumerateTo: set ignoringCase: aBoolean ].
  	^set!

Item was removed:
- ----- Method: RxsCharacter>>enumerateSizeIgnoringCase: (in category 'accessing') -----
- enumerateSizeIgnoringCase: aBoolean
- 
- 	aBoolean ifFalse: [ ^1 ].
- 	character isLetter ifTrue: [ ^2 ].
- 	^1!

Item was added:
+ ----- Method: RxsCharacter>>maximumCharacterCodeIgnoringCase: (in category 'accessing') -----
+ maximumCharacterCodeIgnoringCase: aBoolean
+ 	"Return the largest character code among the characters I represent."
+ 
+ 	aBoolean ifFalse: [ ^character asInteger ].
+ 	^character asUppercase asInteger max: character asLowercase asInteger!

Item was removed:
- ----- Method: RxsPredicate>>enumerateSizeIgnoringCase: (in category 'accessing') -----
- enumerateSizeIgnoringCase: aBoolean
- 
- 	^0 "Not enumerable"!

Item was added:
+ ----- Method: RxsPredicate>>maximumCharacterCodeIgnoringCase: (in category 'accessing') -----
+ maximumCharacterCodeIgnoringCase: aBoolean
+ 	"Return the largest character code among the characters I represent."
+ 
+ 	^-1 "Not enumerable"!

Item was removed:
- ----- Method: RxsRange>>enumerateSizeIgnoringCase: (in category 'accessing') -----
- enumerateSizeIgnoringCase: aBoolean
- 	"Add all of the elements I represent to the collection."
- 
- 	| characterCount |
- 	characterCount := last asInteger - first asInteger + 1 max: 0.
- 	aBoolean ifFalse: [ ^characterCount ].
- 	(last isLetter or: [ first isLetter ]) ifTrue: [ ^characterCount * 2 "Assume many letters" ].
- 	^characterCount "Assume no letters"!

Item was added:
+ ----- Method: RxsRange>>maximumCharacterCodeIgnoringCase: (in category 'accessing') -----
+ maximumCharacterCodeIgnoringCase: aBoolean
+ 	"Return the largest character code among the characters I represent."
+ 
+ 	first <= last ifFalse: [ ^-1 "Empty range" ].
+ 	aBoolean ifFalse: [ ^last asInteger ].
+ 	^(first to: last) detectMax: [ :each |
+ 		each asLowercase asInteger max: each asUppercase asInteger ]
+ 	!



More information about the Squeak-dev mailing list