[squeak-dev] The Inbox: Regex-Core-tobe.86.mcz

commits at source.squeak.org commits at source.squeak.org
Mon Feb 13 09:30:33 UTC 2023


A new version of Regex-Core was added to project The Inbox:
http://source.squeak.org/inbox/Regex-Core-tobe.86.mcz

==================== Summary ====================

Name: Regex-Core-tobe.86
Author: tobe
Time: 13 February 2023, 10:30:32.667487 am
UUID: 5db039b9-778b-436b-b765-f71c29903d69
Ancestors: Regex-Core-tpr.85

For quantifiers, peek that we will get a valid expression (at least one number).

Note that the method is optimized for speed (e.g., ommiting ensure:)

=============== Diff against Regex-Core-tpr.85 ===============

Item was changed:
  ----- Method: RxParser>>atom (in category 'recursive descent') -----
  atom
  	"An atom is one of a lot of possibilities, see below."
  
  	| atom |
  	(lookahead == nil 
  	or: [ lookahead == $| ] 
  	or: [ lookahead == $) ])
  		ifTrue: [ ^RxsEpsilon new ].
  	
  	(lookahead == $*
  	or: [ lookahead == $+ ]
  	or: [ lookahead == $? ]
+ 	or: [ self peekCanBeQuantifiedAtom ])
- 	or: [ lookahead == ${ ])
  		ifTrue: [ ^self signalParseError: 'nothing to repeat' ].
  	
  	lookahead == $( 
  		ifTrue: [
  			^ self group ].
  	
  	lookahead == $[
  		ifTrue: [
  			"<atom> ::= '[' <characterSet> ']' "
  			self match: $[.
  			atom := self characterSet.
  			self match: $].
  			^atom ].
  	
  	lookahead == $: 
  		ifTrue: [
  			"<atom> ::= ':' <messagePredicate> ':' "
  			self match: $:.
  			atom := self messagePredicate.
  			self match: $:.
  			^atom ].
  	
  	lookahead == $. 
  		ifTrue: [
  			"any non-whitespace character"
  			self next.
  			^RxsContextCondition new beAny].
  	
  	lookahead == $^ 
  		ifTrue: [
  			"beginning of line condition"
  			self next.
  			^RxsContextCondition new beBeginningOfLine].
  	
  	lookahead == $$ 
  		ifTrue: [
  			"end of line condition"
  			self next.
  			^RxsContextCondition new beEndOfLine].
  		
  	lookahead == $\ 
  		ifTrue: [
  			"<atom> ::= '\' <node>"
  			self match: $\.
  			^self backslashNode].
  		
  	"If passed through the above, the following is a regular character."
  	atom := RxsCharacter with: lookahead.
  	self next.
  	^atom!

Item was added:
+ ----- Method: RxParser>>peekCanBeQuantifiedAtom (in category 'private') -----
+ peekCanBeQuantifiedAtom
+ 	"grammar is {<number>,<number>} | {,<number>} | {<number>,} | {number}"
+ 
+ 	| old hasNumber |
+ 	lookahead == ${ ifFalse: [^ false].
+ 	hasNumber := false.
+ 	
+ 	old := source position.
+ 	"skip the first number if present"
+ 	[source atEnd not and: [source peek isDigit]] whileTrue: [hasNumber := true. source next].
+ 	
+ 	"assert that we either get the closing parenthesis or a comma"
+ 	source peek = $, ifFalse: [
+ 		source peek = $} ifTrue: [
+ 			source position: old.
+ 			^ true].
+ 		source position: old.
+ 		^ false].
+ 	source next.
+ 	
+ 	"skip the second number if present"
+ 	[source atEnd not and: [source peek isDigit]] whileTrue: [hasNumber := true. source next].
+ 	
+ 	"now we need to have reached the end and must have seen at least one number"
+ 	(source next ~= $} or: [hasNumber not]) ifTrue: [
+ 		source position: old.
+ 		^ false].
+ 	
+ 	source position: old.
+ 	^ true!

Item was changed:
  ----- Method: RxParser>>piece (in category 'recursive descent') -----
  piece
  	"<piece> ::= <atom>
  		| <atom> '*' | <atom> '+' | <atom> '?'
  		| <atom> '{' <number> ',' <number> '}' "
  
  	| atom |
  	atom := self atom.
  	
  	lookahead == $*
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializeStarAtom: atom ].
  
  	lookahead == $+
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializePlusAtom: atom ].
  
  	lookahead == $?
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializeOptionalAtom: atom ].
  	
+ 	self peekCanBeQuantifiedAtom
- 	lookahead == ${
  		ifTrue: [
  			^ self quantifiedAtom: atom ].
  		
  	^ RxsPiece new initializeAtom: atom!



More information about the Squeak-dev mailing list