[squeak-dev] The Trunk: Regex-Core-ul.45.mcz

commits at source.squeak.org commits at source.squeak.org
Fri Sep 25 15:40:02 UTC 2015


Levente Uzonyi uploaded a new version of Regex-Core to project The Trunk:
http://source.squeak.org/trunk/Regex-Core-ul.45.mcz

==================== Summary ====================

Name: Regex-Core-ul.45
Author: ul
Time: 25 September 2015, 10:14:55.741 am
UUID: 0b7b582a-5091-43e4-95f2-981f236e991c
Ancestors: Regex-Core-ul.44

- Allow escaping any character in a character set.
- Use RxsCharacter instead of RxsPredicate for single character escapes like \r, \n, etc.
- Use nil instead of #epsilon for the extremal stream element in RxParser.
- RxParser >> #next returns the value of lookahead. Use it where it makes sense.
- RxsPredicate's class variables' initialization is thread-safe.
- Reinitialize EscapedLetterSelectors in the postscript.

=============== Diff against Regex-Core-ul.44 ===============

Item was changed:
  ----- Method: RxCharSetParser>>parseEscapeChar (in category 'parsing') -----
  parseEscapeChar
  
  	self match: $\.
+ 	elements add: ((RxsPredicate forEscapedLetter: lookahead)
+ 		ifNil: [ RxsCharacter with: lookahead ]).
- 	$- == lookahead
- 		ifTrue: [elements add: (RxsCharacter with: $-)]
- 		ifFalse: [elements add: (RxsPredicate forEscapedLetter: lookahead)].
  	self next!

Item was changed:
  ----- Method: RxParser>>atom (in category 'recursive descent') -----
  atom
  	"An atom is one of a lot of possibilities, see below."
  
  	| atom |
+ 	(lookahead == nil 
- 	(lookahead == #epsilon 
  	or: [ lookahead == $| 
  	or: [ lookahead == $)
  	or: [ lookahead == $*
  	or: [ lookahead == $+ 
  	or: [ lookahead == $? ]]]]])
  		ifTrue: [ ^RxsEpsilon new ].
  		
  	lookahead == $( 
  		ifTrue: [
  			"<atom> ::= '(' <regex> ')' "
  			self match: $(.
  			atom := self regex.
  			self match: $).
  			^atom ].
  	
  	lookahead == $[
  		ifTrue: [
  			"<atom> ::= '[' <characterSet> ']' "
  			self match: $[.
  			atom := self characterSet.
  			self match: $].
  			^atom ].
  	
  	lookahead == $: 
  		ifTrue: [
  			"<atom> ::= ':' <messagePredicate> ':' "
  			self match: $:.
  			atom := self messagePredicate.
  			self match: $:.
  			^atom ].
  	
  	lookahead == $. 
  		ifTrue: [
  			"any non-whitespace character"
  			self next.
  			^RxsContextCondition new beAny].
  	
  	lookahead == $^ 
  		ifTrue: [
  			"beginning of line condition"
  			self next.
  			^RxsContextCondition new beBeginningOfLine].
  	
  	lookahead == $$ 
  		ifTrue: [
  			"end of line condition"
  			self next.
  			^RxsContextCondition new beEndOfLine].
  		
  	lookahead == $\ 
  		ifTrue: [
  			"<atom> ::= '\' <character>"
+ 			self next ifNil: [ self signalParseError: 'bad quotation' ].
+ 			(BackslashConstants includesKey: lookahead) ifTrue: [
+ 				atom := RxsCharacter with: (BackslashConstants at: lookahead).
+ 				self next.
+ 				^atom].
- 			self next.
- 			lookahead == #epsilon 
- 				ifTrue: [ self signalParseError: 'bad quotation' ].
- 			(BackslashConstants includesKey: lookahead)
- 				ifTrue: [
- 					atom := RxsCharacter with: (BackslashConstants at: lookahead).
- 					self next.
- 					^atom].
  			self ifSpecial: lookahead
  				then: [:node | self next. ^node]].
  		
  	"If passed through the above, the following is a regular character."
  	atom := RxsCharacter with: lookahead.
  	self next.
  	^atom!

Item was changed:
  ----- Method: RxParser>>branch (in category 'recursive descent') -----
  branch
  	"<branch> ::= e | <piece> <branch>"
  
  	| piece branch |
  	piece := self piece.
+ 	(lookahead == nil
- 	(lookahead == #epsilon 
  	or: [ lookahead == $| 
  	or: [ lookahead == $) ]])
  		ifTrue: [ branch := nil ]
  		ifFalse: [ branch := self branch ].
  	^RxsBranch new 
  		initializePiece: piece 
  		branch: branch!

Item was changed:
  ----- Method: RxParser>>inputUpTo:errorMessage: (in category 'private') -----
  inputUpTo: aCharacter errorMessage: aString
  	"Accumulate input stream until <aCharacter> is encountered
  	and answer the accumulated chars as String, not including
  	<aCharacter>. Signal error if end of stream is encountered,
  	passing <aString> as the error description."
  
  	| accumulator |
  	accumulator := WriteStream on: (String new: 20).
+ 	[ lookahead == aCharacter or: [lookahead == nil ] ]
- 	[ lookahead == aCharacter or: [lookahead == #epsilon] ]
  		whileFalse: [
  			accumulator nextPut: lookahead.
  			self next].
+ 	lookahead ifNil: [ self signalParseError: aString ].
- 	lookahead == #epsilon
- 		ifTrue: [ self signalParseError: aString ].
  	^accumulator contents!

Item was changed:
  ----- Method: RxParser>>inputUpTo:nestedOn:errorMessage: (in category 'private') -----
  inputUpTo: aCharacter nestedOn: anotherCharacter errorMessage: aString 
  	"Accumulate input stream until <aCharacter> is encountered
  	and answer the accumulated chars as String, not including
  	<aCharacter>. Signal error if end of stream is encountered,
  	passing <aString> as the error description."
  
  	| accumulator nestLevel |
  	accumulator := WriteStream on: (String new: 20).
  	nestLevel := 0.
+ 	[ lookahead == aCharacter and: [ nestLevel = 0 ] ] whileFalse: [
+ 		lookahead ifNil: [ self signalParseError: aString ].
+ 		lookahead == $\
+ 			ifTrue: [ 
+ 				self next ifNil: [ self signalParseError: aString ].
+ 				BackslashConstants
+ 					at: lookahead
+ 					ifPresent: [ :unescapedCharacter | accumulator nextPut: unescapedCharacter ]
+ 					ifAbsent: [
+ 						accumulator
+ 							nextPut: $\;
+ 							nextPut: lookahead ] ]
+ 			ifFalse: [
+ 				accumulator nextPut: lookahead.
+ 				lookahead == anotherCharacter ifTrue: [ nestLevel := nestLevel + 1 ].
+ 				lookahead == aCharacter ifTrue: [ nestLevel := nestLevel - 1 ] ].
+ 		self next ].
- 	[lookahead == aCharacter and: [nestLevel = 0]] whileFalse: 
- 			[#epsilon == lookahead ifTrue: [self signalParseError: aString].
- 			accumulator nextPut: lookahead.
- 			lookahead == anotherCharacter ifTrue: [nestLevel := nestLevel + 1].
- 			lookahead == aCharacter ifTrue: [nestLevel := nestLevel - 1].
- 			self next].
  	^accumulator contents!

Item was changed:
  ----- Method: RxParser>>inputUpToAny:errorMessage: (in category 'private') -----
  inputUpToAny: aDelimiterString errorMessage: aString
  	"Accumulate input stream until any character from <aDelimiterString> is encountered
  	and answer the accumulated chars as String, not including the matched characters from the
  	<aDelimiterString>. Signal error if end of stream is encountered,
  	passing <aString> as the error description."
  
  	| accumulator |
  	accumulator := WriteStream on: (String new: 20).
+ 	[ lookahead == nil or: [ aDelimiterString includes: lookahead ] ]
- 	[ lookahead == #epsilon or: [ aDelimiterString includes: lookahead ] ]
  		whileFalse: [
  			accumulator nextPut: lookahead.
  			self next ].
+ 	lookahead ifNil: [ self signalParseError: aString ].
- 	lookahead == #epsilon
- 		ifTrue: [ self signalParseError: aString ].
  	^accumulator contents!

Item was changed:
  ----- Method: RxParser>>next (in category 'private') -----
  next
  	"Advance the input storing the just read character
  	as the lookahead."
  
+ 	^lookahead := input next!
- 	lookahead := input next ifNil: [ #epsilon ]!

Item was changed:
  ----- Method: RxParser>>parseStream: (in category 'accessing') -----
  parseStream: aStream
  	"Parse an input from a character stream <aStream>.
  	On success, answers an RxsRegex -- parse tree root.
  	On error, raises `RxParser syntaxErrorSignal' with the current
  	input stream position as the parameter."
  
  	| tree |
  	input := aStream.
+ 	self next.
- 	lookahead := nil.
- 	self match: nil.
  	tree := self regex.
+ 	self match: nil.
- 	self match: #epsilon.
  	^tree!

Item was changed:
  ----- Method: RxParser>>regex (in category 'recursive descent') -----
  regex
  	"<regex> ::= e | <branch> `|' <regex>"
  
  	| branch regex |
  	branch := self branch.
  	
+ 	(lookahead == nil 
- 	(lookahead == #epsilon 
  	or: [ lookahead == $) ])
  		ifTrue: [ regex := nil ]
  		ifFalse: [
  			self match: $|.
  			regex := self regex ].
  		
  	^RxsRegex new initializeBranch: branch regex: regex!

Item was changed:
  ----- Method: RxsPredicate class>>forEscapedLetter: (in category 'instance creation') -----
  forEscapedLetter: aCharacter
+ 	"Return a predicate instance for the given character, or nil if there's no such predicate."
  
+ 	^EscapedLetterSelectors
+ 		at: aCharacter
+ 		ifPresent: [ :selector | self new perform: selector ]!
- 	^self new perform:
- 		(EscapedLetterSelectors
- 			at: aCharacter
- 			ifAbsent: [RxParser signalSyntaxException: 'bad backslash escape'])!

Item was changed:
  ----- Method: RxsPredicate class>>initializeEscapedLetterSelectors (in category 'class initialization') -----
  initializeEscapedLetterSelectors
  	"self initializeEscapedLetterSelectors"
  
+ 	EscapedLetterSelectors := Dictionary new
- 	| newEscapedLetterSelectors |
- 	newEscapedLetterSelectors := Dictionary new
  		at: $w put: #beWordConstituent;
  		at: $W put: #beNotWordConstituent;
  		at: $d put: #beDigit;
  		at: $D put: #beNotDigit;
  		at: $s put: #beSpace;
  		at: $S put: #beNotSpace;
+ 		yourself!
- 		at: $\ put: #beBackslash;
- 		at: $r put: #beCarriageReturn;
- 		at: $n put: #beLineFeed;
- 		at: $t put: #beTab;
- 		yourself.
- 	EscapedLetterSelectors := newEscapedLetterSelectors!

Item was changed:
  ----- Method: RxsPredicate class>>initializeNamedClassSelectors (in category 'class initialization') -----
  initializeNamedClassSelectors
  	"self initializeNamedClassSelectors"
  
+ 	NamedClassSelectors := Dictionary new
- 	(NamedClassSelectors := Dictionary new)
  		at: 'alnum' put: #beAlphaNumeric;
  		at: 'alpha' put: #beAlphabetic;
  		at: 'cntrl' put: #beControl;
  		at: 'digit' put: #beDigit;
  		at: 'graph' put: #beGraphics;
  		at: 'lower' put: #beLowercase;
  		at: 'print' put: #bePrintable;
  		at: 'punct' put: #bePunctuation;
  		at: 'space' put: #beSpace;
  		at: 'upper' put: #beUppercase;
+ 		at: 'xdigit' put: #beHexDigit;
+ 		yourself!
- 		at: 'xdigit' put: #beHexDigit!

Item was removed:
- ----- Method: RxsPredicate>>beBackslash (in category 'initialize-release') -----
- beBackslash
- 
- 	self beCharacter: $\!

Item was removed:
- ----- Method: RxsPredicate>>beCarriageReturn (in category 'initialize-release') -----
- beCarriageReturn
- 
- 	self beCharacter: Character cr!

Item was removed:
- ----- Method: RxsPredicate>>beLineFeed (in category 'initialize-release') -----
- beLineFeed
- 
- 	self beCharacter: Character lf!

Item was removed:
- ----- Method: RxsPredicate>>beTab (in category 'initialize-release') -----
- beTab
- 
- 	self beCharacter: Character tab!

Item was changed:
+ (PackageInfo named: 'Regex-Core') postscript: 'RxsPredicate initializeEscapedLetterSelectors.'!
- (PackageInfo named: 'Regex-Core') postscript: 'RxsPredicate initializeEscapedLetterSelectors'!



More information about the Squeak-dev mailing list