[squeak-dev] The Trunk: Regex-Core-ct.81.mcz

commits at source.squeak.org commits at source.squeak.org
Wed Oct 12 12:42:13 UTC 2022


Christoph Thiede uploaded a new version of Regex-Core to project The Trunk:
http://source.squeak.org/trunk/Regex-Core-ct.81.mcz

==================== Summary ====================

Name: Regex-Core-ct.81
Author: ct
Time: 12 October 2022, 2:42:12.805412 pm
UUID: 5daa8bb4-0f9b-d34f-9cc0-2e0316ccd4ff
Ancestors: Regex-Core-ct.80

Documentation only: Minor refactoring to EBNF specification in parsers. Unifies use of spaces and escaping.

=============== Diff against Regex-Core-ct.80 ===============

Item was changed:
  ----- Method: RxAbstractParser>>codePoint: (in category 'recursive descent') -----
  codePoint: size
  	"Matches a character that has the given code codepoint with the specified size of hex digits, unless braced.
+ 	<codePoint> ::= \x ( '{' <hex> '}' | size * <hex> ) "
- 	<codePoint> ::= \x ({<hex>} | <hex>[size])"
  
  	| braced codeString codePoint codeStream |
  	braced := self tryMatch: ${.
  	codeString := braced
  		ifFalse: [self
  			input: size
  			errorMessage: 'invalid codepoint']
  		ifTrue: [self
  			inputUpTo: $}
  			errorMessage: 'no terminating "}"'].
  	
  	codeStream := codeString readStream.
  	codePoint := ((ExtendedNumberParser on: codeStream)
  		defaultBase: 16;
  		nextInteger "allow the stream to change the base -- beware: any new base is specified in base 16!!") ifNil: [
  			self signalParseError: 'invalid codepoint'].
  	codeStream atEnd ifFalse: [
  		self signalParseError: 'invalid codepoint'].
  	
  	braced ifTrue: [
  		self match: $}].
  	
  	^ RxsCharacter with: (Character codePoint: codePoint)!

Item was changed:
  ----- Method: RxAbstractParser>>unicodeCategory (in category 'recursive descent') -----
  unicodeCategory
  	"Matches a character that belongs to the given unicode category.
+ 	<unicodeCategory> ::= \p '{' <categoryName> '}' "
- 	<unicodeCategory> ::= \p '{' <categoryName> '}'"
  
  	| category |
  	self match: ${.
  	category := self inputUpTo: $} errorMessage: 'no terminating "}"'.
  	self match: $}.
  	
  	RxsPredicate supportsUnicode ifFalse:
  		[self signalParseError: 'unicode support is not available'].
  	(RxsPredicate isValidUnicodeCategory: category) ifFalse:
  		[self signalParseError: 'unknown unicode category: ' , category].
  	^ RxsPredicate new beUnicodeCategory: category!

Item was changed:
  ----- Method: RxParser>>piece (in category 'recursive descent') -----
  piece
+ 	"<piece> ::= <atom>
+ 		| <atom> '*' | <atom> '+' | <atom> '?'
+ 		| <atom> '{' <number> ',' <number> '}' "
- 	"<piece> ::= <atom> | <atom>* | <atom>+ | <atom>? | <atom>{<number>,<number>}"
  
  	| atom |
  	atom := self atom.
  	
  	lookahead == $*
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializeStarAtom: atom ].
  
  	lookahead == $+
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializePlusAtom: atom ].
  
  	lookahead == $?
  		ifTrue: [ 
  			self next.
  			^ RxsPiece new initializeOptionalAtom: atom ].
  	
  	lookahead == ${
  		ifTrue: [
  			^ self quantifiedAtom: atom ].
  		
  	^ RxsPiece new initializeAtom: atom!

Item was changed:
  ----- Method: RxParser>>regex (in category 'recursive descent') -----
  regex
+ 	"<regex> ::= e | <branch> '|' <regex> "
- 	"<regex> ::= e | <branch> `|' <regex>"
  
  	| branch regex |
  	branch := self branch.
  	
  	(lookahead == nil 
  	or: [ lookahead == $) ])
  		ifTrue: [ regex := nil ]
  		ifFalse: [
  			self match: $|.
  			regex := self regex ].
  		
  	^RxsRegex new initializeBranch: branch regex: regex!



More information about the Squeak-dev mailing list