[squeak-dev] The Trunk: Regex-Tests-Core-ul.1.mcz

commits at source.squeak.org commits at source.squeak.org
Sat Aug 22 21:53:52 UTC 2015


Levente Uzonyi uploaded a new version of Regex-Tests-Core to project The Trunk:
http://source.squeak.org/trunk/Regex-Tests-Core-ul.1.mcz

==================== Summary ====================

Name: Regex-Tests-Core-ul.1
Author: ul
Time: 12 August 2015, 10:43:52.346 pm
UUID: 2538aebb-3e78-4bb8-ad6f-2b0df63e564b
Ancestors: 

- tests from Pharo

==================== Snapshot ====================

SystemOrganization addCategory: #'Regex-Tests-Core'!

TestCase subclass: #RxMatcherTest
	instanceVariableNames: ''
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Regex-Tests-Core'!

!RxMatcherTest commentStamp: 'Tbn 11/12/2010 22:31' prior: 0!
This class provides tests for the regular expression matcher.!

----- Method: RxMatcherTest class>>packageNamesUnderTest (in category 'accessing') -----
packageNamesUnderTest
	^ #('VB-Regex')!

----- Method: RxMatcherTest>>compileRegex: (in category 'utilties') -----
compileRegex: aString
	"Compile the regex and answer the matcher, or answer nil if compilation fails."

	| syntaxTree |
	syntaxTree := RxParser safelyParse: aString.
	^ syntaxTree isNil ifFalse: [ self matcherClass for: syntaxTree ]!

----- Method: RxMatcherTest>>henryReadme (in category 'testing-henry') -----
henryReadme
	self error: 'The tests in this category are based on the ones in Henry Spencer''s regexp.c package.'!

----- Method: RxMatcherTest>>matcherClass (in category 'accessing') -----
matcherClass
	^ RxMatcher!

----- Method: RxMatcherTest>>runMatcher:with:expect:withSubexpressions: (in category 'utilties') -----
runMatcher: aMatcher with: aString expect: aBoolean withSubexpressions: anArray
	| copy got |
	copy := aMatcher
		copy: aString
		translatingMatchesUsing: [ :each | each ].
	self 
		assert: copy = aString
		description: 'Copying: expected ' , aString printString , ', but got ' , copy printString.
	got := aMatcher search: aString.
	self
		assert: got = aBoolean 
		description: 'Searching: expected ' , aBoolean printString , ', but got ' , got printString.
	(anArray isNil or: [ aMatcher supportsSubexpressions not ])
		ifTrue: [ ^ self ].
	1 to: anArray size by: 2 do: [ :index |
		| sub subExpect subGot |
		sub := anArray at: index.
		subExpect := anArray at: index + 1.
		subGot := aMatcher subexpression: sub.
		self
			assert: subExpect = subGot
			description: 'Subexpression ' , sub printString , ': expected ' , subExpect printString , ', but got ' , subGot printString ]!

----- Method: RxMatcherTest>>runRegex: (in category 'utilties') -----
runRegex: anArray
	"Run a clause anArray against a set of tests. Each clause is an array with a regex source string followed by sequence of 3-tuples. Each three-element group is one test to try against the regex, and includes: 1) test string; 2) expected result; 3) expected subexpression as an array of (index, substring), or nil."

	| source matcher |
	source := anArray first.
	matcher := self compileRegex: source.
	matcher isNil
		ifTrue: [
			(anArray at: 2) isNil
				ifFalse: [ self signalFailure: 'Compilation failed, should have succeeded: ' , source printString ] ]
		ifFalse: [
			(anArray at: 2) isNil
				ifTrue: [ self signalFailure: 'Compilation succeeded, should have failed: ' , source printString ]
				ifFalse: [
					2 to: anArray size by: 3 do: [ :index | 
						self 
							runMatcher: matcher
							with: (anArray at: index)
							expect: (anArray at: index + 1)
							withSubexpressions: (anArray at: index + 2) ] ] ]!

----- Method: RxMatcherTest>>testCaseInsensitive (in category 'testing-protocol') -----
testCaseInsensitive
	| matcher |
	matcher := self matcherClass forString: 'the quick brown fox' ignoreCase: true.
	self assert: (matcher search: 'the quick brown fox').
	self assert: (matcher search: 'The quick brown FOX').
	self assert: (matcher search: 'What do you know about the quick brown fox?').
	self assert: (matcher search: 'What do you know about THE QUICK BROWN FOX?')!

----- Method: RxMatcherTest>>testCaseSensitive (in category 'testing-protocol') -----
testCaseSensitive
	| matcher |
	matcher := self matcherClass forString: 'the quick brown fox' ignoreCase: false.
	self assert: (matcher search: 'the quick brown fox').
	self deny: (matcher search: 'The quick brown FOX').
	self assert: (matcher search: 'What do you know about the quick brown fox?').
	self deny: (matcher search: 'What do you know about THE QUICK BROWN FOX?')!

----- Method: RxMatcherTest>>testCopyReplacingMatches (in category 'testing-protocol') -----
testCopyReplacingMatches
	"See that the match context is preserved while copying stuff between matches:"
	
	| matcher |
	matcher := self matcherClass forString: '\<\d\D+'.
	self assert: (matcher copy: '9aaa1bbb 8ccc' replacingMatchesWith: 'foo')
		= 'foo1bbb foo'!

----- Method: RxMatcherTest>>testCopyTranslatingMatches (in category 'testing-protocol') -----
testCopyTranslatingMatches
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher copy: 'now is  the   time    ' translatingMatchesUsing: [ :each | each reversed ])
		= 'won si  eht   emit    '!

----- Method: RxMatcherTest>>testEmptyStringAtBeginningOfLine (in category 'testing-empty') -----
testEmptyStringAtBeginningOfLine
	| matcher |
	matcher := self matcherClass forString: '^'.
	self
		assert: (matcher copy: 'foo1 bar1' , String cr , 'foo2 bar2' replacingMatchesWith: '*')
			= ('*foo1 bar1' , String cr , '*foo2 bar2')
		description: 'An empty string at the beginning of a line'!

----- Method: RxMatcherTest>>testEmptyStringAtBeginningOfWord (in category 'testing-empty') -----
testEmptyStringAtBeginningOfWord
	| matcher |
	matcher := self matcherClass forString: '\<'.
	self
		assert: (matcher copy: 'foo bar' replacingMatchesWith: '*')
			= '*foo *bar'
		description: 'An empty string at the beginning of a word'!

----- Method: RxMatcherTest>>testEmptyStringAtEndOfLine (in category 'testing-empty') -----
testEmptyStringAtEndOfLine
	| matcher |
	matcher := self matcherClass forString: '$'.
	self
		assert: (matcher copy: 'foo1 bar1' , String cr , 'foo2 bar2' replacingMatchesWith: '*')
			= ('foo1 bar1*', String cr , 'foo2 bar2*')
		description: 'An empty string at the end of a line'!

----- Method: RxMatcherTest>>testEmptyStringAtEndOfWord (in category 'testing-empty') -----
testEmptyStringAtEndOfWord
	| matcher |
	matcher := self matcherClass forString: '\>'.
	self
		assert: (matcher copy: 'foo bar' replacingMatchesWith: '*')
			= 'foo* bar*'
		description: 'An empty string at the end of a word'!

----- Method: RxMatcherTest>>testEmptyStringAtWordBoundary (in category 'testing-empty') -----
testEmptyStringAtWordBoundary
	| matcher |
	matcher := self matcherClass forString: '\b'.
	self
		assert: (matcher copy: 'foo bar' replacingMatchesWith: '*')
			= '*foo* *bar*'
		description: 'An empty string at a word boundary'!

----- Method: RxMatcherTest>>testEmptyStringNotAtWordBoundary (in category 'testing-empty') -----
testEmptyStringNotAtWordBoundary
	| matcher |
	matcher := self matcherClass forString: '\B'.
	self
		assert: (matcher copy: 'foo bar' replacingMatchesWith: '*')
			= 'f*o*o b*a*r'
		description: 'An empty string not at a word boundary'!

----- Method: RxMatcherTest>>testHenry001 (in category 'testing-henry') -----
testHenry001
	self runRegex: #('abc'
		'abc' true (1 'abc')
		'xbc' false nil
		'axc' false nil
		'abx' false nil
		'xabcy' true (1 'abc')
		'ababc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry002 (in category 'testing-henry') -----
testHenry002
	self runRegex: #('ab*c'
		'abc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry003 (in category 'testing-henry') -----
testHenry003
	self runRegex: #('ab*bc'
		'abc' true (1 'abc')
		'abbc' true (1 'abbc')
		'abbbbc' true (1 'abbbbc'))!

----- Method: RxMatcherTest>>testHenry004 (in category 'testing-henry') -----
testHenry004
	self runRegex: #('ab+bc'	
		'abbc' true (1 'abbc')
		'abc' false nil
		'abq' false nil
		'abbbbc' true (1 'abbbbc'))!

----- Method: RxMatcherTest>>testHenry005 (in category 'testing-henry') -----
testHenry005
	self runRegex: #('ab?bc'
		'abbc' true (1 'abbc')
		'abc' true (1 'abc')
		'abbbbc' false nil
		'abc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry006 (in category 'testing-henry') -----
testHenry006
	self runRegex: #('^abc$'
		'abc' true (1 'abc')
		'abcc' false nil
		'aabc' false nil)!

----- Method: RxMatcherTest>>testHenry007 (in category 'testing-henry') -----
testHenry007
	self runRegex: #('^abc'
		'abcc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry008 (in category 'testing-henry') -----
testHenry008
	self runRegex: #('abc$'
		'aabc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry009 (in category 'testing-henry') -----
testHenry009
	self runRegex: #('^'
		'abc' true nil)!

----- Method: RxMatcherTest>>testHenry010 (in category 'testing-henry') -----
testHenry010
	self runRegex: #('$'
		'abc' true nil)!

----- Method: RxMatcherTest>>testHenry011 (in category 'testing-henry') -----
testHenry011
	self runRegex: #('a.c'
		'abc' true (1 'abc')
		'axc' true (1 'axc'))!

----- Method: RxMatcherTest>>testHenry012 (in category 'testing-henry') -----
testHenry012
	"Need to get creative to include the null character..."
	self runRegex: #('a.*c'	
		'axyzc' true (1 'axyzc')
		'axy zc' true (1 'axy zc') "testing that a dot matches a space"
		), (Array with: 'axy', (String with: 0 asCharacter), 'zc'), #(false nil "testing that a dot does not match a null"
		'axyzd' false nil)!

----- Method: RxMatcherTest>>testHenry013 (in category 'testing-henry') -----
testHenry013
	self runRegex: #('.a.*'
		'1234abc' true (1 '4abc')
		'abcd' false nil)!

----- Method: RxMatcherTest>>testHenry014 (in category 'testing-henry') -----
testHenry014
	self runRegex: #('a\w+c'
		' abbbbc ' true (1 'abbbbc')
		'abb bc' false nil)!

----- Method: RxMatcherTest>>testHenry015 (in category 'testing-henry') -----
testHenry015
	self runRegex: #('\w+'
		'  	foobar	quux' true (1 'foobar')
		' 	~!!@#$%^&*()-+=\|/?.>,<' false nil)!

----- Method: RxMatcherTest>>testHenry016 (in category 'testing-henry') -----
testHenry016
	self runRegex: #('a\W+c'
		'a   c' true (1 'a   c')
		'a bc' false nil)!

----- Method: RxMatcherTest>>testHenry017 (in category 'testing-henry') -----
testHenry017
	self runRegex: #('\W+'
		'foo!!@#$bar' true (1 '!!@#$')
		'foobar' false nil)!

----- Method: RxMatcherTest>>testHenry018 (in category 'testing-henry') -----
testHenry018
	self runRegex: #('a\s*c'
		'a   c' true (1 'a   c')
		'a bc' false nil)!

----- Method: RxMatcherTest>>testHenry019 (in category 'testing-henry') -----
testHenry019
	self runRegex: #('\s+'
		'abc3457 sd' true (1 ' ')
		'1234$^*^&asdfb' false nil)!

----- Method: RxMatcherTest>>testHenry020 (in category 'testing-henry') -----
testHenry020
	self runRegex: #('a\S*c'
		'aqwertyc' true (1 'aqwertyc')
		'ab c' false nil)!

----- Method: RxMatcherTest>>testHenry021 (in category 'testing-henry') -----
testHenry021
	self runRegex: #('\S+'
		'     	asdf		' true (1 'asdf')
		' 	
			' false nil)!

----- Method: RxMatcherTest>>testHenry022 (in category 'testing-henry') -----
testHenry022
	self runRegex: #('a\d+c'
		'a0123456789c' true (1 'a0123456789c')
		'a12b34c' false nil)!

----- Method: RxMatcherTest>>testHenry023 (in category 'testing-henry') -----
testHenry023
	self runRegex: #('\d+'
		'foo@#$%123ASD #$$%^&' true (1 '123')
		'foo!!@#$asdfl;' false nil)!

----- Method: RxMatcherTest>>testHenry024 (in category 'testing-henry') -----
testHenry024
	self runRegex: #('a\D+c'
		'aqwertyc' true (1 'aqwertyc')
		'aqw6ertc' false nil)!

----- Method: RxMatcherTest>>testHenry025 (in category 'testing-henry') -----
testHenry025
	self runRegex: #('\D+'
		'1234 abc 456' true (1 ' abc ')
		'1234567890' false nil)!

----- Method: RxMatcherTest>>testHenry026 (in category 'testing-henry') -----
testHenry026
	self runRegex: #('(f|o)+\b'
		'foo' true (1 'foo')
		' foo ' true (1 'foo'))!

----- Method: RxMatcherTest>>testHenry027 (in category 'testing-henry') -----
testHenry027
	self runRegex: #('\ba\w+' "a word beginning with an A"
		'land ancient' true (1 'ancient')
		'antique vase' true (1 'antique')
		'goofy foobar' false nil)!

----- Method: RxMatcherTest>>testHenry028 (in category 'testing-henry') -----
testHenry028
	self runRegex: #('(f|o)+\B'
		'quuxfoobar' true (1 'foo')
		'quuxfoo ' true (1 'fo'))!

----- Method: RxMatcherTest>>testHenry029 (in category 'testing-henry') -----
testHenry029
	self runRegex: #('\Ba\w+' "a word with an A in the middle, match at A and further"
		'land ancient' true (1 'and')
		'antique vase' true (1 'ase')
		'smalltalk shall overcome' true (1 'alltalk')
		'foonix is better' false nil)!

----- Method: RxMatcherTest>>testHenry030 (in category 'testing-henry') -----
testHenry030
	self runRegex: #('fooa\>.*'
		'fooa ' true nil
		'fooa123' false nil
		'fooa bar' true nil
		'fooa' true nil
		'fooargh' false nil)!

----- Method: RxMatcherTest>>testHenry031 (in category 'testing-henry') -----
testHenry031
	self runRegex: #('\>.+abc'
		' abcde fg' false nil
		'foo abcde' true (1 ' abc')
		'abcde' false nil)!

----- Method: RxMatcherTest>>testHenry032 (in category 'testing-henry') -----
testHenry032
	self runRegex: #('\<foo.*'
		'foo' true nil
		'foobar' true nil
		'qfoobarq foonix' true (1 'foonix')
		' foo' true nil
		' 12foo' false nil
		'barfoo' false nil)!

----- Method: RxMatcherTest>>testHenry033 (in category 'testing-henry') -----
testHenry033
	self runRegex: #('.+\<foo'
		'foo' false nil
		'ab foo' true (1 'ab foo')
		'abfoo' false nil)!

----- Method: RxMatcherTest>>testHenry034 (in category 'testing-henry') -----
testHenry034
	self runRegex: #('a[bc]d'
		'abc' false nil
		'abd' true (1 'abd'))!

----- Method: RxMatcherTest>>testHenry035 (in category 'testing-henry') -----
testHenry035
	self runRegex: #('a[b-d]e'
		'abd' false nil
		'ace' true (1 'ace'))!

----- Method: RxMatcherTest>>testHenry036 (in category 'testing-henry') -----
testHenry036
	self runRegex: #('a[b-d]'
		'aac' true (1 'ac'))!

----- Method: RxMatcherTest>>testHenry037 (in category 'testing-henry') -----
testHenry037
	self runRegex: #('a[-b]'
		'a-' true (1 'a-'))!

----- Method: RxMatcherTest>>testHenry038 (in category 'testing-henry') -----
testHenry038
	self runRegex: #('a[b-]'
		'a-' true (1 'a-'))!

----- Method: RxMatcherTest>>testHenry039 (in category 'testing-henry') -----
testHenry039
	self runRegex: #('a[a-b-c]' nil)!

----- Method: RxMatcherTest>>testHenry040 (in category 'testing-henry') -----
testHenry040
	self runRegex: #('[k]'
		'ab' false nil)!

----- Method: RxMatcherTest>>testHenry041 (in category 'testing-henry') -----
testHenry041
	self runRegex: #('a[b-a]' nil)!

----- Method: RxMatcherTest>>testHenry042 (in category 'testing-henry') -----
testHenry042
	self runRegex: #('a[]b' nil)!

----- Method: RxMatcherTest>>testHenry043 (in category 'testing-henry') -----
testHenry043
	self runRegex: #('a[' nil)!

----- Method: RxMatcherTest>>testHenry044 (in category 'testing-henry') -----
testHenry044
	self runRegex: #('a]' 
		'a]' true (1 'a]'))!

----- Method: RxMatcherTest>>testHenry045 (in category 'testing-henry') -----
testHenry045
	self runRegex: #('a[]]b'
		'a]b' true (1 'a]b'))!

----- Method: RxMatcherTest>>testHenry046 (in category 'testing-henry') -----
testHenry046
	self runRegex: #('a[^bc]d'
		'aed' true (1 'aed')
		'abd' false nil)!

----- Method: RxMatcherTest>>testHenry047 (in category 'testing-henry') -----
testHenry047
	self runRegex: #('a[^-b]c'
		'adc' true (1 'adc')
		'a-c' false nil)!

----- Method: RxMatcherTest>>testHenry048 (in category 'testing-henry') -----
testHenry048
	self runRegex: #('a[^]b]c'
		'a]c' false nil
		'adc' true (1 'adc'))!

----- Method: RxMatcherTest>>testHenry049 (in category 'testing-henry') -----
testHenry049
	self runRegex: #('[\de]+'
		'01234' true (1 '01234')
		'0123e456' true (1 '0123e456')
		'0123e45g78' true (1 '0123e45'))!

----- Method: RxMatcherTest>>testHenry050 (in category 'testing-henry') -----
testHenry050
	self runRegex: #('[e\d]+' "reversal of the above, should be the same"
		'01234' true (1 '01234')
		'0123e456' true (1 '0123e456')
		'0123e45g78' true (1 '0123e45'))!

----- Method: RxMatcherTest>>testHenry051 (in category 'testing-henry') -----
testHenry051
	self runRegex: #('[\D]+'
		'123abc45def78' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry052 (in category 'testing-henry') -----
testHenry052
	self runRegex: #('[[:digit:]e]+'
		'01234' true (1 '01234')
		'0123e456' true (1 '0123e456')
		'0123e45g78' true (1 '0123e45'))!

----- Method: RxMatcherTest>>testHenry053 (in category 'testing-henry') -----
testHenry053
	self runRegex: #('[\s]+'
		'2  spaces' true (1 '  '))!

----- Method: RxMatcherTest>>testHenry054 (in category 'testing-henry') -----
testHenry054
	self runRegex: #('[\S]+'
		'  word12!!@#$  ' true (1 'word12!!@#$'))!

----- Method: RxMatcherTest>>testHenry055 (in category 'testing-henry') -----
testHenry055
	self runRegex: #('[\w]+'
		' 	foo123bar	45' true (1 'foo123bar'))!

----- Method: RxMatcherTest>>testHenry056 (in category 'testing-henry') -----
testHenry056
	self runRegex: #('[\W]+'
		'fii234!!@#$34f' true (1 '!!@#$'))!

----- Method: RxMatcherTest>>testHenry057 (in category 'testing-henry') -----
testHenry057
	self runRegex: #('[^[:alnum:]]+'
		'fii234!!@#$34f' true (1 '!!@#$'))!

----- Method: RxMatcherTest>>testHenry058 (in category 'testing-henry') -----
testHenry058
	self runRegex: #('[%&[:alnum:]]+'
		'foo%3' true (1 'foo%3')
		'foo34&rt4$57a' true (1 'foo34&rt4')
		'!!@#$' false nil)!

----- Method: RxMatcherTest>>testHenry059 (in category 'testing-henry') -----
testHenry059
	self runRegex: #('[[:alpha:]]+'
		' 123foo3 ' true (1 'foo')
		'123foo' true (1 'foo')
		'foo1b' true (1 'foo'))!

----- Method: RxMatcherTest>>testHenry060 (in category 'testing-henry') -----
testHenry060
	self runRegex: #('[[:cntrl:]]+'
		' a 1234asdf' false nil)!

----- Method: RxMatcherTest>>testHenry061 (in category 'testing-henry') -----
testHenry061
	self runRegex: #('[[:lower:]]+'
		'UPPERlower1234' true (1 'lower')
		'lowerUPPER' true (1 'lower'))!

----- Method: RxMatcherTest>>testHenry062 (in category 'testing-henry') -----
testHenry062
	self runRegex: #('[[:upper:]]+'
		'UPPERlower1234' true (1 'UPPER')
		'lowerUPPER ' true (1 'UPPER'))!

----- Method: RxMatcherTest>>testHenry063 (in category 'testing-henry') -----
testHenry063
	self runRegex: #('[[:space:]]+'
		'2  spaces' true (1 '  '))!

----- Method: RxMatcherTest>>testHenry064 (in category 'testing-henry') -----
testHenry064
	self runRegex: #('[^[:space:]]+'
		'  word12!!@#$  ' true (1 'word12!!@#$'))!

----- Method: RxMatcherTest>>testHenry065 (in category 'testing-henry') -----
testHenry065
	self runRegex: #('[[:graph:]]+'
		'abc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry066 (in category 'testing-henry') -----
testHenry066
	self runRegex: #('[[:print:]]+'
		'abc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry067 (in category 'testing-henry') -----
testHenry067
	self runRegex: #('[^[:punct:]]+'
		'!!hello,world!!' true (1 'hello'))!

----- Method: RxMatcherTest>>testHenry068 (in category 'testing-henry') -----
testHenry068
	self runRegex: #('[[:xdigit:]]+'
		'  x10FCD  ' true (1 '10FCD')
		' hgfedcba0123456789ABCDEFGH '
			true (1 'fedcba0123456789ABCDEF'))!

----- Method: RxMatcherTest>>testHenry069 (in category 'testing-henry') -----
testHenry069
	self runRegex: #('ab|cd'
		'abc' true (1 'ab')
		'abcd' true (1 'ab'))!

----- Method: RxMatcherTest>>testHenry070 (in category 'testing-henry') -----
testHenry070
	self runRegex: #('()ef'
		'def' true (1 'ef' 2 ''))!

----- Method: RxMatcherTest>>testHenry071 (in category 'testing-henry') -----
testHenry071
	self runRegex: #('()*' nil)!

----- Method: RxMatcherTest>>testHenry072 (in category 'testing-henry') -----
testHenry072
	self runRegex: #('*a' nil)!

----- Method: RxMatcherTest>>testHenry073 (in category 'testing-henry') -----
testHenry073
	self runRegex: #('^*' nil)!

----- Method: RxMatcherTest>>testHenry074 (in category 'testing-henry') -----
testHenry074
	self runRegex: #('$*' nil)!

----- Method: RxMatcherTest>>testHenry075 (in category 'testing-henry') -----
testHenry075
	self runRegex: #('(*)b' nil)!

----- Method: RxMatcherTest>>testHenry076 (in category 'testing-henry') -----
testHenry076
	self runRegex: #('$b'	'b' false nil)!

----- Method: RxMatcherTest>>testHenry077 (in category 'testing-henry') -----
testHenry077
	self runRegex: #('a\' nil)!

----- Method: RxMatcherTest>>testHenry078 (in category 'testing-henry') -----
testHenry078
	self runRegex: #('a\(b'
		'a(b' true (1 'a(b'))!

----- Method: RxMatcherTest>>testHenry079 (in category 'testing-henry') -----
testHenry079
	self runRegex: #('a\(*b'
		'ab' true (1 'ab')
		'a((b' true (1 'a((b'))!

----- Method: RxMatcherTest>>testHenry080 (in category 'testing-henry') -----
testHenry080
	self runRegex: #('a\\b'
		'a\b' true (1 'a\b'))!

----- Method: RxMatcherTest>>testHenry081 (in category 'testing-henry') -----
testHenry081
	self runRegex: #('abc)' nil)!

----- Method: RxMatcherTest>>testHenry082 (in category 'testing-henry') -----
testHenry082
	self runRegex: #('(abc' nil)!

----- Method: RxMatcherTest>>testHenry083 (in category 'testing-henry') -----
testHenry083
	self runRegex: #('((a))'
		'abc' true (1 'a' 2 'a' 3 'a'))!

----- Method: RxMatcherTest>>testHenry084 (in category 'testing-henry') -----
testHenry084
	self runRegex: #('(a)b(c)'
		'abc' true (1 'abc' 2 'a' 3 'c'))!

----- Method: RxMatcherTest>>testHenry085 (in category 'testing-henry') -----
testHenry085
	self runRegex: #('a+b+c'
		'aabbabc' true (1 'abc'))!

----- Method: RxMatcherTest>>testHenry086 (in category 'testing-henry') -----
testHenry086
	self runRegex: #('a**' nil)!

----- Method: RxMatcherTest>>testHenry087 (in category 'testing-henry') -----
testHenry087
	self runRegex: #('a*?' nil)!

----- Method: RxMatcherTest>>testHenry088 (in category 'testing-henry') -----
testHenry088
	self runRegex: #('(a*)*' nil)!

----- Method: RxMatcherTest>>testHenry089 (in category 'testing-henry') -----
testHenry089
	self runRegex: #('(a*)+' nil)!

----- Method: RxMatcherTest>>testHenry090 (in category 'testing-henry') -----
testHenry090
	self runRegex: #('(a|)*' nil)!

----- Method: RxMatcherTest>>testHenry091 (in category 'testing-henry') -----
testHenry091
	self runRegex: #('(a*|b)*' nil)!

----- Method: RxMatcherTest>>testHenry092 (in category 'testing-henry') -----
testHenry092
	self runRegex: #('(a+|b)*'
		'ab' true (1 'ab' 2 'b'))!

----- Method: RxMatcherTest>>testHenry093 (in category 'testing-henry') -----
testHenry093
	self runRegex: #('(a+|b)+'
		'ab' true (1 'ab' 2 'b'))!

----- Method: RxMatcherTest>>testHenry094 (in category 'testing-henry') -----
testHenry094
	self runRegex: #('(a+|b)?'
		'ab' true (1 'a' 2 'a'))!

----- Method: RxMatcherTest>>testHenry095 (in category 'testing-henry') -----
testHenry095
	self runRegex: #('[^ab]*'
		'cde' true (1 'cde'))!

----- Method: RxMatcherTest>>testHenry096 (in category 'testing-henry') -----
testHenry096
	self runRegex: #('(^)*' nil)!

----- Method: RxMatcherTest>>testHenry097 (in category 'testing-henry') -----
testHenry097
	self runRegex: #('(ab|)*' nil)!

----- Method: RxMatcherTest>>testHenry098 (in category 'testing-henry') -----
testHenry098
	self runRegex: #(')(' nil)!

----- Method: RxMatcherTest>>testHenry099 (in category 'testing-henry') -----
testHenry099
	self runRegex: #('' 'abc' true (1 ''))!

----- Method: RxMatcherTest>>testHenry100 (in category 'testing-henry') -----
testHenry100
	self runRegex: #('abc' '' false nil)!

----- Method: RxMatcherTest>>testHenry101 (in category 'testing-henry') -----
testHenry101
	self runRegex: #('a*'
		'' true '')!

----- Method: RxMatcherTest>>testHenry102 (in category 'testing-henry') -----
testHenry102
	self runRegex: #('abcd'
		'abcd' true (1 'abcd'))!

----- Method: RxMatcherTest>>testHenry103 (in category 'testing-henry') -----
testHenry103
	self runRegex: #('a(bc)d'
		'abcd' true (1 'abcd' 2 'bc'))!

----- Method: RxMatcherTest>>testHenry104 (in category 'testing-henry') -----
testHenry104
	self runRegex: #('([abc])*d'
		'abbbcd' true (1 'abbbcd' 2 'c'))!

----- Method: RxMatcherTest>>testHenry105 (in category 'testing-henry') -----
testHenry105
	self runRegex: #('([abc])*bcd'
		'abcd' true (1 'abcd' 2 'a'))!

----- Method: RxMatcherTest>>testHenry106 (in category 'testing-henry') -----
testHenry106
	self runRegex: #('a|b|c|d|e' 'e' true (1 'e'))!

----- Method: RxMatcherTest>>testHenry107 (in category 'testing-henry') -----
testHenry107
	self runRegex: #('(a|b|c|d|e)f'
		'ef' true (1 'ef' 2 'e'))
	"	((a*|b))*	-	c	-	-"!

----- Method: RxMatcherTest>>testHenry108 (in category 'testing-henry') -----
testHenry108
	self runRegex: #('abcd*efg' 
		'abcdefg' true (1 'abcdefg'))!

----- Method: RxMatcherTest>>testHenry109 (in category 'testing-henry') -----
testHenry109
	self runRegex: #('ab*' 
		'xabyabbbz' true (1 'ab')
		'xayabbbz' true (1 'a'))!

----- Method: RxMatcherTest>>testHenry110 (in category 'testing-henry') -----
testHenry110
	self runRegex: #('(ab|cd)e' 'abcde' true (1 'cde' 2 'cd'))!

----- Method: RxMatcherTest>>testHenry111 (in category 'testing-henry') -----
testHenry111
	self runRegex: #('[abhgefdc]ij' 'hij' true (1 'hij'))!

----- Method: RxMatcherTest>>testHenry112 (in category 'testing-henry') -----
testHenry112
	self runRegex: #('^(ab|cd)e' 'abcde' false nil)
	!

----- Method: RxMatcherTest>>testHenry113 (in category 'testing-henry') -----
testHenry113
	self runRegex: #('(abc|)def' 'abcdef' true nil)
	!

----- Method: RxMatcherTest>>testHenry114 (in category 'testing-henry') -----
testHenry114
	self runRegex: #('(a|b)c*d' 'abcd' true (1 'bcd' 2 'b'))
	!

----- Method: RxMatcherTest>>testHenry115 (in category 'testing-henry') -----
testHenry115
	self runRegex: #('(ab|ab*)bc' 'abc' true (1 'abc' 2 'a'))
	!

----- Method: RxMatcherTest>>testHenry116 (in category 'testing-henry') -----
testHenry116
	self runRegex: #('a([bc]*)c*' 'abc' true (1 'abc' 2 'bc'))
	!

----- Method: RxMatcherTest>>testHenry117 (in category 'testing-henry') -----
testHenry117
	self runRegex: #('a([bc]*)(c*d)' 'abcd' true (1 'abcd' 2 'bc' 3 'd'))
	!

----- Method: RxMatcherTest>>testHenry118 (in category 'testing-henry') -----
testHenry118
	self runRegex: #('a([bc]+)(c*d)' 'abcd' true (1 'abcd' 2 'bc' 3 'd'))
	!

----- Method: RxMatcherTest>>testHenry119 (in category 'testing-henry') -----
testHenry119
	self runRegex: #('a([bc]*)(c+d)' 'abcd' true (1 'abcd' 2 'b' 3 'cd'))
	!

----- Method: RxMatcherTest>>testHenry120 (in category 'testing-henry') -----
testHenry120
	self runRegex: #('a[bcd]*dcdcde' 'adcdcde' true (1 'adcdcde'))
	!

----- Method: RxMatcherTest>>testHenry121 (in category 'testing-henry') -----
testHenry121
	self runRegex: #('a[bcd]+dcdcde' 'adcdcde' false nil)
	!

----- Method: RxMatcherTest>>testHenry122 (in category 'testing-henry') -----
testHenry122
	self runRegex: #('(ab|a)b*c' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry123 (in category 'testing-henry') -----
testHenry123
	self runRegex: #('((a)(b)c)(d)' 'abcd' true (1 'abcd' 3 'a' 4 'b' 5 'd'))
	!

----- Method: RxMatcherTest>>testHenry124 (in category 'testing-henry') -----
testHenry124
	self runRegex: #('[ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry125 (in category 'testing-henry') -----
testHenry125
	self runRegex: #('[ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry126 (in category 'testing-henry') -----
testHenry126
	self runRegex: #('[ -~ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry127 (in category 'testing-henry') -----
testHenry127
	self runRegex: #('[ -~ -~ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry128 (in category 'testing-henry') -----
testHenry128
	self runRegex: #('[ -~ -~ -~ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry129 (in category 'testing-henry') -----
testHenry129
	self runRegex: #('[ -~ -~ -~ -~ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry130 (in category 'testing-henry') -----
testHenry130
	self runRegex: #('[ -~ -~ -~ -~ -~ -~ -~]*' 'abc' true (1 'abc'))
	!

----- Method: RxMatcherTest>>testHenry131 (in category 'testing-henry') -----
testHenry131
	self runRegex: #('[a-zA-Z_][a-zA-Z0-9_]*' 'alpha' true (1 'alpha'))
	!

----- Method: RxMatcherTest>>testHenry132 (in category 'testing-henry') -----
testHenry132
	self runRegex: #('^a(bc+|b[eh])g|.h$' 'abh' true (1 'bh' 2 nil))
	!

----- Method: RxMatcherTest>>testHenry133 (in category 'testing-henry') -----
testHenry133
	self runRegex: #('(bc+d$|ef*g.|h?i(j|k))' 
		'effgz' true (1 'effgz' 2 'effgz' 3 nil)
		'ij' true (1 'ij' 2 'ij' 3 'j')
		'effg' false nil
		'bcdd' false nil
		'reffgz' true (1 'effgz' 2 'effgz' 3 nil))!

----- Method: RxMatcherTest>>testHenry134 (in category 'testing-henry') -----
testHenry134
	self runRegex: #('(((((((((a)))))))))' 'a' true (1 'a'))!

----- Method: RxMatcherTest>>testHenry135 (in category 'testing-henry') -----
testHenry135
	self runRegex: #('multiple words of text' 
		'uh-uh' false nil
		'multiple words of text, yeah' true (1 'multiple words of text'))!

----- Method: RxMatcherTest>>testHenry136 (in category 'testing-henry') -----
testHenry136
	self runRegex: #('(.*)c(.*)' 'abcde' true (1 'abcde' 2 'ab' 3 'de'))!

----- Method: RxMatcherTest>>testHenry137 (in category 'testing-henry') -----
testHenry137
	self runRegex: #('\((.*), (.*)\)' '(a, b)' true (2 'a' 3 'b'))!

----- Method: RxMatcherTest>>testMatches (in category 'testing-protocol') -----
testMatches
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher matches: 'now').
	self deny: (matcher matches: 'now is')!

----- Method: RxMatcherTest>>testMatchesIn (in category 'testing-protocol') -----
testMatchesIn
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher matchesIn: 'now is the time') asArray 
		= #('now' 'is' 'the' 'time')!

----- Method: RxMatcherTest>>testMatchesInCollect (in category 'testing-protocol') -----
testMatchesInCollect
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher
		matchesIn: 'now is the time'
		collect: [ :each | each reversed ]) asArray
			= #('won' 'si' 'eht' 'emit')!

----- Method: RxMatcherTest>>testMatchesInDo (in category 'testing-protocol') -----
testMatchesInDo
	| matcher expected |
	matcher := self matcherClass forString: '\w+'.
	expected := #('now' 'is' 'the' 'time') asOrderedCollection.
	matcher 
		matchesIn: 'now is the time'
		do: [ :each | self assert: each = expected removeFirst ].
	self assert: expected isEmpty!

----- Method: RxMatcherTest>>testMatchesOnStream (in category 'testing-protocol') -----
testMatchesOnStream
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher matchesOnStream: 'now is the time' readStream) asArray 
		= #('now' 'is' 'the' 'time')!

----- Method: RxMatcherTest>>testMatchesOnStreamCollect (in category 'testing-protocol') -----
testMatchesOnStreamCollect
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher 
		matchesOnStream: 'now is the time' readStream 
		collect: [ :each | each reversed ]) asArray
			= #('won' 'si' 'eht' 'emit')!

----- Method: RxMatcherTest>>testMatchesOnStreamDo (in category 'testing-protocol') -----
testMatchesOnStreamDo
	| matcher expected |
	matcher := self matcherClass forString: '\w+'.
	expected := #('now' 'is' 'the' 'time') asOrderedCollection.
	matcher 
		matchesOnStream: 'now is the time' readStream
		do: [ :each | self assert: each = expected removeFirst ].
	self assert: expected isEmpty!

----- Method: RxMatcherTest>>testMatchesStream (in category 'testing-protocol') -----
testMatchesStream
	| matcher |
	matcher := self matcherClass forString: '\w+'.
	self assert: (matcher matchesStream: 'now' readStream).
	self deny: (matcher matchesStream: 'now is' readStream)!

----- Method: RxMatcherTest>>testMatchingRangesIn (in category 'testing-protocol') -----
testMatchingRangesIn
	| matcher expected |
	matcher := self matcherClass forString: '\w+'.
	expected := #(1 3 5 6 8 10 12 15) asOrderedCollection.
	(matcher matchingRangesIn: 'now is the time') do: [ :range |
		self assert: range first = expected removeFirst.
		self assert: range last = expected removeFirst ].
	self assert: expected isEmpty!

----- Method: RxMatcherTest>>testRegex001 (in category 'testing') -----
testRegex001
	self runRegex: #('^.*$' 
		'' true (1 '')
		'a' true (1 'a')
		'abc' true (1 'abc'))!

----- Method: RxMatcherTest>>testRegex002 (in category 'testing') -----
testRegex002
	self runRegex: #('a\w+c'
		' abb_bbc ' true (1 'abb_bbc')
		'abb-bc' false nil)!

----- Method: RxMatcherTest>>testRegex003 (in category 'testing') -----
testRegex003
	self runRegex: #('a\W+c'
		' abb_bbc ' false nil
		'abb-bc' false nil
		'a.,:;-&!!"#%/()={[]}+?\~*''c' true (1 'a.,:;-&!!"#%/()={[]}+?\~*''c'))!

----- Method: RxMatcherTest>>testRegex004 (in category 'testing') -----
testRegex004
	self runRegex: #(':isVowel:'
		'aei' true nil
		'xyz' false nil)!

----- Method: RxMatcherTest>>testStringAllRangesOfRegexMatches (in category 'testing-extensions') -----
testStringAllRangesOfRegexMatches
	| result |
	result := 'aabbcc' allRangesOfRegexMatches: 'b+'.
	self assert: result size = 1.
	self assert: result first first = 3.
	self assert: result first last = 4
	!

----- Method: RxMatcherTest>>testStringAllRegexMatches (in category 'testing-extensions') -----
testStringAllRegexMatches
	| result |
	result := 'aabbcc' allRegexMatches: 'b+'.
	self assert: result size = 1.
	self assert: result first = 'bb'!

----- Method: RxMatcherTest>>testStringAsRegex (in category 'testing-extensions') -----
testStringAsRegex
	self assert: 'b+' asRegex class = RxParser preferredMatcherClass!

----- Method: RxMatcherTest>>testStringAsRegexIgnoringCase (in category 'testing-extensions') -----
testStringAsRegexIgnoringCase
	self assert: 'b+' asRegexIgnoringCase class = RxParser preferredMatcherClass!

----- Method: RxMatcherTest>>testStringCopyWithRegexMatchesReplacedWith (in category 'testing-extensions') -----
testStringCopyWithRegexMatchesReplacedWith
	self assert: ('aabbcc' copyWithRegex: 'b+' matchesReplacedWith: 'X') = 'aaXcc'!

----- Method: RxMatcherTest>>testStringCopyWithRegexMatchesTranslatedUsing (in category 'testing-extensions') -----
testStringCopyWithRegexMatchesTranslatedUsing
	self assert: ('aabbcc' 
		copyWithRegex: 'b+' 
		matchesTranslatedUsing: [ :each | 
			self assert: each = 'bb'.
			'X' ]) = 'aaXcc'!

----- Method: RxMatcherTest>>testStringMatchesRegex (in category 'testing-extensions') -----
testStringMatchesRegex
	self deny: ('aabbcc' matchesRegex: 'a+').
	self deny: ('aabbcc' matchesRegex: 'b+c+').
	self assert: ('aabbcc' matchesRegex: 'a+b+c+')!

----- Method: RxMatcherTest>>testStringMatchesRegexIgnoringCase (in category 'testing-extensions') -----
testStringMatchesRegexIgnoringCase
	self deny: ('AABBCC' matchesRegexIgnoringCase: 'a+').
	self deny: ('AABBCC' matchesRegexIgnoringCase: 'b+c+').
	self assert: ('AABBCC' matchesRegexIgnoringCase: 'a+b+c+')!

----- Method: RxMatcherTest>>testStringPrefixMatchesRegex (in category 'testing-extensions') -----
testStringPrefixMatchesRegex
	self assert: ('aabbcc' prefixMatchesRegex: 'a+').
	self deny: ('aabbcc' prefixMatchesRegex: 'b+')!

----- Method: RxMatcherTest>>testStringPrefixMatchesRegexIgnoringCase (in category 'testing-extensions') -----
testStringPrefixMatchesRegexIgnoringCase
	self assert: ('AABBCC' prefixMatchesRegexIgnoringCase: 'a+').
	self deny: ('AABBCC' prefixMatchesRegexIgnoringCase: 'b+')!

----- Method: RxMatcherTest>>testStringRegexMatchesCollect (in category 'testing-extensions') -----
testStringRegexMatchesCollect
	| result |
	result := 'aabbcc' regex: 'b+' matchesCollect: [ :each | each asUppercase ].
	self assert: result size = 1.
	self assert: result first = 'BB'!

----- Method: RxMatcherTest>>testStringRegexMatchesDo (in category 'testing-extensions') -----
testStringRegexMatchesDo
	| result |
	result := OrderedCollection new.
	'aabbcc' regex: 'b+' matchesDo: [ :each | result add: each ].
	self assert: result size = 1.
	self assert: result first = 'bb'!

----- Method: RxMatcherTest>>testSubexpressionCount (in category 'testing-protocol') -----
testSubexpressionCount
	| matcher |
	#(('a' 1) ('a(b)' 2) ('a(b(c))' 3) ('(a)(b)' 3) ('(a(b))*' 3)) do: [ :pair |
		matcher := self matcherClass forString: pair first.
		matcher supportsSubexpressions 
			ifTrue: [ self assert: matcher subexpressionCount = pair last ] ]!

TestCase subclass: #RxParserTest
	instanceVariableNames: ''
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Regex-Tests-Core'!

!RxParserTest commentStamp: 'Tbn 11/12/2010 22:31' prior: 0!
This class provides tests for the regular expression parser.!

----- Method: RxParserTest>>DoesNotWorktestBackQuotesEscape (in category 'tests') -----
DoesNotWorktestBackQuotesEscape
	"self debug: #testBackQuotesEscape"
	
	"Regular expressions can also include the following backquote escapes
to refer to popular classes of characters:
	\w	any word constituent character (same as [a-zA-Z0-9:=])
	\W	any character but a word constituent
	\d	a digit (same as [0-9])
	\D	anything but a digit
	\s 	a whitespace character
	\S	anything but a whitespace character
These escapes are also allowed in character classes: '[\w+-]' means
'any character that is either a word constituent, or a plus, or a
minus'."
	
	self assert: ('one word' matchesRegex: '\w').	
		
	self assert: ('one' matchesRegex: '\w').		  	
	!

----- Method: RxParserTest>>test (in category 'tests') -----
test
	"self debug: #test"
	

	self assert: ('\<t\w+' asRegexIgnoringCase
		copy: 'now is the Time'
		translatingMatchesUsing: [:match | match asUppercase]) = 'now is THE TIME'.

	"the regular expression matches words beginning with either an uppercase or a lowercase T"!

----- Method: RxParserTest>>testCadrMatching (in category 'tests') -----
testCadrMatching
	"self debug: #testCadrMatching"

	"A bit more complex example is the following expression, matching the
name of any of the Lisp-style `car', `cdr', `caar', `cadr',
... functions:"

	self assert: ( 'car' matchesRegex: 'c(a|d)+r').
	self assert: ( 'cdr' matchesRegex: 'c(a|d)+r').
	self assert: ( 'caar' matchesRegex: 'c(a|d)+r').
	self assert: ( 'cadr' matchesRegex: 'c(a|d)+r').
	self assert: ( 'caddar' matchesRegex: 'c(a|d)+r').!

----- Method: RxParserTest>>testCharacterSet (in category 'tests') -----
testCharacterSet
	"self debug: #testCharacterSet"
	
	"So far, we have used only characters as the 'smallest' components of
regular expressions. There are other, more `interesting', components.
A character set is a string of characters enclosed in square
brackets. It matches any single character if it appears between the
brackets. For example, `[01]' matches either `0' or `1':"

	self assert: ('0' matchesRegex: '[01]').	 	
	self deny: ('3' matchesRegex: '[01]'). 	 
	self deny: ('11' matchesRegex: '[01]').	"-- false: a set matches only one character"
	self deny: ('01' matchesRegex: '[01]').
!

----- Method: RxParserTest>>testCharacterSetBinaryNumber (in category 'tests') -----
testCharacterSetBinaryNumber
	"self debug: #testCharacterSetBinaryNumber"
	
	"Using plus operator, we can build the following binary number
recognizer:"
	self assert: ('10010100' matchesRegex: '[01]+').	 	
	self deny: ('10001210' matchesRegex: '[01]+')	 !

----- Method: RxParserTest>>testCharacterSetInversion (in category 'tests') -----
testCharacterSetInversion
	"self debug: #testCharacterSetInversion"
	
	"If the first character after the opening bracket is `^', the set is
inverted: it matches any single character *not* appearing between the
brackets:"
	
	self deny: ('0' matchesRegex: '[^01]').		  	
	"0 appears in 01 so there is no match"
	
	self assert: ('3' matchesRegex: '[^01]').
	"3 is not in 01 so it matches"
	
	
	self deny: ('30' matchesRegex: '[^01]').		
	self deny: ('33333333333333333333333330' matchesRegex: '[^01]').	
	"there is one zero so it does not match"!

----- Method: RxParserTest>>testCharacterSetRange (in category 'tests') -----
testCharacterSetRange
	"self debug: #testCharacterSetRange"
	
	"For convenience, a set may include ranges: pairs of characters
separated with `-'. This is equivalent to listing all characters
between them: `[0-9]' is the same as `[0123456789]'. "	

	self assert: ('0' matchesRegex: '[0-9]').	
	self assert: ('9' matchesRegex: '[0-9]').	
	self deny: ('a' matchesRegex: '[0-9]').
	self deny: ('01' matchesRegex: '[0-9]').	
	self assert: ('01442128629839374565' matchesRegex: '[0-9]+').	
	!

----- Method: RxParserTest>>testLookaround (in category 'tests') -----
testLookaround
	self assert: ('A'   matchesRegex: '(?!!ABC).*').
	self assert: ('AB'  matchesRegex: '(?!!ABC).*').
	self deny:   ('ABC' matchesRegex: '(?!!ABC).*').!

----- Method: RxParserTest>>testMatchesInwW (in category 'tests') -----
testMatchesInwW
	"self debug: #testMatchesInwW"
	
	"1. Backslash escapes similar to those in Perl are allowed in patterns:
	\w	any word constituent character (equivalent to [a-zA-Z0-9:=])
	\W	any character but a word constituent (equivalent to [^a-xA-Z0-9:=]"

	self assert: ('\w+' asRegex matchesIn: 'now is the time') asArray = #('now' 'is' 'the' 'time').
	self assert: ('\W+' asRegex matchesIn: 'now is the time') asArray = #(' ' ' ' ' ').
	
	"why do we get that"
	self assert: ('\w' asRegex matchesIn: 'now') asArray = #('n' 'o' 'w').!

----- Method: RxParserTest>>testOrOperator (in category 'tests') -----
testOrOperator
	"self debug: #testOrOperator"
	
	"The last operator is `|' meaning `or'. It is placed between two
regular expressions, and the resulting expression matches if one of
the expressions matches. It has the lowest possible precedence (lower
than sequencing). For example, `ab*|ba*' means `a followed by any
number of b's, or b followed by any number of a's':"

	self assert: ('abb' matchesRegex: 'ab*|ba*').  	
	self assert: ('baa' matchesRegex: 'ab*|ba*').	 	
	self deny: ('baab' matchesRegex: 'ab*|ba*').
	

	"It is possible to write an expression matching an empty string, for
example: `a|'.  However, it is an error to apply `*', `+', or `?' to
such expression: `(a|)*' is an invalid expression."

	self should: ['(a|)*' asRegex] raise: Error.
!

----- Method: RxParserTest>>testQuantifier (in category 'tests') -----
testQuantifier
	self deny:   (''     matchesRegex: 'a{2}').
	self deny:   ('a'    matchesRegex: 'a{2}').
	self assert: ('aa'   matchesRegex: 'a{2}').
	self deny:   ('aaa'  matchesRegex: 'a{2}').
	
	self deny:   (''     matchesRegex: 'a{2,3}').
	self deny:   ('a'    matchesRegex: 'a{2,3}').
	self assert: ('aa'   matchesRegex: 'a{2,3}').
	self assert: ('aaa'  matchesRegex: 'a{2,3}').
	self deny:   ('aaaa' matchesRegex: 'a{2,3}').
	
	self deny:   (''     matchesRegex: 'a{2,}').
	self deny:   ('a'    matchesRegex: 'a{2,}').
	self assert: ('aa'   matchesRegex: 'a{2,}').
	self assert: ('aaa'  matchesRegex: 'a{2,}').
	self assert: ('aaaa' matchesRegex: 'a{2,}').
	
	self assert: (''     matchesRegex: 'a{,3}').
	self assert: ('a'    matchesRegex: 'a{,3}').
	self assert: ('aa'   matchesRegex: 'a{,3}').
	self assert: ('aaa'  matchesRegex: 'a{,3}').
	self deny:   ('aaaa' matchesRegex: 'a{,3}').!

----- Method: RxParserTest>>testQuantifierSimple (in category 'tests') -----
testQuantifierSimple
	"Test quantifier expressions that can be expressed with + or *"
	self assert: ('a'  matchesRegex: 'a{1}').
	self deny:   ('aa' matchesRegex: 'a{1}').
	
	self assert: ('a'  matchesRegex: 'a{1,1}').
	self deny:   ('aa' matchesRegex: 'a{1,1}').
	
	self assert: ('ab'   matchesRegex: '(ab){1,}').
	self assert: ('abab' matchesRegex: '(ab){1,}').
	self deny:   (''     matchesRegex: '(ab){1,}').
	
	self assert: ('ab'   matchesRegex: '(ab){,1}').
	self assert: (''     matchesRegex: '(ab){,1}').
	self deny:   ('abab' matchesRegex: '(ab){,1}').!

----- Method: RxParserTest>>testQuotingOperators (in category 'tests') -----
testQuotingOperators
	"self debug: #testQuotingOperators"
	
	"As we have seen, characters `*', `+', `?', `(', and `)' have special
meaning in regular expressions. If one of them is to be used
literally, it should be quoted: preceded with a backslash. (Thus,
backslash is also special character, and needs to be quoted for a
literal match--as well as any other special character described
further)."

	self deny: ('ab*' matchesRegex: 'ab*'). "	-- false: star in the right string is special"
	self assert: ('ab*' matchesRegex: 'ab\*').	 		
	self assert: ('a\c' matchesRegex: 'a\\c').		 	!

----- Method: RxParserTest>>testSimpleMatchesRegex (in category 'tests') -----
testSimpleMatchesRegex
	"self debug: #testSimpleMatchesRegex"
	
	"The simplest regular expression is a single character.  It matches
exactly that character. A sequence of characters matches a string with
exactly the same sequence of characters:"

	self assert: ('a' matchesRegex: 'a').
	self assert: ('foobar' matchesRegex: 'foobar')	.
	self deny: ('blorple' matchesRegex: 'foobar')!

----- Method: RxParserTest>>testSimpleMatchesRegexWithStar (in category 'tests') -----
testSimpleMatchesRegexWithStar
	"self debug: #testSimpleMatchesRegexWithStar"
	
	"The above paragraph in testSimpleMatchesRegex introduced a primitive regular expression (a
character), and an operator (sequencing). Operators are applied to
regular expressions to produce more complex regular expressions.
Sequencing (placing expressions one after another) as an operator is,
in a certain sense, `invisible'--yet it is arguably the most common.
A more `visible' operator is Kleene closure, more often simply
referred to as `a star'.  A regular expression followed by an asterisk
matches any number (including 0) of matches of the original
expression. For example:"

	self assert: ('ab' matchesRegex: 'a*b').		 		
	self assert: ('aaaaab' matchesRegex: 'a*b').	
	self assert: ('b' matchesRegex: 'a*b').	 	
	self deny: ('aac' matchesRegex: 'a*b').	 		!

----- Method: RxParserTest>>testSpecialCharacterInSetRange (in category 'tests') -----
testSpecialCharacterInSetRange
	"self debug: #testSpecialCharacterInSetRange"
	
	"Special characters within a set are `^', `-', and `]' that closes the
set. Below are the examples of how to literally use them in a set:
	[01^]		-- put the caret anywhere except the beginning
	[01-]		-- put the dash as the last character
	[]01]		-- put the closing bracket as the first character 
	[^]01]			(thus, empty and universal sets cannot be specified)"

	self assert: ('0' matchesRegex: '[01^]').
	self assert: ('1' matchesRegex: '[01^]').
	self assert: ('^' matchesRegex: '[01^]').
	
	self deny: ('0' matchesRegex: '[^01]').
	self deny: ('1' matchesRegex: '[^01]').
	
	"[^abc] means that everything except abc is matche"
	self assert: ('^' matchesRegex: '[^01]').
	!

----- Method: RxParserTest>>testStarPlusQuestionMark (in category 'tests') -----
testStarPlusQuestionMark
	"self debug: #testStarPlusQuestionMark"
	
	"Two other operators similar to `*' are `+' and `?'. `+' (positive
closure, or simply `plus') matches one or more occurrences of the
original expression. `?' (`optional') matches zero or one, but never
more, occurrences."

	self assert: ('ac' matchesRegex: 'ab*c').  		
	self deny: ('ac' matchesRegex: 'ab+c'). 		"-- false: need at least one b"
	self assert: ('abbc' matchesRegex: 'ab+c').	
	self assert: ('abbbbbbc' matchesRegex: 'ab+c').	
	self deny: ('abbc' matchesRegex: 'ab?c')	 	"-- false: too many b's"!

----- Method: RxParserTest>>testStarPrecedence (in category 'tests') -----
testStarPrecedence
	"self debug: #testStarPrecedence"
	
	"A star's precedence is higher than that of sequencing. A star applies
to the shortest possible subexpression that precedes it. For example,
'ab*' means `a followed by zero or more occurrences of b', not `zero
or more occurrences of ab':"

	self assert: ('abbb' matchesRegex: 'ab*'). 
	self deny: ('abab' matchesRegex: 'ab*').	 	
		
	"To actually make a regex matching `zero or more occurrences of ab',
`ab' is enclosed in parentheses:"
	self assert: ('abab' matchesRegex: '(ab)*'). 
	self deny: ('abcab' matchesRegex: '(ab)*')!

----- Method: RxParserTest>>testTranslatingMatchesUsing (in category 'tests') -----
testTranslatingMatchesUsing
	"self debug: #testTranslatingMatchesUsing"
	

	self assert: ('\<t\w+' asRegexIgnoringCase
		copy: 'now is the Time'
		translatingMatchesUsing: [:match | match asUppercase]) = 'now is THE TIME'.

	"the regular expression matches words beginning with either an uppercase or a lowercase T"!

----- Method: RxParserTest>>toDotestSpecialCharacterInSetRange (in category 'tests') -----
toDotestSpecialCharacterInSetRange
	"self debug: #testSpecialCharacterInSetRange"
	
	"Special characters within a set are `^', `-', and `]' that closes the
set. Below are the examples of how to literally use them in a set:
	[01^]		-- put the caret anywhere except the beginning
	[01-]		-- put the dash as the last character
	[]01]		-- put the closing bracket as the first character 
	[^]01]			(thus, empty and universal sets cannot be specified)"

	self assert: ('0' matchesRegex: '[01^]').
	
	self assert: ('0' matchesRegex: '[0-9]').	
	!



More information about the Squeak-dev mailing list