Regex review

danil a. osipchuk danil at tsnet.ru
Mon Aug 30 14:47:53 UTC 2004


danil a. osipchuk wrote:

the code I used if somebody cares
-------------- next part --------------
'From Squeak3.8alpha of ''17 July 2004'' [latest update: #5976] on 30 August 2004 at 6:26:04 pm'!
Object subclass: #RegexCompairSuit
	uses: {}
	instanceVariableNames: ''
	classVariableNames: ''
	poolDictionaries: ''
	category: 'Regex-Compair'!

!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/29/2004 18:26'!
benchmark: aSymbol
	| ms result |
	ms := Time millisecondsToRun: [ result := self perform: aSymbol].
	^Array with: ms with: result! !

!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:32'!
testFileLinesLFDo: aBlock
	
	self testInFileDo: [:fstream | 
		[fstream atEnd] whileFalse: [ aBlock  value: (fstream upTo: Character lf)]]! !

!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:31'!
testInFileDo: aBlock
	| fstream | 
	
	fstream := FileStream readOnlyFileNamed: self testInFileName.
	[ aBlock value: fstream] ensure: [ fstream close].
	! !

!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:32'!
testInFileLinesDo: aBlock
	
	self testInFileDo: [:fstream | 
		[fstream atEnd] whileFalse: [ aBlock  value: fstream nextLine]]! !

!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:52'!
testInOutFilesDo: aBlock
	| out  | 
	
	self testInFileDo: [:in | 
		out := FileStream forceNewFileNamed: self testOutFileName.
		[ aBlock value: in value: out] ensure: [ out close]].
	! !


!RegexCompairSuit methodsFor: 'config' stamp: 'dao 8/30/2004 16:29'!
testInFileName

	^'/usr/home/danil/myDocs/Squeak/2004-August.txt'! !

!RegexCompairSuit methodsFor: 'config' stamp: 'dao 8/30/2004 16:29'!
testOutFileName

	^'/usr/home/danil/myDocs/Squeak/out.txt'! !


!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 15:34'!
reCollectMailAddresses
	| matcher mails |
	mails := Set new.
	matcher := '\w+@(\w+\.)+\w+' asRe.
	self testFileLinesLFDo: [:str | 
		(matcher collectFrom: str) ifNotNilDo: [:col | mails addAll: col]].
	^mails
	
	
	! !

!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 16:21'!
reCollectMaskedMailAddresses
	| matcher mails |
	mails := Set new.
	matcher := '\w+ at (\w+\.)+\w+' asRe.
	self testFileLinesLFDo: [:str | 
		(matcher collectFrom: str) ifNotNilDo: [:col | mails addAll: col]].
	^mails
	
	
	! !

!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 16:31'!
reCountWords
	| matcher num |
	num := 0.
	matcher := '\w+' asRe.
	self testInFileLinesDo: [:str | 
		num := num +  (matcher collectFrom: str) size].
	^num
	
	
	! !

!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 15:12'!
reCountWordsLF
	| matcher num |
	num := 0.
	matcher := '\w+' asRe.
	self testFileLinesLFDo: [:str | 
		(matcher collectFrom: str) ifNotNilDo: [:col | num := num + col size]].
	^num
	
	
	! !

!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 18:02'!
reRemaskMailAddresses
	| matcher str| 
	matcher  :=   Re on: '(\w+)( at )(\w+\.)+(\w+)'.
	self testInOutFilesDo: [:in :out |
			[in atEnd] whileFalse: 
				[ str := in upTo: Character lf.
				   out nextPutAll: (matcher search: str andReplace: [:m |
					String streamContents: [:stream|
						stream
							nextPutAll: (m matchAt: 1);
							nextPutAll: ' dog ';
							nextPutAll: ((m matchAt: 3) copyReplaceAll: '.' with: ' point ');
							nextPutAll: (m matchAt: 4)	]]).
					out nextPut: Character lf.]].

	
! !


!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCollectMailAddresses
	| mails matcher | 
	mails := Set new.
	matcher  :=   RxMatcher forString: '\w+@(\w+\.)+\w+'.
	self testInFileDo: [:fstream |
			 matcher matchesOnStream: fstream  do: [:s | mails add: s]].
	^mails
	
! !

!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCollectMaskedMailAddresses
	| mails matcher | 
	mails := Set new.
	matcher  :=   RxMatcher forString: '\w+ at (\w+\.)+\w+'.
	self testInFileDo: [:fstream |
			 matcher matchesOnStream: fstream  do: [:s | mails add: s]].
	^mails
	
! !

!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCountWords
	| num matcher | 
	num := 0.
	matcher  :=   RxMatcher forString: '\w+'.
	self testInFileDo: [:fstream |
			 matcher matchesOnStream: fstream  do: [:s | num := num + 1]].
	^num
	
! !

!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 17:30'!
rxRemaskMailAddresses
	| matcher | 
	matcher  :=   RxMatcher forString: '(\w+)( at )(\w+\.)+(\w+)'.
	self testInOutFilesDo: [:in :out |
			 matcher copyStream: in to: out
				translatingMatchesUsing: [:weWillUseMatcherCacheInstead |
					String streamContents: [:stream|
						stream
							nextPutAll: (matcher subexpression: 2);
							nextPutAll: ' dog ';
							nextPutAll: ((matcher subexpression: 4) copyReplaceAll: '.' with: ' point ');
							nextPutAll: (matcher subexpression: 5)	]]].

	
! !


!RegexCompairSuit methodsFor: 'misc' stamp: 'dao 8/30/2004 16:30'!
countLines
	| num |
	num := 0.
	self testInFileLinesDo: [:s | num := num + 1].
	^num! !

!RegexCompairSuit methodsFor: 'misc' stamp: 'dao 8/29/2004 20:18'!
countLinesLF
	| num |
	num := 0.
	self testFileLinesLFDo: [:s | num := num + 1].
	^num! !


More information about the Squeak-dev mailing list