Regex review
danil a. osipchuk
danil at tsnet.ru
Mon Aug 30 14:47:53 UTC 2004
danil a. osipchuk wrote:
the code I used if somebody cares
-------------- next part --------------
'From Squeak3.8alpha of ''17 July 2004'' [latest update: #5976] on 30 August 2004 at 6:26:04 pm'!
Object subclass: #RegexCompairSuit
uses: {}
instanceVariableNames: ''
classVariableNames: ''
poolDictionaries: ''
category: 'Regex-Compair'!
!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/29/2004 18:26'!
benchmark: aSymbol
| ms result |
ms := Time millisecondsToRun: [ result := self perform: aSymbol].
^Array with: ms with: result! !
!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:32'!
testFileLinesLFDo: aBlock
self testInFileDo: [:fstream |
[fstream atEnd] whileFalse: [ aBlock value: (fstream upTo: Character lf)]]! !
!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:31'!
testInFileDo: aBlock
| fstream |
fstream := FileStream readOnlyFileNamed: self testInFileName.
[ aBlock value: fstream] ensure: [ fstream close].
! !
!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:32'!
testInFileLinesDo: aBlock
self testInFileDo: [:fstream |
[fstream atEnd] whileFalse: [ aBlock value: fstream nextLine]]! !
!RegexCompairSuit methodsFor: 'support' stamp: 'dao 8/30/2004 16:52'!
testInOutFilesDo: aBlock
| out |
self testInFileDo: [:in |
out := FileStream forceNewFileNamed: self testOutFileName.
[ aBlock value: in value: out] ensure: [ out close]].
! !
!RegexCompairSuit methodsFor: 'config' stamp: 'dao 8/30/2004 16:29'!
testInFileName
^'/usr/home/danil/myDocs/Squeak/2004-August.txt'! !
!RegexCompairSuit methodsFor: 'config' stamp: 'dao 8/30/2004 16:29'!
testOutFileName
^'/usr/home/danil/myDocs/Squeak/out.txt'! !
!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 15:34'!
reCollectMailAddresses
| matcher mails |
mails := Set new.
matcher := '\w+@(\w+\.)+\w+' asRe.
self testFileLinesLFDo: [:str |
(matcher collectFrom: str) ifNotNilDo: [:col | mails addAll: col]].
^mails
! !
!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 16:21'!
reCollectMaskedMailAddresses
| matcher mails |
mails := Set new.
matcher := '\w+ at (\w+\.)+\w+' asRe.
self testFileLinesLFDo: [:str |
(matcher collectFrom: str) ifNotNilDo: [:col | mails addAll: col]].
^mails
! !
!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 16:31'!
reCountWords
| matcher num |
num := 0.
matcher := '\w+' asRe.
self testInFileLinesDo: [:str |
num := num + (matcher collectFrom: str) size].
^num
! !
!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 15:12'!
reCountWordsLF
| matcher num |
num := 0.
matcher := '\w+' asRe.
self testFileLinesLFDo: [:str |
(matcher collectFrom: str) ifNotNilDo: [:col | num := num + col size]].
^num
! !
!RegexCompairSuit methodsFor: 're-tests' stamp: 'dao 8/30/2004 18:02'!
reRemaskMailAddresses
| matcher str|
matcher := Re on: '(\w+)( at )(\w+\.)+(\w+)'.
self testInOutFilesDo: [:in :out |
[in atEnd] whileFalse:
[ str := in upTo: Character lf.
out nextPutAll: (matcher search: str andReplace: [:m |
String streamContents: [:stream|
stream
nextPutAll: (m matchAt: 1);
nextPutAll: ' dog ';
nextPutAll: ((m matchAt: 3) copyReplaceAll: '.' with: ' point ');
nextPutAll: (m matchAt: 4) ]]).
out nextPut: Character lf.]].
! !
!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCollectMailAddresses
| mails matcher |
mails := Set new.
matcher := RxMatcher forString: '\w+@(\w+\.)+\w+'.
self testInFileDo: [:fstream |
matcher matchesOnStream: fstream do: [:s | mails add: s]].
^mails
! !
!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCollectMaskedMailAddresses
| mails matcher |
mails := Set new.
matcher := RxMatcher forString: '\w+ at (\w+\.)+\w+'.
self testInFileDo: [:fstream |
matcher matchesOnStream: fstream do: [:s | mails add: s]].
^mails
! !
!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 16:31'!
rxCountWords
| num matcher |
num := 0.
matcher := RxMatcher forString: '\w+'.
self testInFileDo: [:fstream |
matcher matchesOnStream: fstream do: [:s | num := num + 1]].
^num
! !
!RegexCompairSuit methodsFor: 'rx-tests' stamp: 'dao 8/30/2004 17:30'!
rxRemaskMailAddresses
| matcher |
matcher := RxMatcher forString: '(\w+)( at )(\w+\.)+(\w+)'.
self testInOutFilesDo: [:in :out |
matcher copyStream: in to: out
translatingMatchesUsing: [:weWillUseMatcherCacheInstead |
String streamContents: [:stream|
stream
nextPutAll: (matcher subexpression: 2);
nextPutAll: ' dog ';
nextPutAll: ((matcher subexpression: 4) copyReplaceAll: '.' with: ' point ');
nextPutAll: (matcher subexpression: 5) ]]].
! !
!RegexCompairSuit methodsFor: 'misc' stamp: 'dao 8/30/2004 16:30'!
countLines
| num |
num := 0.
self testInFileLinesDo: [:s | num := num + 1].
^num! !
!RegexCompairSuit methodsFor: 'misc' stamp: 'dao 8/29/2004 20:18'!
countLinesLF
| num |
num := 0.
self testFileLinesLFDo: [:s | num := num + 1].
^num! !
More information about the Squeak-dev
mailing list
|