[squeak-dev] The Inbox: Regex-Tests-Core-ct.28

christoph.thiede at student.hpi.uni-potsdam.de christoph.thiede at student.hpi.uni-potsdam.de
Thu Oct 28 03:44:26 UTC 2021


Name: Regex-Tests-Core-ct.28
Author: ct
Time: 28 October 2021, 4:56:42.233233 am
UUID: 4baf515d-1baa-814c-b109-26781d0698b1
Ancestors: Regex-Tests-Core-mt.16

==================== Summary ====================

Complements Regex-Core-ct.71 (Unicode backslash atoms). Supersedes Regex-Tests-Core-ct.24. Merges Regex-Tests-Core-tobe.17.

=============== Diff against Regex-Core-mt.61 ===============

RxMatcherTest>>testHenry039 {testing-henry} · ct 10/28/2021 02:44 (changed)
testHenry039
-     self runRegex: #('a[a-b-c]' nil)
+     self runRegex: #('a[a-c-d]'
+         'aa' true nil
+         'ab' true nil
+         'ac' true nil
+         'ad' true nil
+         'a-' true nil
+         'ae' false nil)

RxParserTest>>testCharacterSetWithEscapedCharacters {tests} · ct 10/27/2021 23:17 (changed)
testCharacterSetWithEscapedCharacters
    "self debug: #testCharacterSetRange"
    
    {
        '[\r]'. String cr. String space.
        '[\n]'. String lf. String space.
        '[\t]'. String tab. String space.
        '[\e]'. Character escape asString. String space.
        '[\f]'. Character newPage asString. String space.
        '[\]]+'. ']]]'. '[[['.
        '[\S]+[\s]+=[\s]+#[^\[(]'. 'foo = #bar'. 'foo = #[1 2 3]'.
        '[\d]+'. '123'. 'abc'.
        '[\D]+'. 'abc'. '123'.
        '[\w]+'. 'a1_b2'. '...'.
        '[\W]+'. '...'. 'a1_b2'.
+         '[\b]'. 'b'. ' '.
+         '[\p{L}\d]+'. 'tschüß123'. ':-)'.
+         '[\P{L}a]'. 'a'. 'b'.
    } groupsDo: [ :regexString :inputToAccept :inputToReject |
        | regex |
        regex := regexString asRegex.
        self
            assert: (regex search: inputToAccept);
            deny: (regex search: inputToReject) ]

RxParserTest>>testCodePointu {tests} · ct 10/28/2021 04:46
+ testCodePointu
+ 
+     | string |
+     string := String value: 16r1f388.
+     self assert: [string matchesRegex: '\u{1f388}'].
+     self assert: ['A' matchesRegex: '\u0041'].
+     self assert: ['Aa' matchesRegex: '\u0041a'].
+     self assert: ['m' matchesRegex: '\u006D'].
+     self assert: ['m' matchesRegex: '\u006d'].
+     self should: ['\u004' asRegex] raise: RegexSyntaxError.
+     self should: ['\u0g41' asRegex] raise: RegexSyntaxError.
+     
+     self assert: ['e' matchesRegex: '\u{ar101}'].
+     self deny: [string matchesRegex: '\u{1f387}'].
+     self deny: ['\u{1f388}' matchesRegex: '\u{1f388}'].
+     self deny: ['1f388' matchesRegex: '\u{1f388}'].
+     self deny: ['u' matchesRegex: '\u{1}'].
+     self deny: [(String value: 16r1f389) matchesRegex: '\u{1f388}'].
+     self deny: [(WideString fromByteArray: #(16r17f3 16r88)) matchesRegex: '\u{1f388}'].
+     self deny: [(WideString fromByteArray: #(16r17f3 88)) matchesRegex: '\u{1f388}'].
+     
+     self assert: ['m' matchesRegex: '[\u006d]'].
+     self assert: ['3' matchesRegex: '[\u0032-4]'].
+     self deny: ['0' matchesRegex: '[\u0032-4]'].
+     self assert: ['3' matchesRegex: '[2-\u0034]'].
+     self deny: ['0' matchesRegex: '[2-\u0034]'].
+     self should: ['[\u006d-\d]' asRegex] raise: RegexSyntaxError.
+     self should: ['[\d-\u006d]' asRegex] raise: RegexSyntaxError.
+     self assert: ['A' matchesRegex: '[\u006d-\u006fA]'].

RxParserTest>>testCodePointx {tests} · ct 10/28/2021 04:47
+ testCodePointx
+ 
+     self assert: ['8' matchesRegex: '\x38'].
+     self deny: ['8' matchesRegex: '\x39'].
+     self deny: ['9' matchesRegex: '\x38'].
+     self deny: ['&' matchesRegex: '\x38'].
+     self deny: ['\x38' matchesRegex: '\x38'].
+     self deny: ['38' matchesRegex: '\x38'].
+     self assert: ['8a' matchesRegex: '\x38a'].
+     self should: ['\x3' asRegex] raise: RegexSyntaxError.
+     self deny: [(WideString fromByteArray: {3. 8}) matchesRegex: '\x38'].
+     self deny: [(WideString fromByteArray: {3. 38}) matchesRegex: '\x38'].
+     self deny: [(String new: 20 withAll: $x) matchesRegex: '\x20'].
+     
+     self assert: ['8' matchesRegex: '\x{38}'].
+     self assert: ['?' matchesRegex: '\x{38a}'].
+     self assert: ['8' matchesRegex: '\x{2r111000}'].
+     self deny: ['8' matchesRegex: '\x{39}'].
+     self deny: ['9' matchesRegex: '\x{38}'].
+     self deny: ['\x{38}' matchesRegex: '\x{38}'].
+     
+     self assert: ['8a' matchesRegex: '[\x38a]+'].

RxParserTest>>testRegexSyntaxErrorPosition {tests} · ct 10/28/2021 03:14
+ testRegexSyntaxErrorPosition
+ 
+     | position |
+     ['a::z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 3 equals: position.
+     ['a[b[:space:_]y]z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 12 equals: position.
+     ['a[^][::]]z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 8 equals: position.
+     
+     "During nested parsing, the global position must be provided"
+     ['a\x{}z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 5 equals: position.
+     ['a[b\x{}y]z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 7 equals: position.
+     ['a[^b\x{}y]z' asRegex] on: RegexSyntaxError do: [:ex | position := ex position].
+     self assert: 8 equals: position.

RxParserTest>>testUnicodeCategory {tests} · ct 10/28/2021 04:47
+ testUnicodeCategory
+ 
+     self assert: ['X' matchesRegex: '\p{Lu}'].
+     self assert: ['X' matchesRegex: '\p{L}'].
+     self deny: ['X' matchesRegex: '\p{Ll}'].
+     self assert: ['x' matchesRegex: '\p{Ll}'].
+     self assert: ['x' matchesRegex: '\p{L}'].
+     self deny: ['x' matchesRegex: '\p{Lu}'].
+     
+     self deny: ['X' matchesRegex: '\P{Lu}'].
+     self deny: ['X' matchesRegex: '\P{L}'].
+     self assert: ['X' matchesRegex: '\P{Ll}'].
+     self deny: ['x' matchesRegex: '\P{Ll}'].
+     self deny: ['x' matchesRegex: '\P{L}'].
+     self assert: ['x' matchesRegex: '\P{Lu}'].
+     
+     self assert: ['x' matchesRegex: '[\p{L}]'].
+     self deny: ['x' matchesRegex: '[\P{L}]'].
+     self assert: ['x' matchesRegex: '[^\P{L}]'].


---
Sent from Squeak Inbox Talk
["Regex-Tests-Core-ct.28.mcz"]
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.squeakfoundation.org/pipermail/squeak-dev/attachments/20211028/6f74c00c/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: Regex-Tests-Core-ct.28.mcz
Type: application/octet-stream
Size: 24737 bytes
Desc: not available
URL: <http://lists.squeakfoundation.org/pipermail/squeak-dev/attachments/20211028/6f74c00c/attachment.obj>


More information about the Squeak-dev mailing list