[Pkg] The Trunk: Network-pre.203.mcz

Wed Sep 6 17:55:15 UTC 2017

Patrick Rein uploaded a new version of Network to project The Trunk:
http://source.squeak.org/trunk/Network-pre.203.mcz

==================== Summary ====================

Name: Network-pre.203
Author: pre
Time: 6 September 2017, 7:55:04.916204 pm
UUID: a4494ca0-d9ef-1347-94b6-7719eb9a3e26
Ancestors: Network-tpr.202

Changes the mail address parser to only accept addresses in a valid string of email addresses. 
Adds logic to the mail address parser to parse out the names in front of email addresses.
Changes the tokenizer to accept non-ascii characters in email addresses which is allowed since 2012.

=============== Diff against Network-tpr.202 ===============

Item was changed:
  Object subclass: #MailAddressParser
+ 	instanceVariableNames: 'tokens addresses storeNames curAddrTokens'
- 	instanceVariableNames: 'tokens addresses curAddrTokens'
  	classVariableNames: ''
  	poolDictionaries: ''
  	category: 'Network-RFC822'!

  !MailAddressParser commentStamp: '<historical>' prior: 0!
  Parse mail addresses.  The basic syntax is:

  	addressList := MailAddressParser addressesIn: aString

  This currently only returns the bare addresses, but it could also return a list of the address "source codes".  For example, if you give it "Joe <joe at foo>, <jane>", it will currently return a list ('joe at foo' 'jane').  It would be nice to also get a list ('Joe <joe at foo>'  '<jane>').!

Item was added:
+ ----- Method: MailAddressParser class>>addressesAndNamePairsIn: (in category 'parsing') -----
+ addressesAndNamePairsIn: aString
+ 	"return a collection of the addresses and the corresponding names listed in aString"
+ 	| tokens |
+ 	tokens := MailAddressTokenizer tokensIn: aString.
+ 	^(self new initialize: tokens) grabAddressesAndNames!

Item was changed:
  ----- Method: MailAddressParser>>grabAddressWithRoute (in category 'parsing') -----
  grabAddressWithRoute
  	"grad an address of the form 'Descriptive Text <real.address at c.d.e>"

+ 	| name |
  	self startNewAddress.
+ 	
- 
  	tokens removeLast.	"remove the >"

  	"grab until we see a $<"
+ 	[ tokens isEmpty ifTrue: [self error: '<> are not matched' ].
+ 	  tokens last type = $<] 
+ 		whileFalse: [ self addToAddress ].
- 	[ 
- 		tokens isEmpty ifTrue: [
- 			self error: '<> are not matched' ].
- 		tokens last type = $<
- 	] whileFalse: [ self addToAddress ].

  	tokens removeLast.  "remove the <"
+ 	self finishAddress.
+ 	
+ 	name := self grabName.
+ 	
+ 	storeNames ifTrue: [addresses addFirst: {name . addresses removeFirst}].!
- 
- 
- 	self removePhrase.
- 
- 	self finishAddress!

Item was changed:
  ----- Method: MailAddressParser>>grabAddresses (in category 'parsing') -----
  grabAddresses
+ 	
- 	"grab all the addresses in the string"
  	| token |
  	"remove comments"
  	tokens removeAllSuchThat: [:t | t type == #Comment].
+ 	
  	"grab one address or address group each time through this loop"
+ 	[ [tokens isEmpty not and: [ tokens last type = $, ]] 
+ 		whileTrue: [ tokens removeLast ].
+ 	  tokens isEmpty ] whileFalse: [
+ 		
- 	[ 
- 		"remove commas"
- 		[
- 			tokens isEmpty not and: [ tokens last type = $, ]
- 		] whileTrue: [ tokens removeLast ].
- 
- 		"check whether any tokens are left"
- 		tokens isEmpty 
- 	] whileFalse: [
  		token := tokens last.

  		"delegate, depending on what form the address is in"
  		"the from can be determined from the last token"
+ 		token type = $> 
+ 			ifTrue: [self grabAddressWithRoute ]
+ 			ifFalse: [ (#(Atom DomainLiteral QuotedString) includes: token type)  
+ 						ifTrue: [self grabBasicAddress ]
+ 						ifFalse: [token type = $; 
+ 								ifTrue: [self grabGroupAddress ]
+ 								ifFalse: [self error: 'un-recognized address format' ]]]].

+ 	^ addresses!
- 		token type = $> ifTrue: [
- 			self grabAddressWithRoute ]
- 		ifFalse: [ 
- 			(#(Atom DomainLiteral QuotedString) includes: token type)  ifTrue: [
- 				self grabBasicAddress ]
- 		ifFalse: [
- 			token type = $; ifTrue: [
- 				self grabGroupAddress ]
- 		ifFalse: [
- 			^self error: 'un-recognized address format' ] ] ]
- 	].
- 
- 	^addresses!

Item was added:
+ ----- Method: MailAddressParser>>grabAddressesAndNames (in category 'parsing') -----
+ grabAddressesAndNames
+ 	
+ 	storeNames := true.
+ 	
+ 	self grabAddresses.
+ 	
+ 	addresses := addresses collect: [:a |
+ 		a isString 
+ 			ifTrue: [{'' . a}]
+ 			ifFalse: [a] ].
+ 
+ 	^ addresses!

Item was changed:
  ----- Method: MailAddressParser>>grabBasicAddress (in category 'parsing') -----
  grabBasicAddress
  	"grad an address of the form a.b at c.d.e"
+ 	| name |
  	self startNewAddress.
  	"grab either the domain if specified, or the domain if not"
  	self addToAddress.
  	[tokens isEmpty not and: [ tokens last type = $.] ] 
  		whileTrue: 
  			["add name-dot pairs of tokens"
  			self addToAddress.
  			(#(Atom QuotedString ) includes: tokens last type)
  				ifFalse: [self error: 'bad token in address: ' , tokens last text].
  			self addToAddress].
  	(tokens isEmpty or: [tokens last type ~= $@])
  		ifTrue: ["no domain specified"
  			self finishAddress]
  		ifFalse: 
  			["that was the domain.  check that no QuotedString's slipped in"
  			curAddrTokens do: [:tok | tok type = #QuotedString ifTrue: [self error: 'quote marks are not allowed within a domain name (' , tok text , ')']].
  			"add the @ sign"
  			self addToAddress.
  			"add the local part"
  			(#(Atom QuotedString ) includes: tokens last type)
  				ifFalse: [self error: 'invalid local part for address: ' , tokens last text].
  			self addToAddress.
  			"add word-dot pairs if there are any"
  			[tokens isEmpty not and: [tokens last type = $.]]
  				whileTrue: 
  					[self addToAddress.
  					(tokens isEmpty not and: [#(Atom QuotedString ) includes: tokens last type])
  						ifTrue: [self addToAddress]].
+ 			self finishAddress].
+ 	
+ 	name := self grabName.
+ 	storeNames ifTrue: [addresses addFirst: {name . addresses removeFirst}].!
- 			self finishAddress]!

Item was changed:
  ----- Method: MailAddressParser>>grabGroupAddress (in category 'parsing') -----
  grabGroupAddress
  	"grab an address of the form 'phrase : address, address, ..., address;'"
  	"I'm not 100% sure what this format means, so I'm just returningthe list of addresses between the : and ;   -ls  (if this sounds right to someone, feel free to remove this comment :)"

  	"remove the $; "
  	tokens removeLast.

  	"grab one address each time through this loop"
+ 	[     "remove commas"
+ 		[tokens isEmpty not and: [ tokens last type = $, ]] 
+ 			whileTrue: [ tokens removeLast ].
- 	[ 
- 		"remove commas"
- 		[
- 			tokens isEmpty not and: [ tokens last type = $, ]
- 		] whileTrue: [ tokens removeLast ].

+ 		tokens isEmpty ifTrue: ["no matching :" ^ self error: 'stray ; in address list'].
+ 		tokens last type = $:] 
+ 			whileFalse: [
+ 				
+ 				"delegate to either grabAddressWithRoute, or grabBasicAddress.  nested groups are not allowed"
+ 				tokens last type = $> 
+ 					ifTrue: [self grabAddressWithRoute ]
+ 					ifFalse: [ (#(Atom DomainLiteral QuotedString) includes: tokens last type)  
+ 								ifTrue: [self grabBasicAddress ]
+ 								ifFalse: [^ self error: 'un-recognized address format' ]]].
- 		tokens isEmpty ifTrue: [
- 			"no matching :"
- 			^self error: 'stray ; in address list'. ].

- 		tokens last type = $:
- 	] whileFalse: [
- 		"delegate to either grabAddressWithRoute, or grabBasicAddress.  nested groups are not allowed"
- 
- 		tokens last type = $> ifTrue: [
- 			self grabAddressWithRoute ]
- 		ifFalse: [ 
- 			(#(Atom DomainLiteral QuotedString) includes: tokens last type)  ifTrue: [
- 				self grabBasicAddress ]
- 		ifFalse: [
- 			^self error: 'un-recognized address format' ] ]
- 	].
- 
  	tokens removeLast.   "remove the :"

  	self removePhrase.!

Item was added:
+ ----- Method: MailAddressParser>>grabName (in category 'parsing') -----
+ grabName
+ 
+ 	| name |
+ 	name := ''.
+ 	[tokens isEmpty not and: [#(Atom QuotedString $. $@) includes: (tokens last type) ]] 
+ 		whileTrue: [ name := {tokens removeLast text copyWithoutAll: '"'. name} joinSeparatedBy: Character space ].
+ 	^ name trimBoth!

Item was added:
+ ----- Method: MailAddressParser>>initialize (in category 'private-initialization') -----
+ initialize
+ 
+ 	storeNames := false.!

Item was changed:
  ----- Method: MailAddressParser>>removePhrase (in category 'parsing') -----
  removePhrase
  	"skip most characters to the left of this"

+ 	[tokens isEmpty not and: [#(Atom QuotedString $. $@) includes: (tokens last type)]] 
+ 		whileTrue: [ tokens removeLast ].
- 	[
- 		tokens isEmpty not and: [
- 			#(Atom QuotedString $. $@) includes: (tokens last type) ]
- 	] whileTrue: [ tokens removeLast ].
  !

Item was changed:
  ----- Method: MailAddressTokenizer class>>nonAtomSet (in category 'class initialization') -----
  nonAtomSet
+ 	"(from RFC 6531)"
- 	"(from RFC 2822)"

+ 	^CSNonAtom ifNil: [ | notAtoms | 
+ 		notAtoms := CharacterSet new
- 	^CSNonAtom ifNil: [
- 		CSNonAtom := CharacterSet new
  			addAll: ($A to: $Z);
  			addAll: ($a to: $z);
  			addAll: ($0 to: $9);
  			addAll: '!!#$%&''*+-/=?^_`{|}~';
+ 			complement.
+ 		CSNonAtom := Set new.
+ 		0 to: 127 do: [:v | | c |
+ 			c := Character value: v.
+ 			(notAtoms includes: c) ifTrue: [CSNonAtom add: c]]]!
- 			complement ]!