[squeak-dev] The Trunk: Multilingual-ul.209.mcz

commits at source.squeak.org commits at source.squeak.org
Sun May 3 00:15:44 UTC 2015


Levente Uzonyi uploaded a new version of Multilingual to project The Trunk:
http://source.squeak.org/trunk/Multilingual-ul.209.mcz

==================== Summary ====================

Name: Multilingual-ul.209
Author: ul
Time: 1 May 2015, 7:48:22.654 pm
UUID: 966caead-fd54-4687-9369-3580c258e5db
Ancestors: Multilingual-ul.208

Refactored unicode data parsing.
Case conversion (ToUpper and ToLower) is parsed from UnicodeData.txt instead of CaseFolding.txt.
Reinitialize both CaseFolding and CompositionMapping in the postscript.

=============== Diff against Multilingual-ul.206 ===============

Item was added:
+ ----- Method: Unicode class>>caseFoldingData (in category 'casing') -----
+ caseFoldingData
+ 	
+ 	UIManager default informUserDuring: [ :bar |
+ 		| stream |
+ 		bar value: 'Downloading CaseFolding Unicode data'.
+ 		stream := HTTPClient httpGet: 'http://www.unicode.org/Public/UNIDATA/CaseFolding.txt'.
+ 		(stream isKindOf: RWBinaryOrTextStream) ifFalse: [
+ 			^self error: 'Download failed' ].
+ 		^stream reset; contents ]!

Item was added:
+ ----- Method: Unicode class>>initializeCaseFolding (in category 'casing') -----
+ initializeCaseFolding
+ 	" self initializeCaseFolding "
+ 	
+ 	self parseCaseFoldingFrom: self caseFoldingData!

Item was removed:
- ----- Method: Unicode class>>initializeCaseMappings (in category 'casing') -----
- initializeCaseMappings
- 	"Unicode initializeCaseMappings"
- 	ToCasefold := IdentityDictionary new.
- 	ToUpper := IdentityDictionary new.
- 	ToLower := IdentityDictionary new.
- 	UIManager default informUserDuring: [:bar|
- 		| stream |
- 		bar value: 'Downloading Unicode data'.
- 		stream := HTTPClient httpGet: 'http://www.unicode.org/Public/UNIDATA/CaseFolding.txt'.
- 		(stream isKindOf: RWBinaryOrTextStream) ifFalse:[^self error: 'Download failed'].
- 		stream reset.
- 		bar value: 'Updating Case Mappings'.
- 		self parseCaseMappingFrom: stream.
- 	].!

Item was changed:
  ----- Method: Unicode class>>initializeCompositionMappings (in category 'composing') -----
  initializeCompositionMappings
+ 	" self initializeCompositionMappings "
+ 
+ 	self parseCompositionMappingFrom: self unicodeData!
- 	"Unicode initializeCompositionMappings"
- 	Compositions := IdentityDictionary new.
- 	Decompositions := IdentityDictionary new.
- 	UIManager default informUserDuring:[:bar|
- 		| stream |
- 		bar value: 'Downloading Unicode data'.
- 		stream := HTTPClient httpGet: 'http://unicode.org/Public/UNIDATA/UnicodeData.txt'.
- 		(stream isKindOf: RWBinaryOrTextStream) ifFalse:[^self error: 'Download failed'].
- 		stream reset.
- 		bar value: 'Updating Composition Mappings'.
- 		self parseCompositionMappingFrom: stream.
- 	].!

Item was added:
+ ----- Method: Unicode class>>parseCaseFoldingFrom: (in category 'casing') -----
+ parseCaseFoldingFrom: caseFoldingData
+ 	"Parse the Unicode casing mappings from the given string."
+ 
+ 	| newToCasefold |
+ 	newToCasefold := PluggableDictionary integerDictionary.
+ 
+ 	"Filter the mappings (Simple and Common) to newToCasefold."
+ 	caseFoldingData linesDo: [ :line |
+ 		| lineWithoutComment fields sourceCode destinationCode |
+ 		lineWithoutComment := line copyUpTo: $#.
+ 		fields := lineWithoutComment findTokens: '; '.
+ 		(fields size > 2 and: [ #('C' 'S') includes: (fields at: 2) ]) ifTrue: [
+ 			sourceCode := Integer readFrom: (fields at: 1) base: 16.
+ 			destinationCode := Integer readFrom: (fields at: 3) base: 16.
+ 			newToCasefold at: sourceCode put: destinationCode ] ].
+ 
+ 	"Compact and save."
+ 	ToCasefold := newToCasefold compact
+ 	!

Item was removed:
- ----- Method: Unicode class>>parseCaseMappingFrom: (in category 'casing') -----
- parseCaseMappingFrom: stream
- 	"Parse the Unicode casing mappings from the given stream.
- 	Handle only the simple mappings"
- 	"
- 		Unicode initializeCaseMappings.
- 	"
- 
- 	ToCasefold := IdentityDictionary new: 2048.
- 	ToUpper := IdentityDictionary new: 2048.
- 	ToLower := IdentityDictionary new: 2048.
- 
- 	[stream atEnd] whileFalse:[
- 		| fields line srcCode dstCode |
- 		line := stream nextLine copyUpTo: $#.
- 		fields := line withBlanksTrimmed findTokens: $;.
- 		(fields size > 2 and: [#('C' 'S') includes: (fields at: 2) withBlanksTrimmed]) ifTrue:[
- 			srcCode := Integer readFrom: (fields at: 1) withBlanksTrimmed base: 16.
- 			dstCode := Integer readFrom: (fields at: 3) withBlanksTrimmed base: 16.
- 			ToCasefold at: srcCode put: dstCode.
- 		].
- 	].
- 
- 	ToCasefold keysAndValuesDo:
- 		[:k :v |
- 		(self isUppercaseCode: k)
- 			ifTrue:
- 				["In most cases, uppercase letter are folded to lower case"
- 				ToUpper at: v put: k.
- 				ToLower at: k put: v].
- 		(self isLowercaseCode: k)
- 			ifTrue:
- 				["In a few cases, two upper case letters are folded to the same lower case.
- 				We must find an upper case letter folded to the same letter"
- 				| up |
- 				up := ToCasefold keys detect: [:e | (self isUppercaseCode: e) and: [(ToCasefold at: e) = v]] ifNone: [nil].
- 				up ifNotNil: [ToUpper at: k put: up]]].!

Item was changed:
  ----- Method: Unicode class>>parseCompositionMappingFrom: (in category 'composing') -----
+ parseCompositionMappingFrom: unicodeData
- parseCompositionMappingFrom: stream
  	"Parse the Unicode composition mappings from the given stream"
  	"
  		Unicode initializeCompositionMappings.
  	"
+ 	| newCompositions newDecompositions newToUpper newToLower toNumber |
- 	| toNumber fields codePoint decomposed baseChar compChar line |
  
  	toNumber := [:quad | quad inject: 0 into:[:sum :ch| sum * 16 + ch digitValue]].
  
+ 	newCompositions := PluggableDictionary integerDictionary.
+ 	newDecompositions := PluggableDictionary integerDictionary.
+ 	newToUpper := PluggableDictionary integerDictionary.
+ 	newToLower := PluggableDictionary integerDictionary.
- 	Compositions := IdentityDictionary new: 2048.
- 	Decompositions := IdentityDictionary new: 2048.
  
+ 	unicodeData linesDo: [ :line |
+ 		| fields |
+ 		(fields := line splitBy: ';') size > 13 ifTrue: [
+ 			| codePoint lowercaseCodePoint uppercaseCodePoint decomposed baseChar compChar |
- 	[stream atEnd] whileFalse:[
- 		line := (stream nextLine copyUpTo: $#) withBlanksTrimmed readStream.
- 		fields := Array streamContents:[:s|[line atEnd] whileFalse:[s nextPut: (line upTo: $;)]].
- 		fields size > 6 ifTrue:[
  			codePoint := toNumber value: (fields at: 1).
+ 			uppercaseCodePoint := (fields at: 13) ifEmpty: [ codePoint ] ifNotEmpty: toNumber.
+ 			codePoint = uppercaseCodePoint ifFalse: [ newToUpper at: codePoint put: uppercaseCodePoint ].
+ 			lowercaseCodePoint := (fields at: 14) ifEmpty: [ codePoint ] ifNotEmpty: toNumber.
+ 			codePoint = lowercaseCodePoint ifFalse: [ newToLower at: codePoint put: lowercaseCodePoint ].
  			decomposed := (fields at: 6) findTokens: ' '.
  			(decomposed size = 2 and:[decomposed first first ~= $<]) ifTrue:[
+ 				decomposed replace: toNumber.
- 				decomposed := decomposed collect: toNumber.
  				baseChar := decomposed first.		"base character"
  				compChar := decomposed second.	"composition character"
+ 				newDecompositions at: codePoint put: { baseChar. compChar }.
+ 				(newCompositions at: baseChar ifAbsentPut: [  PluggableDictionary integerDictionary ])
+ 					at: compChar put: codePoint ] ] ].
+ 
+ 	"Compact the new dictionaries."
+ 	newCompositions compact.
+ 	newCompositions valuesDo: [ :each | each compact ].
+ 	newDecompositions compact.
+ 	newToUpper compact.
+ 	newToLower compact.
+ 	"Save atomically."
+ 	Compositions := newCompositions.
+ 	Decompositions := newDecompositions.
+ 	ToUpper := newToUpper.
+ 	ToLower := newToLower.
- 				Decompositions at: codePoint put: (Array with: baseChar with: compChar).
- 				(Compositions at: baseChar ifAbsentPut:[IdentityDictionary new])
- 					at: compChar put: codePoint.
- 			].
- 		].
- 	].
  !

Item was added:
+ ----- Method: Unicode class>>unicodeData (in category 'composing') -----
+ unicodeData
+ 	
+ 	UIManager default informUserDuring: [ :bar |
+ 		| stream |
+ 		bar value: 'Downloading Unicode data'.
+ 		stream := HTTPClient httpGet: 'http://www.unicode.org/Public/UNIDATA/UnicodeData.txt'.
+ 		(stream isKindOf: RWBinaryOrTextStream) ifFalse: [
+ 			^self error: 'Download failed' ].
+ 		^stream reset; contents ]!

Item was changed:
+ (PackageInfo named: 'Multilingual') postscript: 'Unicode
+ 	initializeCaseFolding;
+ 	initializeCompositionMappings'!
- (PackageInfo named: 'Multilingual') postscript: '"Remove CrLfFileStream from the startupList"
- (Smalltalk classNamed: ''CrLfFileStream'') ifNotNil: [ :class |
- 	Smalltalk removeFromStartUpList: class ].
- "Ensure toLower et al work on WideStrings"
- Unicode
- 	initialize;
- 	initializeCaseMappings.
- '!



More information about the Squeak-dev mailing list