77-- Test Latin character strings with Latin collation
88-- Set size limit of data types to be the same under Basic Multilingual Plane (BMP)
99-- Characters between 1-byte (ASCII) and 3-bytes (East Asian)
10+ USE master ;
11+ DROP DATABASE IF EXISTS UnicodeDatabase;
12+ CREATE DATABASE UnicodeDatabase COLLATE LATIN1_GENERAL_100_CI_AS_SC_UTF8
13+ GO
1014
15+ USE UnicodeDatabase
16+ GO
1117DROP TABLE IF EXISTS t1;
1218CREATE TABLE t1 (c1 varchar (24 ) COLLATE Latin1_General_100_CI_AI,
1319 c2 nvarchar (8 ) COLLATE Latin1_General_100_CI_AI);
4652
4753
4854-- uh-oh data loss on the varchar example. Why?
49- -- varchar is bound to code page enconding, and these code points cannot be found in the Latin code page.
55+ -- varchar is bound to code page enconding,
56+ -- and these code points cannot be found in the Latin code page.
5057SELECT ASCII (' 敏' COLLATE Latin1_General_100_CI_AI), CHAR (63 )
5158SELECT ASCII (' 捷' COLLATE Latin1_General_100_CI_AI), CHAR (63 )
5259
152159
153160-- But the majority of my data is set to Latin (ASCII)
154161DROP TABLE IF EXISTS t4;
155- CREATE TABLE t4 (c1 varchar (110 ) COLLATE Latin1_General_100_CI_AI_SC );
162+ CREATE TABLE t4 (c1 varchar (110 ) COLLATE Latin1_General_100_CI_AI_SC_UTF8 );
156163INSERT INTO t4 VALUES (N ' MyStringThequickbrownfoxjumpsoverthelazydogIsLatinAscii敏捷的棕色狐狸跳👶👦' )
157- SELECT LEN (c1) AS [varchar UTF16 LEN],
158- DATALENGTH(c1) AS [varchar UTF16 DATALENGTH], c1
164+ SELECT LEN (c1) AS [varchar UTF8 LEN],
165+ DATALENGTH(c1) AS [varchar UTF8 DATALENGTH], c1
159166FROM t4;
160167GO
161168
164171-- Where are the savings?
165172SELECT DATALENGTH(N ' MyStringThequickbrownfoxjumpsoverthelazydogIsLatinAscii' ) AS [Latin_UTF16_2bytes],
166173 DATALENGTH(N ' 敏捷的棕色狐狸跳' ) AS [Chinese_UTF16_2bytes],
167- DATALENGTH(N ' 👶👦 ' ) AS [SC_UTF16_4bytes]
174+ DATALENGTH(N ' 👶' ) AS [SC_UTF16_4bytes]
168175SELECT DATALENGTH(' MyStringThequickbrownfoxjumpsoverthelazydogIsLatinAscii' COLLATE Latin1_General_100_CI_AI_SC_UTF8) AS [Latin_UTF8_1byte],
169176 DATALENGTH(' 敏捷的棕色狐狸跳' COLLATE Latin1_General_100_CI_AI_SC_UTF8) AS [Chinese_UTF8_3bytes],
170- DATALENGTH(' 👶👦 ' COLLATE Latin1_General_100_CI_AI_SC_UTF8) AS [SC_UTF8_4bytes]
177+ DATALENGTH(' 👶' COLLATE Latin1_General_100_CI_AI_SC_UTF8) AS [SC_UTF8_4bytes]
171178GO
0 commit comments