文章目录
- 首先 UTF-8是一种可变长度的Unicode编码方式,它使用1到4个字节来表示一个字符。这种设计使得UTF-8既能表示全球所有的字符,又能保持与ASCII编码的兼容性。基本编码规则如下: 单字节字符:0xxxxxxx(ASCII字符) 双字节字符:110xxxxx 10xxxxxx 三字节字符:1110xxxx 10xxxxxx 10xxxxxx 四字节字符:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- local function utf8_len(str) local fontSize = 20 local lenInByte = #str local count = 0 local i = 1 while true do local curByte = string.byte(str, i) if i > lenInByte then break end local byteCount = 1 if curByte > 0 and curByte < 128 then byteCount = 1 elseif curByte>=128 and curByte<224 then byteCount = 2 elseif curByte>=224 and curByte<240 then byteCount = 3 elseif curByte>=240 and curByte<=247 then byteCount = 4 else break end i = i + byteCount count = count + 1 end return count end
首先 UTF-8是一种可变长度的Unicode编码方式,它使用1到4个字节来表示一个字符。这种设计使得UTF-8既能表示全球所有的字符,又能保持与ASCII编码的兼容性。基本编码规则如下:
- 单字节字符:0xxxxxxx(ASCII字符)
- 双字节字符:110xxxxx 10xxxxxx
- 三字节字符:1110xxxx 10xxxxxx 10xxxxxx
- 四字节字符:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
local function utf8_len(str)
local fontSize = 20
local lenInByte = #str
local count = 0
local i = 1
while true do
local curByte = string.byte(str, i)
if i > lenInByte then
break
end
local byteCount = 1
if curByte > 0 and curByte < 128 then
byteCount = 1
elseif curByte>=128 and curByte<224 then
byteCount = 2
elseif curByte>=224 and curByte<240 then
byteCount = 3
elseif curByte>=240 and curByte<=247 then
byteCount = 4
else
break
end
i = i + byteCount
count = count + 1
end
return count
end
local function utf8_len(str)
local fontSize = 20
local lenInByte = #str
local count = 0
local i = 1
while true do
local curByte = string.byte(str, i)
if i > lenInByte then
break
end
local byteCount = 1
if curByte > 0 and curByte < 128 then
byteCount = 1
elseif curByte>=128 and curByte<224 then
byteCount = 2
elseif curByte>=224 and curByte<240 then
byteCount = 3
elseif curByte>=240 and curByte<=247 then
byteCount = 4
else
break
end
i = i + byteCount
count = count + 1
end
return count
end
发表回复