From 52d06037d13afdc68384f1031766ac087cb5e70e Mon Sep 17 00:00:00 2001
From: hryx <codroid@gmail.com>
Date: Tue, 31 Dec 2013 04:37:04 -0800
Subject: Added utf8.encode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Here's a function that takes a Unicode code point and returns the corresponding UTF-8 encoded character bytes. Example:

    utf8.encode(0x265c) -- returns '♜'

Please feel free to edit or revert if it's not your style. It might not be relevant to Quickie, though it's a handy UTF-8 utility.
---
 utf8.lua | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/utf8.lua b/utf8.lua
index fcb5a6c..90a4ea0 100644
--- a/utf8.lua
+++ b/utf8.lua
@@ -123,6 +123,40 @@ local function reverse(s)
 	return table.concat(t)
 end
 
+-- Convert a Unicode code point to a UTF-8 byte sequence
+-- Logic stolen from this page:
+-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
+--
+-- Arguments:
+--     Number representing the Unicode code point (e.g. 0x265c).
+--
+-- Returns:
+--     UTF-8 encoded string of the given character.
+--     Numbers out of range produce a blank string.
+local function encode(code)
+	if code < 0 then
+		error('Code point must not be negative.')
+	elseif code <= 0x7f then
+		return string.char(code)
+	elseif code <= 0x7ff then
+		local c1 = code / 64 + 192
+		local c2 = code % 64 + 128
+		return string.char(c1, c2)
+	elseif code <= 0xffff then
+		local c1 = code / 4096 + 224
+		local c2 = code % 4096 / 64 + 128
+		local c3 = code % 64 + 128
+		return string.char(c1, c2, c3)
+	elseif code <= 0x10ffff then
+		local c1 = code / 262144 + 240
+		local c2 = code % 262144 / 4096 + 128
+		local c3 = code % 4096 / 64 + 128
+		local c4 = code % 64 + 128
+		return string.char(c1, c2, c3, c4)
+	end
+	return ''
+end
+
 return {
 	iter    = iter,
 	chars   = chars,
@@ -130,4 +164,5 @@ return {
 	sub     = sub,
 	split   = split,
 	reverse = reverse,
+	encode  = encode
 }
-- 
cgit v1.2.3-70-g09d2