Module:Make emoji zwj table
Appearance
This module creates a new version of emoji_t
for use in Module:Citation/CS1/Configuration.
To use this module:
- locate a copy of the new version of the Unicode file emoji-zwj-sequences.txt. This file might be found in https://unicode.org/Public/emoji/VV.V/ (where
VV.V
is the new Unicode version number). - copy the whole content of emoji-zwj-sequences.txt to your clipboard
- edit this page (the module's documentation page)
- paste your clipboard into this page overwriting any previous version of the Unicode data; do not disturb the html comment tags.
- replace the url in the
{{#invoke:}}
with the url of the new emoji-zwj-sequences.txt file - preview this page; if nothing wrong with the rendering, save.
- copy
emoji_t
to your clipboard and paste it overemoji_t
in Module:Citation/CS1/Configuration/sandbox (always update the live module suite from its sandboxen)
emoji_t
[edit]use this table to overwrite same-named table in Module:Citation/CS1/Configuration/sandbox
-- list of emoji that use a zwj character (U+200D) to combine with another emoji -- from: https://unicode.org/Public/emoji/16.0/emoji-zwj-sequences.txt; version: 16.0; 2024-08-14 -- table created by: [[:en:Module:Make emoji zwj table]] local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx [8596] = true, -- U+2194 ↔ left right arrow [8597] = true, -- U+2195 ↕ up down arrow [9760] = true, -- U+2620 ☠ skull and crossbones [9792] = true, -- U+2640 ♀ female sign [9794] = true, -- U+2642 ♂ male sign [9877] = true, -- U+2695 ⚕ staff of aesculapius [9878] = true, -- U+2696 ⚖ scales [9895] = true, -- U+26A7 ⚧ male with stroke and male and female sign [9992] = true, -- U+2708 ✈ airplane [10052] = true, -- U+2744 ❄ snowflake [10084] = true, -- U+2764 ❤ heavy black heart [10145] = true, -- U+27A1 ➡ black rightwards arrow [11035] = true, -- U+2B1B ⬛ black large square [127752] = true, -- U+1F308 🌈 rainbow [127787] = true, -- U+1F32B 🌫 fog [127806] = true, -- U+1F33E 🌾 ear of rice [127859] = true, -- U+1F373 🍳 cooking [127868] = true, -- U+1F37C 🍼 baby bottle [127876] = true, -- U+1F384 🎄 christmas tree [127891] = true, -- U+1F393 🎓 graduation cap [127908] = true, -- U+1F3A4 🎤 microphone [127912] = true, -- U+1F3A8 🎨 artist palette [127979] = true, -- U+1F3EB 🏫 school [127981] = true, -- U+1F3ED 🏭 factory [128102] = true, -- U+1F466 👦 boy [128103] = true, -- U+1F467 👧 girl [128104] = true, -- U+1F468 👨 man [128105] = true, -- U+1F469 👩 woman [128139] = true, -- U+1F48B 💋 kiss mark [128165] = true, -- U+1F4A5 💥 collision symbol [128168] = true, -- U+1F4A8 💨 dash symbol [128171] = true, -- U+1F4AB 💫 dizzy symbol [128187] = true, -- U+1F4BB 💻 personal computer [128188] = true, -- U+1F4BC 💼 brief case [128293] = true, -- U+1F525 🔥 fire [128295] = true, -- U+1F527 🔧 wrench [128300] = true, -- U+1F52C 🔬 microscope [128488] = true, -- U+1F5E8 🗨 left speech bubble [128640] = true, -- U+1F680 🚀 rocket [128658] = true, -- U+1F692 🚒 fire engine [129001] = true, -- U+1F7E9 🟩 large green square [129003] = true, -- U+1F7EB 🟫 large brown square [129309] = true, -- U+1F91D 🤝 handshake [129455] = true, -- U+1F9AF 🦯 probing cane [129456] = true, -- U+1F9B0 🦰 emoji component red hair [129457] = true, -- U+1F9B1 🦱 emoji component curly hair [129458] = true, -- U+1F9B2 🦲 emoji component bald [129459] = true, -- U+1F9B3 🦳 emoji component white hair [129466] = true, -- U+1F9BA 🦺 safety vest [129468] = true, -- U+1F9BC 🦼 motorized wheelchair [129469] = true, -- U+1F9BD 🦽 manual wheelchair [129489] = true, -- U+1F9D1 🧑 adult [129490] = true, -- U+1F9D2 🧒 child [129657] = true, -- U+1FA79 🩹 adhesive bandage [129778] = true, -- U+1FAF2 🫲 leftwards hand }
emoji_names_t
[edit]use this table to overwrite same-named table in :en:Module:Make emoji zwj table; add missing names.
local emoji_names_t = { -- keys are hex values from U+xxxx code points ['2194'] = 'left right arrow', ['2195'] = 'up down arrow', ['2620'] = 'skull and crossbones', ['2640'] = 'female sign', ['2642'] = 'male sign', ['2695'] = 'staff of aesculapius', ['2696'] = 'scales', ['26A7'] = 'male with stroke and male and female sign', ['2708'] = 'airplane', ['2744'] = 'snowflake', ['2764'] = 'heavy black heart', ['27A1'] = 'black rightwards arrow', ['2B1B'] = 'black large square', ['1F308'] = 'rainbow', ['1F32B'] = 'fog', ['1F33E'] = 'ear of rice', ['1F373'] = 'cooking', ['1F37C'] = 'baby bottle', ['1F384'] = 'christmas tree', ['1F393'] = 'graduation cap', ['1F3A4'] = 'microphone', ['1F3A8'] = 'artist palette', ['1F3EB'] = 'school', ['1F3ED'] = 'factory', ['1F466'] = 'boy', ['1F467'] = 'girl', ['1F468'] = 'man', ['1F469'] = 'woman', ['1F48B'] = 'kiss mark', ['1F4A5'] = 'collision symbol', ['1F4A8'] = 'dash symbol', ['1F4AB'] = 'dizzy symbol', ['1F4BB'] = 'personal computer', ['1F4BC'] = 'brief case', ['1F525'] = 'fire', ['1F527'] = 'wrench', ['1F52C'] = 'microscope', ['1F5E8'] = 'left speech bubble', ['1F680'] = 'rocket', ['1F692'] = 'fire engine', ['1F7E9'] = 'large green square', ['1F7EB'] = 'large brown square', ['1F91D'] = 'handshake', ['1F9AF'] = 'probing cane', ['1F9B0'] = 'emoji component red hair', ['1F9B1'] = 'emoji component curly hair', ['1F9B2'] = 'emoji component bald', ['1F9B3'] = 'emoji component white hair', ['1F9BA'] = 'safety vest', ['1F9BC'] = 'motorized wheelchair', ['1F9BD'] = 'manual wheelchair', ['1F9D1'] = 'adult', ['1F9D2'] = 'child', ['1FA79'] = 'adhesive bandage', ['1FAF2'] = 'leftwards hand', }
--[[
This module creates an associative table emoji code points that may follow a zero-width joiner character (U+200D).
The module reads a copy of the Unicode Emoji ZWJ Sequences for UTS (typically emoji-zwj-sequences.txt found in
https://unicode.org/Public/emoji/VV.V/ where VV.V is the Unicode version number). The copy of the unicode data
file is held inside html comments in the module's /doc page. From that file, the module extracts pairs of
<zwj> <emoji code point>. The moculde save each unique code point, transformed as necessary to build a new version
of emoji_t for use in Module:Citation/CS1/Configuration.
The module takes one positional parameter:
{{#invoke:make emoji zwj table|main|<url>}}
<url> is the url that matches the Unicode data file. Alas, Lua modules cannot read external data files so <url>
is merely used to document where the data may be found.
Use of this module is documented on its /doc page
]]
require('strict');
local emoji_names_t = { -- keys are hex values from U+xxxx code points
['2194'] = 'left right arrow',
['2195'] = 'up down arrow',
['2620'] = 'skull and crossbones',
['2640'] = 'female sign',
['2642'] = 'male sign',
['2695'] = 'staff of aesculapius',
['2696'] = 'scales',
['26A7'] = 'male with stroke and male and female sign',
['2708'] = 'airplane',
['2744'] = 'snowflake',
['2764'] = 'heavy black heart',
['27A1'] = 'black rightwards arrow',
['2B1B'] = 'black large square',
['1F308'] = 'rainbow',
['1F32B'] = 'fog',
['1F33E'] = 'ear of rice',
['1F373'] = 'cooking',
['1F37C'] = 'baby bottle',
['1F384'] = 'christmas tree',
['1F393'] = 'graduation cap',
['1F3A4'] = 'microphone',
['1F3A8'] = 'artist palette',
['1F3EB'] = 'school',
['1F3ED'] = 'factory',
['1F466'] = 'boy',
['1F467'] = 'girl',
['1F468'] = 'man',
['1F469'] = 'woman',
['1F48B'] = 'kiss mark',
['1F4A5'] = 'collision symbol',
['1F4A8'] = 'dash symbol',
['1F4AB'] = 'dizzy symbol',
['1F4BB'] = 'personal computer',
['1F4BC'] = 'brief case',
['1F525'] = 'fire',
['1F527'] = 'wrench',
['1F52C'] = 'microscope',
['1F5E8'] = 'left speech bubble',
['1F680'] = 'rocket',
['1F692'] = 'fire engine',
['1F7E9'] = 'large green square',
['1F7EB'] = 'large brown square',
['1F91D'] = 'handshake',
['1F9AF'] = 'probing cane',
['1F9B0'] = 'emoji component red hair',
['1F9B1'] = 'emoji component curly hair',
['1F9B2'] = 'emoji component bald',
['1F9B3'] = 'emoji component white hair',
['1F9BA'] = 'safety vest',
['1F9BC'] = 'motorized wheelchair',
['1F9BD'] = 'manual wheelchair',
['1F9D1'] = 'adult',
['1F9D2'] = 'child',
['1FA79'] = 'adhesive bandage',
['1FAF2'] = 'leftwards hand',
}
--[[--------------------------< M A I N >----------------------------------------------------------------------
]]
local function main (frame)
local this_wiki = table.concat ({':', mw.language.getContentLanguage():getCode(), ':'});
local title_obj = mw.title.getCurrentTitle();
local content;
if title_obj.prefixedText:match ('/doc$') then -- if this title object is the ~/doc page (viewing the ~/doc page standalone)
content = title_obj:getContent(); -- get the content
else -- when viewing the module page
content = mw.title.new (table.concat ({title_obj.prefixedText, '/doc'})):getContent(); -- get title object and content for the ~/doc page
end
local code_points_t = {}; -- sequence to hold unique code points that follow U+200D in RGI Emoji ZWJ Sequences in decimal
local out_t = {}; -- final output goes here
local new_emoji_names_t = {}; -- used to update emoji_names_t in this module
local tabs_15 = string.rep ('\t', 15); -- for six-digit keys
local tabs_16 = string.rep ('\t', 16); -- for keys that have fewer than six digits
local file_date = content:match ('# *Date: *(%d%d%d%d%-%d%d%-%d%d)'); -- file date of the Unicode source
local file_version = content:match ('# *Version: *([%d%.]+)'); -- version of the Unicode source
for code_point in content:gmatch ('200D (%x+)') do -- find each <zwj> <code point> pair
local code_point_dec = tonumber ('0x' .. code_point); -- convert hex code point to decimal for output table key
if not code_points_t[code_point] then -- if we have not seen this <code_point> before
code_points_t[code_point] = true; -- remember that we have now seen this <code_point>
table.insert (out_t, table.concat ({ -- build a line for this code point
'\t[', -- open key markup
code_point_dec, -- <code_point> in decimal
'] = true,', -- close key and assign it the value 'true'
(100000 <= code_point_dec) and tabs_15 or tabs_16, -- insert a bunch of tabs between the k/v pair and an associated comment
'-- U+', -- start the comment; prefix for the hex <code point>
code_point, -- add the <code point>
' &#x', -- hex html entity prefix for <code point>
code_point, -- add the <code point>
'; ', -- finish the html entity
emoji_names_t[code_point] and emoji_names_t[code_point] or '', -- if we have a name for this code point, add the name; empty string else
}));
table.insert (new_emoji_names_t, table.concat ({ -- build a line for this code point
'\t[\'', -- open key markup
code_point, -- <code_point> in hex
'\'] = \'', -- close key, open quote mark and ready to assign it a name
emoji_names_t[code_point] and emoji_names_t[code_point] or '', -- if we have a name for this code point, add the name; empty string else
'\',', -- add closing quote mark and terminal comma
}));
end
end
local function compare_dec (a, b) -- local compare function for decimal table.sort() ascending
a = a:match ('%[(%d+)%]'); -- extract decimal key text
b = b:match ('%[(%d+)%]');
return tonumber (a) < tonumber (b); -- convert key text to numbers and compare
end
local function compare_hex (a, b) -- local compare function for hexadecimal table.sort() ascending
a = a:match ('%[\'(%x+)\'%]'); -- extract hexadecimal key text
b = b:match ('%[\'(%x+)\'%]');
a = table.concat ({'0x', a}); -- make a hex string
b = table.concat ({'0x', b});
return tonumber (a) < tonumber (b); -- convert hex key text todecimal numbers and compare
end
table.sort (out_t, compare_dec); -- ascending numerical sort on decimal keys
local prefix_t = {}; -- build a prefix for this version of the table
table.insert (prefix_t, '==<span style="font-family: monospace, monospace;">emoji_t</span>==');
table.insert (prefix_t, 'use this table to overwrite same-named table in [[Module:Citation/CS1/Configuration/sandbox]]');
table.insert (prefix_t, '<pre>-- list of emoji that use a zwj character (U+200D) to combine with another emoji');
table.insert (prefix_t, table.concat ({'-- from: ', frame.args[1], '; version: ', file_version, '; ', file_date}));
table.insert (prefix_t, table.concat ({'-- table created by: [[', this_wiki, title_obj.nsText, ':', title_obj.baseText, ']]'}));
table.insert (prefix_t, table.concat ({'local emoji_t = {', tabs_16, '-- indexes are decimal forms of the hex values in U+xxxx'}));
table.insert (out_t, 1, table.concat (prefix_t, '\n')); -- insert at the head of the output table
table.insert (out_t, '\t}</pre>'); -- close the <pre> tag
table.sort (new_emoji_names_t, compare_hex); -- ascending numerical sort on hexadecimal keys
table.insert (out_t, '==<span style="font-family: monospace, monospace;">emoji_names_t</span>==');
table.insert (out_t, table.concat ({'use this table to overwrite same-named table in ', this_wiki, title_obj.nsText, ':', title_obj.baseText, '; add missing names.'}));
table.insert (out_t, table.concat ({'\n<pre>local emoji_names_t = {', tabs_15, '-- keys are hex values from U+xxxx code points'}));
for _, v in ipairs (new_emoji_names_t)do
table.insert (out_t, v);
end
table.insert (out_t, '\t}</pre>'); -- close the <pre> tag
return frame:preprocess (table.concat (out_t, '\n')); -- make a big string and done
end
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]
return {
main = main,
}