Module:Unicode data/scripts/make
MyWikiBiz, Author Your Legacy — Friday January 03, 2025
Jump to navigationJump to searchlocal p = {} local Array = require 'Module:Array' local function pattern_escape(str) return (str:gsub('%p', '%%%1')) end function p.make_script_name_to_code(page_name) local property_value_aliases = assert(assert(mw.title.new(page_name)):getContent()) local script_aliases = property_value_aliases:match( pattern_escape '# Script (sc)' .. '%s+(.-)%s+' .. pattern_escape '# Script_Extensions (scx)') local script_name_to_code = {} for code, name in script_aliases:gmatch 'sc%s+;%s+(%a+)%s+;%s+([%a_]+)' do script_name_to_code[name] = code end return script_name_to_code end function p.make_script_data(scripts_txt, property_value_aliases_txt) local script_data = assert(assert(mw.title.new(scripts_txt)):getContent()) local script_name_to_code = p.make_script_name_to_code(property_value_aliases_txt) setmetatable(script_name_to_code, { __index = function (self, k) error(('No code for "%s"'):format(k)) end }) local script_ranges = Array() local prev_codepoint, prev_script_name, prev_script_range for codepoint1, codepoint2, script_name in script_data:gmatch '%f[^\n%z](%x+)%.?%.?(%x*)%s+;%s*([%w_]+)' do codepoint1, codepoint2 = tonumber(codepoint1, 16), tonumber(codepoint2, 16) local script_range if prev_script_range and script_name == prev_script_name and codepoint1 - prev_codepoint == 1 then prev_script_range[2] = codepoint2 or codepoint1 else script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] } script_ranges:insert(script_range) end prev_codepoint, prev_script_name, prev_script_range = codepoint2 or codepoint1, script_name, script_range or prev_script_range end local singles = {} local i = 1 while script_ranges[i] do local low, high, script_code = unpack(script_ranges[i]) if low == high then singles[low] = script_code script_ranges:remove(i) else i = i + 1 end end script_ranges:sort( function (range1, range2) return range1[1] < range2[1] end) local template = [[ local data = { singles = { ... }, ranges = { ... }, -- Scripts.txt gives full names; here we consider them aliases to save space. aliases = { ... }, } ]] local printed_ranges = Array() for _, range in ipairs(script_ranges) do local low, high, script_code = unpack(range) printed_ranges:insert(('\t\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code)) end local printed_singles = Array() for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do printed_singles:insert(('\t\t[0x%05X] = "%s",'):format(codepoint, script_code)) end local printed_script_name_to_code = Array() for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code, function(name1, name2) return script_name_to_code[name1] < script_name_to_code[name2] end) do printed_script_name_to_code:insert(('\t\t%s = "%s",'):format(code, name:gsub('_', ' '))) end local data = template :gsub('%.%.%.', printed_singles:concat('\n'), 1) :gsub('%.%.%.', printed_ranges:concat('\n'), 1) :gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1) return data end function p.highlight(lua_code) return mw.getCurrentFrame():extensionTag{ name = "syntaxhighlight", content = lua_code, args = { lang = "lua" } } end function p.main(frame) local dir = 'User:Erutuon/Unicode' local scripts_txt = dir .. '/Scripts.txt' local property_value_aliases_txt = dir ..'/PropertyValueAliases.txt' return p.highlight(p.make_script_data(scripts_txt, property_value_aliases_txt)) end return p