Modulo:Convert/makeunits
Questo è un modulo scritto in Lua. Le istruzioni che seguono sono contenute nella sottopagina Modulo:Convert/makeunits/man (modifica · cronologia)
Sandbox: Modulo:Convert/makeunits/sandbox (modifica · cronologia) · Sottopagine: lista · Test: Modulo:Convert/makeunits/test (modifica · cronologia · Esegui)
Questo modulo serve a preparare i dati usati dal Modulo:Convert per permettere la conversione tra unità di misura.
Uso: Inserire una delle due linee sotto (senza nient'altro) in una sandbox:
{{#invoke:convert/makeunits|makeunits}}
{{subst:#invoke:convert/makeunits|makeunits}}
L'anteprima della sandbox dovrebbe mostrae il codice che deve essere copiato e incollato in Modulo:Convert/data. Se si verifica un problema comparirà un messaggio di errore che indicherà un problema da correggere.
Per default il modulo legge la definizione da Template:Converti/Man/Conversion data. Allo scopo di test è possibile specificare che la definizione deve essere letta da un'altra pagina, specificando il titolo desiderato. Per esempio, User:Johnuniq/sandbox2:
{{#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}
{{subst:#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}
Il modulo contiene la tabella specials
che viene usata per inserire un insieme limitato di dati predefiniti che non sono al momento definiti nel wikitex in input.
Il modulo legge anche il contenuto di Modulo:Convert/text per permettere l'localizzazione delle tabelle delle unità di misura per l'uso su altre wiki.
-- This module generates the wikitext required at Module:Convert/data
-- by reading and processing the wikitext of the master list of units
-- (see conversion_data for the page title).
--
-- Script method:
-- * Read lines, ignoring everything before "== Conversions ==".
-- * Process the following lines:
-- * Find next level-3 heading like "=== Length ===".
-- * Parse each following line starting with "|"
-- (but ignore lines starting with "|-" or "|}".
-- * Split such lines into fields (delimiter "||") and trim
-- leading/trailing whitespace from each field.
-- Remove any "colspan" at front of second field (symbol).
-- * Remove thousand separators (commas) from the scale field.
-- If the scale is a number, do not change it.
-- Otherwise, it should be an expression like "5/9", in
-- which case it is replaced by the value of the expression.
-- * Remove wiki formatting '[[...]]' from the link field.
-- * Remove redundant fields from the unit to reduce size of data table.
-- * Create alternative forms of a unit such as an alias or a combination.
-- * Stop processing when encounter end of text or a line starting
-- with a level-2 heading ("==" but not "===").
-- * Repeat above for each heading listed at prepare_data().
-- * Output Lua source for the units table.
--
-- -- Output has the following form.
-- local all_units = {
-- ["unitcode"] = { -- standard format
-- name1 = "singular name", -- omitted if redundant
-- name1_us = "singular name sp=us", -- omitted if redundant
-- name2 = "plural name", -- omitted if redundant
-- name2_us = "plural name sp=us", -- omitted if redundant
-- symbol = "symbol",
-- sym_us = "symbol sp=us", -- omitted if redundant
-- usename = 1, -- omitted if empty
-- utype = "unit type", -- from level-3 heading
-- scale = 1, -- a value, if necessary from evaluating an expression
-- subdivs = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } } -- composite input; omitted if empty
-- link = "title of article for wikilink", -- omitted if empty or redundant
-- ... -- other values
-- },
-- ["unitcode"] = { -- alternative format to generate an alias
-- target = "unit code",
-- ... -- optional values to override those of target
-- },
-- ["unitcode"] = { -- alternative format to generate a "per" unit like $/acre or BTU/h
-- per = {u1, u2}, -- numbered table of unitcodes (u1 may be a currency symbol)
-- ... -- optional values
-- },
-- ["unitcode"] = { -- alternative format to generate an error message
-- shouldbe = "message that some other unit code should be used",
-- },
-- ["unitcode"] = { -- alternative format for combination outputs (like 'm ft')
-- combination = {u1, u2, ...}, -- numbered table of unitcodes
-- utype = "unit type", -- as for standard format
-- },
-- ["unitcode"] = { -- alternative format for output multiples (like 'ftin')
-- combination = {u1, u2, ...}, -- numbered table of unitcodes
-- multiple = {f1, f2, ...}, -- numbered table of integer factors
-- utype = "unit type", -- as for standard format
-- },
-- ...
-- }
local ulower = mw.ustring.lower
local usub = mw.ustring.sub
local text_code
local specials = {
-- This table is used to add extra fields when defining some units which
-- require exceptions to normal processing.
-- Each key is in the local language, while each value is fixed text.
-- However, this script should NOT be edited.
-- Instead, the translation_table in Module:Convert/text can be edited,
-- and this script will replace sections of the following with localized
-- definitions from Module:Convert/text, if given.
-- Ask for assistance at [[:en:Module talk:Convert]].
-- LATER: It would be better if this was defined in the conversion data.
utype = {
-- ["unit type in local language"] = "name_used_in_this_script"
["fuel efficiency"] = "type_fuel_efficiency",
["length"] = "type_length",
["temperature"] = "type_temperature",
["volume"] = "type_volume",
},
ucode = {
exception = {
-- ["unit code in local language"] = "name_used_in_module_convert"
["ft"] = "integer_more_precision",
["in"] = "subunit_more_precision",
["lb"] = "integer_more_precision",
},
istemperature = {
-- Common temperature scales (not keVT or MK).
-- ["unit code in local language"] = 1
["C"] = true,
["F"] = true,
["K"] = true,
["R"] = true,
},
usesymbol = {
-- Use unit symbol not name if abbr not specified.
-- ["unit code in local language"] = 1
["C"] = 1,
["F"] = 1,
["K"] = 1,
["R"] = 1,
["C-change"] = 1,
["F-change"] = 1,
["K-change"] = 1,
},
alttype = {
-- Unit has an alternate type that is a valid conversion.
-- ["unit code in local language"] = "alternate type in local language"
["Nm"] = "energy",
["ftlb"] = "torque",
["ftlb-f"] = "torque",
["ftlbf"] = "torque",
["inlb"] = "torque",
["inlb-f"] = "torque",
["inlbf"] = "torque",
["inoz-f"] = "torque",
["inozf"] = "torque",
},
},
}
-- Module text for the local language (localization).
-- A default table of text for enwiki is provided here.
-- If needed for another wiki, wanted sections from the table can be
-- copied into translation_table in Module:Convert/text.
-- For example, copying and modifying only the titles section may give:
--
-- local translation_table = {
-- ... -- other items
-- mtext = {
-- titles = {
-- -- name_used_in_this_script = 'Title of page'
-- conversion_data = 'Modul:Convert/documentation/conversion data/dok',
-- },
-- },
-- }
local mtext = {
section_names = {
-- name_used_in_this_script = 'Section title used in conversion data'
overrides = 'Overrides',
conversions = 'Conversions',
outmultiples = 'Output multiples',
combinations = 'Combinations',
inmultiples = 'Input multiples',
defaults = 'Defaults',
links = 'Links',
perunits = 'Automatic per units',
varnames = 'Variable names',
},
titles = {
-- name_used_in_this_script = 'Title of page'
conversion_data = 'Module:Convert/documentation/conversion data/doc',
},
messages = {
-- name_used_in_this_script = 'Error message ($1 = first parameter, $2 = second)'
m_als_bad = 'Alias has invalid text in field "$1".',
m_als_dup = 'Alias "$1" already defined.',
m_als_link = 'Alias "$1" must include a wikilink ("[[...]]") in the symlink text.',
m_als_mul = 'Alias "$1" has multiplier "$2" which is not a number.',
m_als_same = 'Should omit "$1" for alias "$2" because it is the same as its target.',
m_als_type = 'Target of alias "$1" has wrong type.',
m_als_undef = 'Primary unit must be defined before alias "=$1"',
m_cmb_miss = 'Missing unit code for a combination.',
m_cmb_none = 'No units specified for combination "$1"',
m_cmb_one = 'Only one unit specified for combination "$1"',
m_cmb_type = 'Unit "$1" in combination "$2" has wrong type.',
m_cmb_undef = 'Unit "$1" in combination "$2" not defined.',
m_cmp_def = 'Composite "$1" must specify a default unit code.',
m_cmp_int = 'Composite "$1" has components where scale ratios are not integers.',
m_cmp_inval = 'Composite "$1" has a component with an invalid scale, "$2".',
m_cmp_many = 'Composite "$1" has too many fields.',
m_cmp_miss = 'Missing unit code for a composite.',
m_cmp_order = 'Composite "$1" has components in wrong order or with invalid scales.',
m_cmp_scale = 'Alternate unit "$1" in composite "$2" has wrong scale.',
m_cmp_two = 'Composite "$1" must specify exactly two unit codes.',
m_cmp_type = 'Unit "$1" in composite "$2" has wrong type.',
m_cmp_undef = 'Unit "$1" in composite "$2" not defined.',
m_def_cond = 'Invalid condition in default "$1" for unit "$2".',
m_def_fmt = 'Default output "$1" for unit "$2" should have 2 or 3 "!".',
m_def_rpt = 'Default output "$1" for unit "$2" is repeated.',
m_def_same = 'Default output for unit "$1" is the same unit.',
m_def_type = 'Default output "$1" for unit "$2" has wrong type.',
m_def_undef = 'Default output "$1" for unit "$2" is not defined.',
m_dfs_code = 'Defaults section: no unit code specified.',
m_dfs_dup = 'Defaults section: unit "$1" has already been specified.',
m_dfs_none = 'Defaults section: unit "$1" has no default specified.',
m_dfs_sym = 'Defaults section: unit "$1" must have a symbol.',
m_dfs_two = 'Defaults section: unit "$1" should have two fields only.',
m_dfs_undef = 'Defaults section: unit "$1" is not defined.',
m_dup_code = 'Unit code "$1" has already been defined.',
m_error = 'Error:',
m_ftl_read = 'Could not read wikitext from "[[$1]]".',
m_ftl_table = '[[$1]] should export table "$2".',
m_ftl_type = 'Fatal error: unknown data type for "$1"',
m_hdg_lev2 = 'Level 2 heading "$1" not found.',
m_hdg_lev3 = 'No level 3 heading before: $1',
m_line_num = ' (line $1).',
m_lnk_brack = 'Link "$1" has wrong number of brackets.',
m_lnk_dup = 'Link exception "$1" is already defined.',
m_lnk_miss = 'Missing unit code for a link.',
m_lnk_none = 'No link defined for unit "$1".',
m_lnk_sym = 'Unit code "$1" for a link must have a symbol.',
m_lnk_two = 'Row for unit "$1" link should have two fields only.',
m_lnk_type = 'Link exception "$1" has wrong type.',
m_lnk_undef = 'Unit code "$1" for a link is not defined.',
m_miss_code = 'Missing unit code.',
m_miss_sym = 'Missing symbol.',
m_miss_type = 'Missing unit type.',
m_mul_int = 'Multiple "$1" has components where scale ratios are not integers.',
m_mul_miss = 'Missing unit code for a multiple.',
m_mul_none = 'No units specified for multiple "$1"',
m_mul_one = 'Only one unit specified for multiple "$1"',
m_mul_order = 'Multiple "$1" has components in wrong order or with invalid scales.',
m_mul_scale = 'Multiple "$1" has a component with an invalid scale, "$2".',
m_mul_std = 'Unit "$1" in multiple "$2" must be a standard unit.',
m_mul_type = 'Unit "$1" in multiple "$2" has wrong type.',
m_mul_undef = 'Unit "$1" in multiple "$2" not defined.',
m_no_title = 'Need title of page with unit definitions.',
m_ovr_dup = 'Override "$1" is already defined.',
m_ovr_miss = 'Missing unit code for an override.',
m_per_dup = 'Per unit "$1" already defined.',
m_per_empty = 'Unit "$1" has an empty field in the "per".',
m_per_fuel = 'Unit "$1" has invalid unit types for fuel efficiency.',
m_per_inv = 'Invalid field for a "per".',
m_per_two = 'Unit "$1" does not have exactly 2 fields in the "per".',
m_per_undef = 'Unit "$1" has undefined unit code "$2" in the "per".',
m_percent_s = 'Field "$1" must not contain "%s".',
m_pfx_bad = 'Unknown prefix: "$1".',
m_pfx_name = 'Unit with Prefix set must include Name.',
m_scl_bad = 'Scale expression is invalid: "$1".',
m_scl_miss = 'Missing scale.',
m_scl_oflow = 'Scale expression gives an invalid value: "$1".',
m_var_cnt = 'Variable names section: each row must have five fields.',
m_var_dup = 'Unit "$1" already has a variable name.',
m_var_miss = 'Missing field for a variable name.',
m_var_undef = 'Unit "$1" in variable names is not defined.',
m_warning = 'Warning:',
m_wrn_more = ' (and more not shown)',
m_wrn_nbsp = 'Line $1 contains a nonbreaking space.',
m_wrn_nodef = 'Units with the following unit codes have no default output.',
m_wrn_ucode = ' $1',
},
}
local function message(key, ...)
-- Return a message from the message table, which can be localized.
-- '$1', '$2', ... are replaced with the first, second, ... parameters,
-- each of which must be a string or a number.
-- The global variable is_test_run can be set by a testing program to
-- check the messages generated by this program.
local rep = {}
for i, v in ipairs({...}) do
rep['$' .. i] = v
end
key = key or '???'
local extra
if is_test_run and key ~= 'm_line_num' then
extra = key .. ': '
else
extra = ''
end
return extra .. string.gsub(mtext.messages[key] or key, '$%d+', rep)
end
local function quit(key, ...)
-- Use error() to pass an error message to the surrounding pcall().
error(message(key, ...), 0)
end
local function quit_no_message()
-- Throw an error.
-- This is used in some functions which can throw an error with a message,
-- but where the message is in fact never displayed because the calling
-- function uses pcall to catch errors, and any message is ignored.
-- Using this function documents that the message (which may be useful in
-- some other application) does not need translation as it never appears.
error('this message is not displayed', 0)
end
local function collection()
-- Return a table to hold items.
return {
n = 0,
add = function (self, item)
self.n = self.n + 1
self[self.n] = item
end,
pop = function (self, item)
if self.n > 0 then
local top = self[self.n]
self.n = self.n - 1
return top
end
end,
join = function (self, sep)
return table.concat(self, sep or '\n')
end,
}
end
local warnings = collection()
local function add_warning(key, ...)
-- Add a warning that will be inserted before the final result.
warnings:add(message(key, ...))
end
---Begin code to evaluate expressions-----------------------------------
-- This is needed because Lua's loadstring() is not available in Scribunto,
-- and each scale value can be specifed as an expression such as "5/9".
-- More complex expressions are supported, including use of parentheses
-- and the binary operators: + - * / ^
local operators = {
['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },
['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },
['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },
['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },
['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },
['('] = '(',
[')'] = ')',
}
local function tokenizer(text)
-- Function 'next' returns the next token which is one of:
-- number
-- table (operator)
-- string ('(' or ')')
-- nil (end of text)
-- If invalid, an error is thrown.
-- The number is unsigned (unary operators are not supported).
return {
pos = 1,
maxpos = #text,
text = text,
next = function(self)
if self.pos <= self.maxpos then
local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)
if hit then
self.pos = p2 + 1
return operators[hit]
end
p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)
if not hit then
p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)
end
local value = tonumber(hit)
if value then
self.pos = p2 + 1
return value
end
quit_no_message('invalid number "' .. self.text:sub(self.pos) .. '"')
end
end
}
end
local function evaluate_tokens(tokens, inparens)
-- Return the value from evaluating tokenized expression, or throw an error.
local numstack, opstack = collection(), collection()
local function perform_ops(precedence, associativity)
while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or
(opstack[opstack.n].precedence == precedence and associativity == 1)) do
local rhs = numstack:pop()
local lhs = numstack:pop()
if not (rhs and lhs) then quit_no_message('missing number') end
local op = opstack:pop()
numstack:add(op.func(lhs, rhs))
end
end
local token_last
local function set_state(token_type)
if token_last == token_type then
local missing = (token_type == 'number') and 'operator' or 'number'
quit_no_message('missing ' .. missing)
end
token_last = token_type
end
while true do
local token = tokens:next()
if type(token) == 'number' then
set_state('number')
numstack:add(token)
elseif type(token) == 'table' then
set_state('operator')
perform_ops(token.precedence, token.associativity)
opstack:add(token)
elseif token == '(' then
set_state('number')
numstack:add(evaluate_tokens(tokens, true))
elseif token == ')' then
if inparens then
break
end
quit_no_message('unbalanced parentheses')
else
break
end
end
perform_ops(0)
if numstack.n > 1 then quit_no_message('missing operator') end
if numstack.n < 1 then quit_no_message('missing number') end
return numstack:pop()
end
local function evaluate(expression)
-- Return value (a number) from evaluating expression (a string),
-- or throw an error if invalid.
-- This is not bullet proof, but it should support the expressions used.
return evaluate_tokens(tokenizer(expression))
end
---End code to evaluate expressions-------------------------------------
---Begin code adapted from Module:Convert-------------------------------
local plural_suffix = 's' -- may be changed from translation.plural_suffix below
local function shallow_copy(t)
-- Return a shallow copy of t.
-- Do not need the features and overhead of mw.clone() provided by Scribunto.
local result = {}
for k, v in pairs(t) do
result[k] = v
end
return result
end
local function split(text, delimiter)
-- Return a numbered table with fields from splitting text.
-- The delimiter is used in a regex without escaping (for example, '.' would fail).
-- Each field has any leading/trailing whitespace removed.
local t = {}
text = text .. delimiter -- to get last item
for item in text:gmatch('%s*(.-)%s*' .. delimiter) do
table.insert(t, item)
end
return t
end
local unit_mt = {
-- Metatable to get missing values for a unit that does not accept SI prefixes.
-- Warning: The boolean value 'false' is returned for any missing field
-- so __index is not called twice for the same field in a given unit.
__index = function (self, key)
local value
if key == 'name1' or key == 'sym_us' then
value = self.symbol
elseif key == 'name2' then
value = self.name1 .. plural_suffix
elseif key == 'name1_us' then
value = self.name1
if not rawget(self, 'name2_us') then
-- If name1_us is 'foot', do not make name2_us by appending plural_suffix.
self.name2_us = self.name2
end
elseif key == 'name2_us' then
local raw1_us = rawget(self, 'name1_us')
if raw1_us then
value = raw1_us .. plural_suffix
else
value = self.name2
end
elseif key == 'link' then
value = self.name1
else
value = false
end
rawset(self, key, value)
return value
end
}
local function prefixed_name(unit, name, index)
-- Return unit name with SI prefix inserted at correct position.
-- index = 1 (name1), 2 (name2), 3 (name1_us), 4 (name2_us).
-- The position is a byte (not character) index, so use Lua's sub().
local pos = rawget(unit, 'prefix_position')
if type(pos) == 'string' then
pos = tonumber(split(pos, ',')[index])
end
if pos then
return name:sub(1, pos - 1) .. unit.si_name .. name:sub(pos)
end
return unit.si_name .. name
end
local unit_prefixed_mt = {
-- Metatable to get missing values for a unit that accepts SI prefixes.
-- Before use, fields si_name, si_prefix must be defined.
-- The unit must define _symbol, _name1 and
-- may define _sym_us, _name1_us, _name2_us
-- (_sym_us, _name2_us may be defined for a language using sp=us
-- to refer to a variant unrelated to U.S. units).
__index = function (self, key)
local value
if key == 'symbol' then
value = self.si_prefix .. self._symbol
elseif key == 'sym_us' then
value = rawget(self, '_sym_us')
if value then
value = self.si_prefix .. value
else
value = self.symbol
end
elseif key == 'name1' then
value = prefixed_name(self, self._name1, 1)
elseif key == 'name2' then
value = rawget(self, '_name2')
if value then
value = prefixed_name(self, value, 2)
else
value = self.name1 .. plural_suffix
end
elseif key == 'name1_us' then
value = rawget(self, '_name1_us')
if value then
value = prefixed_name(self, value, 3)
else
value = self.name1
end
elseif key == 'name2_us' then
value = rawget(self, '_name2_us')
if value then
value = prefixed_name(self, value, 4)
elseif rawget(self, '_name1_us') then
value = self.name1_us .. plural_suffix
else
value = self.name2
end
elseif key == 'link' then
value = self.name1
else
value = false
end
rawset(self, key, value)
return value
end
}
local function lookup(units, unitcode, sp, what)
-- Return a copy of the unit if found, or return nil.
-- In this cut-down code, sp is always nil, and what is ignored.
local t = units[unitcode]
if t then
if t.shouldbe then
return nil
end
local result = shallow_copy(t)
if result.prefixes then
result.si_name = ''
result.si_prefix = ''
return setmetatable(result, unit_prefixed_mt)
end
return setmetatable(result, unit_mt)
end
local SIprefixes = text_code.SIprefixes
for plen = SIprefixes[1] or 2, 1, -1 do
-- Look for an SI prefix; should never occur with an alias.
-- Check for longer prefix first ('dam' is decametre).
-- SIprefixes[1] = prefix maximum #characters (as seen by mw.ustring.sub).
local prefix = usub(unitcode, 1, plen)
local si = SIprefixes[prefix]
if si then
local t = units[usub(unitcode, plen+1)]
if t and t.prefixes then
local result = shallow_copy(t)
if (sp == 'us' or t.sp_us) and si.name_us then
result.si_name = si.name_us
else
result.si_name = si.name
end
result.si_prefix = si.prefix or prefix
-- In this script, each scale is a string.
result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))
result.prefixes = nil -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)
return setmetatable(result, unit_prefixed_mt)
end
end
end
local exponent, baseunit = unitcode:match('^e(%d+)(.*)')
if exponent then
local engscale = text_code.eng_scales[exponent]
if engscale then
local result = lookup(units, baseunit, sp, 'no_combination')
if not result then return nil end
if not (result.offset or result.builtin or result.engscale) then
result.defkey = unitcode -- key to lookup default exception
result.engscale = engscale
-- Do not set result.scale as this code is called for units where that is not set.
return result
end
end
end
return nil
end
local function evaluate_condition(value, condition)
-- Return true or false from applying a conditional expression to value,
-- or throw an error if invalid.
-- A very limited set of expressions is supported:
-- v < 9
-- v * 9 < 9
-- where
-- 'v' is replaced with value
-- 9 is any number (as defined by Lua tonumber)
-- '<' can also be '<=' or '>' or '>='
-- In addition, the following form is supported:
-- LHS and RHS
-- where
-- LHS, RHS = any of above expressions.
local function compare(value, text)
local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')
if arithop == nil then
quit_no_message('Invalid default expression.')
elseif arithop == '*' then
factor = tonumber(factor)
if factor == nil then
quit_no_message('Invalid default expression.')
end
value = value * factor
end
limit = tonumber(limit)
if limit == nil then
quit_no_message('Invalid default expression.')
end
if compop == '<' then
return value < limit
elseif compop == '<=' then
return value <= limit
elseif compop == '>' then
return value > limit
elseif compop == '>=' then
return value >= limit
end
quit_no_message('Invalid default expression.') -- should not occur
end
local lhs, rhs = condition:match('^(.-%W)and(%W.*)')
if lhs == nil then
return compare(value, condition)
end
return compare(value, lhs) and compare(value, rhs)
end
---End adapted code-----------------------------------------------------
local function strip(text)
-- Return text with no leading/trailing whitespace.
return text:match("^%s*(.-)%s*$")
end
local function empty(text)
-- Return true if text is nil or empty (assuming a string).
return text == nil or text == ''
end
-- Tables of units: k = unit code, v = unit table.
local units_index = {} -- all units: normal, alias, per, combination, or multiple
local alias_index = {} -- all aliases (to detect attempts to define more than once)
local per_index = {} -- all "per" units (to detect attempts to define more than once)
local function get_unit(ucode, utype)
-- Look up unit code in our cache of units.
-- If utype == nil, the unit should already have been defined.
-- Otherwise, ucode may represent an automatically generated combination
-- where each component must have the given utype; a dummy unit is returned.
if empty(ucode) then
return nil
end
local unit = lookup(units_index, ucode)
if unit or not utype then
return unit
end
local combo = collection()
if ucode:find('+', 1, true) then
for item in (ucode .. '+'):gmatch('%s*(.-)%s*%+') do
if item ~= '' then
combo:add(item)
end
end
elseif ucode:find('%s') then
for item in ucode:gmatch('%S+') do
combo:add(item)
end
end
if combo.n > 1 then
local result = setmetatable({ utype = utype }, {
__index = function (self, key)
error('Bug: invalid use of automatically generated unit')
end })
for _, v in ipairs(combo) do
local component = lookup(units_index, v)
if not component or component.shouldbe or component.combination then
return nil
end
if utype ~= component.utype then
result.utype = component.utype -- set wrong type which caller will detect
break
end
end
return result
end
end
local overrides = {} -- read from input for unit codes that should not be checked for a duplicate
local function insert_unique_unit(data, unit, index)
-- After inserting any required built-in data, insert the unit into the
-- data table and (if index not nil) add to index,
-- but not if the unit code is already defined.
local ucode = unit.unitcode
local known = get_unit(ucode)
if known and not overrides[ucode] then
quit('m_dup_code', ucode)
end
for item, t in pairs(specials.ucode) do
unit[item] = t[ucode]
end
if index then
index[ucode] = unit
end
table.insert(data, unit)
end
local function check_condition(condition)
-- Return true if condition appears to be valid; otherwise return false.
for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do
local success, result = pcall(evaluate_condition, value, condition)
if not success then
return false
end
end
return true
end
local function check_default_expression(default, ucode)
-- Return a numbered table of names present in param default
-- (two names if an expression, or one name (param default) otherwise).
-- Throw an error if a problem occurs.
-- An expression uses pipe-delimited fields with 'v' representing
-- the input value for the conversion.
-- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'
-- returns { 'smallsuffix', 'bigsuffix' }.
if not default:find('!', 1, true) then
return { default }
end
local t = {}
for item in (default .. '!'):gmatch('%s*(.-)%s*!') do
t[#t+1] = item -- split on '!', removing leading/trailing whitespace
end
if not (#t == 3 or #t == 4) then
quit('m_def_fmt', default, ucode)
end
local condition, default1, default2 = t[1], t[2], t[3]
if #t == 4 then
default1 = default1 .. t[4]
default2 = default2 .. t[4]
end
if not check_condition(condition) then
quit('m_def_cond', default, ucode)
end
return { default1, default2 }
end
local function check_default(default, ucode, utype, unit_table)
-- Check the given name (or expression) of a default output.
-- Normally a unit must not define itself as its default. However,
-- some units are defined merely for use in per units, and they have
-- the same ucode, utype and default.
-- Example: unit cent which cannot be converted to anything other than
-- a cent, but which can work, for example, in cent/km and cent/mi.
-- Throw an error if a problem occurs.
local done = {}
for _, default in ipairs(check_default_expression(default, ucode)) do
if done[default] then
quit('m_def_rpt', default, ucode)
end
if default == ucode and ucode ~= utype then
quit('m_def_same', ucode)
end
local default_table = get_unit(default, utype)
if not default_table then
quit('m_def_undef', default, ucode)
end
if not (utype == unit_table.utype and utype == default_table.utype) then
quit('m_def_type', default, ucode)
end
done[default] = true
end
end
local function check_all_defaults(units, maxerrors)
-- Check each default in units and warn if needed.
-- This is done after all input data has been processed.
-- Throw an error if a problem occurs.
local errors = collection()
local missing = collection() -- unitcodes with missing defaults
for _, unit in ipairs(units) do
if not unit.shouldbe and not unit.combination then
-- This is a standard unit or an alias/per (not shouldbe, combo).
-- An alias may have a default defined, but it is optional.
local default = unit.default
local ucode = unit.unitcode
if empty(default) then
if not unit.target then -- unit should have a default
missing:add(ucode)
end
else
local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)
if not ok then
errors:add(msg)
if errors.n >= maxerrors then
break
end
end
end
end
end
if errors.n > 0 then
error(errors:join(), 0)
end
if missing.n > 0 then
add_warning('m_wrn_nodef')
local limit = maxerrors
for _, v in ipairs(missing) do
limit = limit - 1
if limit < 0 then
add_warning('m_wrn_more')
break
end
add_warning('m_wrn_ucode', v)
end
end
end
local function check_all_pers(units, maxerrors)
-- Check each component of each "per" unit and warn if needed.
-- In addition, add any required extra fields for some types of units.
-- This is done after all input data has been processed.
-- Throw an error if a problem occurs.
local errors = collection()
local function errmsg(key, ...)
errors:add(message(key, ...))
end
for _, unit in ipairs(units) do
local per = unit.per
if per then
local ucode = unit.unitcode
if #per ~= 2 then
errmsg('m_per_two', ucode)
else
local types = {}
for i, v in ipairs(per) do
if empty(v) then
errmsg('m_per_empty', ucode)
end
if not text_code.currency[v] then
local t = get_unit(v)
if t then
types[i] = t.utype
else
errmsg('m_per_undef', ucode, v)
end
end
end
if specials.utype[unit.utype] == 'type_fuel_efficiency' then
local expected = { type_volume = 1, type_length = 2 }
local top_type = expected[specials.utype[types[1]]]
local bot_type = expected[specials.utype[types[2]]]
if top_type and bot_type and top_type ~= bot_type then
unit.iscomplex = true
if top_type == 1 then
unit.invert = 1
else
unit.invert = -1
end
else
errmsg('m_per_fuel', ucode)
end
end
end
end
if errors.n >= maxerrors then
break
end
end
if errors.n > 0 then
error(errors:join(), 0)
end
end
local function update_units(units, composites, varnames)
-- Update some unit definitions with extra data defined in other sections.
-- This is done after all input data has been processed.
for _, unit in ipairs(units) do
local comp = composites[unit.unitcode]
if comp then
unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'
end
local vn = varnames[unit.unitcode]
if vn then
unit.varname = vn
end
end
end
local function make_override(data)
-- Return a function which, when called, stores a unit code that is not to be
-- checked for a duplicate. The table is stored in data (also a table).
return function (utype, fields)
local ucode = fields[1]
if empty(ucode) then
quit('m_ovr_miss')
end
if data[ucode] then
quit('m_ovr_dup', ucode)
end
data[ucode] = true
end
end
local function make_default(data)
-- Return a function which, when called, stores a table that defines a
-- default output unit. The table is stored in data (also a table).
local defaults_index = {} -- to detect attempts to define a default twice
return function (utype, fields)
-- Store a table defining a unit.
-- This is for a unit such as 'kg' that has a default output unit
-- different from what is defined for the base unit ('g').
-- Throw an error if a problem occurs.
local ucode = fields[1]
local default = fields[2]
if empty(ucode) then
quit('m_dfs_code')
end
if empty(default) then
quit('m_dfs_none', ucode)
end
if #fields ~= 2 then
quit('m_dfs_two', ucode)
end
local unit_table = get_unit(ucode)
if not unit_table then
quit('m_dfs_undef', ucode)
end
local symbol = unit_table.defkey or unit_table.symbol
if empty(symbol) then
quit('m_dfs_sym', ucode)
end
check_default(default, ucode, utype, unit_table)
if defaults_index[ucode] then
quit('m_dfs_dup', ucode)
end
defaults_index[ucode] = default
table.insert(data, { symbol = symbol, default = default })
end
end
local function clean_link(link, name)
-- Return link, customary where:
-- link = given link after removing any '[[...]]' wiki formatting
-- and removing any leading '+' or '*' or '@';
-- customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil
-- (for extra "US" or "U.S." or "Imperial" customary units link).
-- Result has leading/trailing whitespace removed, and is nil if empty
-- or if link matches the name, if a name is specified.
-- Exception: If the link is empty and the name starts with '[[',
-- the link is stored as '' (for a unit name which is always linked).
-- If the resulting link is nil, no link field is stored, and
-- if a link is required, it will be set from the unit's name.
local original = link
if empty(link) then
return (name and name:sub(1, 2) == '[[') and '' or nil
end
local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }
local customary = prefixes[link:sub(1, 1)]
if customary then
link = strip(link:sub(2))
end
if link:sub(1, 2) == '[[' then
link = link:sub(3)
end
if link:sub(-2) == ']]' then
link = link:sub(1, -3)
end
link = strip(link)
if link:sub(1, 1) == '[' or link:sub(-1) == ']' then
quit('m_lnk_brack', original)
end
if link == '' then
link = nil
elseif name then
local l = ulower(usub(link, 1, 1)) .. usub(link, 2)
local n = ulower(usub(name, 1, 1)) .. usub(name, 2)
if l == n then
link = nil -- link == name, ignoring case of first letter
end
end
return link, customary
end
local function make_link(data)
-- Return a function which, when called, stores a table that defines a
-- link exception. The table is stored in data (also a table).
local links_index = {} -- to detect attempts to define a link twice
return function (utype, fields)
-- Store a table defining a unit.
-- This is for a unit such as 'kg' that has a linked article
-- different from what is defined for the base unit ('g').
-- Throw an error if a problem occurs.
local ucode = fields[1]
local link = clean_link(fields[2])
if empty(ucode) then
quit('m_lnk_miss')
end
if empty(link) then
quit('m_lnk_none', ucode)
end
if #fields ~= 2 then
quit('m_lnk_two', ucode)
end
local unit_table = get_unit(ucode)
if not unit_table then
quit('m_lnk_undef', ucode)
end
if utype ~= unit_table.utype then
quit('m_lnk_type', ucode)
end
local symbol = unit_table.symbol
if empty(symbol) then
quit('m_lnk_sym', ucode)
end
if links_index[ucode] then
quit('m_lnk_dup', ucode)
end
links_index[ucode] = link
table.insert(data, { symbol = symbol, link = link })
end
end
local function clean_scale(scale)
-- Return cleaned scale as a string, after evaluating any expression.
-- It would be better to retain scale expressions like "5/9" so that
-- the expression is evaluated on the server and maintains the full
-- resolution of the server. However, there are many such expressions
-- in the table of all units, and it seems pointless to require the
-- server to evaluate all of them just to do one convert.
if empty(scale) then
quit('m_scl_miss')
end
assert(type(scale) == 'string', 'Bug: scale has an unexpected type')
scale = string.gsub(scale, ',', '') -- remove comma separators
if tonumber(scale) then -- not an expression
return scale
end
local status, value = pcall(evaluate, scale)
if not (status and type(value) == 'number') then
quit('m_scl_bad', scale)
end
local result = string.format('%.17g', value)
if result:find('[#n]') then
-- Lua can give results like "#INF" while Scribunto gives "inf". Either is an error.
quit('m_scl_oflow', scale)
end
-- Omit redundant zeros from results like '1.2e-005'.
-- Do not bother looking for results like '1.2e+005' as none occur in practice.
local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')
if zeros then
result = lhs .. rhs
end
return result
end
local function add_alias_optional_fields(unit, start, fields, target)
-- Inspect fields[i] for i = start, start+1 ..., and extract any
-- definitions appropriate for an alias or "per", and add them to unit.
-- For an alias, target is a valid unit; for a "per", target is nil.
-- Throw error if encounter an invalid entry.
for i = start, #fields do
local field = fields[i]
if not empty(field) then
local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')
local good
if not empty(rhs) then
for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do
if lhs == item then
if item == 'sp' then
if rhs == 'us' then
unit.sp_us = true
good = true
end
elseif item == 'link' then
local tlink
if target then
tlink = target[item]
end
local link, customary = clean_link(rhs, tlink)
if link then
unit[item] = link
end
if customary then
unit.customary = customary
end
good = true
elseif item == 'symlink' then
local pos1 = rhs:find('[[', 1, true)
local pos2 = rhs:find(']]', 1, true)
if not (pos1 and pos2 and (pos1 < pos2)) then
quit('m_als_link', unit.unitcode)
end
unit.symlink = rhs
good = true
elseif item == 'multiplier' then
if not tonumber(rhs) then
quit('m_als_mul', unit.unitcode, rhs)
end
unit[item] = rhs
good = true
else
if target and rhs == target[item] then
quit('m_als_same', item, unit.unitcode)
end
unit[item] = rhs
good = true
end
break
end
end
end
if not good then
quit('m_als_bad', field)
end
end
end
end
local function make_alias(fields, ucode, utype, symbol)
-- Return a new alias unit, or return nil if symbol is not already
-- defined as the unit code of the target unit.
-- Throw an error if invalid.
local target = get_unit(symbol)
if not target then
return nil
end
local unit = { unitcode = ucode, utype = utype, target = symbol }
add_alias_optional_fields(unit, 3, fields, target)
if alias_index[ucode] then
quit('m_als_dup', ucode)
else
alias_index[ucode] = unit
end
if target.utype ~= utype then
quit('m_als_type', ucode)
end
return unit
end
local function make_per(fields, ucode, utype, symbol)
-- Return a new "per" unit, or return nil if symbol is not of form "x/y".
-- Throw an error if invalid.
-- The top, bottom unit codes are checked later, after all units are defined.
local top, bottom = symbol:match('^(.-)/(.*)$')
if not top then
return nil
end
local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }
add_alias_optional_fields(unit, 3, fields)
if per_index[ucode] then
quit('m_per_dup', ucode)
else
per_index[ucode] = unit
end
return unit
end
local function make_unit(data)
-- Return a function which, when called, stores a table that defines a
-- single unit. The table is stored in data (also a table).
local fieldnames = {
-- Fields in the Conversions section are assumed to be in the following order.
'unitcode',
'symbol',
'sym_us',
'scale',
'extra',
'name1',
'name2',
'name1_us',
'name2_us',
'prefixes',
'default',
'link',
}
return function (utype, fields)
-- Store a table defining a unit.
-- Throw an error if a problem occurs.
local ucode, symbol = fields[1], fields[2]
if empty(utype) then
quit('m_miss_type')
end
if empty(ucode) then
quit('m_miss_code')
end
if empty(symbol) then
quit('m_miss_sym')
end
local prefix = symbol:sub(1, 1)
if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then
if symbol:sub(1, 2) == '==' then
prefix = symbol:sub(1, 2)
end
symbol = strip(symbol:sub(#prefix + 1)) -- omit prefix and any following whitespace
fields[2] = symbol
else
prefix = nil -- not a valid prefix
end
if prefix == '=' or prefix == '==' then
-- ucode is an alias (a fake unit code used in a convert template), or
-- defines a "per" unit like "$/acre" or "BTU/h".
-- For an alias, symbol is the unit code of the actual unit.
-- For a "per", symbol is of form "x/y" where x and y are unit codes,
-- or x is a recognized currency symbol and y is a unit code.
-- Checking that x and y are valid is deferred until all units have
-- been defined so, for example, "BTU/h" can be defined before "h".
local unit
if prefix == '=' then
unit = make_alias(fields, ucode, utype, symbol)
else
unit = make_per(fields, ucode, utype, symbol)
end
if not unit then
-- Do not define an alias in terms of another alias.
quit('m_als_undef', symbol)
end
insert_unique_unit(data, unit, units_index)
return
elseif prefix == '!' then
-- ucode may be incorrectly entered as a unit code.
-- symbol is a message saying what unit code should be used.
local unit = { unitcode = ucode, shouldbe = symbol }
insert_unique_unit(data, unit, nil)
return
end
-- Make the unit.
local unit = { utype = utype }
for i, name in ipairs(fieldnames) do
if not empty(fields[i]) then
unit[name] = fields[i]
end
end
-- Remove redundancy from unit.
if unit.sym_us == symbol then
unit.sym_us = nil
end
local prefixes = unit.prefixes
local name1, name2 = unit.name1, unit.name2
if name1 then
if name1 == symbol and not prefixes then
-- A unit which takes an SI prefix must not have a nil name because,
-- for example, the name for "kW" = "kilo" .. "watt" (name for "W").
-- The "not prefixes" test is needed for bnwiki where the
-- watt unit has the same name and symbol.
unit.name1 = nil
end
else
name1 = symbol
end
if name2 then
if name2 == name1 .. plural_suffix then
unit.name2 = nil
end
else
name2 = name1 .. plural_suffix
end
local name1_us, name2_us = unit.name1_us, unit.name2_us
if name1_us then
if name1_us == name1 then
unit.name1_us = nil
end
end
if name2_us then
if unit.name1_us then
if name2_us == unit.name1_us .. plural_suffix then
unit.name2_us = nil
end
elseif name2_us == name2 then
unit.name2_us = nil
end
end
-- Other changes to unit.
unit.scale = clean_scale(unit.scale)
local extra = unit.extra
if not empty(extra) then
-- Set appropriate fields for a unit that needs more than a simple
-- multiplication by a ratio of unit scales to convert values.
unit.iscomplex = true
if extra == 'volume/length' then
unit.invert = 1
elseif extra == 'length/volume' then
unit.invert = -1
elseif specials.utype[utype] == 'type_temperature' then
unit.offset = extra
elseif extra == 'invert' then
unit.invert = -1
else
unit.builtin = extra
end
end
if prefix == '~' then
-- Magic code for units like "acre" where the symbol is not really a
-- symbol, and output should use the singular or plural name instead.
unit.usename = 1
elseif prefix == '*' then
-- Magic code for units like "pitch" which have a symbol that is the same as
-- another unit with entries defined in the default or link exceptions tables.
unit.defkey = ucode -- key for default exceptions
unit.linkey = ucode -- key for link exceptions
end
local name_for_link
if prefixes then
if prefixes == 'SI' then
unit.prefixes = 1
elseif prefixes == 'SI2' then
unit.prefixes = 2
elseif prefixes == 'SI3' then
unit.prefixes = 3
else
quit('m_pfx_bad', prefixes)
end
else
-- Only units which do not accept SI prefixes have name_for_link set.
-- That is because, for example, if set name_for_link = name1 for unit g,
-- then the link is "kilogram" for kg, and "yottagram" for Yg, and so on
-- for all prefixes. That might be desirable for some units, but not all.
name_for_link = name1
end
unit.link, unit.customary = clean_link(unit.link, name_for_link)
if prefixes then
-- The SI prefix is always at the start (position = 1) for symbol and sym_us.
-- However, each name (name1, name2, name1_us, name2_us) can have the SI prefix
-- at any position, and that position can be different for each name.
-- For enwiki, the only units with names where the prefix is not at the start
-- are "square metre" and "cubic metre" ("square meter" and "cubic meter" for sp=us).
-- Some other wikis want the flexibility that the prefix position can be different
-- so the position is stored as nil (if always 1), or N (an integer, if always N),
-- or a string of four comma-separated numbers such as "5,7,9,11" which means the
-- prefix position for (name1, name2, name1_us, name2_us) is (5, 7, 9, 11)
-- respectively.
local name1, name1_us = unit.name1, unit.name1_us -- after redundancy removed
if not name1 then
quit('m_pfx_name')
end
local positions = collection()
for i, k in ipairs({ 'name1', 'name2', 'name1_us', 'name2_us' }) do
local name = unit[k]
local pos
if name then
pos = name:find('%s', 1, true)
if pos then
unit[k] = name:sub(1, pos - 1) .. name:sub(pos + 2)
end
elseif i == 2 or i == 3 then
pos = positions[1]
elseif i == 4 then
pos = positions[unit.name1_us and 3 or 2]
end
positions:add(pos or 1)
end
local pos = positions[1]
for i = 2, positions.n do
if pos ~= positions[i] then
pos = '"' .. positions:join(',') .. '"'
break
end
end
if pos ~= 1 then
unit.prefix_position = pos
end
for _, name in ipairs({ 'symbol', 'sym_us', 'name1', 'name1_us', 'name2', 'name2_us' }) do
unit['_' .. name] = unit[name]
unit[name] = nil -- force call to __index metamethod so any SI prefix can be handled
end
end
for name, v in pairs(unit) do
-- Reject if a string field includes "%s" (should not occur after above).
if type(v) == 'string' and v:find('%s', 1, true) then
quit('m_percent_s', name)
end
end
insert_unique_unit(data, unit, units_index)
end
end
local function make_combination(data)
-- Return a function which, when called, stores a table that defines a
-- single combination unit. The table is stored in data (also a table).
return function (utype, fields)
-- Store a table defining a unit.
-- This is for a combination unit that specifies more than one output.
-- The target units must be defined first.
-- Throw an error if a problem occurs.
local unit = { utype = utype, combination = {} }
for i, v in ipairs(fields) do
if i == 1 then -- unitcode
if v == '' then
quit('m_cmb_miss')
end
unit.unitcode = v
elseif v == '' then
-- Ignore empty fields.
else
local target = get_unit(v)
if not target then
quit('m_cmb_undef', v, unit.unitcode)
end
if target.utype ~= utype then
quit('m_cmb_type', v, unit.unitcode)
end
table.insert(unit.combination, v)
end
end
if #unit.combination < 2 then
quit(#unit.combination == 0 and 'm_cmb_none' or 'm_cmb_one', unit.unitcode)
end
insert_unique_unit(data, unit, units_index)
end
end
local function make_perunit(data)
-- Return a function which, when called, stores a table that defines a
-- fixup for an automatic per unit. The table is stored in data (also a table).
local pertype_index = {} -- to detect attempts to define a fixup twice
return function (utype, fields)
-- Store a table to define a fixup.
-- Typos or other errors in the input are not detected!
-- Parameter utype is ignored (it is nil).
-- Throw an error if a problem occurs.
local lhs, rhs, link, multiplier
for i, v in ipairs(fields) do
if v == '' then
-- Ignore empty fields.
elseif i == 1 then
lhs = v -- like "length/time"
elseif i == 2 then
rhs = v -- like "speed"
elseif i == 3 then
link = v
elseif i == 4 then
if not tonumber(v) then
quit('m_per_inv')
end
multiplier = v
else
quit('m_per_inv')
end
end
if lhs and (rhs or link or multiplier) then
if link or multiplier then
local parts = collection()
if rhs then
parts:add('utype = "' .. rhs .. '"')
end
if link then
parts:add('link = "' .. link .. '"')
end
if multiplier then
parts:add('multiplier = ' .. multiplier)
end
rhs = '{ ' .. parts:join(', ') .. ' }'
else
rhs = '"' .. rhs .. '"'
end
if pertype_index[lhs] then
quit('m_per_dup', lhs)
end
pertype_index[lhs] = rhs
table.insert(data, { lhs = lhs, rhs = rhs })
else
quit('m_per_inv')
end
end
end
local function make_varname(data)
-- Return a function which, when called, stores a table that defines a
-- variable name for a unit. The table is stored in data (also a table).
return function (utype, fields)
-- Set or update an entry in the data table to record that a unit has a variable name.
-- This is for slwiki where a unit name depends on the value.
-- The target units must be defined first.
-- Parameter utype is ignored (it is nil).
-- Throw an error if a problem occurs.
local count = #fields
if count ~= 5 then
quit('m_var_cnt')
end
local ucode
local names = {}
for i = 1, count do
local v = fields[i]
if empty(v) then
quit('m_var_miss')
end
if i == 1 then -- unitcode
ucode = v
if not get_unit(v) then
quit('m_var_undef', v)
end
else
table.insert(names, v)
end
end
if data[ucode] then
quit('m_var_dup', ucode)
end
data[ucode] = table.concat(names, '!')
end
end
local function reversed(t)
-- Return a numbered table in reverse order.
local reversed, count = {}, #t
for i = 1, count do
reversed[i] = t[count + 1 - i]
end
return reversed
end
local function make_inputmultiple(data)
-- Return a function which, when called, stores a table that defines a
-- single composite (multiple input) unit. The table is stored in data (also a table).
return function (utype, fields)
-- Set or update an entry in the data table to record that a unit
-- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.
-- The target units must be defined first.
-- Throw an error if a problem occurs.
local unitcode -- dummy code required for simplicity, but which is not used in output
local alternate_code -- an alternative unit code can be specified to replace convert input
local fixed_name -- a fixed name can be specified to replace the unit's normal symbol/name
local default_code
local ucodes, scales = {}, {}
for i, v in ipairs(fields) do
-- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name
if i == 1 then
if v == '' then
quit('m_cmp_miss')
end
unitcode = v
elseif 2 <= i and i <= 5 then
if not (i == 5 and v == '') then
local target = get_unit(v, (i == 4) and utype or nil) -- the default may be an auto combination
if not target then
quit('m_cmp_undef', v, unitcode)
end
if target.utype ~= utype then
quit('m_cmp_type', v, unitcode)
end
if i < 4 then
if not target.scale then
quit('m_mul_std', v, unitcode)
end
table.insert(ucodes, v)
table.insert(scales, target.scale)
elseif i == 4 then
default_code = v
else
if scales[#scales] ~= target.scale then
quit('m_cmp_scale', v, unitcode)
end
alternate_code = v
end
end
elseif i == 6 then
if v ~= '' then
fixed_name = v
end
else
quit('m_cmp_many', unitcode)
end
end
if #ucodes ~= 2 then
quit('m_cmp_two', unitcode)
end
if not default_code then
quit('m_cmp_def', unitcode)
end
-- Component units must be specified from most-significant to least-significant,
-- and each ratio of a pair of scales must be very close to an integer.
-- Currently, there will be exactly two scales and one ratio.
local ratios, count = {}, #scales
for i = 1, count do
local scale = tonumber(scales[i])
if scale == nil or scale <= 0 then
quit('m_cmp_inval', unitcode, scales[i])
end
scales[i] = scale
end
for i = 1, count - 1 do
local ratio = scales[i] / scales[i + 1]
local rounded = math.floor(ratio + 0.5)
if rounded < 2 then
quit('m_cmp_order', unitcode)
end
if math.abs(ratio - rounded)/ratio > 1e-6 then
quit('m_cmp_int', unitcode)
end
ratios[i] = rounded
end
local text = { tostring(ratios[1]) }
local function add_text(key, value)
table.insert(text, string.format('%s = %q', key, value))
end
if default_code then
add_text('default', default_code)
end
if alternate_code then
add_text('unit', alternate_code)
end
if fixed_name then
add_text('name', fixed_name)
end
local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))
local main_code = ucodes[1]
local item = data[main_code]
if item then
table.insert(item.subdivs, subdiv)
else
data[main_code] = { subdivs = { subdiv } }
end
end
end
local function make_outputmultiple(data)
-- Return a function which, when called, stores a table that defines a
-- single multiple output unit. The table is stored in data (also a table).
return function (utype, fields)
-- Store a table defining a unit.
-- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).
-- The target units must be defined first.
-- Throw an error if a problem occurs.
local unit = { utype = utype }
local ucodes, scales = {}, {}
for i, v in ipairs(fields) do
if i == 1 then -- unitcode
if v == '' then
quit('m_mul_miss')
end
unit.unitcode = v
elseif v == '' then
-- Ignore empty fields.
else
local target = get_unit(v)
if not target then
quit('m_mul_undef', v, unit.unitcode)
end
if target.utype ~= utype then
quit('m_mul_type', v, unit.unitcode)
end
if not target.scale then
quit('m_mul_std', v, unit.unitcode)
end
table.insert(ucodes, v)
table.insert(scales, target.scale)
end
end
if #ucodes < 2 then
quit(#ucodes == 0 and 'm_mul_none' or 'm_mul_one', unit.unitcode)
end
-- Component units must be specified from most-significant to least-significant
-- (so scale values will be in descending order),
-- and each ratio of a pair of scales must be very close to an integer.
-- The componenets and ratios are stored in reverse order (least significant first).
-- This script stores a unit scale as a string (might be an expression like "5/9"),
-- but scales in a multiple are handled as numbers (should never be expressions).
local ratios, count = {}, #scales
for i = 1, count do
local scale = tonumber(scales[i])
if scale == nil or scale <= 0 then
quit('m_mul_scale', unit.unitcode, scales[i])
end
scales[i] = scale
end
for i = 1, count - 1 do
local ratio = scales[i] / scales[i + 1]
local rounded = math.floor(ratio + 0.5)
if rounded < 2 then
quit('m_mul_order', unit.unitcode)
end
if math.abs(ratio - rounded)/ratio > 1e-6 then
quit('m_mul_int', unit.unitcode)
end
ratios[i] = rounded
end
unit.combination = reversed(ucodes)
unit.multiple = reversed(ratios)
insert_unique_unit(data, unit, units_index)
end
end
-- To make updating the data module easier, this script inserts a preamble
-- and a postamble so the result can be used to replace the whole page.
local data_preamble = [=[
-- Conversion data used by [[Module:Convert]] which uses mw.loadData() for
-- read-only access to this module so that it is loaded only once per page.
-- See [[:en:Template:Convert/Transwiki guide]] if copying to another wiki.
--
-- These data tables follow:
-- all_units all properties for a unit, including default output
-- default_exceptions exceptions for default output ('kg' and 'g' have different defaults)
-- link_exceptions exceptions for links ('kg' and 'g' have different links)
--
-- These tables are generated by a script which reads the wikitext of a page that
-- documents the required properties of each unit; see [[:en:Module:Convert/doc]].
]=]
local data_postamble = [=[
return {
all_units = all_units,
default_exceptions = default_exceptions,
link_exceptions = link_exceptions,
per_unit_fixups = per_unit_fixups,
}]=]
local out_unit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above). --
---------------------------------------------------------------------------
local all_units = {]]
local out_unit_suffix = [[
}
]]
local out_default_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above). --
---------------------------------------------------------------------------
local default_exceptions = {
-- Prefixed units with a default different from that of the base unit.
-- Each key item is a prefixed symbol (unitcode for engineering notation).]]
local out_default_suffix = [[
}
]]
local out_default_item = [[
["{symbol}"] = "{default}",]]
local out_link_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above). --
---------------------------------------------------------------------------
local link_exceptions = {
-- Prefixed units with a linked article different from that of the base unit.
-- Each key item is a prefixed symbol (not unitcode).]]
local out_link_suffix = [[
}
]]
local out_link_item = [[
["{symbol}"] = "{link}",]]
local out_perunit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above). --
---------------------------------------------------------------------------
local per_unit_fixups = {
-- Automatically created per units of form "x/y" may have their unit type
-- changed, for example, "length/time" is changed to "speed".
-- Other adjustments can also be specified.]]
local out_perunit_suffix = [[
}
]]
local out_perunit_item = [[
["{lhs}"] = {rhs},]]
local combination_specification = { -- pure combination like 'm ft', or a multiple like 'ftin'
'combination',
'multiple',
'utype',
}
local alias_specification = {
'target',
'symbol',
'sp_us',
'default',
'link',
'symlink',
'customary',
'multiplier',
}
local per_specification = {
'per',
'symbol',
'sp_us',
'utype',
'invert',
'iscomplex',
'default',
'link',
'symlink',
'customary',
'multiplier',
}
local shouldbe_specification = {
'shouldbe',
}
local unit_specification = {
'_name1',
'_name1_us',
'_name2',
'_name2_us',
'_symbol',
'_sym_us',
'prefix_position',
'name1',
'name1_us',
'name2',
'name2_us',
'varname',
'symbol',
'sym_us',
'usename',
'usesymbol',
'utype',
'alttype',
'builtin',
'scale',
'offset',
'invert',
'iscomplex',
'istemperature',
'exception',
'prefixes',
'default',
'subdivs',
'defkey',
'linkey',
'link',
'customary',
'sp_us',
}
local no_quotes = {
combination = true,
customary = true,
multiple = true,
multiplier = true,
offset = true,
per = true,
prefix_position = true,
scale = true,
subdivs = true,
}
local function add_unit_lines(results, unit, spec)
-- Add lines of Lua source to define a unit to the results collection.
local function add_line(line)
-- Had planned to replace sequences of spaces with 4-column tabs here
-- (because the CodeEditor now assumes the use of such tabs).
-- However, 4-column tabs are only visible when editing a module
-- with browser scripting and the CodeEditor enabled, and that is rare.
-- A module is usually viewed (with 8-column tabs), and some indents
-- would be messed up unless 8-column tabs are used. Therefore,
-- have decided to simply replace 8 spaces at start of line with a single
-- tab which reduces the size of the module, and is correct for viewing.
if line:sub(1, 8) == string.rep(' ', 8) then
line = '\t' .. line:sub(9)
end
results:add(line)
end
local first_item = ' ["' .. unit.unitcode .. '"] = {'
local last_item = ' },'
add_line(first_item)
for _, k in ipairs(spec) do
local v = unit[k]
if v then
local want_quotes = (type(v) == 'string' and not no_quotes[k])
if type(v) == 'boolean' then
v = tostring(v)
elseif type(v) == 'number' or k == 'scale' then
-- Replace results like '1e-006' with '1e-6'.
v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)
elseif type(v) ~= 'string' then
quit('m_ftl_type', unit.unitcode)
end
local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's')
add_line(fmt:format(k, v))
end
end
add_line(last_item)
end
local function numbered_table_as_string(data, unit)
local t = {}
for _, v in ipairs(data) do
if type(v) == 'string' then
table.insert(t, '"' .. v .. '"')
elseif type(v) == 'number' then
table.insert(t, tostring(v))
else
quit('m_ftl_type', unit.unitcode)
end
end
return '{ ' .. table.concat(t, ', ') .. ' }'
end
local function extract_heading(line)
-- Return n, s where n = heading level number (nil if none), and
-- s = heading text (with leading/trailing whitespace removed).
local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'
local before, heading, after = line:match(pattern)
if heading and #heading > 0 then
-- Don't bother checking if before == after.
return #before, heading
end
end
local function fields(line)
-- Return a numbered table of fields split from line.
-- Items are delimited by "||".
-- Each item has leading/trailing whitespace removed, and any encoded pipe
-- characters are decoded.
-- The second field (for symbol when processing units) is adjusted to
-- remove any "colspan" at the front of lines like:
-- "| unitcode || colspan="11" | !Text to display for an error message".
local t = {}
line = line .. "||" -- to get last field
for item in line:gmatch("%s*(.-)%s*||") do
table.insert(t, (item:gsub('|', '|')))
end
if t[2] then
local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')
if cleaned then
t[2] = cleaned
end
end
return t
end
local function prepare_section(maker, lines, section, maxerrors, need_section, need_utype)
-- Process the first level-two section with the given section name
-- in the given table of lines of wikitext.
-- If successful, maker inserts each item into a table.
-- Otherwise, an error is thrown.
local skip = true
local errors = collection()
local utype -- unit type (from level-three heading)
local nbsp = '\194\160' -- nonbreaking space is utf-8 encoded as hex c2 a0
for linenumber, line in ipairs(lines) do
if skip then
-- Skip down to and including the starting heading.
local level, heading = extract_heading(line)
if level == 2 and heading == section then
skip = false
end
else
-- Accummulate unit definitions.
local c1 = line:sub(1, 1)
local c2 = line:sub(2, 2)
if c1 == '|' and not (c2 == '-' or c2 == '}') then
if need_utype and empty(utype) then
quit('m_hdg_lev3', line)
end
if line:find(nbsp, 1, true) then
-- For example, "acre ft" does not work if it contains nbsp.
add_warning('m_wrn_nbsp', linenumber)
end
local ok, msg = pcall(maker, utype, fields(line:sub(2)))
if not ok then
if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end
errors:add(msg .. message('m_line_num', linenumber))
if errors.n >= maxerrors then
break
end
end
else
local level, heading = extract_heading(line)
if level == 3 then
utype = ulower(heading)
elseif level == 2 then
break
end
end
end
end
if skip and need_section then
quit('m_hdg_lev2', section)
end
if errors.n > 0 then
error(errors:join(), 0)
end
end
local function get_page_lines(page_title)
-- Read the wikitext of the page at the given title; split the text into
-- lines with leading and trailing space removed from each line.
-- Return a numbered table of the lines, or throw an error.
if empty(page_title) then
quit('m_no_title')
end
local t = mw.title.new(page_title)
if t then
local content = t:getContent()
if content then
if content:sub(-1) ~= '\n' then
content = content .. '\n'
end
local lines = collection()
for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do
lines:add(line)
end
return lines
end
end
quit('m_ftl_read', page_title)
end
local function prepare_data(conversion_data_title, maxerrors, is_sandbox)
-- Read the page of conversion data, and process the wikitext
-- in the sections with wanted level-two headings.
-- Return units, defaults, links (three tables).
-- Throw an error if a problem occurs.
local composites, defaults, links, units, perunits, varnames = {}, {}, {}, {}, {}, {}
local sections = {
{ 'overrides' , make_override , overrides , 0 },
{ 'conversions' , make_unit , units , 0 },
{ 'outmultiples', make_outputmultiple, units , 0 },
{ 'combinations', make_combination , units , 0 },
{ 'inmultiples' , make_inputmultiple , composites, 0 }, -- after all units defined so default will be defined
{ 'defaults' , make_default , defaults , 0 },
{ 'links' , make_link , links , 0 },
{ 'perunits' , make_perunit , perunits , 1 },
{ 'varnames' , make_varname , varnames , 1 },
}
local lines = get_page_lines(conversion_data_title)
for _, section in ipairs(sections) do
local heading = mtext.section_names[section[1]]
local maker = section[2](section[3])
local code = section[4]
local need_section, need_utype
if code == 0 and not is_sandbox then
need_section = true
end
if code == 0 then
need_utype = true
end
prepare_section(maker, lines, heading, maxerrors, need_section, need_utype)
end
check_all_defaults(units, maxerrors)
check_all_pers(units, maxerrors)
update_units(units, composites, varnames)
return units, defaults, links, perunits
end
local function _makeunits(results, data_title, text_title)
-- Read the wikitext for the conversion data.
-- Append output to given results collection, or throw error if a problem.
text_code = require(text_title)
for _, name in ipairs({ 'SIprefixes', 'eng_scales', 'currency' }) do
if type(text_code[name]) ~= 'table' then
quit('m_ftl_table', text_title, name)
end
end
local translation = text_code.translation_table
if translation then
if translation.plural_suffix then
plural_suffix = translation.plural_suffix
end
local ts = translation.specials
if ts then
if ts.utype then
specials.utype = ts.utype
end
if ts.ucode then
specials.ucode = ts.ucode
end
end
local tm = translation.mtext
if tm then
if tm.section_names then
mtext.section_names = tm.section_names
end
if tm.titles then
mtext.titles = tm.titles
end
if tm.messages then
mtext.messages = tm.messages
end
end
end
local is_sandbox
local conversion_data_title = mtext.titles.conversion_data
if data_title and conversion_data_title ~= data_title then
conversion_data_title = data_title
if is_test_run then
is_sandbox = true
data_preamble = nil
data_postamble = nil
out_unit_prefix = 'local all_units = {'
out_unit_suffix = '}'
out_default_prefix = '\nlocal default_exceptions = {'
out_default_suffix = '}'
out_default_item = '\t["{symbol}"] = "{default}",'
out_link_prefix = '\nlocal link_exceptions = {'
out_link_suffix = '}'
out_link_item = '\t["{symbol}"] = "{link}",'
out_perunit_prefix = '\nlocal per_unit_fixups = {'
out_perunit_suffix = '}'
out_perunit_item = '\t["{lhs}"] = {rhs},'
end
end
local units, defaults, links, perunits = prepare_data(conversion_data_title, 20, is_sandbox)
if data_preamble then
results:add(data_preamble)
end
results:add(out_unit_prefix)
for _, unit in ipairs(units) do
local spec
if unit.target then
spec = alias_specification
elseif unit.per then
spec = per_specification
unit.per = numbered_table_as_string(unit.per, unit)
elseif unit.shouldbe then
spec = shouldbe_specification
elseif unit.combination then
spec = combination_specification
unit.combination = numbered_table_as_string(unit.combination, unit)
if unit.multiple then
unit.multiple = numbered_table_as_string(unit.multiple, unit)
end
else
spec = unit_specification
end
add_unit_lines(results, unit, spec)
end
results:add(out_unit_suffix)
for _, t in ipairs({
{ defaults, out_default_prefix, out_default_item, out_default_suffix },
{ links , out_link_prefix , out_link_item , out_link_suffix },
{ perunits, out_perunit_prefix, out_perunit_item, out_perunit_suffix } }) do
local data, prefix, item, suffix = t[1], t[2], t[3], t[4]
if #data > 0 or not is_sandbox then
results:add(prefix)
for _, unit in ipairs(data) do
results:add((item:gsub('{([%w_]+)}', unit)))
end
results:add(suffix)
end
end
if data_postamble then
results:add(data_postamble)
end
end
local function makeunits(frame)
local args = frame.args
local results = collection()
local ok, msg = pcall(_makeunits, results, args[1], args[2] or 'Module:Convert/text')
if not ok then
results:add(message('m_error'))
results:add('')
results:add(msg)
end
local warn = ''
if warnings.n > 0 then
warn = message('m_warning') .. '\n\n' .. warnings:join() .. '\n\n'
end
-- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>.
-- The following renders the text as is, and preserves tab characters.
return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n'
end
return { makeunits = makeunits }