Módulo:Footnotes/anchor id list/data

Origem: Wikipédia, a enciclopédia livre.
Documentação do módulo[ver] [editar] [histórico] [purgar]

Uso[editar código-fonte]

Este é um módulo de dados do Módulo:Footnotes, ele fornece opções de dados para detetar a citação que deve ter o link

-- copied from Module:Ref info/data - maybe overkill here?
-- for this application make lists from the redirect lists and force all template names first character uppercase

require('strict');

--[[--------------------------< C S 1 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs1 templates and their redirects.  These patterns exclude redirects that are vcite-like
which redirects should be deleted because vcite is not cs1.

]]

local cs1_template_patterns = {                                                    -- lua patterns of the cannonical names and redirects
    '[Cc]itar ar[Xx]iv',                                                            -- arXiv is the canonical name
        '[Cc]itar ArXiv',

    '[Cc]itar vídeo',                                                            -- canonical
        '[Cc]itar AV media', '[Cc]ite AV media', '[Cc]ita vídeo', '[Cc]itar video',
--        '[Cc]ite cd', '[Cc]ite DVD', '[Cc]ite dvd', '[Cc]ite film',
--        '[Cc]ite image', '[Cc]ite media', '[Cc]ite movie',
--        '[Cc]ite music video', '[Cc]ite radio', '[Cc]ite song',
--        '[Cc]ite video', '[Cc]ite visual', '[Cc]ite You[Tt]ube',
--        '[Cc]ita vídeo',                                                        -- non-English redirect; TODO: tally separately?

    '[Cc]itar vídeo notas',                                                    -- canonical
        '[Cc]ite AV media notes', '[Cc]itar DVD notas',
        '[Cc]itar DVD[ %-]notes', '[Cc]ite av media',

--    '[Cc]ite bio[Rr]xiv',                                                        -- bioRxiv is the canonical form
    '[Cc]itar [Ll]ivro',                                                            -- book is the canonical form
        '[Rr]ef[ %-]livro', '[Cc]ita libro',
        '[Rr]eferência a livro', '[Ll]iteratur', '[Oo]uvrage', '[Cc]itarlivro',
--        '[Cc]ite chapter', '[Cc]ite manual',
--        '[Bb]okref', '[Cc]itace monografie', '[Cc]ite book',                    -- non-English redirects; TODO: tally separately?
--        '[Cc]iteer boek', '[Oo]uvrage', '[Rr]ef%-llibre', '서적 인용',

--    '[Cc]ite citeseerx',                                                        -- canonical
    '[Cc]itar conferência',                                                        -- canonical
        '[Cc]ite conference',                                                    -- cita conferenza is non-English; TODO: tally separately?

    '[Cc]itar enciclopédia',                                                    -- cite encyclopedia is the canonical name
        '[Cc]ite enciclopaedia', '[Cc]ita enciclopedia', '[Cc]ite dictionary',
        '[Cc]ite encyclopedia', 'OEDsub',

    '[Cc]ite [Ee]pisódio',                                                        -- episode is the canonical form
        '[Cc]ite episode', '[Cc]ita tv', '[Cc]itar? episodio',
        '[Cc]itation épisode',

        '[Cc]itar entrevista',
        '[Cc]ite interview',                                                        -- canonical
    '[Cc]itar periódico',                                                            -- cite journal is the canonical form
        '[Cc]ite journal', '[Cc]ite paper', '[Cc]ite document',
        '[Aa]rticle', '[Cc]ita publicación periódica',
--        '[Cc]ite ?paper', '[Vv]cite2 journal',
--        '[Cc]ita pubblicazione', '[Cc]itace periodika', '[Cc]itar jornal',        -- non-English redirects; TODO: tally separately?
--        '[Cc]itar publicació', '[Cc]ytuj pismo', '[Tt]idskriftsref',
--        'Навод из стручног часописа', '저널 인용',

    '[Cc]itar revista',                                                        -- magazine is the canonical form
        '[Cc]ite magazine', '[Cc]itar publicação', '[Cc]ite periodical',

    '[Cc]itar lista de discussão',                                            -- mailing list is the canonical form
        '[Cc]ite mailing ?list',

    '[Cc]itar mapa',                                                            -- map is the canonical form
    '[Cc]ite map',
    '[Cc]ita jornal',                                                        -- cite news is the canonical form
        '[Cc]ite[ %-]?news', '[Cc]itar not[íi]cias?', '[Cc]itenews', '[Cc]ite[%-]news', '[Cc]ita noticia',
        '[Cc]ita news', '[Cc]ite article', '[Rr]ef[%-]notícia', '[Rr]ef[%-]publicació',
        '[Cc]ite newspaper', '[Cc]ita novas',

    '[Cc]itar grupo de notícias',                                                        -- canonical
        '[Cc]ite newsgroup',

    '[Cc]itar podcast',                                                            -- canonical
        '[Cc]ite podcast',
    '[Cc]itar comunicado de imprensa',                                                -- press release is the canonical form
        '[Cc]ite press release', '[Cc]itar comunicados de imprensa', '[Cc]ite pr',

    '[Cc]itar relatório',                                                        -- canonical
        '[Cc]ite report',
    '[Cc]itar série',                                                            -- canonical
    '[Cc]itar sinal',                                                            -- canonical

    '[Cc]itar discurso',                                                        -- canonical
        '[Cc]ite speech',
    '[Cc]ite ssrn',                                                                -- canonical
        '[Cc]ite SSRN',

    '[Cc]itar relatório técnico',                                                -- techreport is the canonical form
        '[Cc]ite techreport',

    '[Cc]itar tese',                                                            -- canonical
        '[Cc]ite thesis', '[Cc]itar dissertação', '[Cc]ite dissertation',
        '[Cc]itar monografia',                                                    -- non-English redirect; TODO: tally separately?

    '[Cc]itar [Ww]eb',                                                            -- web is the canonical form
        '[Cc]ite ?web', '[Cc]iteer web', '[Cc]ita web', '[Ll]ien web', '[Cc]itar página',
        '[Rr]ef%-web', '[Ww]ebref',
    }


--[[--------------------------< C S 2 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs2 templates redirects.

]]

local cs2_template_patterns = {                                                    -- lua patterns of the cannonical names and redirects
    '[Cc]itation',
        '[Cc]ite', '[Cc]itar', '[Cc]ite citation', '[Cc]ite study',
        '[Cc]ite [Tt]echnical standard',
    }


--[[--------------------------< V C I T E _ T E M P L A T E _ P A T T E R N S >--------------------------------

These are patterns for Vcite-family templates and their redirects.

]]

local vcite_template_patterns = {
    '[Vv]citar livro',                                                            -- canonical
        '[Vv]ancite book', '[Vv]ancite report', '[Vv]cite encyclopedia',
        '[Vv]cite report',

    '[Vv]citar periódico',                                                            -- canonical
        '[Cc]it journal', '[Cc]it paper', '[Vv]ancite journal', '[Vv]cite paper',

    '[Vv]citar jornal',                                                            -- canonical
        '[Vv]ancite news',

    '[Vv]citar web',                                                                -- canonical
        '[Vv]ancite web',
    }


--[[--------------------------< H A R V C _ T E M P L A T E _ P A T T E R N S >--------------------------------

These are patterns for the harvc template and its redirects.

]]

local harvc_template_patterns = {
    '[Hh]arvc',                                                                    -- canonical
        '[Cc]itec',
    }


--[[--------------------------< C S 1 _ L I K E _ T E M P L A T E _ P A T T E R N S >--------------------------

These are patterns for miscellaneous templates and their redirects that 'look like' cs1 templates (begin with cite ...)

Because they 'look like' cs1 templates they are handled as if they were cs1 templates.  These templates are NOT
wrapper templates; names and dates are always to be extracted from the article instantiation of the template.

]]

local cs1_like_template_patterns = {
    '[Cc]ite LSA',                                                                -- canonical
    }


--[[--------------------------< W I K I C I T E _ T E M P L A T E _ P A T T E R N S >--------------------------

These are patterns for the wikicite template and its redirects.

]]

local wikicite_template_patterns = {
    '[Ww]ikicite',                                                                -- canonical
    }


--[[--------------------------< A N C H O R _ T E M P L A T E _ P A T T E R N S >------------------------------

These are patterns for the anchor template and its redirects.

]]

local anchor_template_patterns = {
    '[Aa]nchors?',                                                                -- anchor is canonical form
    '[Aa]nchor for redirect',
    '[Aa]nchro',
    '[Aa]ncor',
    '[Ââ]ncora',
    }


--[[--------------------------< S F N _ W H I T E L I S T _ P A T T E R N S >----------------------------------

These are patterns for the anchor template and its redirects.

]]

local sfn_whitelist_patterns = {
    '[Ss]fn whitelist',                                                            -- canonical
        '[Hh]arv whitelist',
    }


--[[--------------------------< K N O W N _ T E M P L A T E S >------------------------------------------------

These tables are created from the *_template_patterns tables.  To make these tables, entries in the source tables
are evaluated to replace lua patterns with the appropriate characters to create names for the output tables.

First charqacter is always uppercase

For example:
    [Cc]ite ar[Xx]iv
becomes
    Cite arXiv
    Cite arxiv

]]

local known_templates_cs12 = {};                                                -- the exported tables
local known_templates_vcite = {};
local known_templates_harvc = {};
local known_templates_wikicite = {};
local known_templates_anchor = {};
local known_templates_sfn_whitelist = {};

    local function add_stripped (list, name)
        if not list[name] then
            list[name] = true;
        end
    end


    local function pattern_convert (pattern, list)
        local lead, tail, c, l, name;
        local first_char_patterns = {
            '^%[(%a)%a%]',                                                        -- leading character (usually uppercase)
            '^%[%a(%a)%]',                                                        -- leading character (usually lowercase)
            }

        c = tostring (pattern:match ('%u'));                                    -- tostring() required because I don't know why; lua chokes complaining that c is not a string
        name = pattern:gsub ('^%[%a%a%]', c);                                    -- replace bracketed first character [Xx] with selected character from the match

        if name:match ('(.-)%[(%a)(%a)%](.*)') then                                -- mixed case optional letters
            lead, c, l, tail = name:match ('(.-)%[(%a)(%a)%](.*)');
            add_stripped (list, lead .. c .. tail);                                -- uppercase
            add_stripped (list, lead .. l .. tail);                                -- lowercase

        elseif name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$') then                    -- [ %-]?
            lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$');
            add_stripped (list, lead .. tail);                                    -- neither char
            add_stripped (list, lead .. ' ' .. tail);                            -- space
            add_stripped (list, lead .. '-' .. tail);                            -- hyphen

        elseif name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$') then                    -- [%-]?
            lead, c, tail = name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$');
            add_stripped (list, lead .. tail);                                    -- no hyphen
            add_stripped (list, lead .. '-' .. tail);                            -- hyphen

        elseif name:match ('^([^%[]+)(%[ %%%-%])(.+)$') then                    -- [ %-]
            lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%])(.+)$');
            add_stripped (list, lead .. ' ' .. tail);                            -- space
            add_stripped (list, lead .. '-' .. tail);                            -- hyphen

        elseif name:match ('^([^%?]+)(%%%-%?)(.+)$') then                        -- %-?
            lead, c, tail = name:match ('^([^%?]+)(%%%-%?)(.+)$');
            add_stripped (list, lead .. tail);                                    -- no hyphen
            add_stripped (list, lead .. '-' .. tail);                            -- hyphen

        elseif name:match ('^(.-)(%%%-)(.+)$') then                                -- %-
            lead, c, tail = name:match ('^(.-)(%%%-)(.+)$');
            add_stripped (list, lead .. '-' .. tail);                            -- hyphen

        elseif name:match ('^(.-)(.)%?(.*)$') then                                -- .?
            lead, c, tail = name:match ('^(.-)(.)%?(.*)$');
            add_stripped (list, lead .. tail);                                    -- no character
            add_stripped (list, lead .. c .. tail);                                -- character

        else
            add_stripped (list, name);                                            -- no patterns so save as is
        end
    end

    for _, t in ipairs ({
        {cs1_template_patterns, known_templates_cs12},
        {cs2_template_patterns, known_templates_cs12},
        {cs1_like_template_patterns, known_templates_cs12},
        {vcite_template_patterns, known_templates_vcite},
        {harvc_template_patterns, known_templates_harvc},
        {wikicite_template_patterns, known_templates_wikicite},
        {anchor_template_patterns, known_templates_anchor},
        {sfn_whitelist_patterns, known_templates_sfn_whitelist},
        }) do
            for _, pattern in ipairs (t[1]) do                                    -- for each patern in *_template_patterns
                pattern_convert (pattern, t[2])                                    -- convert and store in known_templates_*
            end
    end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return
    {
    known_templates_anchor = known_templates_anchor,
    known_templates_cs12 = known_templates_cs12,
    known_templates_harvc = known_templates_harvc,
    known_templates_vcite = known_templates_vcite,
    known_templates_wikicite = known_templates_wikicite,
    known_templates_sfn_whitelist = known_templates_sfn_whitelist,
    }