Модуль:WikidataSelectors: различия между версиями
Материал из Vita Nostra
Перейти к навигацииПерейти к поиску
ru>Putnik (исправление метода load()) |
Smoren (обсуждение | вклад) м (1 версия импортирована) |
(нет различий)
|
Текущая версия от 00:25, 23 февраля 2024
Для документации этого модуля может быть создана страница Модуль:WikidataSelectors/doc
local i18n = {
["errors"] = {
["rank-not-valid"] = "Некорретное значение приоритета (rank)",
["cant-parse-condition"] = "Не удалось разобрать условие"
}
}
local validRanks = {
'best',
'preferred',
'normal',
'deprecated'
}
--[[
Internal function for error message
Input: key in errors table
Output: error message
]]
local function throwError( key )
error( i18n.errors[key] )
end
local p = {}
--[[
Load property and filter statements
Input: entityId, selector string
Output: filtered statements table
]]
function p.load( entityId, propertySelector )
local propertyId = mw.ustring.match( propertySelector, '^[Pp]%d+' )
if not propertyId then
return nil
end
propertyId = string.upper( propertyId )
local allStatements = {}
allStatements[ propertyId ] = mw.wikibase.getAllStatements( entityId, propertyId )
return p.filter( allStatements, propertySelector )
end
--[[
Parse selectors and filter statements
Input: statements table, selector string
Output: filtered statements table
]]
function p.filter( allClaims, propertySelector )
propertySelector = mw.text.trim( propertySelector )
-- Get property ID from selector
local propertyId = mw.ustring.match( propertySelector, '^[Pp]%d+' )
if not propertyId then
propertyId = ''
end
local initPos = #propertyId + 1
propertyId = string.upper( propertyId )
if ( not allClaims ) then
return nil
end
local allPropertyClaims = allClaims[propertyId]
if ( not allPropertyClaims ) then
return nil
end
-- Gathering rules
local rules = p.matchSelectors( propertySelector, initPos )
-- If there is no rank filter, than default rank is 'best'
local isRanked = false
for i, subRules in ipairs( rules ) do
for j, rule in ipairs( subRules ) do
if rule['type'] == 'rank' then
isRanked = true
break
end
end
end
if not isRanked then
table.insert( rules, 1, { { type = 'rank', value = 'best' } } )
end
-- Execute rules
allPropertyClaims = p.applyRules( allPropertyClaims, rules )
return allPropertyClaims
end
--[[
Match and gather selector rules
Input: string with selectors rules, start position
Output: rules table
]]
function p.matchSelectors( selectorsString, initPos )
local rules = {}
local rawRulePattern = '^%s*%[%s*[^%[%]]+%s*%]%s*'
local rulePattern = '^%s*%[%s*([^%[%]]+)%s*%]%s*$'
if not initPos then
initPos = 1
end
local rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos )
while rawRule do
initPos = initPos + #rawRule
rule = mw.ustring.match( rawRule, rulePattern )
rule = mw.text.trim( rule )
local subRules = mw.text.split( rule, '%s*,%s*' )
local commands = {}
local comm
for i, subRule in ipairs( subRules ) do
local isInversed = false
if mw.ustring.match( subRule, '^!' ) then
isInversed = true
subRule = mw.ustring.match( subRule, '^!%s*(.+)$' )
end
-- p123[1]
if mw.ustring.match( subRule, '^%d+$' ) then
table.insert( commands, {
type = 'position',
value = subRule,
inversed = isInversed
} )
-- p123[rank:preferred]
elseif mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' ) then
rank = mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' )
table.insert( commands, {
type = 'rank',
value = rank,
inversed = isInversed
} )
-- p123[language:xx]
elseif mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' ) then
value = mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' )
table.insert( commands, {
type = 'language',
value = value,
inversed = isInversed
} )
-- p123[language!:xx]
elseif mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' ) then
value = mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' )
table.insert( commands, {
type = 'language',
value = value,
inversed = not isInversed
} )
-- p123[min]
elseif mw.ustring.match( subRule, '^min$' ) then
table.insert( commands, { type = 'value_min' } )
-- p123[max]
elseif mw.ustring.match( subRule, '^max$' ) then
table.insert( commands, { type = 'value_max' } )
-- p123[min:p456]
elseif mw.ustring.match( subRule, '^min%s*:%s*[Pp]%d+$' ) then
value = mw.ustring.match( subRule, ':%s*([Pp]%d+)$' )
table.insert( commands, {
type = 'qualifier_min',
qualifier = value
} )
-- p123[max:p456]
elseif mw.ustring.match( subRule, '^max%s*:%s*[Pp]%d+$' ) then
value = mw.ustring.match( subRule, ':%s*([Pp]%d+)$' )
table.insert( commands, {
type = 'qualifier_max',
qualifier = value
} )
-- p123[unit:q789]
elseif mw.ustring.match( subRule, '^unit%s*:%s*[^%[%],:]+$' ) then
value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' )
table.insert( commands, {
type = 'unit',
value = value,
inversed = isInversed
} )
-- p123[unit!:q789]
elseif mw.ustring.match( subRule, '^unit%s*!:%s*[^%[%],:]+$' ) then
value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' )
table.insert( commands, {
type = 'unit',
value = value,
inversed = not isInversed
} )
-- p123[p456]
elseif mw.ustring.match( subRule, '^[Pp]%d+$' ) then
qualifier = mw.ustring.match( subRule, '^[Pp]%d+' )
table.insert( commands, {
type = 'qualifier',
qualifier = qualifier,
value = nil,
inversed = isInversed
} )
-- p123[p456:q789]
elseif mw.ustring.match( subRule, '^[Pp]%d+%s*:%s*[^%[%],:]+$' ) then
qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*:?' )
value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' )
table.insert( commands, {
type = 'qualifier',
qualifier = qualifier,
value = value,
inversed = isInversed
} )
-- p123[p456!:q789]
elseif mw.ustring.match( subRule, '^[Pp]%d+%s*!:%s*[^%[%],:]+$' ) then
qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*!:?' )
value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' )
table.insert( commands, {
type = 'qualifier',
qualifier = qualifier,
value = value,
inversed = not isInversed
} )
-- p123[q456]
elseif mw.ustring.match( subRule, '^[Qq]%d+$' ) then
value = mw.ustring.match( subRule, '^[Qq]%d+' )
table.insert( commands, {
type = 'value',
value = value,
inversed = isInversed
} )
else
throwError( 'cant-parse-condition' )
end
end
if #commands then
table.insert( rules, commands )
end
rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos )
end
return rules
end
--[[
Intercept statements with selector rules
Input: statements table, selector rules
Output: filtered statements table
]]
function p.applyRules( claims, rules )
for i, subRules in ipairs( rules ) do
local newClaims = {}
for j, rule in ipairs( subRules ) do
if rule['type'] == 'rank' then
table.insert( newClaims, p.filterByRank( claims, rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'language' then
table.insert( newClaims, p.filterByLanguage( claims, rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'unit' then
table.insert( newClaims, p.filterByUnit( claims, rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'position' then
table.insert( newClaims, p.filterByPosition( claims, rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'qualifier' then
table.insert( newClaims, p.filterByQualifier( claims, rule['qualifier'], rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'qualifier_min' then
table.insert( newClaims, p.filterUtterByQualifier( claims, rule['qualifier'], true ) )
elseif rule['type'] == 'qualifier_max' then
table.insert( newClaims, p.filterUtterByQualifier( claims, rule['qualifier'], false ) )
elseif rule['type'] == 'value' then
table.insert( newClaims, p.filterByValue( claims, rule['value'], rule['inversed'] ) )
elseif rule['type'] == 'value_min' then
table.insert( newClaims, p.filterUtter( claims, true ) )
elseif rule['type'] == 'value_max' then
table.insert( newClaims, p.filterUtter( claims, false ) )
end
end
claims = {}
--[[
Merge all claims
TODO: It's not good
]]
for j, newSubClaims in ipairs( newClaims ) do
for k, newClaim in ipairs( newSubClaims ) do
local isNew = true
for l, oldClaim in ipairs( claims ) do
if oldClaim['id'] == newClaim['id'] then
isNew = false
break
end
end
if isNew then
table.insert( claims, newClaim )
end
end
end
end
return claims
end
--[[
Filter statements by rank
Input: claims table, rank value, inversion
Output: filtered statements table
]]
function p.filterByRank( claims, rank, inversed )
if not inversed then
inversed = false
end
if not rank then
rank = 'best'
end
-- Check if rank value is valid
local isValidRank = false
for i, validRank in ipairs( validRanks ) do
if rank == validRank then
isValidRank = true
break
end
end
if not isValidRank then
throwError( 'rank-not-valid' )
end
-- Find the best rank
if rank == 'best' then
rank = 'normal' -- default rank (don't use deprecated even if it's no more claims)
-- If we have at least one preferred rank, mark it as best
for i, statement in pairs( claims ) do
if (statement.rank == 'preferred') then
rank = 'preferred'
break
end
end
end
local resultClaims = {};
for i, statement in pairs( claims ) do
if ( statement.rank == rank ) ~= inversed then
table.insert( resultClaims, statement )
end
end
return resultClaims
end
--[[
Filter statements by language of value
Input: claims table, language, inversion
Output: filtered statements table
]]
function p.filterByLanguage( claims, language, inversed )
if not inversed then
inversed = false
end
local resultClaims = {}
local mulStatement = {}
for i, statement in ipairs( claims ) do
isMatchLanguage = false
if statement['mainsnak']
and statement['mainsnak']['datavalue']
and statement['mainsnak']['datavalue']['value']
and statement['mainsnak']['datavalue']['value']['language'] then
if statement['mainsnak']['datavalue']['value']['language'] == language then
isMatchLanguage = true
end
if statement['mainsnak']['datavalue']['value']['language'] == 'mul' then
mulStatement = statement
end
end
if isMatchLanguage ~= inversed then
table.insert( resultClaims, statement )
end
end
if next(resultClaims) == nil and next(mulStatement) ~= nil then
-- if specific language is not found, but there is Q20923490 value
table.insert( resultClaims, mulStatement )
end
return resultClaims
end
--[[
Filter statements by unit of value
Input: claims table, unit, inversion
Output: filtered statements table
]]
function p.filterByUnit( claims, unit, inversed )
if not inversed then
inversed = false
end
unit = 'http://www.wikidata.org/entity/' .. string.upper( unit )
local resultClaims = {}
for i, statement in ipairs( claims ) do
isMatchUnit = false
if statement['mainsnak']
and statement['mainsnak']['datavalue']
and statement['mainsnak']['datavalue']['value']
and statement['mainsnak']['datavalue']['value']['unit']
and statement['mainsnak']['datavalue']['value']['unit'] == unit then
isMatchUnit = true
end
if isMatchUnit ~= inversed then
table.insert( resultClaims, statement )
break
end
end
return resultClaims
end
--[[
Filter statements by position
Input: claims table, position, inversion
Output: filtered statements table
]]
function p.filterByPosition( claims, position, inversed )
if not inversed then
inversed = false
end
local resultClaims = {};
for statementPosition, statement in ipairs( claims ) do
if ( statementPosition == tonumber( position ) ) ~= inversed then
table.insert( resultClaims, statement )
break
end
end
return resultClaims
end
--[[
Filter statements by qualifier existance or it's value
Input: claims table, ID of qualifier's property, qualifier's value, inversion
Output: filtered statements table
]]
function p.filterByQualifier( claims, qualifierId, value, inversed )
if not inversed then
inversed = false
end
qualifierId = string.upper( qualifierId )
local resultClaims = {}
for i, statement in ipairs( claims ) do
if statement['qualifiers'] and statement['qualifiers'][qualifierId] then
if value == nil then
if ( #statement['qualifiers'][qualifierId] > 0 ) ~= inversed then
table.insert( resultClaims, statement )
end
else
local isQualifierFound = false
for j, qualifier in ipairs( statement['qualifiers'][qualifierId] ) do
if qualifier['datavalue'] then
local qualifierValue = qualifier['datavalue']['value']
if qualifier['datavalue']['type'] == 'wikibase-entityid' then
qualifierValue = qualifierValue.id
value = string.upper( value )
end
if qualifierValue == value then
isQualifierFound = true
break
end
end
end
if isQualifierFound ~= inversed then
table.insert( resultClaims, statement )
end
end
elseif inversed then
table.insert( resultClaims, statement )
end
end
return resultClaims
end
--[[
Filter statements by it's values
Input: claims table, value, inversed
Output: filtered statements table
]]
function p.filterByValue( claims, value, inversed )
inversed = inversed or false
local resultClaims = {}
for i, statement in ipairs( claims ) do
local statementValue
if statement['mainsnak']
and statement['mainsnak']['datavalue']
and statement['mainsnak']['datavalue']['type']
then
statementValue = statement['mainsnak']['datavalue']['value']
if statement['mainsnak']['datavalue']['type'] == 'quantity' then
statementValue = statementValue.amount
end
if statement['mainsnak']['datavalue']['type'] == 'time' then
statementValue = statementValue.time
end
if statement['mainsnak']['datavalue']['type'] == 'wikibase-entityid' then
statementValue = statementValue.id
value = string.upper( value )
end
end
if ( statementValue == value ) ~= inversed then
table.insert( resultClaims, statement )
end
end
return resultClaims
end
--[[
Find a statement with minimum or maximum value
Input: claims table, asc, inversed
Output: filtered statements table
]]
function p.filterUtter( claims, asc, inversed )
local resultValue = nil
for i, statement in ipairs( claims ) do
local statementValue
if statement['mainsnak'] and
statement['mainsnak']['datavalue'] and
statement['mainsnak']['datavalue']['type']
then
statementValue = statement['mainsnak']['datavalue']['value']
if statement['mainsnak']['datavalue']['type'] == 'quantity' then
statementValue = statementValue.amount
end
if statement['mainsnak']['datavalue']['type'] == 'time' then
statementValue = statementValue.time
end
if statement['mainsnak']['datavalue']['type'] == 'wikibase-entityid' then
statementValue = statementValue.id
end
if not resultValue or ( statementValue < resultValue ) == asc then
resultValue = statementValue
end
end
end
mw.logObject( resultValue, 'resultValue' )
return p.filterByValue( claims, resultValue, inversed )
end
--[[
Find a statement with minimum or maximum qualifier value
Input: claims table, qualifierId, asc
Output: filtered statements table
]]
function p.filterUtterByQualifier( claims, qualifierId, asc )
qualifierId = string.upper( qualifierId )
local resultValue = nil
local resultStatement = nil
for i, statement in ipairs( claims ) do
if not statement['qualifiers'] and not statement['qualifiers'][qualifierId] then
if resultStatement == nil then
resultStatement = statement
end
else
for _, qualifier in ipairs( statement['qualifiers'][qualifierId] ) do
if qualifier['datavalue'] then
local qualifierValue = qualifier['datavalue']['value']
if qualifier['datavalue']['type'] == 'quantity' then
qualifierValue = qualifierValue.amount
end
if qualifier['datavalue']['type'] == 'time' then
qualifierValue = qualifierValue.time
end
if qualifier['datavalue']['type'] == 'wikibase-entityid' then
qualifierValue = qualifierValue.id
end
if not resultValue or ( qualifierValue < resultValue ) == asc then
resultStatement = statement
resultValue = qualifierValue
end
end
end
end
end
return { resultStatement }
end
return p