Модуль:WikidataSelectors

Материал из Vita Nostra
Версия от 00:25, 23 февраля 2024; Smoren (обсуждение | вклад) (1 версия импортирована)
(разн.) ← Предыдущая версия | Текущая версия (разн.) | Следующая версия → (разн.)
Перейти к навигацииПерейти к поиску

Для документации этого модуля может быть создана страница Модуль:WikidataSelectors/doc

local i18n = {
    ["errors"] = {
        ["rank-not-valid"] = "Некорретное значение приоритета (rank)",
        ["cant-parse-condition"] = "Не удалось разобрать условие"
    }
}

local validRanks = {
	'best',
	'preferred',
	'normal',
	'deprecated'
}


--[[ 
  Internal function for error message
 
  Input: key in errors table
  Output: error message
]]
local function throwError( key )
    error( i18n.errors[key] )
end


local p = {}

--[[
  Load property and filter statements
 
  Input: entityId, selector string
  Output: filtered statements table
]]
function p.load( entityId, propertySelector )
	local propertyId = mw.ustring.match( propertySelector, '^[Pp]%d+' )
	if not propertyId then
		return nil
	end
	propertyId = string.upper( propertyId )

	local allStatements = {}
	allStatements[ propertyId ] = mw.wikibase.getAllStatements( entityId, propertyId )

	return p.filter( allStatements, propertySelector )
end

--[[
  Parse selectors and filter statements
 
  Input: statements table, selector string
  Output: filtered statements table
]]
function p.filter( allClaims, propertySelector )
	propertySelector = mw.text.trim( propertySelector )

	-- Get property ID from selector
	local propertyId = mw.ustring.match( propertySelector, '^[Pp]%d+' )
	if not propertyId then
		propertyId = ''
	end
	local initPos = #propertyId + 1
	propertyId = string.upper( propertyId )
    if ( not allClaims ) then
    	return nil
    end
	local allPropertyClaims = allClaims[propertyId]
    if ( not allPropertyClaims ) then
    	return nil
    end

	-- Gathering rules
	local rules = p.matchSelectors( propertySelector, initPos )

	-- If there is no rank filter, than default rank is 'best'
	local isRanked = false
	for i, subRules in ipairs( rules ) do
		for j, rule in ipairs( subRules ) do
			if rule['type'] == 'rank' then
				isRanked = true
				break
			end
		end
	end
	if not isRanked then
		table.insert( rules, 1, { { type = 'rank', value = 'best' } } )
	end

	-- Execute rules
	allPropertyClaims = p.applyRules( allPropertyClaims, rules )

	return allPropertyClaims
end


--[[
  Match and gather selector rules
 
  Input: string with selectors rules, start position
  Output: rules table
]]
function p.matchSelectors( selectorsString, initPos )
	local rules = {}
	local rawRulePattern = '^%s*%[%s*[^%[%]]+%s*%]%s*'
	local rulePattern = '^%s*%[%s*([^%[%]]+)%s*%]%s*$'
	
	if not initPos then
		initPos = 1
	end

	local rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos )

	while rawRule do
		initPos = initPos + #rawRule
		rule = mw.ustring.match( rawRule, rulePattern )
		rule = mw.text.trim( rule )

		local subRules = mw.text.split( rule, '%s*,%s*' )
		local commands = {}
		local comm

		for i, subRule in ipairs( subRules ) do
			local isInversed = false
			if mw.ustring.match( subRule, '^!' ) then
				isInversed = true
				subRule = mw.ustring.match( subRule, '^!%s*(.+)$' )
			end

			-- p123[1]
			if mw.ustring.match( subRule, '^%d+$' ) then
				table.insert( commands, {
					type = 'position',
					value = subRule,
					inversed = isInversed
				} )
	
			-- p123[rank:preferred]
			elseif mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' ) then
				rank = mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' )
				table.insert( commands, {
					type = 'rank',
					value = rank,
					inversed = isInversed
				} )

			-- p123[language:xx]
			elseif mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' ) then
				value = mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' )
				table.insert( commands, {
					type = 'language',
					value = value,
					inversed = isInversed
				} )

			-- p123[language!:xx]
			elseif mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' ) then
				value = mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' )
				table.insert( commands, {
					type = 'language',
					value = value,
					inversed = not isInversed
				} )

			-- p123[min]
			elseif mw.ustring.match( subRule, '^min$' ) then
				table.insert( commands, { type = 'value_min' } )

			-- p123[max]
			elseif mw.ustring.match( subRule, '^max$' ) then
				table.insert( commands, { type = 'value_max' } )

			-- p123[min:p456]
			elseif mw.ustring.match( subRule, '^min%s*:%s*[Pp]%d+$' ) then
				value = mw.ustring.match( subRule, ':%s*([Pp]%d+)$' )
				table.insert( commands, {
					type = 'qualifier_min',
					qualifier = value
				} )

			-- p123[max:p456]
			elseif mw.ustring.match( subRule, '^max%s*:%s*[Pp]%d+$' ) then
				value = mw.ustring.match( subRule, ':%s*([Pp]%d+)$' )
				table.insert( commands, {
					type = 'qualifier_max',
					qualifier = value
				} )

			-- p123[unit:q789]
			elseif mw.ustring.match( subRule, '^unit%s*:%s*[^%[%],:]+$' ) then
				value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' )
				table.insert( commands, {
					type = 'unit',
					value = value,
					inversed = isInversed
				} )

			-- p123[unit!:q789]
			elseif mw.ustring.match( subRule, '^unit%s*!:%s*[^%[%],:]+$' ) then
				value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' )
				table.insert( commands, {
					type = 'unit',
					value = value,
					inversed = not isInversed
				} )

			-- p123[p456]
			elseif mw.ustring.match( subRule, '^[Pp]%d+$' ) then
				qualifier = mw.ustring.match( subRule, '^[Pp]%d+' )
				table.insert( commands, {
					type = 'qualifier',
					qualifier = qualifier,
					value = nil,
					inversed = isInversed
				} )

			-- p123[p456:q789]
			elseif mw.ustring.match( subRule, '^[Pp]%d+%s*:%s*[^%[%],:]+$' ) then
				qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*:?' )
				value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' )
				table.insert( commands, {
					type = 'qualifier',
					qualifier = qualifier,
					value = value,
					inversed = isInversed
				} )

			-- p123[p456!:q789]
			elseif mw.ustring.match( subRule, '^[Pp]%d+%s*!:%s*[^%[%],:]+$' ) then
				qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*!:?' )
				value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' )
				table.insert( commands, {
					type = 'qualifier',
					qualifier = qualifier,
					value = value,
					inversed = not isInversed
				} )

			-- p123[q456]
			elseif mw.ustring.match( subRule, '^[Qq]%d+$' ) then
				value = mw.ustring.match( subRule, '^[Qq]%d+' )
				table.insert( commands, {
					type = 'value',
					value = value,
					inversed = isInversed
				} )

			else
				throwError( 'cant-parse-condition' )
			end
		end

		if #commands then
			table.insert( rules, commands )

		end

		rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos )
	end
	
	return rules
end


--[[
  Intercept statements with selector rules
 
  Input: statements table, selector rules
  Output: filtered statements table
]]
function p.applyRules( claims, rules )
	for i, subRules in ipairs( rules ) do
		local newClaims = {}
		for j, rule in ipairs( subRules ) do
			if rule['type'] == 'rank' then
				table.insert( newClaims, p.filterByRank( claims, rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'language' then
				table.insert( newClaims, p.filterByLanguage( claims, rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'unit' then
				table.insert( newClaims, p.filterByUnit( claims, rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'position' then
				table.insert( newClaims, p.filterByPosition( claims, rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'qualifier' then
				table.insert( newClaims, p.filterByQualifier( claims, rule['qualifier'], rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'qualifier_min' then
				table.insert( newClaims, p.filterUtterByQualifier( claims, rule['qualifier'], true ) )
			elseif rule['type'] == 'qualifier_max' then
				table.insert( newClaims, p.filterUtterByQualifier( claims, rule['qualifier'], false ) )
			elseif rule['type'] == 'value' then
				table.insert( newClaims, p.filterByValue( claims, rule['value'], rule['inversed'] ) )
			elseif rule['type'] == 'value_min' then
				table.insert( newClaims, p.filterUtter( claims, true ) )
			elseif rule['type'] == 'value_max' then
				table.insert( newClaims, p.filterUtter( claims, false ) )
			end
		end
		claims = {}

		--[[
			Merge all claims
			TODO: It's not good
		]]
		for j, newSubClaims in ipairs( newClaims ) do
			for k, newClaim in ipairs( newSubClaims ) do
				local isNew = true
				for l, oldClaim in ipairs( claims ) do
					if oldClaim['id'] == newClaim['id'] then
						isNew = false
						break
					end
				end
				if isNew then
					table.insert( claims, newClaim )
				end
			end
		end
	end

	return claims
end


--[[
  Filter statements by rank
 
  Input: claims table, rank value, inversion
  Output: filtered statements table
]]
function p.filterByRank( claims, rank, inversed )
	if not inversed then
		inversed = false
	end

	if not rank then
		rank = 'best'
	end
	
	-- Check if rank value is valid
	local isValidRank = false
	for i, validRank in ipairs( validRanks ) do
		if rank == validRank then
			isValidRank = true
			break
		end
	end
	if not isValidRank then
		throwError( 'rank-not-valid' )
	end

	-- Find the best rank
	if rank == 'best' then
		rank = 'normal' -- default rank (don't use deprecated even if it's no more claims)

		-- If we have at least one preferred rank, mark it as best
		for i, statement in pairs( claims ) do
			if (statement.rank == 'preferred') then
				rank = 'preferred'
				break
			end
		end
	end

	local resultClaims = {};
	for i, statement in pairs( claims ) do
		if ( statement.rank == rank ) ~= inversed then
			table.insert( resultClaims, statement )
		end
	end

	return resultClaims
end


--[[
  Filter statements by language of value
 
  Input: claims table, language, inversion
  Output: filtered statements table
]]
function p.filterByLanguage( claims, language, inversed )
	if not inversed then
		inversed = false
	end

	local resultClaims = {}
	local mulStatement = {}
	for i, statement in ipairs( claims ) do
		isMatchLanguage = false
		if statement['mainsnak']
				and statement['mainsnak']['datavalue']
				and statement['mainsnak']['datavalue']['value']
				and statement['mainsnak']['datavalue']['value']['language'] then
				
			if statement['mainsnak']['datavalue']['value']['language'] == language then
				isMatchLanguage = true
			end
			if statement['mainsnak']['datavalue']['value']['language'] == 'mul' then
				mulStatement = statement
			end
		end
		if isMatchLanguage ~= inversed then
			table.insert( resultClaims, statement )
		end
	end
	
	if next(resultClaims) == nil and next(mulStatement) ~= nil then
		-- if specific language is not found, but there is Q20923490 value
		table.insert( resultClaims, mulStatement )
	end

	return resultClaims
end


--[[
  Filter statements by unit of value
 
  Input: claims table, unit, inversion
  Output: filtered statements table
]]
function p.filterByUnit( claims, unit, inversed )
	if not inversed then
		inversed = false
	end

	unit = 'http://www.wikidata.org/entity/' .. string.upper( unit )

	local resultClaims = {}
	for i, statement in ipairs( claims ) do
		isMatchUnit = false
		if statement['mainsnak']
				and statement['mainsnak']['datavalue']
				and statement['mainsnak']['datavalue']['value']
				and statement['mainsnak']['datavalue']['value']['unit']
				and statement['mainsnak']['datavalue']['value']['unit'] == unit then
			isMatchUnit = true
		end
		if isMatchUnit ~= inversed then
			table.insert( resultClaims, statement )
			break
		end
	end

	return resultClaims
end


--[[
  Filter statements by position
 
  Input: claims table, position, inversion
  Output: filtered statements table
]]
function p.filterByPosition( claims, position, inversed )
	if not inversed then
		inversed = false
	end

	local resultClaims = {};
	for statementPosition, statement in ipairs( claims ) do
		if ( statementPosition == tonumber( position ) ) ~= inversed then
			table.insert( resultClaims, statement )
			break
		end
	end

	return resultClaims
end


--[[
  Filter statements by qualifier existance or it's value
 
  Input: claims table, ID of qualifier's property, qualifier's value, inversion
  Output: filtered statements table
]]
function p.filterByQualifier( claims, qualifierId, value, inversed )
	if not inversed then
		inversed = false
	end

	qualifierId = string.upper( qualifierId )

	local resultClaims = {}
	for i, statement in ipairs( claims ) do
		if statement['qualifiers'] and statement['qualifiers'][qualifierId] then
			if value == nil then
				if ( #statement['qualifiers'][qualifierId] > 0 ) ~= inversed then
					table.insert( resultClaims, statement )
				end
			else
				local isQualifierFound = false
				for j, qualifier in ipairs( statement['qualifiers'][qualifierId] ) do
					if qualifier['datavalue'] then
						local qualifierValue = qualifier['datavalue']['value']
						if qualifier['datavalue']['type'] == 'wikibase-entityid' then
							qualifierValue = qualifierValue.id
							value = string.upper( value )
						end
			
						if qualifierValue == value then
							isQualifierFound = true
							break
						end
					end
				end
				if isQualifierFound ~= inversed then
					table.insert( resultClaims, statement )
				end
			end
		elseif inversed then
			table.insert( resultClaims, statement )
		end
	end
	return resultClaims
end


--[[
  Filter statements by it's values
 
  Input: claims table, value, inversed
  Output: filtered statements table
]]
function p.filterByValue( claims, value, inversed )
	inversed = inversed or false

	local resultClaims = {}
	for i, statement in ipairs( claims ) do
		local statementValue
		if statement['mainsnak'] 
		and statement['mainsnak']['datavalue'] 
		and statement['mainsnak']['datavalue']['type']
		then 
			statementValue = statement['mainsnak']['datavalue']['value']
			if statement['mainsnak']['datavalue']['type'] == 'quantity' then
				statementValue = statementValue.amount
			end
			if statement['mainsnak']['datavalue']['type'] == 'time' then
				statementValue = statementValue.time
			end
			if statement['mainsnak']['datavalue']['type'] == 'wikibase-entityid' then
				statementValue = statementValue.id
				value = string.upper( value )
			end
		end

		if ( statementValue == value ) ~= inversed then
			table.insert( resultClaims, statement )
		end
	end

	return resultClaims
end


--[[
  Find a statement with minimum or maximum value
 
  Input: claims table, asc, inversed
  Output: filtered statements table
]]
function p.filterUtter( claims, asc, inversed )
	local resultValue = nil
	for i, statement in ipairs( claims ) do
		local statementValue
		if statement['mainsnak'] and
			statement['mainsnak']['datavalue'] and
			statement['mainsnak']['datavalue']['type']
		then
			statementValue = statement['mainsnak']['datavalue']['value']
			if statement['mainsnak']['datavalue']['type'] == 'quantity' then
				statementValue = statementValue.amount
			end
			if statement['mainsnak']['datavalue']['type'] == 'time' then
				statementValue = statementValue.time
			end
			if statement['mainsnak']['datavalue']['type'] == 'wikibase-entityid' then
				statementValue = statementValue.id
			end

			if not resultValue or ( statementValue < resultValue ) == asc then
				resultValue = statementValue
			end
		end
	end

	mw.logObject( resultValue, 'resultValue' )
	return p.filterByValue( claims, resultValue, inversed )
end


--[[
  Find a statement with minimum or maximum qualifier value
 
  Input: claims table, qualifierId, asc
  Output: filtered statements table
]]
function p.filterUtterByQualifier( claims, qualifierId, asc )
	qualifierId = string.upper( qualifierId )

	local resultValue = nil
	local resultStatement = nil
	for i, statement in ipairs( claims ) do
		if not statement['qualifiers'] and not statement['qualifiers'][qualifierId] then
			if resultStatement == nil then
				resultStatement = statement
			end
		else
			for _, qualifier in ipairs( statement['qualifiers'][qualifierId] ) do
				if qualifier['datavalue'] then
					local qualifierValue = qualifier['datavalue']['value']
					if qualifier['datavalue']['type'] == 'quantity' then
						qualifierValue = qualifierValue.amount
					end
					if qualifier['datavalue']['type'] == 'time' then
						qualifierValue = qualifierValue.time
					end
					if qualifier['datavalue']['type'] == 'wikibase-entityid' then
						qualifierValue = qualifierValue.id
					end

					if not resultValue or ( qualifierValue < resultValue ) == asc then
						resultStatement = statement
						resultValue = qualifierValue
					end
				end
			end
		end
	end

	return { resultStatement }
end


return p