Jump to content

Data Platform/Systems/Hive/Queries/Wikidata

From Wikitech

2018 - Wikidata editors that have also edited a Wikipedia

WITH
wikipedias AS (
  SELECT
    DISTINCT dbname
  FROM wmf_raw.mediawiki_project_namespace_map
  WHERE snapshot = '2018-02'
    AND hostname LIKE '%wikipedia.org'
),

wikidata_editors AS (
  SELECT
    DISTINCT event_user_text
  FROM wmf.mediawiki_history
  WHERE snapshot = '2018-02'
    AND event_entity = 'revision'
    AND event_type = 'create'
    AND wiki_db = 'wikidatawiki'
    AND NOT event_user_is_anonymous
    AND NOT ARRAY_CONTAINS(event_user_groups, 'bot')
)

SELECT
  mwh.wiki_db,
  COUNT(DISTINCT mwh.event_user_text) as wikidata_coeditors
FROM wmf.mediawiki_history mwh
  JOIN wikipedias w ON (mwh.wiki_db = w.dbname)
  JOIN wikidata_editors wde ON (mwh.event_user_text = wde.event_user_text)
WHERE snapshot = '2018-02'
  AND event_entity = 'revision'
  AND event_type = 'create'
  AND NOT mwh.event_user_is_anonymous
  AND NOT ARRAY_CONTAINS(mwh.event_user_groups, 'bot')
GROUP BY
  mwh.wiki_db
ORDER BY wikidata_coeditors DESC
LIMIT 1000
;

2015 - Special:EntityData calls per user agent in a given hour

SELECT
  count(*) as count, user_agent
FROM
  webrequest
WHERE
  year = 2015
  AND month = 10
  AND day = 12
  AND hour = 1
  AND uri_host = "www.wikidata.org"
  AND http_status = 200
  AND http_method = "GET"
  AND uri_path LIKE "/wiki/Special:EntityData%"
GROUP BY user_agent
ORDER BY count
LIMIT 999999;