Wikipedia:WikiProject Redirect/Inconsistent targets
Appearance
A more frequently updated version of this report is available on Toolforge at https://tb-dev.toolforge.org/ISR/.
Regenerating this report
[edit]-- First generate a list of applicable titles
DROP TABLE IF EXISTS redir_titles;
CREATE TABLE redir_titles (
rt_id int(8) unsigned NOT NULL,
rt_title varchar(255) BINARY NOT NULL,
rt_crushed varchar(255) BINARY,
PRIMARY KEY (rt_id)
);
INSERT INTO redir_titles ( rt_id, rt_title )
SELECT /* SLOW_OK */ page_id, page_title
FROM enwiki_p.page
WHERE page_namespace = 0
AND length( page_title ) >= 10
AND page_is_redirect = 1;
UPDATE redir_titles
SET rt_crushed = lcase( rt_title )
WHERE rt_title != lcase( rt_title );
DELETE FROM redir_titles WHERE rt_crushed IS NULL;
ALTER TABLE redir_titles ADD INDEX( rt_crushed );
-- Now find pairs of similar redirects
DROP TABLE IF EXISTS similar_redirects;
CREATE TABLE similar_redirects (
sr_src_id1 int(8) unsigned NOT NULL,
sr_src_title1 varchar(255) binary NOT NULL,
sr_targ_ns1 int(11),
sr_targ_title1 varchar(255) binary,
sr_src_id2 int(8) unsigned NOT NULL,
sr_src_title2 varchar(255) binary NOT NULL,
sr_targ_ns2 int(11),
sr_targ_title2 varchar(255) binary,
PRIMARY KEY (sr_src_id1,sr_src_id2)
);
-- This will have found most pairs in both directions (A v B / B v A) - eliminate duplicates of this type
-- by checking the A id is always less than the B.
INSERT INTO similar_redirects( sr_src_id1, sr_src_title1, sr_src_id2, sr_src_title2 )
SELECT /* SLOW_OK */ a.rt_id, a.rt_title, b.rt_id, b.rt_title
FROM redir_titles a
INNER JOIN redir_titles b ON a.rt_crushed = b.rt_crushed
WHERE a.rt_id < b.rt_id;
-- Fill in the redirect target for each of these and dismiss any with the same target
-- Doing it this way is ugly, but is SQL language neutral. Go standardisation!
UPDATE similar_redirects
SET sr_targ_ns1 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );
UPDATE similar_redirects
SET sr_targ_title1 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );
UPDATE similar_redirects
SET sr_targ_ns2 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );
UPDATE similar_redirects
SET sr_targ_title2 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );
DELETE
FROM similar_redirects
WHERE sr_targ_ns1 = sr_targ_ns2
AND sr_targ_title1 = sr_targ_title2;
-- See what we have left
SELECT count(*) FROM similar_redirects;
SELECT concat( '* [[' , sr_src_title1, ']] redirects to [[', sr_targ_title1, ']], but [[', sr_src_title2, ']] redirects to [[', sr_targ_title2, ']]' )
FROM similar_redirects
WHERE sr_targ_ns1 = 0
AND sr_targ_ns2 = 0
ORDER by sr_src_title1 ASC
LIMIT 200;