User:Opencooper/showKanji.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Opencooper/showKanji. |
// This script shows, if found, the kanji and kana for an article
// It then calls another script, bindKana.js, to clean up the display of ruby
// For configuration, please see the documentation
// License: CC0
function setup() {
// If we're not reading an article, do nothing
if (!(mw.config.get( 'wgAction' ) === 'view'
&& mw.config.get( 'wgIsArticle' )
&& !location.search.split('oldid=')[1]
&& !mw.config.get("wgIsMainPage")
&& mw.config.get("wgContentLanguage") !== "ja")) {
return;
}
// Assuming that if there's no wikidata, there're no 1:1 interlanguage links,
// and we don't want cases where a page links to a subsection of a jawiki
// article
if (wikidataId === null) {
return;
}
// Placeholder so other elements don't push it down later
var header;
if ($('#firstHeading').length) { // Vector
header = $('#firstHeading');
} else if ($('.page-heading').length) { // Minerva
header = $('.page-heading');
} else {
console.error("showKanji.js: Couldn't find a page heading. This skin ("
+ mw.config.get( 'skin' ) + ") might not be supported.");
return;
}
header.append("<div id='kanjiInfo' lang='ja' dir='ltr'></div>");
// Get the Japanese label from wikidata
// API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities
$.ajax({
url: "https://www.wikidata.org/w/api.php",
data: {
action: "wbgetentities",
ids: wikidataId,
props: "labels",
languages: "ja",
format: "json",
origin: "*"
},
success: parseJaLabel
});
}
function parseJaLabel(response) {
var wikidataInfo = response.entities[wikidataId];
var jaLabel;
if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) {
jaLabel = wikidataInfo.labels.ja.value;
}
if (jaLabel) {
jaLabel = jaLabel.toHalfWidth();
buildRegexes(jaLabel);
displayKanji(jaLabel);
} else {
return;
}
// If the japanese title is not just only kana, get the reading
if (!kanjiRegexes.kanaOnly.test(jaLabel)) {
requestKana();
}
}
function buildRegexes(kanji) {
// Strip $kanji of all kanji and kana, adding whatever is left to the regex
var reKanjiKana = /[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴー-]/g;
var kanjiStripped = kanji.replace(reKanjiKana, "");
kanjiStripped += " ";
// Need to add hyphen escaped since it has special behavior in regex classes
kanjiStripped += "\\-";
var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");
kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;
kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
kanjiRegexes.katakanaOnly = new RegExp("^[ァ-ヴーA-Za-z" + kanjiAuxillary + "]+$");
// Add midpoint for Latin in titles
if (/\w/.test(kanji)) { kanjiStripped += "・"; }
var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)";
var kanjiEscaped = mw.util.escapeRegExp(kanji);
// Account for spaces, but ignore backslash and other misc characters
var reKanjiKanaLatin = /([\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴーA-Za-z0-9])/g;
var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?");
kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");
// Add kanji to regex to make sure we're not getting the reading of some
// other term
kanjiRegexes.lead = new RegExp(kanjiSpaced + "[^(\n)]*?\\(" + leadReBase, "i"); // brittle
}
function displayKanji(kanji) {
wikidataKanji = kanji;
$('#kanjiInfo').append("<ruby>" + kanji + "</ruby>");
// Add some classes so users can choose to not display for example
// katakana-only kanji in their CSS
if (kanjiRegexes.latinOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-latin-only");
$("#kanjiInfo").prop("title", "Japanese title in Latin script");
$("#kanjiInfo").css("display", "none");
} else if (kanjiRegexes.hiraganaOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-hiragana-only");
$("#kanjiInfo").prop("title", "Japanese title in hiragana");
} else if (kanjiRegexes.katakanaOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-katakana-only");
$("#kanjiInfo").prop("title", "Japanese title in katakana");
} else {
$("#kanjiInfo").prop("title", "Japanese title in kanji");
}
}
function requestKana() {
// API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims
// We have to wholesale get all the claims instead of just one because the
// kana might be present as a qualifier to another claim
$.ajax({
url: "https://www.wikidata.org/w/api.php",
data: {
action: "wbgetclaims",
entity: wikidataId,
format: "json",
origin: "*"
},
success: parseKanaClaim
});
}
function parseKanaClaim(response) {
var kana;
var properties = {
title: "P1476",
nativeLabel: "P1705",
officialName: "P1448",
nameInNativeLanguage: "P1559"
};
var nameInKana = "P1814";
// Try getting nameInKana as a qualifier to some properties
for (var prop in properties) {
var pnum = properties[prop];
if (response.claims[pnum]) {
var kanji = response.claims[pnum][0].mainsnak.datavalue.value.text;
if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
&& response.claims[pnum][0].qualifiers
&& response.claims[pnum][0].qualifiers[nameInKana]) {
kana = response.claims[pnum][0].qualifiers[nameInKana][0].datavalue.value;
break;
}
}
}
// Try getting nameInKana as a general claim
if (!kana && response.claims[nameInKana]) {
prop = "nameInKana";
kana = response.claims[nameInKana][0].mainsnak.datavalue.value;
}
// We couldn't find nameInKana
if (!kana) {
getInterlanguage();
return;
}
kana = kana.toHalfWidth();
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wikidata");
$("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}
function getInterlanguage() {
var apiUrl = location.origin + "/w/api.php";
// Documentation: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Blanglinks
$.ajax({
url: apiUrl,
data: {
action: "query",
format: "json",
prop: "langlinks",
lllang: "ja",
titles: mw.config.get( 'wgTitle' )
},
success: function(response) {
var pageId = mw.config.get( 'wgArticleId' );
var page = response.query.pages[pageId];
var langlinks = page ? page.langlinks : undefined;
var jaLabel;
if (langlinks) {
jaLabel = langlinks[0]["*"];
jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors
} else {
getWiktionary();
return;
}
scrapeKana(jaLabel);
}
});
}
function scrapeKana(jaLabel) {
// Get jawiki article's lead wikitext
// API docs: https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bextracts
$.ajax({
url: "https://ja.wikipedia.org/w/api.php",
data: {
action: "query",
prop: "extracts",
format: "json",
redirects: true,
exintro: true,
exsentences: 2,
exlimit: 1,
explaintext: true,
titles: jaLabel,
origin: "*"
},
success: getFirstSentence
});
}
function getFirstSentence(response) {
var responsePart = response.query.pages;
// Have to split parsing into two parts since jawiki pageid is unknown
var pageId = Object.keys(responsePart)[0];
var introText = responsePart[pageId].extract;
if (!introText) {
console.error("showKanji.js: TextExtracts failed to get a lead for the Japanese article.");
getWiktionary();
return;
}
var wikitext = introText.toHalfWidth();
var kana;
var kanaSearch = wikitext.match(kanjiRegexes.lead);
if (kanaSearch && kanaSearch.length == 2) {
kana = kanaSearch[1];
} else {
getWiktionary();
return;
}
// Rm trailing characters
kana = kana.replace(/[・、 ]$/, "");
// Abort if our reading is only katakana (for non-Latin) or Latin
if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && kanjiRegexes.katakanaOnly.test(kana))
|| kanjiRegexes.latinOnly.test(kana)) {
getWiktionary();
return;
}
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-jawiki");
}
// Adapted from:
// http://ilog4.blogspot.com/2015/09/javascript-convert-full-width-and-half.html
// https://stackoverflow.com/a/20488304/1995949
// https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms
String.prototype.toHalfWidth = function() {
var halfWidth = this.replace(/[\uff01-\uff5e]/g, function(s) {return String.fromCharCode(s.charCodeAt(0) - 0xFEE0)});
halfWidth = halfWidth.replace(/ /g, " ");
return halfWidth;
};
// We use the English Wiktionary because it has more terms and better structure
function getWiktionary() {
// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
$.ajax({
url: "https://en.wiktionary.org/w/api.php",
data: {
action: "parse",
format: "json",
page: wikidataKanji,
prop: "sections",
origin: "*"
},
success: findJapaneseSection
});
}
function findJapaneseSection(response) {
if (response.error) {
return;
}
var sectionsCount = response.parse.sections.length;
var sectionIndex;
for (let i = 0; i < sectionsCount; i++) {
var sectionHeader = response.parse.sections[i].line;
if (sectionHeader == "Japanese") {
sectionIndex = response.parse.sections[i].index;
break;
}
}
if (sectionIndex == null) {
return;
}
// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
$.ajax({
url: "https://en.wiktionary.org/w/api.php",
data: {
action: "parse",
format: "json",
page: wikidataKanji,
prop: "text",
section: sectionIndex,
origin: "*"
},
success: parseWiktionary
});
}
function parseWiktionary(response) {
var html = response.parse.text["*"];
var parsed = $($.parseHTML(html));
// Wiktionary adds readings as furigana
var headword = parsed.find(".headword:lang(ja)").first();
var seeTable = parsed.find(".Jpan ruby").first();
var kanji = "";
var kana = "";
if (headword.length) {
// Wiktionary already binds their kana, so we have to undo the process to get
// the constituent parts, at least with the current markup
var childNodes = headword[0].childNodes;
for (let i = 0; i < childNodes.length; i++) {
if (childNodes[i].nodeName == "RUBY") {
var ruby = $(childNodes[i]); // convert back to JQuery for convenience
ruby.children("rp").remove();
kana += ruby.children("rt").detach().text();
kanji += ruby.text();
} else if (childNodes[i].nodeType == 3) { // "#text"
kanji += childNodes[i].nodeValue;
kana += childNodes[i].nodeValue;
}
}
if (kanji != wikidataKanji) { return; }
} else if (seeTable.length) {
kanji = seeTable.children("rb").text();
kana = seeTable.children("rt").text();
} else {
return;
}
if (kana) {
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wiktionary");
}
}
function displayKana(kana) {
$("#kanjiInfo ruby").append("<rt>" + kana + "</rt>");
// Cleanup redundant furigana with another script
var kanjiOnlyRe = /^[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]+$/;
if (!kanjiOnlyRe.test(wikidataKanji)) {
mw.loader.load( '//en.wikipedia.org/w/index.php?title=User:Opencooper/bindKana.js&action=raw&ctype=text/javascript' );
}
}
var wikidataId = mw.config.get( 'wgWikibaseItemId' );
var wikidataKanji;
var kanjiRegexes = {};
$(setup);