User:Glrx/Phoneme.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Glrx/Phoneme. |
// Use SSML to say Wikipedia IPAc-en phoneme string
// Only works in Edge browser because it is the only browser that obeys SSML
// Currently sets speaker as en-US.
// The Wikipedia IPA templates create a span with class="IPA" to signal IPA strings
// the IPAc-he and IPAc-ka templates put the class on each phoneme!
// The templates do not set a language attribute
// For example, the IPAc-it template does not set lang="it" for tha span.
// in a sense, it should not because the help information is in English.
// language may be inferred from child anchor element
// e.g., <a href="/wiki/Help:IPA/Arabic" title="Help:IPA/Arabic">...</a> allows the inference lang="ar"
// Non-English strings may cause XML to be verbalized
// see examples on https://en.wikipedia.org/wiki/Template:IPA
// Copyright 2018 https://en.Wikipedia.org/wiki/User:Glrx. Permission CC-BY-SA 3.0.
/*jslint browser:true, for:true, white:true, single:true */
/*global console mw speechSynthesis SpeechSynthesisUtterance document */
/*property
add, appendChild, color, createElement, find, firstChild, getAttribute,
hook, length, log, nodeName, replace, setAttribute, speak, style, text,
textContent, title
*/
/** @type {Object.<string, string>} */
var langtagFromTitle = {
"Help:IPA" : "en-US",
"Help:IPA/Arabic" : "ar", // works
"Help:IPA/Cantonese" : "yue",
"Help:IPA/English" : "en-US",
"Help:IPA/French" : "fr-FR", // fr speaks XML
"Help:IPA/Hungarian" : "hu", // works
"Help:IPA/Irish" : "ga", // speaks XML
"Help:IPA/Italian" : "it-IT", // it speaks XML
"Help:IPA/Japanese" : "ja", // works
"Help:IPA/Korean" : "ko", // works
"Help:IPA/Mandarin" : "cmn",
"Help:IPA/Māori" : "mi", // works
"Help:IPA/Polish" : "pl", // works
"Help:IPA/Portuguese" : "pt", // works
"Help:IPA/Romanian" : "ro", // dies: bubbles up to anchor
"Help:IPA/Spanish" : "es-ES", // es speaks XML
"Help:IPA for Georgian" : "ka",
"Help:IPA for Hebrew" : "he"
};
/** Choose a local voice for the langtag
* There may be a problem with the first call and Chrome: SpeechSynthesis.onvoiceschanged
* @param {string} langtag
* @returns {SpeechSynthesisVoice | null} - voice
*/
function voiceChoose(langtag) {
"use strict";
/* sequence of SpeechSynthesisVoice */
var voices = speechSynthesis.getVoices();
var voice = null;
var i;
// look through the voices
// returns first match rather than best match
for (i = 0; i < voices.length; i++) {
var v = voices[i];
// console.log(v);
if (v.lang == langtag && v.localService) {
// console.log("..match");
voice = v;
return voice;
}
}
return voice;
}
/**
* Takes an element with attribute data-ph, build a SpeechSynthesisUtterance
* that uses the phoneme string, and speaks that utterance
* @param {Element} el
* @returns {null}
*/
function speakPhoneme(el) {
"use strict";
/** @type {string} */
var str = el.getAttribute("data-ph");
/** @type {string} */
var langtag = el.getAttribute("data-langtag");
// use the Web Speech standard; some browswers will want webkit... prefix
var u = new SpeechSynthesisUtterance();
// <?xml version="1.0"?>
// schemaLocation is recommended:
// xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
// xsi:schemaLocation="http://www.w3.org/2001/10/synthesis http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
// if I set <speak version="1.1"...>, then Edge speaks the markup
/** @type {string} */
var strXML = '<?xml version="1.0"?>\r\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">\r\n <phoneme alphabet="ipa" ph="mama">Phoneme speech not available.</phoneme>\r\n</speak>';
// set the language
strXML = strXML.replace("en-US", langtag);
// this needs to superquote double quotes, but hit all the main chars
str = str.replace("&", "&");
str = str.replace("'", "'");
str = str.replace("\"", """);
str = str.replace("<", "<");
str = str.replace(">", ">");
strXML = strXML.replace("mama", str);
// language is set in the SSML, so do not set it here
// Some Web Speech implementions are finicky and will not accept "en"
// u.lang = "en-US";
// try choosing a local voice. Windows local voices may do SSML's phoneme
u.voice = voiceChoose(langtag);
// Specification says .text is a DOMSTRING
u.text = strXML;
// speak the SSML
speechSynthesis.speak(u);
}
mw.hook( "wikipage.content" ).add( function( $content ) {
"use strict";
// get a list of elements with class IPA
var clist = $content.find(".IPA");
// process each element
/** @type {number} */
var i;
/** @type {Element} */
var e;
/** @type {string} */
var langtag;
/** @type {string} */
var strContent;
/** @type {Element} */
var ch;
/** @type {Element} */
var spanSpeak;
for (i=0; i < clist.length; i+=1) {
e = clist[i];
langtag = "en-US";
// textContent will extract text from internal spans...
strContent = e.textContent;
// remove the slashes (should anchor leading and trailing)
// assuming the result is valid IPA string
strContent = strContent.replace(/\//g, "");
// foreign languages use square brackets
strContent = strContent.replace(/\[/g, "");
strContent = strContent.replace(/\]/g, "");
// OED strings have parens
strContent = strContent.replace(/\(/g, "");
strContent = strContent.replace(/\)/g, "");
// comma was for alternatives
strContent = strContent.replace(/,/g, "");
// hypen was for join
strContent = strContent.replace(/\-/g, "");
// Edge complains about some phonemes; silently remove them
strContent = strContent.replace(/˔/g, "");
// Hack -- look for language
ch = e.firstChild;
// if the first child is an anchor
if (ch && ch.nodeName === "A") {
// if the title is something like Help:IPA/Arabic, then we have a language
if (! langtagFromTitle[ch.title]) {
console.log("Missing title language: " + ch.title);
} else {
langtag = langtagFromTitle[ch.title];
}
}
// create a span for the phoneme speaker prompt
spanSpeak = document.createElement("span");
// text for the prompt - ([speaker] speak)
spanSpeak.textContent = "(\uD83D\uDD0A)";
// save the IPA string in the data-ph attribute
spanSpeak.setAttribute("data-ph", strContent);
spanSpeak.setAttribute("data-langtag", langtag);
// show the IPA string as a tooltip
spanSpeak.setAttribute("title", strContent + " using langtag " + langtag);
// set the onclick action
spanSpeak.setAttribute("onclick", "speakPhoneme(this);");
// color the span red
spanSpeak.style.color = "red";
// add the span to class IPA span
e.appendChild(spanSpeak);
}
});