Jump to content

User:Glrx/Phoneme.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// Use SSML to say Wikipedia IPAc-en phoneme string
//   Only works in Edge browser because it is the only browser that obeys SSML

// Currently sets speaker as en-US.
// The Wikipedia IPA templates create a span with class="IPA" to signal IPA strings
//   the IPAc-he and IPAc-ka templates put the class on each phoneme!
// The templates do not set a language attribute
//   For example, the IPAc-it template does not set lang="it" for tha span.
//     in a sense, it should not because the help information is in English.
//   language may be inferred from child anchor element
//     e.g., <a href="/wiki/Help:IPA/Arabic" title="Help:IPA/Arabic">...</a> allows the inference lang="ar"
//   Non-English strings may cause XML to be verbalized
//     see examples on https://en.wikipedia.org/wiki/Template:IPA

// Copyright 2018 https://en.Wikipedia.org/wiki/User:Glrx.  Permission CC-BY-SA 3.0.

/*jslint browser:true, for:true, white:true, single:true */
/*global console mw speechSynthesis SpeechSynthesisUtterance document */
/*property
    add, appendChild, color, createElement, find, firstChild, getAttribute,
    hook, length, log, nodeName, replace, setAttribute, speak, style, text,
    textContent, title
*/

/** @type {Object.<string, string>} */
var langtagFromTitle = {
    "Help:IPA" : "en-US",
    "Help:IPA/Arabic" : "ar", // works
    "Help:IPA/Cantonese" : "yue",
    "Help:IPA/English" : "en-US",
    "Help:IPA/French" : "fr-FR", // fr speaks XML
    "Help:IPA/Hungarian" : "hu", // works
    "Help:IPA/Irish" : "ga", // speaks XML
    "Help:IPA/Italian" : "it-IT", // it speaks XML
    "Help:IPA/Japanese" : "ja", // works
    "Help:IPA/Korean" : "ko", // works
    "Help:IPA/Mandarin" : "cmn",
    "Help:IPA/Māori" : "mi", // works
    "Help:IPA/Polish" : "pl", // works
    "Help:IPA/Portuguese" : "pt", // works
    "Help:IPA/Romanian" : "ro", // dies: bubbles up to anchor
    "Help:IPA/Spanish" : "es-ES", // es speaks XML
    "Help:IPA for Georgian" : "ka",
    "Help:IPA for Hebrew" : "he"
};

/** Choose a local voice for the langtag
 * There may be a problem with the first call and Chrome: SpeechSynthesis.onvoiceschanged
 * @param {string} langtag
 * @returns {SpeechSynthesisVoice | null} - voice
 */
function voiceChoose(langtag) {
	"use strict";
	/* sequence of SpeechSynthesisVoice */
	var voices = speechSynthesis.getVoices();
	var voice = null;
	var i;
	
	// look through the voices
	//   returns first match rather than best match
	for (i = 0; i < voices.length; i++) {
		var v = voices[i];
		// console.log(v);
		
		if (v.lang == langtag && v.localService) {
			// console.log("..match");
			voice = v;
			return voice;
		}
	}
	
	return voice;
}

/**
 * Takes an element with attribute data-ph, build a SpeechSynthesisUtterance
 * that uses the phoneme string, and speaks that utterance
 * @param {Element} el
 * @returns {null}
*/
function speakPhoneme(el) {
    "use strict";
	
    /** @type {string} */
    var str = el.getAttribute("data-ph");
    /** @type {string} */
    var langtag = el.getAttribute("data-langtag");
	
    // use the Web Speech standard; some browswers will want webkit... prefix
    var u = new SpeechSynthesisUtterance();

    // <?xml version="1.0"?>
    // schemaLocation is recommended:
    //   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    //   xsi:schemaLocation="http://www.w3.org/2001/10/synthesis http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
    // if I set <speak version="1.1"...>, then Edge speaks the markup
    /** @type {string} */
    var strXML = '<?xml version="1.0"?>\r\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">\r\n  <phoneme alphabet="ipa" ph="mama">Phoneme speech not available.</phoneme>\r\n</speak>';
    
    // set the language
    strXML = strXML.replace("en-US", langtag);
    
    // this needs to superquote double quotes, but hit all the main chars
    str = str.replace("&", "&amp;");
    str = str.replace("'", "&apos;");
    str = str.replace("\"", "&quot;");
    str = str.replace("<", "&lt;");
    str = str.replace(">", "&gt;");
    strXML = strXML.replace("mama", str);
    
    // language is set in the SSML, so do not set it here
    // Some Web Speech implementions are finicky and will not accept "en"
    // u.lang = "en-US";
    
    // try choosing a local voice. Windows local voices may do SSML's phoneme
    u.voice = voiceChoose(langtag);
    
    // Specification says .text is a DOMSTRING
    u.text = strXML;

    // speak the SSML
    speechSynthesis.speak(u);
}

mw.hook( "wikipage.content" ).add( function( $content ) {
    "use strict";
    
    // get a list of elements with class IPA
    var clist = $content.find(".IPA");
    
    // process each element
    /** @type {number} */
    var i;
    /** @type {Element} */
    var e;
    /** @type {string} */
    var langtag;
    /** @type {string} */
    var strContent;
    /** @type {Element} */
    var ch;
    /** @type {Element} */
    var spanSpeak;
    
    for (i=0; i < clist.length; i+=1) {
    	e = clist[i];
        langtag = "en-US";
        
        // textContent will extract text from internal spans...
        strContent = e.textContent;

        // remove the slashes (should anchor leading and trailing)
        //  assuming the result is valid IPA string
        strContent = strContent.replace(/\//g, "");
        // foreign languages use square brackets
        strContent = strContent.replace(/\[/g, "");
        strContent = strContent.replace(/\]/g, "");
        
        // OED strings have parens
        strContent = strContent.replace(/\(/g, "");
        strContent = strContent.replace(/\)/g, "");
        
        // comma was for alternatives
        strContent = strContent.replace(/,/g, "");
        // hypen was for join
        strContent = strContent.replace(/\-/g, "");
        
        // Edge complains about some phonemes; silently remove them
        strContent = strContent.replace(/˔/g, "");
        
        // Hack -- look for language
        ch = e.firstChild;
        
        // if the first child is an anchor
        if (ch && ch.nodeName === "A") {
        	// if the title is something like Help:IPA/Arabic, then we have a language
        	
            if (! langtagFromTitle[ch.title]) {
                console.log("Missing title language: " + ch.title);
        	} else {
        		langtag = langtagFromTitle[ch.title];
        	}
        }

        // create a span for the phoneme speaker prompt
        spanSpeak = document.createElement("span");
        
        // text for the prompt - ([speaker] speak)
        spanSpeak.textContent = "(\uD83D\uDD0A)";
        
        // save the IPA string in the data-ph attribute
        spanSpeak.setAttribute("data-ph", strContent);
        spanSpeak.setAttribute("data-langtag", langtag);
        
        // show the IPA string as a tooltip
        spanSpeak.setAttribute("title", strContent + " using langtag " + langtag);
        
        // set the onclick action
        spanSpeak.setAttribute("onclick", "speakPhoneme(this);");
        
        // color the span red
        spanSpeak.style.color = "red";
        
        // add the span to class IPA span
        e.appendChild(spanSpeak);
    }
});