Create a wordcloud from any page with search function

Building off of my last post Create a wordcloud from any page. I wanted to add some search functionality.
So now clicking a word highlights the word on the page.

Here’s the example: Word Cloud w/ Search

  1. Old:
    spans.push("<span style='font-size: "+freqs[i].fontSize+"px'>"+freqs[i].word+"</span> ");

    New:

    spans.push("<span style='margin: auto 4px;font-size: "+freqs[i].fontSize+"px;line-height: "+freqs[i].fontSize+"px'>"+freqs[i].word+"</span> ");

    Here I added line height and a margin to each word’s span. But before I was finished I also added an href that referred to two new functions. I thought it would be nice if clicking words highlighted them on the page.

    Newer:

    spans.push("<span style='margin: auto 4px;font-size: "+freqs[i].fontSize+"px;line-height: "+freqs[i].fontSize+"px'><a href='#"+i+"'>"+freqs[i].word+"</a></span> ");

    Now inside the GM_wait function I added:

    setOtherQueries()

    This points to

    		function setOtherQueries(){
    			//thanks: http://weblogtoolscollection.com/archives/2009/04/10/how-to-highlight-search-terms-with-jquery/
    			jQuery.fn.extend({
    				highlight: function(searchTerm, insensitive){
    					$('body').removeHighlight();
    					var regex = new RegExp("(]*>)|(b"+ searchTerm.replace(/([-.*+?^${}()|[]/])/g,"$1") +")", insensitive ? "ig" : "g");
    					return this.html(this.html().replace(regex, function(a, b, c){
    						return (a.charAt(0) == "<") ? a : "<span class="highlight" style="font-weight:bold;background: #D3E18A">" + c + "</span>";
    					}));
    				}
    			});
    
    			//thanks: http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
    			jQuery.fn.removeHighlight = function() {
    			 return this.find("span.highlight").each(function() {
    				this.parentNode.firstChild.nodeName;
    				with (this.parentNode) {
    				 replaceChild(this.firstChild, this);
    				 normalize();
    				}
    			 }).end();
    			};
    		}
    
  2. New:
    timeoutID = window.setTimeout(GM_wait,100);

    Old:

    window.setTimeout(GM_wait,100);

    In the GM_wait function: I wanted to make sure we were setting an id on the timeout so we could clear it below in the else statement:

    if(typeof timeoutID != 'undefined') { window.clearTimeout(timeoutID);}
  3. New:
    function makeCloud(freqs) {
      if($('#floatingCloud').length==0){...}}

    Old:

    function makeCloud(freqs) {...}

    I added that to accommodate the highlighting function. This if statement avoids regenerating the cloud each time.

  4. New:
    $('head').append(' .highlight {font-weight:bold; background: #D3E18A;} #floatingCloud .highlight {font-weight:normal; background: none;} #floatingCloud span a, #floatingCloud span a:hover {text-decoration:none; margin: auto 4px;} #floatingCloud span a i {font-style:normal; font-size:10px; visibility:hidden;} #floatingCloud span a:hover i {visibility:visible;} ');

    I consolidated and added the styles to the document

  5. New:
    spans.push("<span style='font-size: "+freqs[i].fontSize+"px;line-height: "+freqs[i].fontSize+"px'><a href='#"+i+"'>"+freqs[i].word+"<i> ("+freqs[i].freq+")</i></a></span> ");

    Old:

                spans.push("<span style='margin: auto 4px;font-size: "+freqs[i].fontSize+"px;line-height: "+freqs[i].fontSize+"px'><a href='#"+i+"'>"+freqs[i].word+"</a></span> ");

    I added the frequency of the word in a hidden tag. visible on mouse over.

My to do list:

  • I’m adding a var for minimum Frequency too, as I don’t want to waste space on single occurrences: minFreq = 2;
  • Adding random colors or colors from a scheme for each word
  • Change the background color
  • Make the background closeable
  • Potentially creating images of each word and adding rotation
  • Potentially making the background draggable

The current version of the code:

//wordle_jquery_search
// Based on impeachGod code at http://impeachgod.webs.com/wordle.js
// for the word frequency handling
(function(){
		var wordMatcher = /[A-Za-z-]{3,}/g; /*Removed: 0-9_. {3,}: words must be 3 chars or more*/
    var commonWords = ["the", "of", "to", "and", "a", "in", "is", "it",
        "that", "was", "for", "on", "are", "with", "as", "be", "at",
        "one", "have", "this", "from", "or", "had", "by", "but", "some",
        "out", "were", "all", "when", "an", "each"];
    var minFreq = 2;
		var maxFreq = 0;
    var fontMin = 10;
    var fontMax = 70;
    var maxWords = 100;

    var JQUERY_URL = 'http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js';
    var CSS_URL = '';

    /* Bookmarklet code (change the url of the script to where you're hosting it):
javascript:function%20loadScript(scriptURL)%20{%20var%20scriptElem%20=%20document.createElement('SCRIPT');%20scriptElem.setAttribute('language',%20'JavaScript');%20scriptElem.setAttribute('src',%20scriptURL);%20document.body.appendChild(scriptElem);}%20loadScript('http://odt.uoregon.edu/v_includes/wordle_jquery_search.js');

Orig:
javascript:function%20loadScript(scriptURL)%20{%20var%20scriptElem%20=%20document.createElement('SCRIPT');%20scriptElem.setAttribute('language',%20'JavaScript');%20scriptElem.setAttribute('src',%20scriptURL);%20document.body.appendChild(scriptElem);}%20loadScript('http://fsavard.com/code/wordcloud.js');
    */

    //////////////////////////////////////////////////////////////////////////
    // Load CSS

    /*var headID = document.getElementsByTagName("head")[0];
    var cssNode = document.createElement('link');
    cssNode.type = 'text/css';
    cssNode.rel = 'stylesheet';
    cssNode.href = CSS_URL;
    cssNode.media = 'screen';
    headID.appendChild(cssNode);*/

    //////////////////////////////////////////////////////////////////////////
    // Jquery loading
    // Check if jQuery's loaded
    function GM_wait() {
			if(typeof window.jQuery == 'undefined') {
				if(typeof GM_JQ == 'undefined') {
					// Add jQuery
					var GM_JQ = document.createElement('script');
					GM_JQ.src = JQUERY_URL;
					GM_JQ.type = 'text/javascript';
					document.getElementsByTagName('head')[0].appendChild(GM_JQ);
				}
				timeoutID = window.setTimeout(GM_wait,100);
			} else {
				$ = window.jQuery; letsJQuery();
				if(typeof timeoutID != 'undefined') { window.clearTimeout(timeoutID);}
				setOtherQueries();
				$('head').append(' .highlight {font-weight:bold; background: #D3E18A;} #floatingCloud .highlight {font-weight:normal; background: none;} #floatingCloud span a, #floatingCloud span a:hover {text-decoration:none; margin: auto 4px;} #floatingCloud span a i {font-style:normal; font-size:10px; visibility:hidden;} #floatingCloud span a:hover i {visibility:visible;} ');
			}
    }
    GM_wait();

	   //////////////////////////////////////////////////////////////////////////
    // Actual functionality


    // Based on http://refactormycode.com/codes/341-jquery-all-descendent-text-nodes-within-a-node
    function extractText() {
        ret = [];
        $(document.body).contents().each( function() {
            var fn = arguments.callee;
            if ( this.nodeType == 3 )
                ret.push( this.nodeValue );
            else $(this).contents().each(fn);
        });
        return ret.join(' ');
    }

    // Based on impeachGod code at http://impeachgod.webs.com/wordle.js


    function parseWords(txt) {

        var words = txt.match(wordMatcher);
        var i;
        // convert to lowercase
        for (i = 0; i < words.length; i++) {
            words[i] = words[i].toLowerCase();
        }
        return words;
    }

    function countFrequencies(words) {
        var freqs = {};
        var word;
        var i;

        // count word occurence frequencies
        for (i = 0; i  maxFreq){
                    maxFreq = freqs[words[i]];
                }
            }
        }
        // remove common words
        for (i = 0; i < commonWords.length; i++) {
            delete freqs[commonWords[i]];
        }
        // convert to array
        var freqsArray = [];
        for (word in freqs) {
            if (freqs.hasOwnProperty(word)) {
                freqsArray.push({"word": word, "freq": freqs[word]});
            }
        }
        return freqsArray;
    }

    // Adapted to scale
    function scale(freqs) {
        var i;
        for (i = 0; i < freqs.length; i++) {
            freqs[i].fontSize = Math.round(freqs[i].freq/maxFreq * (fontMax - fontMin) + fontMin);
				}
    }


    function makeCloud(freqs) {
			if($(&#039;#floatingCloud&#039;).length==0){
        var div = $(document.createElement(&#039;div&#039;));
        div.attr(&#039;style&#039;,&#039;position: fixed; left: 15px; top: 15px; z-index: 999; display: block; width: 90%; border: 1px solid black; padding: 30px; margin: 10px; background-color: #74adcb;	filter:alpha(opacity=90);	-moz-opacity:0.9;	-khtml-opacity: 0.9;	opacity: 0.9;&#039;);
				div.attr(&#039;id&#039;,&#039;floatingCloud&#039;);
        spans = []
				for(var i=0; i<freqs.length; i++){
            spans.push("<span style='font-size: "+freqs[i].fontSize+"px;line-height: "+freqs[i].fontSize+"px'><a href='#"+i+"'>"+freqs[i].word+"<i> ("+freqs[i].freq+")</i></a></span> ");
        }
        div.append(spans.join(" "));

        $(document.body).append(div);
			}
    }


		function setOtherQueries(){
			//thanks: http://weblogtoolscollection.com/archives/2009/04/10/how-to-highlight-search-terms-with-jquery/
			jQuery.fn.extend({
				highlight: function(searchTerm, insensitive){
					$('body').removeHighlight();
					var regex = new RegExp("(]*>)|(b"+ searchTerm.replace(/([-.*+?^${}()|[]/])/g,"$1") +")", insensitive ? "ig" : "g");
					return this.html(this.html().replace(regex, function(a, b, c){
						return (a.charAt(0) == "<") ? a : "<span class="highlight">" + c + "</span>";
					}));
				}
			});

			//thanks: http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
			jQuery.fn.removeHighlight = function() {
			 return this.find("span.highlight").each(function() {
				this.parentNode.firstChild.nodeName;
				with (this.parentNode) {
				 replaceChild(this.firstChild, this);
				 normalize();
				}
			 }).end();
			};
		}

    // All your GM code must be inside this function
    function letsJQuery() {
			var txt = extractText();
			var words = parseWords(txt);
			var freqs = countFrequencies(words);
			freqs = freqs.sort(function(a, b) {
					return b.freq - a.freq;
			});
			// Keep only the first
			if(freqs.length > maxWords) {
					freqs.splice(maxWords, freqs.length - maxWords);
			}
			freqs = freqs.sort(function(a, b) {
					return b.word < a.word;
			});

			scale(freqs);

			makeCloud(freqs);
	}

})();
Leave a Comment