189 lines
4.4 KiB
JavaScript
189 lines
4.4 KiB
JavaScript
function Trie(key) {
|
|
this.key = key;
|
|
this.value;
|
|
}
|
|
|
|
Trie.prototype.put = function (name, value) {
|
|
|
|
var node = this,
|
|
nameLength = name.length,
|
|
i = 0,
|
|
currentLetter;
|
|
|
|
for (i = 0; i < nameLength; i++) {
|
|
currentLetter = name[i];
|
|
node = node[currentLetter] || (node[currentLetter] = new Trie(currentLetter));
|
|
}
|
|
|
|
node.value = value;
|
|
node.name = name;
|
|
|
|
};
|
|
|
|
Trie.prototype.get = function (name) {
|
|
var node = this,
|
|
nameLength = name.length,
|
|
i, node;
|
|
|
|
for (i = 0; i < nameLength; i++) {
|
|
if (!(node = node[name[i]])) break;
|
|
}
|
|
|
|
return (i === nameLength) ? node.value : null;
|
|
};
|
|
|
|
function Chinese() {
|
|
}
|
|
|
|
Chinese.prototype.loaded = await("dict-loaded");
|
|
Chinese.prototype.dictionary = new Trie();
|
|
|
|
Chinese.prototype.toEnglish = function(value) {
|
|
var entry = this.getFirstMatchingEntry(value);
|
|
|
|
if(entry) {
|
|
return entry["en"];
|
|
}
|
|
return null;
|
|
};
|
|
|
|
Chinese.prototype.toPinyin = function(value) {
|
|
var result = "";
|
|
var pos = 0;
|
|
|
|
while(true) {
|
|
var currentChar = value[pos];
|
|
if(!currentChar) {
|
|
break;
|
|
}
|
|
|
|
if(!(currentChar.charCodeAt(0) >= 19968 && currentChar.charCodeAt(0) <= 64041)) {
|
|
// It's not a chinese character
|
|
result += currentChar;
|
|
pos += 1;
|
|
}
|
|
else {
|
|
// It's a chinese character. start by trying to find a long word match,
|
|
// and if it fails, all the way down to a single hanzi.
|
|
var match = null;
|
|
var match_length = 0;
|
|
|
|
for(var j = 4; j > 0; j--) {
|
|
match = this.getFirstMatchingEntry(value.substring(pos, pos + j));
|
|
match_length = j;
|
|
if(match) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(match && match["pin"]) {
|
|
result += match["pin"].replace(/\s/g, '');
|
|
pos += match_length;
|
|
}
|
|
else {
|
|
result += currentChar;
|
|
pos += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
Chinese.prototype.toTraditional = function(value) {
|
|
var entry = this.getFirstMatchingEntry(value);
|
|
|
|
if(!entry) {
|
|
return null;
|
|
}
|
|
|
|
return entry["trad"];
|
|
}
|
|
|
|
Chinese.prototype.toSimplified = function(value) {
|
|
var entry = this.getFirstMatchingEntry(value);
|
|
|
|
if(!entry) {
|
|
return null;
|
|
}
|
|
|
|
return entry["simp"];
|
|
}
|
|
|
|
Chinese.prototype.determineBeginningWord = function(value) {
|
|
for(var i = value.length; i > 0; i--) {
|
|
var entry = this.getFirstMatchingEntry(value.substring(0, i));
|
|
if(entry) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
Chinese.prototype.getFirstMatchingEntry = function(value) {
|
|
return this.dictionary.get(value + "0");
|
|
}
|
|
|
|
Chinese.prototype.getMatchingEntries = function(value) {
|
|
var results = new Array();
|
|
var index = 0;
|
|
while(true) {
|
|
var entry = this.dictionary.get(value + index.toString());
|
|
if(!entry) {
|
|
break;
|
|
}
|
|
|
|
results.push(entry);
|
|
index += 1;
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
|
|
$.get('src/cedict_ts.u8', function(myContentFile) {
|
|
var lines = myContentFile.split("\r\n");
|
|
|
|
// Build a simple Trie structure
|
|
for(var i = 0; i < lines.length; i++) {
|
|
// Skip empty lines and comments
|
|
if(!lines[i] || lines[i] === "" || lines[i].substring(0, 1) === "#") {
|
|
continue;
|
|
}
|
|
|
|
// CC-CEDICT format:
|
|
// Traditional Simplified [pin1 yin1] /English equivalent 1/equivalent 2/
|
|
var line_data = {};
|
|
|
|
// Parse the dictionary entry into its respective parts
|
|
var results = [];
|
|
results = lines[i].split(" ", 2);
|
|
line_data["trad"] = results[0] ;
|
|
line_data["simp"] = results[1];
|
|
|
|
lines[i] = lines[i].substring(lines[i].indexOf("[") + 1, lines[i].length);
|
|
|
|
line_data["pin"] = lines[i].substring(0, lines[i].indexOf("]"));
|
|
line_data["en"] = lines[i].substring(lines[i].indexOf("/") + 1, lines[i].lastIndexOf("/"));
|
|
|
|
var existingCountSimplified = 0;
|
|
if(Chinese.prototype.dictionary.get(line_data["simp"] + "0")) {
|
|
existingCountSimplified = Chinese.prototype.getMatchingEntries(line_data["simp"]).length;
|
|
}
|
|
Chinese.prototype.dictionary.put(line_data["simp"] + existingCountSimplified.toString(), line_data);
|
|
|
|
if(line_data["simp"] !== line_data["trad"] + "0") {
|
|
// also add lookup for this entry via trad word
|
|
var existingCountTraditional = 0;
|
|
if(Chinese.prototype.dictionary.get(line_data["trad"])) {
|
|
existingCountTraditional = Chinese.prototype.getMatchingEntries(line_data["trad"]).length;
|
|
}
|
|
|
|
Chinese.prototype.dictionary.put(line_data["trad"] + existingCountTraditional.toString(), line_data);
|
|
}
|
|
}
|
|
|
|
Chinese.prototype.loaded.keep("dict-loaded");
|
|
}, 'text');
|