You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
3.5 KiB
127 lines
3.5 KiB
var ucs2 = require('./ucs2');
|
|
|
|
exports = {
|
|
encode: function(str) {
|
|
var codePoints = ucs2.decode(str);
|
|
var byteArr = '';
|
|
for (var i = 0, len = codePoints.length; i < len; i++) {
|
|
byteArr += encodeCodePoint(codePoints[i]);
|
|
}
|
|
return byteArr;
|
|
},
|
|
decode: function(str, safe) {
|
|
byteArr = ucs2.decode(str);
|
|
byteIdx = 0;
|
|
byteCount = byteArr.length;
|
|
codePoint = 0;
|
|
bytesSeen = 0;
|
|
bytesNeeded = 0;
|
|
lowerBoundary = 0x80;
|
|
upperBoundary = 0xbf;
|
|
var codePoints = [];
|
|
var tmp;
|
|
while ((tmp = decodeCodePoint(safe)) !== false) {
|
|
codePoints.push(tmp);
|
|
}
|
|
return ucs2.encode(codePoints);
|
|
}
|
|
};
|
|
var fromCharCode = String.fromCharCode;
|
|
function encodeCodePoint(codePoint) {
|
|
if ((codePoint & 0xffffff80) === 0) {
|
|
return fromCharCode(codePoint);
|
|
}
|
|
var ret = '',
|
|
count,
|
|
offset;
|
|
|
|
if ((codePoint & 0xfffff800) === 0) {
|
|
count = 1;
|
|
offset = 0xc0;
|
|
} else if ((codePoint & 0xffff0000) === 0) {
|
|
count = 2;
|
|
offset = 0xe0;
|
|
} else if ((codePoint & 0xffe00000) == 0) {
|
|
count = 3;
|
|
offset = 0xf0;
|
|
}
|
|
ret += fromCharCode((codePoint >> (6 * count)) + offset);
|
|
while (count > 0) {
|
|
var tmp = codePoint >> (6 * (count - 1));
|
|
ret += fromCharCode(0x80 | (tmp & 0x3f));
|
|
count--;
|
|
}
|
|
return ret;
|
|
}
|
|
var byteArr,
|
|
byteIdx,
|
|
byteCount,
|
|
codePoint,
|
|
bytesSeen,
|
|
bytesNeeded,
|
|
lowerBoundary,
|
|
upperBoundary;
|
|
function decodeCodePoint(safe) {
|
|
while (true) {
|
|
if (byteIdx >= byteCount && bytesNeeded) {
|
|
if (safe) return goBack();
|
|
throw new Error('Invalid byte index');
|
|
}
|
|
if (byteIdx === byteCount) return false;
|
|
var byte = byteArr[byteIdx];
|
|
byteIdx++;
|
|
if (!bytesNeeded) {
|
|
if ((byte & 0x80) === 0) {
|
|
return byte;
|
|
}
|
|
|
|
if ((byte & 0xe0) === 0xc0) {
|
|
bytesNeeded = 1;
|
|
codePoint = byte & 0x1f;
|
|
} else if ((byte & 0xf0) === 0xe0) {
|
|
if (byte === 0xe0) lowerBoundary = 0xa0;
|
|
if (byte === 0xed) upperBoundary = 0x9f;
|
|
bytesNeeded = 2;
|
|
codePoint = byte & 0xf;
|
|
} else if ((byte & 0xf8) === 0xf0) {
|
|
if (byte === 0xf0) lowerBoundary = 0x90;
|
|
if (byte === 0xf4) upperBoundary = 0x8f;
|
|
bytesNeeded = 3;
|
|
codePoint = byte & 0x7;
|
|
} else {
|
|
if (safe) return goBack();
|
|
throw new Error('Invalid UTF-8 detected');
|
|
}
|
|
continue;
|
|
}
|
|
if (byte < lowerBoundary || byte > upperBoundary) {
|
|
if (safe) {
|
|
byteIdx--;
|
|
return goBack();
|
|
}
|
|
throw new Error('Invalid continuation byte');
|
|
}
|
|
lowerBoundary = 0x80;
|
|
upperBoundary = 0xbf;
|
|
codePoint = (codePoint << 6) | (byte & 0x3f);
|
|
bytesSeen++;
|
|
if (bytesSeen !== bytesNeeded) continue;
|
|
var tmp = codePoint;
|
|
codePoint = 0;
|
|
bytesNeeded = 0;
|
|
bytesSeen = 0;
|
|
return tmp;
|
|
}
|
|
}
|
|
function goBack() {
|
|
var start = byteIdx - bytesSeen - 1;
|
|
byteIdx = start + 1;
|
|
codePoint = 0;
|
|
bytesNeeded = 0;
|
|
bytesSeen = 0;
|
|
lowerBoundary = 0x80;
|
|
upperBoundary = 0xbf;
|
|
return byteArr[start];
|
|
}
|
|
|
|
module.exports = exports;
|
|
|