html中UTF8编码的解码

我们写爬虫爬回来的一些页面可能是编码后的utf8码,这时候我们需要对他们进行解码

1
var code10, code16, zh;

code10 = '天堂向左,深圳向右';

zh = code10.replace(/&#(\d+);/g, function($, $1) {return String.fromCodePoint($1)});

console.log(zh);

code16 = zh.replace(/[^\u0000-\u00ff]/g, function($) {return '&#x' + $.codePointAt(0).toString(16) + ';';});

console.log(code16);

zh = code16.replace(/&#x(\w+);/g, function($, $1) {return String.fromCodePoint(parseInt($1, 16))});

console.log(zh);

文章目录
,