功能其实很见简单,通过 phantomjs.exe 采集 url 加载的资源,通过子进程的方式,启动nodejs 加载所有的资源,对于css的资源,匹配css内容,下载里面的url资源
当然功能还是很简单的,在响应式设计和异步加载的情况下,还是有很多资源没有能够下载,需要根据实际情况处理下
首先当然是下载 nodejs 和 phantomjs
下面是 phantomjs.exe 执行的 down.js
var page = require('webpage').create(), system = require('system');var spawn = require(child_process).spawnif (system.args.length === 1) { console.log('usage: netsniff.js '); phantom.exit(1);} else { var urls = []; page.address = system.args[1]; page.onresourcereceived = function (res) { if (res.stage === 'start') { urls.push(res.url); } }; page.open(page.address, function (status) { var har; if (status !== 'success') { console.log('fail to load the address'); phantom.exit(1); } else { console.log('down resource ' + urls.length + ' urls.'); var child = spawn(node, [--harmony, downhtml.js, urls.join(',')]) child.stdout.on(data, function (data) { console.log(data); }) child.stderr.on(data, function (data) { console.log(data); }) child.on(exit, function (code) { phantom.exit(); }) } });}
下面是对应的node运行的 downhtml.js
use strict;var fs = require('fs');var http = require('http');var path = require('path');var r_url = require('url');var dircache = {};//缓存减少判断function makedir (pathstr, callback) { if (dircache[pathstr] == 1) { callback(); } else { fs.exists(pathstr, function (exists) { if (exists == true) { dircache[pathstr] == 1; callback(); } else { makedir(path.dirname(pathstr), function () { fs.mkdir(pathstr, function () { dircache[pathstr] == 1; callback(); }) }); } }) }};var reg = /[:,]\s*url\([']?.*?(\1)\)/gvar reg2 = /\(([']?)(.*?)(\1)\)/var isdownmap = {};var downimgfromcss = function (url) { http.get(url, function(res) { //console.log(path.resolve(process.cwd(), 'index.min.css')) //res.pipe(fs.createwritestream(path.resolve(process.cwd(), 'index.min.css'))); var body = ; res.setencoding('utf8'); res.on('data', function (chunk) { body += chunk; }); res.on('end', function () { var match = body.match(reg); for (var i = 0, len = match.length; i < len; i++){ var m = match[i].match(reg2); if (m && m[2]) { var url = m[2]; let imgurl = r_url.resolve(url, url); if (!isdownmap[imgurl]) { var uo = r_url.parse(imgurl); let filepath = cwd + '/' + uo.hostname + uo.pathname; makedir(path.dirname(filepath), function () { http.get(imgurl, function (res) { res.pipe(fs.createwritestream(filepath)); }) }) isdownmap[imgurl] = 1; } } } }); });}var urls = process.argv[2].split(',');var cwd = process.cwd();//下载资源urls.foreach(function (url) { var uo = r_url.parse(url); var filepath; if (uo.pathname == '/' || uo.pathname == '') { filepath = cwd + '/' + uo.hostname + '/index.html'; } else { filepath = cwd + '/' + uo.hostname + uo.pathname; } makedir(path.dirname(filepath), function () { http.get(url, function (res) { if (url.indexof('.css') != -1 || (res.headers[content-type] && res.headers[content-type].indexof('text/css')!= -1)) { console.log('down images form css file:' + url + '.'); downimgfromcss(url); } res.pipe(fs.createwritestream(filepath)); }) });});
down.js downhtml.js 放在同一个文件夹下 通过下列 cmd 运行
d:\phantomjs-2.0.0-windows\bin\phantomjs.exe down.js http://www.youku.com/
以上所述就是本文的全部内容了,希望大家能够喜欢。