Node.js Http实例

配置文件

module.exports = {
	server: '127.0.0.1',
	port: 9999,
	uas: [
		'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0',
		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
		'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101 Firefox/43.0',
		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
		'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)',
		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) DumpRenderTree/0.0.0.0 Safari/536.11',
		'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8) Presto/2.10.289 Version/12.00',
		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36'
	]
}

以下是主文件:

var config = require('./config');

var http = require('http');
var https = require('https');
var querystring = require('querystring');
var url = require('url');
var zlib = require('zlib');
var cheerio = require('cheerio');

// 如果没有输出则list为空数组
var parseHtml = function(html, itemid) {
	var $ = cheerio.load(html);

	var trs = $("table:contains('Date of Purchase')").last().find("tr");

	var tr = trs.first();
	var hasVar = false;
	if(trs.first().find("th:contains('Variation')").length > 0) {
		hasVar = true;
	}

	var data = [];
	trs.each(function(){
		var row = $(this);
		var colmns = row.find("td");

		if(hasVar) {
			var variation = colmns.eq(2).text().replace(/^\s+|\s+$/,'');
			var price = colmns.eq(3).text().replace(/^\s+|\s+$/,'');
			var qty = colmns.eq(4).text().replace(/^\s+|\s+$/,'');
			var datetime = colmns.eq(5).text().replace(/^\s+|\s+$/,'');
		} else {
			var variation = '-';
			var price = colmns.eq(2).text().replace(/^\s+|\s+$/,'');
			var qty = colmns.eq(3).text().replace(/^\s+|\s+$/,'');
			var datetime = colmns.eq(4).text().replace(/^\s+|\s+$/,'');
		}
        if(!price || (price === 'Price')) {
            return true;
        }
		//console.log(variation + "|" + price.replace(/\s+/, ' ')+"|"+qty+"|"+datetime);
		data.push({'variation': variation, 'price': price, 'qty': qty, 'date_purchase': datetime});
	});

	return {'itemid': itemid, 'list': data};
};

var showMem = function () {
        var mem = process.memoryUsage();
        var format = function (bytes) {
            return (bytes / 1024 / 1024).toFixed(2) + ' MB';
        };

        console.log('Process: heapTotal ' + format(mem.heapTotal) + ' heapUsed ' + format(mem.heapUsed) + ' rss ' + format(mem.rss));
        console.log('-----------------------------------------------------------');
};

var server = http.createServer(function (req, res) {
	//showMem();

	/*
	这里的req是一个IncomingMessage对象,实现了Readable Stream接口(https://nodejs.org/dist/latest-v4.x/docs/api/stream.html#stream_class_stream_readable)

	对于客户端请求服务端,客户端的请求进来的数据是IncomingMessage,在服务端对外发起请求(组装数据),远程的响应类似于远程对服务端发起请求,那么这些响应信息就是一个IncomingMessage。

	从客户端读请求数据,还是从服务端读响应数据,都封装为对Readable Stream的读取。

	console.log(req.headers);
	{ 
		host: '127.0.0.1:9999',
		connection: 'keep-alive',
		'cache-control': 'max-age=0',
		accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*\/*;q=0.8',
		'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/
		'accept-encoding': 'gzip,deflate,sdch',
		'accept-language': 'zh-CN,zh;q=0.8,en;q=0.6' 
	}
	console.log(req.httpVersion);		// http版本
	console.log(req.method);			// 请求方法,GET POST PUT
	console.log(req.rawHeaders);		// 原始头,headers中是去掉了重复头,这里是原始头
	console.log(req.rawTrailers);
	console.log(req.statusCode);		// http.ClientRequest.
	console.log(req.statusMessage);		// http.ClientRequest.
	console.log(req.url);				// http.Server.
	*/

	// 第二参数解析查询字符串
	var ur = url.parse(req.url, true);
	if(typeof ur.query.u !== 'undefined') {
		ur.query.u = req.url.replace(/[^=]+u\=/i, '');
	}
	/*
	{
	    protocol: null,
	    slashes: null,
	    auth: null,
	    host: null,
	    port: null,
	    hostname: null,
	    hash: null,
	    search: '?u=xxxx',
	    query: { u: 'xxxx' },
	    pathname: '/craw',
	    path: '/craw?u=xxxx',
	    href: '/craw?u=xxxx'
	}
	*/
	// 是否在线
    if(ur.pathname === '/ping') {
		res.write("online");
		res.end();
		return;
    }   

	// 只处理/craw
    if(ur.pathname !== '/craw') {
		res.statusCode = 404;
		res.statusMessage = 'Not found';
		res.end();
		return;
    }

	var ghost = '';
	var gpath = '/';
	var protocol = 'http';
	var itemid = '';
	// 传递了u参数,u=http://offer.xxx.com/ws/g.php?a=xxx
	if(typeof ur.query.u !== 'undefined') {
		var m = ur.query.u.match(/item=([0-9]+)/);
		if(typeof m[1] !== 'undefined') {
			itemid = m[1];
		} else {
			res.statusCode = 404;
			res.statusMessage = 'Not found';
			res.end();
			return;
		}

		var furl = url.parse(ur.query.u);

		if(furl.protocol && furl.host) {
			if(furl.protocol === 'https:') {
				protocol = 'https';
			}
			ghost = furl.host;
			gpath = ur.query.u;
		} else {
			res.statusCode = 404;
			res.statusMessage = 'Not found';
			res.end();
			return;
		}
	} else if(typeof ur.query.item !== 'undefined') {
		itemid = ur.query.item;
		var ghost = 'offer.ebay.com';
		var gpath = '/ws/eBayISAPI.dll?ViewBidsLogin&item='+ur.query.item+'&rt=nc&_trksid=p2047675.l2564';
	} else {
		res.statusCode = 404;
		res.statusMessage = 'Not found';
		res.end();
		return;
	}

	// 随机UA
	var ua = '';
	var uidx = Math.floor(Math.random() * config.uas.length + 1)-1;
	if((uidx >= 0) && (typeof config.uas[uidx] !== 'undefined')) {
		ua = config.uas[uidx];	
	}
	if(ua === '') {
		ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0';
	}
	//console.log(ua);

	// 发送POST时需要提交的数据
	//var postData = querystring.stringify({
	//	'msg' : 'xxxx'
	//});

	// 这里的options对应HTTP的请求行和请求头信息(请求的cookie是请求头的组成部分)
	// 请求体的设置是调用clientRequest对象的write方法(可写流)
	var options = {
			host: ghost,
			path: gpath,
			method: 'GET',
			headers:{
				'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
				'Accept-Encoding': 'gzip,deflate',
				'Accept-Language': 'en-US,en;q=0.8',
				'Cache-Control': 'max-age=0',
				'Connection': 'keep-alive',
				//'Referer': '',
				'User-Agent': ua
			}
	};
	var q = http;
	if(protocol === 'https') {
		options['port'] = 443;
		q = https;
	}

	//var html = '';
	// 内部对外请求
	// reqc是一个clientRequest对象,它是一个可写流
	// 而resc是一个IncomeMessage对象,它是一个可读流
	var reqc = q.request(options, function(resc){
		//resc.setEncoding('utf-8');
		var chunks = [];
		// 数据可能分块返回
		resc.on('data', function(data){
			//html += data;
			chunks.push(data);
		});

		// 数据返回结束
		// 发起请求时发送了gzip,deflate,服务器端返回压缩数据
		// 由于压缩,网络传输时间明显下降
		resc.on('end', function(){
			// 传递状态码,比如302,404等
			res.statusCode = resc.statusCode;
			var buffer = Buffer.concat(chunks);
			var encoding = resc.headers['content-encoding'];
			if (encoding == 'gzip') {
				zlib.gunzip(buffer, function(err, decoded) {
					if (!err) {
						var json = parseHtml(decoded.toString(), itemid);
						var rej = {"success": 1, "message": '', "data": json};
					} else {
						var rej = {"success": 0, "message": '', "data": {}};
					}
					res.setHeader('Content-Type', 'application/json;charset=UTF-8'); 
					res.write(JSON.stringify(rej));
					res.end();
				});
			} else if (encoding == 'deflate') {
				zlib.inflate(buffer, function(err, decoded) {
					if (!err) {
						var json = parseHtml(decoded.toString(), itemid);
						var rej = {"success": 1, "message": '', "data": json};
					} else {
						var rej = {"success": 0, "message": '', "data": {}};
					}
					res.setHeader('Content-Type', 'application/json;charset=UTF-8');
					res.write(JSON.stringify(rej));
					res.end();
				});
			} else {
				if (!err) {
					var json = parseHtml(decoded.toString(), itemid);
					var rej = {"success": 1, "message": '', "data": json};
				} else {
					var rej = {"success": 0, "message": '', "data": {}};
				}
				res.setHeader('Content-Type', 'application/json;charset=UTF-8');
				res.write(JSON.stringify(rej));
				res.end();
			}

			chunks = null;
		});
	});

	// 设置超时,当超时时执行回调,终止请求
	reqc.setTimeout(10000, function(t) {
		var timeout = (new Date()).toLocaleString() + " Timeout";
		console.log(timeout);

		// 触发error事件 -> socket hang up
		reqc.abort();
	});
	// 发生错误
	reqc.on('error', function(e) {
		var err = (new Date()).toLocaleString() + " Error: "+e.message;
		console.log(err);
		//res.write('error');
		res.statusCode = 599;
		res.statusMessage = err;
		res.end();
	});
	// 发送数据
	//reqc.write(postData);

	//
	reqc.end();

}).listen(config.port, config.server);

/////////////////////////////////////
server.timeout = 20000;
server.on('listening', function() {
    var msg = (new Date()).toLocaleString() + " Server Listening";
	console.log(msg);
});
server.on('close', function() {
	var msg = (new Date()).toLocaleString() + " Server Closed";
	console.log(msg);
});

这个例子中,HTTP模块多数功能多已经使用到。类比其它语言,并不简单多少(甚至更加复杂),JS语法也不见得有何优势。实际上,之所以选择Node.js来做Http代理,原因只有一个,部署非常简单。