-
Notifications
You must be signed in to change notification settings - Fork 59
/
Copy pathgoogle.js
104 lines (88 loc) · 3.13 KB
/
google.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
var request = require('request')
var cheerio = require('cheerio')
var querystring = require('querystring')
var util = require('util')
var linkSel = 'h3.r a'
var descSel = 'div.s'
var itemSel = 'div.g'
var nextSel = 'td.b a span'
var URL = '%s://www.google.%s/search?hl=%s&q=%s&start=%s&sa=N&num=%s&ie=UTF-8&oe=UTF-8&gws_rd=ssl'
var nextTextErrorMsg = 'Translate `google.nextText` option to selected language to detect next results link.'
var protocolErrorMsg = "Protocol `google.protocol` needs to be set to either 'http' or 'https', please use a valid protocol. Setting the protocol to 'https'."
// start parameter is optional
function google (query, start, callback) {
var startIndex = 0
if (typeof callback === 'undefined') {
callback = start
} else {
startIndex = start
}
igoogle(query, startIndex, callback)
}
google.resultsPerPage = 3
google.tld = 'com'
google.lang = 'en'
google.requestOptions = {}
google.nextText = 'Next'
google.protocol = 'https'
var igoogle = function (query, start, callback) {
if (google.resultsPerPage > 100) google.resultsPerPage = 100 // Google won't allow greater than 100 anyway
if (google.lang !== 'en' && google.nextText === 'Next') console.warn(nextTextErrorMsg)
if (google.protocol !== 'http' && google.protocol !== 'https') {
google.protocol = 'https'
console.warn(protocolErrorMsg)
}
// timeframe is optional. splice in if set
if (google.timeSpan) {
URL = URL.indexOf('tbs=qdr:') >= 0 ? URL.replace(/tbs=qdr:[snhdwmy]\d*/, 'tbs=qdr:' + google.timeSpan) : URL.concat('&tbs=qdr:', google.timeSpan)
}
var newUrl = util.format(URL, google.protocol, google.tld, google.lang, querystring.escape(query), start, google.resultsPerPage)
var requestOptions = {
url: newUrl,
method: 'GET'
}
for (var k in google.requestOptions) {
requestOptions[k] = google.requestOptions[k]
}
request(requestOptions, function (err, resp, body) {
if ((err == null) && resp.statusCode === 200) {
var $ = cheerio.load(body)
var res = {
url: newUrl,
query: query,
start: start,
links: [],
$: $,
body: body
}
$(itemSel).each(function (i, elem) {
var linkElem = $(elem).find(linkSel)
var descElem = $(elem).find(descSel)
var item = {
title: $(linkElem).first().text(),
link: null,
description: null,
href: null
}
var qsObj = querystring.parse($(linkElem).attr('href'))
if (qsObj['/url?q']) {
item.link = qsObj['/url?q']
item.href = item.link
}
$(descElem).find('div').remove()
item.description = $(descElem).text()
res.links.push(item)
})
if ($(nextSel).last().text() === google.nextText) {
res.next = function () {
//This will call the function again - Recursive Function
igoogle(query, start + google.resultsPerPage, callback)
}
}
callback(null, res)
} else {
callback(new Error('Error on response' + (resp ? ' (' + resp.statusCode + ')' : '') + ':' + err + ' : ' + body), null, null)
}
})
}
module.exports = google