-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfetcher.harel.js
138 lines (122 loc) · 4.4 KB
/
fetcher.harel.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
var zombie = require('zombie'),
fc = require("./fetcher.common.js"),
fs = require("fs"),
urlParse = require("url").parse,
levDist = require("./LevDistance.js").calc;
var baseUrl = "http://www.harel-gemel.co.il/";
var DEBUG = function(str) {
//console.log("DEBUG", str);
};
var processsAssetsList = function(fund, browser, onDone, urlStr) {
DEBUG("PROCESSING ASSETS LIST " + fund.number);
var files = browser.document.querySelectorAll("a.AttFileGallery");
var urlPrefix = baseUrl;
if (urlStr !== undefined) {
var parsed = urlParse(urlStr);
urlPrefix = parsed.protocol + "//" + parsed.host + "/";
}
var processedFiles = [];
for (var id in files) {
//console.log(files[id].textContent, files[id].attributes.href.value);
var file = files[id];
if (file["attributes"] !== undefined && -1 != file.attributes.href.value.indexOf(".xls")) {
var url = urlPrefix + "/" + file.attributes.href.value;
var m = file.textContent.match(/\d+\/(\d+)\/(\d+)/);
if (m === null) {
var m = file.textContent.match(/\d+\.(\d+)\.(\d+)/);
}
if (m !== undefined) {
var q = Math.floor((parseInt(m[1].substr(0,2), 10) -1) / 3) + 1;
var year = m[2];
processedFiles.push({ body: fund.body, number: fund.number, url: url, quarter: q, year: year});
//console.log(url, q, year);
} else { console.log("3333", m); }
}
}
if (files.length === 0) {
console.log("Unable to fetch fund: " + fund.number);
}
//onDone(fund, processedFiles);
DEBUG("DONE " + fund.number);
fc.fetchFunds(processedFiles, onDone);
//onDone();
};
exports.fetchOne = function(fund, onDone) {
DEBUG("START " + fund.number);
var browser = new zombie();
browser.debug = false;
browser.runScripts = false; // this disables executing javascript
browser.on("error", function(error) {
console.error("Error with fund " + fund.number + ": ", error);
});
var cont = true;
var fundName = "";
browser.visit(baseUrl).then(function() {
DEBUG("FIRST VISIT " + fund.number);
// Search by fund number.
browser.fill("input[name='SearchParam']", fund.number);
return browser.pressButton("#image1");
}).then(function() {
// Click find the right found link in the results list & click it.
DEBUG("AFTER SEARCH " + fund.number);
var links = browser.document.querySelectorAll("a.SearchResult");
for (var i = 0; i < links.length; ++i) {
var link = links[i];
if (link.childNodes.length > 0 && link.childNodes[0].nodeName == "#text" && link.childNodes[0].nodeValue.indexOf(fund.number) !== -1) {
fundName = link.childNodes[0].nodeValue.replace(fund.number.toString(), "");
return browser.clickLink(link);
}
}
console.log("Fund " + fund.number + " was not found via search, going to try url(" + fund.url + ") instead.");
return browser.visit(fund.url).then(function() {
// We could try to check if the page is "OK" here, but we don't have really anything to do if it isn't, so just try to parse it.
processsAssetsList(fund, browser, onDone, fund.url);
cont = false;
});
}).then(function() {
if (!cont) {
return;
}
// Go to assets list
DEBUG("FUND FOUND, GOING TO ASSETS LIST " + fund.number);
return browser.clickLink("רשימות נכסים");
}).then(function() {
if (!cont) {
return;
}
DEBUG("ASSETS LIST " + fund.number);
if (browser.location.href.indexOf("ArticleID") == -1) {
DEBUG("NOT FILE LIST, FIND THE RIGHT FUND");
var lists = browser.document.querySelectorAll("a.ArticlesListTitle");
var minDist = Number.MAX_VALUE;
var bestLink = null;
for (var id in lists) {
var list = lists[id];
if (list["attributes"] !== undefined && list.childNodes.length > 0) {
var cleanLink = list.childNodes[0].nodeValue.replace("רשימות", "").replace("נכסים", "");
var dist = levDist(fundName, cleanLink);
if (dist < minDist) {
minDist = dist;
bestLink = list;
}
}
}
if (bestLink === null) {
onDone();
console.log("****Couldn't find files list for fund ", fund.number, "*****");
} else {
return browser.clickLink(bestLink);
}
}
}).then(function() {
if (!cont) {
return;
}
DEBUG("YEAH! " + fund.number);
processsAssetsList(fund, browser, onDone);
});
};
var endFunc = function() {
DEBUG("END");
};
//exports.fetchOne({ number: 566});