-
-
Notifications
You must be signed in to change notification settings - Fork 16
/
filter-urls-not-in-changelog.js
executable file
·216 lines (171 loc) · 5.6 KB
/
filter-urls-not-in-changelog.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env node
// TODO: add an option that allows us to choose whether we filter the json (as json), or just extract the URLs from it (and return them as newline text)
const { parseArgs } = require("util");
const fs = require("fs");
const path = require("path");
const { createInterface } = require("readline");
const { once } = require("events");
// List of changelog filenames
const changelogFilenames = ["CHANGELOG.md", "CHANGELOG-2023.md"];
// URL Prefixes
const urlPrefixes = ["https://chat.openai.com/", "https://cdn.oaistatic.com/"];
// Define a set to store all output URLs for de-duplication
const processedUrls = new Set();
const displayUsage = (scriptName) => {
console.log(`Usage: ${scriptName} [--json] [--json-with-urls]`);
console.log(' --json: Process input as JSON array of objects with "url" keys');
console.log(' --json-with-urls: Output both the filtered JSON and a list of URLs [implies --json]');
console.log("-h, --help: Display this help message");
};
const parseArguments = () => {
const scriptName = path.basename(process.argv[1]);
const parsedArgs = (() => {
try {
return parseArgs({
strict: true,
allowPositionals: false,
options: {
json: {
type: 'boolean',
default: false,
},
'json-with-urls': {
type: 'boolean',
default: false,
},
help: {
type: 'boolean',
short: 'h',
},
}
});
} catch (error) {
displayUsage(scriptName);
console.error("\nError: Invalid arguments provided.", error.message);
process.exit(1);
}
})();
if (parsedArgs.values.help) {
displayUsage(scriptName);
process.exit(0);
}
const isJsonWithUrlsMode = parsedArgs.values['json-with-urls'];
// Automatically enable JSON mode if --json-with-urls is specified
const isJsonMode = parsedArgs.values.json || isJsonWithUrlsMode;
return { isJsonMode, isJsonWithUrlsMode };
};
const readAllStdin = async () => {
const rl = createInterface({
input: process.stdin,
output: process.stdout,
terminal: false,
crlfDelay: Infinity,
});
const lines = [];
rl.on("line", (line) => lines.push(line));
await once(rl, "close");
return lines;
};
const main = async () => {
const { isJsonMode, isJsonWithUrlsMode } = parseArguments();
const inputData = await readAllStdin();
if (!isJsonMode && looksLikeJson(inputData)) {
console.error(
"Error: Input data looks like JSON. Did you forget to use the --json flag?"
);
process.exit(1);
}
const combinedChangelogContent =
loadAndCombineChangelogContent(changelogFilenames);
const outputUrls = isJsonMode
? processJsonInput(inputData, combinedChangelogContent)
: inputData
.map((url) => processUrl(url, combinedChangelogContent))
.filter((url) => !!url);
printUrls(outputUrls);
if (isJsonWithUrlsMode) {
printUrls(outputUrls.map(obj => obj.url))
}
};
const loadAndCombineChangelogContent = (filenames) => {
const directory = path.join(__dirname, "..");
return filenames
.map((filename) => {
const changelogPath = path.join(directory, filename);
return fs.existsSync(changelogPath)
? fs.readFileSync(changelogPath, "utf8")
: "";
})
.join("\n");
};
const looksLikeJson = (inputData) => {
const firstNonEmptyNonCommentLine = inputData.find((line) => {
const trimmedLine = line.trim();
return (
trimmedLine &&
!trimmedLine.startsWith("//") &&
!trimmedLine.startsWith("#")
);
});
if (!firstNonEmptyNonCommentLine) return false;
return (
firstNonEmptyNonCommentLine.startsWith("{") ||
firstNonEmptyNonCommentLine.startsWith("[")
);
};
const processJsonInput = (inputLines, combinedChangelogContent) => {
if (!looksLikeJson(inputLines)) {
console.warn(
"Warning: Input data doesn't look like JSON. Are you sure you want to use --json mode on this data?"
);
}
const inputData = inputLines.join("\n");
const jsonInput = JSON.parse(inputData);
if (!Array.isArray(jsonInput)) {
throw new Error("JSON input is not an array");
}
// TODO: add an option that allows us to choose whether we filter the json (as json), or just extract the URLs from it (and return them as newline text)
return jsonInput
.map((item) => ({
...item,
url: processUrl(item?.url, combinedChangelogContent),
}))
.filter(({ url }) => !!url);
};
const processUrl = (inputUrl, combinedChangelogContent) => {
if (!isValidUrl(inputUrl)) return null;
const url = cleanUrl(inputUrl)
if (processedUrls.has(url)) return null;
const urlExistsInChangelog = generateUrlVariations(url, urlPrefixes).some(
(variation) => combinedChangelogContent.includes(variation)
);
if (urlExistsInChangelog) return null;
// Used for de-duplication
processedUrls.add(url);
return url;
};
const isValidUrl = (url) => {
return url && URL.canParse(url) && url.trim().startsWith("http");
};
const cleanUrl = (url) => {
const parsedUrl = new URL(url)
parsedUrl.search = ''
return parsedUrl.href
}
const generateUrlVariations = (inputUrl, prefixes) => {
const url = inputUrl.trim();
// Remove any existing prefix from the URL
const urlWithoutPrefixes = prefixes.reduce((acc, prefix) => {
return url.startsWith(prefix) ? url.substring(prefix.length) : acc;
}, url);
return prefixes.map((prefix) => `${prefix}${urlWithoutPrefixes}`);
};
const printUrls = (urls) => {
urls.forEach((url) => {
if (url) console.log(url);
});
};
// Entry point
main().catch((err) => {
console.error("error:", err);
});