Product Engineer, CTO & a Beer Enthusiast
Experiments, thoughts and scripts documented for posterity.
Feb 19, 2018
const chrome = require("chrome-aws-lambda");
const fetch = require("node-fetch");
const isProd = process.env.NODE_ENV === "production";
var puppeteer;
if (!isProd) puppeteer = require("puppeteer-core");
else puppeteer = require("puppeteer");
global.Headers = fetch.Headers;
module.exports = async function (req, res) {
let url = req.body.url;
let waitForDom = req.body.dom;
let waitTimeout = req.body.timeout || 3000;
let browser;
try {
browser = await puppeteer.launch({
args: chrome.args,
executablePath: await chrome.executablePath,
headless: true
});
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on("request", req => {
/**** ************** ****/
/**** CUSTOM HEADERS ****/
/**** ************** ****/
const headers = req.headers();
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
headers["Content-Type"] = "application/x-www-form-urlencoded";
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11";
headers["Accept-Encoding"] = "gzip,deflate,sdch";
headers["Accept-Language"] = "en-GB,en-US;q=0.8,en;q=0.6";
headers["Accept-Charset"] = "ISO-8859-1,utf-8;q=0.7,*;q=0.3";
if (req.resourceType() == "stylesheet" || req.resourceType() == "font" || req.resourceType() == "image") req.abort();
else req.continue({ headers });
});
await page.goto(url);
try {
await page.waitForSelector(waitForDom, { timeout: waitTimeout });
const content = await page.content();
res.setHeader('content-type', 'text/html');
res.send(content);
} catch (e) {
return undefined;
}
} catch (e) {
res.statusCode = 500;
res.setHeader("Content-Type", "text/html");
res.end("<h1>Server Error</h1><p>Sorry, there was a problem</p>" + e);
} finally {
if (browser) browser.close();
}
};