代码如下,功能是抓取 jandan 段子区的段子吐槽。由于段子的吐槽是分页的,会有个“点击加载更多”的按钮,要判断该按钮是否存在,存在则持续点击。遇到的问题是,要判断按钮是否存在,要用到 document 对象,但 document 对象只存在于page.evaluate()
内;而如果放在page.evaluate()
里判断按钮是否存在,存在则点击,点击要用到page.click(selector)
,而page
对象是又不存在于page.evaluate()
里,该怎么处理?
PS: 案例用了 jandan,如果冒犯到站长或者蛋友,还请包涵,只是测试不做他用。
const puppeteer = require('puppeteer');
const chalk = require('chalk');
(async() => {
const browser = await puppeteer.launch({
executablePath: '/Applications/Chromium.app/Contents/MacOS/Chromium',
headless: true,
slowMo: 200,
ignoreHTTPSErrors: true,
timeout: 10000
});
console.log(chalk.green('服务正常启动'));
try {
const page = await browser.newPage();
page.on('console', msg => {
if (typeof msg === 'object') {
console.dir(msg);
} else {
console.log(chalk.blue(msg));
}
});
// 进入页面
await page.goto('https://jandan.net/duan/page-94#comments');
const commentBtn = '.tucao-btn';
await page.click(commentBtn);
const tucao = '.jandan-tucao';
const tucao_hot = '.tucao-hot';
const tucao_list = '.tucao-list';
const tucao_more = 'div.jandan-tucao-more:not([style])';
await page.waitForSelector(tucao);
const cmts = await page.evaluate( (selector, more) => {
const tucaos = Array.from(document.querySelector(selector).querySelectorAll('.tucao-row'));
return tucaos.map(comment => {
const author = comment.querySelector('.tucao-author').textContent;
const content = comment.querySelector('.tucao-content').textContent.trim();
const oo = comment.querySelector('.tucao-oo').textContent;
const xx = comment.querySelector('.tucao-xx').textContent;
return `${author} oo[${oo}] xx[${xx}]: \n${content}\n`;
});
}, tucao_list, tucao_more);
console.log(cmts.join('\n'));
await browser.close();
console.log(chalk.green('服务正常结束'));
} catch (error) {
console.log(error);
console.log(chalk.red('服务意外终止'));
await browser.close();
} finally {
process.exit(0);
}
})();
1
xuyl OP ```
while ( document.querySelector(tucao_more) ) { page.click(tucao_more); page.waitFor(1000); } ``` 就是这段代码放哪里的问题 |
2
dd0754 2018-04-20 21:13:41 +08:00
await page.goto('https://jandan.net/duan/page-94#comments', {
timeout: 10000, waitUntil: 'domcontentloaded', }); const commentBtn = '.tucao-btn'; let length = await page.evaluate(commentBtn => { let btns = document.querySelectorAll(commentBtn); btns.forEach(el => el.click()); return btns.length; }, commentBtn); console.log(length); |