node使用puppeteer生成pdf与截图

之前写过一篇 vue cli2 使用 wkhtmltopdf 踩坑指南，由于wkhtmltopdf对vue的支持并不友好，而且不支持css3，经过调研最终选择puppeteer，坑少，比较靠谱。

一、准备工作

puppeteer中文文档: https://zhaoqize.github.io/puppeteer-api-zh_CN/#/
node版本必须在10.18.1+以上
新建pdf.js
安装puppeteernpm install puppeteer（这里用的是15.0.1版本，测试没问题）
需要生成pdf的html页面需要添加打印样式（不添加会导致背景色无法显示等问题）
```
 html {
  -webkit-print-color-adjust: exact;
}
```
cd到pdf.js所在的文件夹执行node pdf.js

补充有关打印样式的css编写（也可以用puppeteer控制生成的边距）：

 /* 媒体查询 */
@media print{
  .noprint{
    display:none;
  }
}
/* 打印页面边距设置 */
@page {
  margin: 2cm 0 0 0;
}
/* 第一页边距设置 */
@page :first {
  margin: 0;
}

二、常用案例

这里直接提供一些常用的生成pdf案例，比较简单，直接复制就能用

1. 通过设置token下载pdf的最简单使用方式

这种方式用于直接访问页面下载pdf，可以给页面所有http请求设置headers，直接执行 node pdf.js 即可生成pdf

pdf.js

 const puppeteer = require('puppeteer')
const token = 'kjjkheyJzdWIiOiIxMDAwMDAwMDAwMDAxMjM0'
 
async function printPDF() {
  const browser = await puppeteer.launch({ headless: true })
  const page = await browser.newPage()
  await page.setExtraHTTPHeaders({'uniedu-sso-token': token})
  await page.goto('https://baidu.com', {waitUntil: 'networkidle0'})
  await page.pdf({ path: 'test.pdf', format: 'A4'})
  await browser.close()
}
 
printPDF()

2. 通过html字符串生成pdf

pdf.js

 const puppeteer = require('puppeteer')
const html = `
  
    
    ·
    Document
    
  
  
    页面Dom
  
`
 
async function printPDF() {
  const browser = await puppeteer.launch({ headless: true })
  const page = await browser.newPage()
  await page.setContent(html, {waitUntil: 'networkidle0'})
  await page.pdf({ path: 'test.pdf', format: 'A4'})
  await browser.close()
}
 
printPDF()

3. 简单封装node命令的形式并通过引入html文件生成pdf

首先需要安装 npm install minimist

这里的A3自定义了宽高，puppeteer也有自己默认的A3尺寸，具体详见官方文档page.pdf([options])

生成test.pdf可通过执行该命令 node pdf.js --format=A3 --htmlPath=./index.html --pdfPath=./test.pdf

 const puppeteer = require('puppeteer')
const fs = require('fs')
const args = require('minimist')(process.argv.slice(2))
const format = args['format']
const htmlPath = args['htmlPath']
const pdfPath = args['pdfPath']
 
const pdfParams = {
  'A3': {
    path: pdfPath,
    width: '420mm',
    height: '297mm',
    margin: {
      right: '0.1cm',
    }
  },
  'A4': {
    path: pdfPath,
    format: 'A4'
  }
}
 
async function printPDF(format = 'A4') {
// 如果需要部署在服务端尽量加上这行参数
  const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox'], headless: true})
  const page = await browser.newPage()
  const htmlContent = fs.readFileSync(htmlPath, 'utf-8')
  await page.setContent(htmlContent, { waitUntil: 'networkidle0' })
  await page.pdf(pdfParams[format])
  await browser.close()
}
 
printPDF(format)

4. 循环异步批量下载pdf

这里browser只需要打开一次就可以了，只需要每次跳转新页面下载pdf，这样可以不用频繁的开启关闭无头浏览器

 const puppeteer = require('puppeteer')
const tokens = require('./tokens.json')
 
async function printPDF(page, token, index) {
  console.log(`第${index + 1}份正在打印……`)
  await page.goto('https://baidu.com', {waitUntil: 'networkidle0'});
  await page.pdf({ path: `./pdf/node${index + 1}.pdf`, format: 'A4'})
  console.log(`第${index + 1}份已经完成`)
}
 
async function printAll() {
  console.log(`一共${tokens.length}份，正在打印中……`)
  const browser = await puppeteer.launch({ headless: true })
  const page = await browser.newPage()
  await page.setExtraHTTPHeaders({'uniedu-sso-token': token})
  for(let i = 0; i < tokens.length; i ++){
    await printPDF(page, tokens[i], i)
  }
  await browser.close()
}
 
printAll()

如果需要自动生成文件夹归类，可以用node的fs.existsSync和fs.mkdirSync方法，先判断有没有这个文件夹，没有则创建

 if(!fs.existsSync('dirName')) {
  fs.mkdirSync('dirName')
}

案例

users.json

 [
  {
    "accountId": 1000005000000010244,
    "className": "701班",
    "userName": "陈昱含",
    "token": "eyJhbGciOiJSUzI1NiJ9"
  },
  {
    "accountId": 1000005000000010245,
    "className": "701班",
    "userName": "韩艺臻",
    "token": "eyJhbGciOiJSUzI1NiJ9"
  },
  {
    "accountId": 1000005000000010246,
    "className": "702班",
    "userName": "范玥",
    "token": "eyJhbGciOiJSUzI1NiJ9"
  },
  {
    "accountId": 1000005000000010247,
    "className": "702班",
    "userName": "方泽清",
    "token": "eyJhbGciOiJSUzI1NiJ9"
  },
  {
    "accountId": 1000005000000010248,
    "className": "702班",
    "userName": "付明宇",
    "token": "eyJhbGciOiJSUzI1NiJ9"
  }
]

pdfs.js

 const fs = require('fs')
const puppeteer = require('puppeteer')
const users = require('./users.json')
 
;(async () => {
  const pdfPath = './pdfs/'
  // 处理token数据
  const nameArr = [...new Set(users.map(v => v.className))]
  const classes = nameArr.map(className => ({
    className,
    dirName: pdfPath + className,
    students: users.filter(v => v.className === className).map(v => ({ ...v, dirName: `${pdfPath + className}/${v.userName}` }))
  }))
  // 创建无头浏览器
  const browser = await puppeteer.launch({ headless: true })
  const page = await browser.newPage()
  let count = 0
  for (const cla of classes) {
    if (!fs.existsSync(cla.dirName)) fs.mkdirSync(cla.dirName)
    for (const stu of cla.students) {
      if (!fs.existsSync(stu.dirName)) fs.mkdirSync(stu.dirName)
      await page.setExtraHTTPHeaders({ 'uniedu-sso-token': stu.token })
      await page.goto('http://localhost:8080/#/report?07=1000100000006089002', { waitUntil: 'networkidle0' })
      await page.pdf({ path: `${stu.dirName}/学情分析.pdf`, format: 'A4' })
      console.log(`第${++count}份 -> ${cla.className} -> ${stu.userName} -> 已完成……`)
    }
  }
  await browser.close()
})()

5. puppeteer生成截图

这里直接给出最近简单封装的node命令形式的代码作为参考，大部分参数可以参考官方文档

唯一值得说的一个参数是fitContent，这个是我自己加的，可以用于局部的截图，需要html的标签内含有screenshot这个id，说白了就是需要截图的元素用

包裹起来

 const puppeteer = require('puppeteer')
const fs = require('fs')
const args = require('minimist')(process.argv.slice(2))
 
const clip = {}
args.clipX && (clip.x = Number(args.clipX))
args.clipY && (clip.y = Number(args.clipY))
args.clipW && (clip.width = Number(args.clipW))
args.clipH && (clip.height = Number(args.clipH))
 
let params = {}
args.imgPath && (params.path = args.imgPath)
args.type && (params.type = args.type)
args.quality && (params.quality = Number(args.quality))
args.fullPage && (params.fullPage = args.fullPage === 'true')
args.omitBackground && (params.omitBackground = args.omitBackground === 'true')
args.encoding && (params.encoding = args.encoding)
Object.keys(clip).length !== 0 && (params.clip = clip)
 
async function printImg() {
  const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox'], headless: true})
  const page = await browser.newPage()
  const htmlContent = fs.readFileSync(args.htmlPath, 'utf-8')
  await page.setContent(htmlContent, { waitUntil: 'networkidle0' })
  const range = await page.$('#screenshot')
  const clip = await range.boundingBox()
  const result = args.fitContent === 'true' ? { ...params, clip } : params
  await page.screenshot(result)
  await browser.close()
}
 
printImg()
 
/*
  参数说明：
  htmlPath: html文件路径
  imgPath: 截图保存路径。截图图片类型将从文件扩展名推断出来。如果是相对路径，则从当前路径解析。如果没有指定路径，图片将不会保存到硬盘
  type: 指定截图类型, 可以是 jpeg 或者 png。默认 'png'.
  quality: 图片质量, 可选值 0-100. png 类型不适用。
  fullPage: 如果设置为true，则对完整的页面（需要滚动的部分也包含在内）。默认是false
  clipX: 指定裁剪区域相对于左上角（0， 0）的x坐标
  clipY: 指定裁剪区域相对于左上角（0， 0）的y坐标
  clipW: 指定裁剪区域的宽度
  clipH: 指定裁剪区域的高度
  omitBackground: 隐藏默认的白色背景，背景透明。默认不透明
  encoding: 图像的编码可以是 base64 或 binary。 默认为“二进制”。
  fitContent: 设为true，则只对id="screenshot"包裹的内容区域截图
*/
 
// node 命令示例
// node pdf.js --htmlPath=./index.html --imgPath=aa.png --fitContent=true

6. 调用本地chrome

puppeteer默认会安装一个最新版本的chromiue，也可以调起本地的chrome，这时候需要使用puppeteer-core

这个find_chrome.js在mac上测试没问题，但是windows上不好使，可以直接找到chrome安装路径直接复制过来，注意windows复制的路径是反斜杠要改成斜杠。

安装npm i puppeteer-core carlo

 const puppeteer = require('puppeteer-core');
//find_chrome模块来源于GoogleChromeLabs的Carlo,可以查看本机安装Chrome目录
 
const findChrome = require('./node_modules/carlo/lib/find_chrome.js')
 
;(async () => {
  let findChromePath = await findChrome({})
  let executablePath = findChromePath.executablePath;
  console.log(executablePath)
  const browser = await puppeteer.launch({
    executablePath,
    headless: false
  })
 
  const page = await browser.newPage()
  await page.goto('https://www.baidu.com/')
 
  // await browser.close()
})()

相关阅读:
centos(7.9) minikube(v1.28.0) kaniko 构建镜像
 【教学类】公开课学号挂牌（15*15CM手工纸）
jQuery
【JavaWeb】Servlet系列 --- HttpServlet【底层源码分析】
设计模式之十一：代理模式
 git基础操作命令
 PyTorch入门学习（八）：神经网络-卷积层
 外中断的应用
 17 wordcloud库的使用
 【智能优化算法-MOEA_D】基于MOEA_D求解联合经济排放调度(CEED)问题附matlab代码
原文地址：https://www.cnblogs.com/lwlblog/p/16636857.html

	/* 媒体查询 */
	@media print{
	.noprint{
	display:none;
	}
	}
	/* 打印页面边距设置 */
	@page {
	margin: 2cm 0 0 0;
	}
	/* 第一页边距设置 */
	@page :first {
	margin: 0;
	}

	const puppeteer = require('puppeteer')
	const token = 'kjjkheyJzdWIiOiIxMDAwMDAwMDAwMDAxMjM0'

	async function printPDF() {
	const browser = await puppeteer.launch({ headless: true })
	const page = await browser.newPage()
	await page.setExtraHTTPHeaders({'uniedu-sso-token': token})
	await page.goto('https://baidu.com', {waitUntil: 'networkidle0'})
	await page.pdf({ path: 'test.pdf', format: 'A4'})
	await browser.close()
	}

	printPDF()

	const puppeteer = require('puppeteer')
	const html = `



	·
	Document



	页面Dom

	`

	async function printPDF() {
	const browser = await puppeteer.launch({ headless: true })
	const page = await browser.newPage()
	await page.setContent(html, {waitUntil: 'networkidle0'})
	await page.pdf({ path: 'test.pdf', format: 'A4'})
	await browser.close()
	}

	printPDF()

	const puppeteer = require('puppeteer')
	const fs = require('fs')
	const args = require('minimist')(process.argv.slice(2))
	const format = args['format']
	const htmlPath = args['htmlPath']
	const pdfPath = args['pdfPath']

	const pdfParams = {
	'A3': {
	path: pdfPath,
	width: '420mm',
	height: '297mm',
	margin: {
	right: '0.1cm',
	}
	},
	'A4': {
	path: pdfPath,
	format: 'A4'
	}
	}

	async function printPDF(format = 'A4') {
	// 如果需要部署在服务端尽量加上这行参数
	const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox'], headless: true})
	const page = await browser.newPage()
	const htmlContent = fs.readFileSync(htmlPath, 'utf-8')
	await page.setContent(htmlContent, { waitUntil: 'networkidle0' })
	await page.pdf(pdfParams[format])
	await browser.close()
	}

	printPDF(format)

	const puppeteer = require('puppeteer')
	const tokens = require('./tokens.json')

	async function printPDF(page, token, index) {
	console.log(`第${index + 1}份正在打印……`)
	await page.goto('https://baidu.com', {waitUntil: 'networkidle0'});
	await page.pdf({ path: `./pdf/node${index + 1}.pdf`, format: 'A4'})
	console.log(`第${index + 1}份已经完成`)
	}

	async function printAll() {
	console.log(`一共${tokens.length}份，正在打印中……`)
	const browser = await puppeteer.launch({ headless: true })
	const page = await browser.newPage()
	await page.setExtraHTTPHeaders({'uniedu-sso-token': token})
	for(let i = 0; i < tokens.length; i ++){
	await printPDF(page, tokens[i], i)
	}
	await browser.close()
	}

	printAll()

	[
	{
	"accountId": 1000005000000010244,
	"className": "701班",
	"userName": "陈昱含",
	"token": "eyJhbGciOiJSUzI1NiJ9"
	},
	{
	"accountId": 1000005000000010245,
	"className": "701班",
	"userName": "韩艺臻",
	"token": "eyJhbGciOiJSUzI1NiJ9"
	},
	{
	"accountId": 1000005000000010246,
	"className": "702班",
	"userName": "范玥",
	"token": "eyJhbGciOiJSUzI1NiJ9"
	},
	{
	"accountId": 1000005000000010247,
	"className": "702班",
	"userName": "方泽清",
	"token": "eyJhbGciOiJSUzI1NiJ9"
	},
	{
	"accountId": 1000005000000010248,
	"className": "702班",
	"userName": "付明宇",
	"token": "eyJhbGciOiJSUzI1NiJ9"
	}
	]

	const fs = require('fs')
	const puppeteer = require('puppeteer')
	const users = require('./users.json')

	;(async () => {
	const pdfPath = './pdfs/'
	// 处理token数据
	const nameArr = [...new Set(users.map(v => v.className))]
	const classes = nameArr.map(className => ({
	className,
	dirName: pdfPath + className,
	students: users.filter(v => v.className === className).map(v => ({ ...v, dirName: `${pdfPath + className}/${v.userName}` }))
	}))
	// 创建无头浏览器
	const browser = await puppeteer.launch({ headless: true })
	const page = await browser.newPage()
	let count = 0
	for (const cla of classes) {
	if (!fs.existsSync(cla.dirName)) fs.mkdirSync(cla.dirName)
	for (const stu of cla.students) {
	if (!fs.existsSync(stu.dirName)) fs.mkdirSync(stu.dirName)
	await page.setExtraHTTPHeaders({ 'uniedu-sso-token': stu.token })
	await page.goto('http://localhost:8080/#/report?07=1000100000006089002', { waitUntil: 'networkidle0' })
	await page.pdf({ path: `${stu.dirName}/学情分析.pdf`, format: 'A4' })
	console.log(`第${++count}份 -> ${cla.className} -> ${stu.userName} -> 已完成……`)
	}
	}
	await browser.close()
	})()

	const puppeteer = require('puppeteer-core');
	//find_chrome模块来源于GoogleChromeLabs的Carlo,可以查看本机安装Chrome目录

	const findChrome = require('./node_modules/carlo/lib/find_chrome.js')

	;(async () => {
	let findChromePath = await findChrome({})
	let executablePath = findChromePath.executablePath;
	console.log(executablePath)
	const browser = await puppeteer.launch({
	executablePath,
	headless: false
	})

	const page = await browser.newPage()
	await page.goto('https://www.baidu.com/')

	// await browser.close()
	})()

	html {
	-webkit-print-color-adjust: exact;
	}