Python操作Office
# 修改Excel
# 示例代码
# https://blog.csdn.net/sinat_28576553/article/details/81275650
import openpyxl
filepath = "D:\\Desktop\\基础信息(2)(1).xlsx"
#获取 工作簿对象
workbook=openpyxl.load_workbook(filepath)
#与xlrd 模块的区别
#wokrbook=xlrd.open_workbook(""DataSource\Economics.xls)
#获取工作簿 workbook的所有工作表
# shenames=workbook.get_sheet_names() # wb['Sheet'] #通过名称获取工作薄
# print(shenames) #['各省市', '测试表']
#在xlrd模块中为 sheetnames=workbook.sheet_names()
#使用上述语句会发出警告:DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).
#说明 get_sheet_names已经被弃用 可以改用 wb.sheetnames 方法
shenames=workbook.sheetnames
# print(shenames) #['各省市', '测试表']
"""
for sheetName in shenames:
# print(sheetName)
#获得工作簿的表名后,就可以获得表对象
worksheet=workbook.get_sheet_by_name(sheetName) # workbook[shenames[1]]
"""
worksheet=workbook.get_sheet_by_name("单位基本信息表")
print(worksheet)
rows=worksheet.max_row
columns=worksheet.max_column
print(rows,columns)
#设置第一列的宽度
#sheet.column_dimensions['A'].width = 20.0
# #设置第一行的高度
# sheet.row_dimensions[1].height = 25.0
#设置(1,1)的单元格的颜色为8E236B, 填充方式用solid(纯色)
sheet.cell(1,1).fill=PatternFill(fill_type="solid",start_color="8E236B")
#在第一列之前插入一列
worksheet.insert_cols(1) #
worksheet.insert_rows(7)#在第行前面插入一行
# 合并单元格
fanwei = "'A{}:B{}'".format('1','2')
sheet.merge_cells('A1:B2') #这样可以
sheet.merge_cells(fanwei) #这样不行
sheet.merge_cells('A{}:B{}'.format(1,2)) #这样可以?神奇
sheet.merge_cells('A{}:B{}'.format('1','2')) #这样也行
from openpyxl.styles import PatternFill, Border, Side, Font, Alignment
# 字体
font = Font(size=18, bold=True, color="1874CD")
sheet.cell(row=1, column=1).font = font
# 边框
thin = Side(border_style="thin", color="000000")
border = Border(top=thin, left=thin, right=thin, bottom=thin)
sheet.cell(row=2, column=2).border = border
# 对齐方式
sheet.cell(row=2, column=2).alignment = Alignment(horizontal='center', vertical='center')
# 自动换行
worksheet.cell(row=1, column=1, value=value).alignment = Alignment(wrapText=True)
wk_sheet.cell(row=2,column=2,value='大区') #在第二行,第二列下入“大区”数值
wb.close()#关闭
import openpyxl
from openpyxl.styles import Border, Side, Font, Alignment
file = "07决算和报告.xlsx"
filepath = "D:\\Desktop\\技术标准excel\\" + file
topath = "D:\\Desktop\\技术标准excel\\new\\test.xlsx" # + file
#获取 工作簿对象
workbook=openpyxl.load_workbook(filepath)
#获取工作簿 workbook的所有工作表
shenames=workbook.sheetnames
for sheetName in shenames:
#获得工作簿的表名后,就可以获得表对象
worksheet=workbook.get_sheet_by_name(sheetName) # workbook[shenames[1]]
# 插入两行并修改行高
worksheet.insert_rows(2) #在第行前面插入一行
worksheet.insert_rows(2) #在第行前面插入一行
# 边框
thin = Side(border_style="thin", color="000000")
border = Border(top=thin, left=thin, right=thin, bottom=thin)
worksheet.cell(row=2, column=1).border = border
worksheet.cell(row=3, column=1).border = border
# 字体
font = Font(size=16, bold=True, color="000000")
font2 = Font(size=10.5, bold=True, color="000000")
worksheet.cell(row=2, column=1).font = font
worksheet.cell(row=3, column=1).font = font
worksheet.cell(row=4, column=9).font = font2
worksheet.cell(row=4, column=10).font = font2
worksheet.cell(row=4, column=11).font = font2
# 自动换行
worksheet.cell(row=4, column=9).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
worksheet.cell(row=4, column=10).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
worksheet.cell(row=4, column=11).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
worksheet.cell(row=2, column=1).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
worksheet.cell(row=3, column=1).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
worksheet.row_dimensions[2].height = 37.2 #设置第行的高度
worksheet.row_dimensions[3].height = 37.2 #设置第行的高度
worksheet.row_dimensions[4].height = 67.1 #设置第行的高度
worksheet.column_dimensions['I'].width = 30.0
worksheet.column_dimensions['J'].width = 30.0
worksheet.column_dimensions['K'].width = 35.0
#worksheet.merge_cells('A1:K1')
worksheet.merge_cells('A2:K2')
worksheet.merge_cells('A3:K3')
worksheet.cell(row=2,column=1,value='请结合系统实现,详细描述本表的使用说明(业务场景、业务环节对表的具体操作、与其他表的配合使用、存储的具体内容):')
worksheet.cell(row=3,column=1,value='请填写是否有其他需要修改的内容:')
worksheet.cell(row=4,column=9,value='是否必填(数据库设置要求)')
worksheet.cell(row=4,column=10,value='结合系统实现,详细描述字段的使用说明')
worksheet.cell(row=4,column=11,value='对字段的修改建议(1是如建议“新增”或“删除”字段,请注明“新增”或“删除”。2是对已有字段长度、值域、备注等信息提出修改建议)')
for row in range(4, worksheet.max_row + 1) :
worksheet.cell(row=row, column=9).border = border
worksheet.cell(row=row, column=10).border = border
worksheet.cell(row=row, column=11).border = border
workbook.save(topath)
workbook.close()#关闭
# 读写Word
前提:
# 导入包
from docx import Document
from docx.shared import Inches #英寸
# 文本
- 文本操作(基础)
# 在末尾添加段落
paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
# 在标识之前添加段落
prior_paragraph = paragraph.insert_paragraph_before('Lorem ipsum')
# 添加标题(默认1级)
document.add_heading('The REAL meaning of the universe')
# 添加2级标题
document.add_heading('The role of dolphins', level=2)
# 添加分页符
document.add_page_break()
- 段落样式
# 添加段落时设置样式
document.add_paragraph('Lorem ipsum dolor sit amet.', style='ListBullet')
# 添加段落后设置样式
paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
paragraph.style = 'List Bullet'
# 通过run的两个属性来设置粗体和斜体
paragraph = document.add_paragraph('Lorem ipsum ')
run = paragraph.add_run('dolor')
run.bold = True # == paragraph.add_run('dolor').bold = True
paragraph.add_run(' sit amet.')
- 字体样式
paragraph = document.add_paragraph('Normal text, ')
paragraph.add_run('text with emphasis.', 'Emphasis')
# 等价于
paragraph = document.add_paragraph('Normal text, ')
run = paragraph.add_run('text with emphasis.')
run.style = 'Emphasis'
# 表格
- 表格操作(基础)
# 添加一个2行2列的表格
table = document.add_table(rows=2, cols=2)
# 获取单元格
cell = table.cell(0, 1)
# 单元格赋值
cell.text = 'parrot, possibly dead'
# 获取一行
row = table.rows[1]
# 添加一行
row = table.add_row()
# 添加样式
table.style = 'LightShading-Accent1'
示例:将数组的数据放入表格
# get table data -------------
items = (
(7, '1024', 'Plush kittens'),
(3, '2042', 'Furbees'),
(1, '1288', 'French Poodle Collars, Deluxe'),
# add table ------------------
table = document.add_table(1, 3)
# populate header row --------
heading_cells = table.rows[0].cells
heading_cells[0].text = 'Qty'
heading_cells[1].text = 'SKU'
heading_cells[2].text = 'Description'
# add a data row for each item
for item in items:
cells = table.add_row().cells
cells[0].text = str(item.qty)
cells[1].text = item.sku
cells[2].text = item.desc
- 读取表格
path = "text\\预算管理一体化地债系统数据库设计(代码集).docx"
document = Document(path) # 读入文件
tables = document.tables # 获取文件中的表格集
table = tables[1] # 获取文件中的第一个表格
# 遍历表格内容
for i in range(0, len(table.rows)): # 从表格第一行开始循环读取表格数据
for j in range(0, len(table.columns)):
print(table.cell(i,j).text, end = '\t')
print('', end = '\n')
# 图片
- 图片操作(基础)
# 添加图片
document.add_picture('image-filename.png')
# 添加图片时设置图片大小
document.add_picture('image-filename.png', width=Inches(1.0))
# 文档
- 文档操作(基础)
# 新建文件
from docx import Document
document = Document()
document.save('test.docx')
# 流式读写???
f = open('foobar.docx', 'rb')
document = Document(f)
f.close()
with open('foobar.docx', 'rb') as f:
source_stream = StringIO(f.read())
document = Document(source_stream)
source_stream.close()
...
target_stream = StringIO()
document.save(target_stream)
- 文本对齐
# 从父级元素继承
from docx.enum.text import WD_ALIGN_PARAGRAPH
document = Document()
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment
# None# indicating alignment is inherited from the style hierarc
# 从对象获取值
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
paragraph_format.alignment
# CENTER (1)
- 左右(首行)缩进
# 左缩进
from docx.shared import Inches
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.left_indent
# None # indicating indentation is inherited from the style hierarchy
paragraph_format.left_indent = Inches(0.5)
paragraph_format.left_indent
# 457200
paragraph_format.left_indent.inches
# 0.5
# 右缩进
from docx.shared import Pt
paragraph_format.right_indent
# None
paragraph_format.right_indent = Pt(24)
paragraph_format.right_indent
# 304800
paragraph_format.right_indent.pt
# 24.0
# 首行缩进
paragraph_format.first_line_indent
#None
paragraph_format.first_line_indent = Inches(-0.25)
paragraph_format.first_line_indent
# -228600
paragraph_format.first_line_indent.inches
# -0.25
- 制表符(见文档)
- 段间距(见文档)
- 行间距(见文档)
- 分页(见文档)
- 字体格式和颜色(见文档)
from docx import Document
document = Document()
run = document.add_paragraph().add_run()
font = run.font
from docx.shared import Pt
font.name = 'Calibri'
font.size = Pt(12)
- sections(见文档)
- 页眉页脚(见文档)
# 按顺序读取doc文档
import os
import docx
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
def iter_block_items(parent):
"""
Yield each paragraph and table child within *parent*, in document order.
Each returned value is an instance of either Table or Paragraph. *parent*
would most commonly be a reference to a main Document object, but
also works for a _Cell object, which itself can contain paragraphs and tables.
"""
if isinstance(parent, Document):
parent_elm = parent.element.body
elif isinstance(parent, _Cell):
parent_elm = parent._tc
else:
raise ValueError("something's not right")
for child in parent_elm.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl):
yield Table(child, parent)
def read_table(table):
return [[cell.text for cell in row.cells] for row in table.rows]
def read_word(word_path):
doc = docx.Document(word_path)
for block in iter_block_items(doc):
if isinstance(block, Paragraph):
print("text", [block.text])
elif isinstance(block, Table):
print("table", read_table(block))
if __name__ == '__main__':
ROOT_DIR_P = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) # 项目根目录
word_path = os.path.join(ROOT_DIR_P, "data/test_to_word.docx") # pdf文件路径及文件名
# word_path = os.path.join(ROOT_DIR_P, "data/test_to_word2.docx") # pdf文件路径及文件名
read_word(word_path)
# 按模版生成Word文档
# docxtpl
第三方库官网 (opens new window) (遵循 jinja2 模板的语法)
参考模版 python-docx-template (opens new window)
测试脚本
import json
from docxtpl import DocxTemplate
# 指定JSON文件的路径
json_file_path = 'conf/doc_data.json'
res_doc_path = "files/my_word_template_res.docx"
doc = DocxTemplate("conf/my_word_template.docx")
# 读取JSON文件并将其内容解析为字典
with open(json_file_path, 'r', encoding='utf-8') as file:
context = json.load(file)
doc.render(context)
doc.save(res_doc_path)
测试数据
{
"name": "名称",
"rtext": "富文本__\n_&",
"special": "R('转义_ _<>_&')",
"fruits": ["Apple", "Banana", "Cherry"],
"display_paragraph":true,
"Paragraphs" : [
{
"name": "段落 2.1",
"table_name": "table_name1",
"table_fields": ["id", "name", "age", "gender"],
"table_data": [
{"id": "1", "name": "张三", "age": "20", "gender": "男"},
{"id": "2", "name": "李四", "age": "21", "gender": "女"},
{"id": "3", "name": "王五", "age": "22", "gender": "男"}
]
},
{"name":"段落 2.2"}
]
}
测试模版(word)
------------- 循环
{% for fruit in fruits %}
- {{ fruit }}
{% endfor %}
------------- 段落
{% set paragraph %}
This is a new paragraph that we want to insert. It can contain multiple sentences.
{% endset %}
{{ paragraph }}
------------- 判断
{%p if display_paragraph %}
Here is my paragraph
{%p endif %}
------------- 拼接行
1111
{%- if display_paragraph -%}
2222
{%- else -%}
3333
{%- endif -%}
9999
------------- 循环段落(嵌套内容)
{% for Paragraph in Paragraphs %}
{{ Paragraph.name }}
{{ Paragraph.table_name }}
id name age Gender
{%tr for row in Paragraph. table_data %}
{{ row.id }} {{ row.name }} {{ row.age }} {{ row.gender }}
{%tr endfor %}
{% endfor %}
# 按模版生成 Excel 文档
# xlsxtpl
第三方库官网 (opens new window)
xlsxtpl 使用 jinja2 作为其模板引擎,遵循 jinja2 模板的语法。
# 使用 Python + Mermaid 绘图并插入 Word 文档
使用 nodejs 安装 mermaid-cli,通过 Python 代码调用客户端生成图片保存到磁盘,再将磁盘图片插入到 Word 文档
- 安装 mermaid-cli
npm install -g @mermaid-js/mermaid-cli
- 代码示例
import subprocess
from docx import Document
from docx.shared import Inches
import os
# 你的Mermaid代码
mermaid_code = """
graph TD
A[Christmas] -->|Get money| B(Go shopping)
B --> C{Let me think}
C -->|One| D[Laptop]
C -->|Two| E[iPhone]
C -->|Three| F[Car]
"""
# 将Mermaid代码写入临时文件
with open('temp.mmd', 'w') as file:
file.write(mermaid_code)
mmscpath = "D:\\Program Files\\nodejs\\npm_global\\mmdc.cmd"
# 使用Mermaid CLI将Mermaid代码转换为SVG
# subprocess.run(['mmdc', '-i', 'temp.mmd', '-o', 'temp.svg'])
subprocess.run([mmscpath, '-i', 'temp.mmd', '-o', 'temp.png'])
# 创建或加载Word文档
doc = Document()
doc.add_heading('Mermaid Diagram in Word', 0)
# 将图片插入到Word文档中
doc.add_picture('temp.png', width=Inches(4.0))
# 保存Word文档
doc.save('diagram.docx')
# 清理临时文件
os.remove('temp.mmd')
# os.remove('temp.svg')
os.remove('temp.png')
编辑 (opens new window)
上次更新: 2025/01/02, 13:03:48