其他
Python

Python操作Office

# 修改Excel

# 示例代码

# https://blog.csdn.net/sinat_28576553/article/details/81275650
import openpyxl

filepath = "D:\\Desktop\\基础信息(2)(1).xlsx"

#获取 工作簿对象
workbook=openpyxl.load_workbook(filepath)
#与xlrd 模块的区别
#wokrbook=xlrd.open_workbook(""DataSource\Economics.xls)

#获取工作簿 workbook的所有工作表
# shenames=workbook.get_sheet_names()    # wb['Sheet'] #通过名称获取工作薄
# print(shenames)  #['各省市', '测试表']
#在xlrd模块中为 sheetnames=workbook.sheet_names()
 
#使用上述语句会发出警告：DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).
#说明 get_sheet_names已经被弃用 可以改用 wb.sheetnames 方法
shenames=workbook.sheetnames
# print(shenames)  #['各省市', '测试表']



"""
for sheetName in shenames: 
    # print(sheetName)
    #获得工作簿的表名后，就可以获得表对象
    worksheet=workbook.get_sheet_by_name(sheetName)     # workbook[shenames[1]]
"""
worksheet=workbook.get_sheet_by_name("单位基本信息表")
print(worksheet)


rows=worksheet.max_row
columns=worksheet.max_column

print(rows,columns)


#设置第一列的宽度
#sheet.column_dimensions['A'].width = 20.0
 
# #设置第一行的高度
# sheet.row_dimensions[1].height = 25.0

#设置（1,1）的单元格的颜色为8E236B， 填充方式用solid(纯色)
sheet.cell(1,1).fill=PatternFill(fill_type="solid",start_color="8E236B")

#在第一列之前插入一列
worksheet.insert_cols(1)  #
worksheet.insert_rows(7)#在第行前面插入一行

# 合并单元格
fanwei = "'A{}:B{}'".format('1','2')
sheet.merge_cells('A1:B2') #这样可以
sheet.merge_cells(fanwei) #这样不行
sheet.merge_cells('A{}:B{}'.format(1,2)) #这样可以？神奇
sheet.merge_cells('A{}:B{}'.format('1','2')) #这样也行

from openpyxl.styles import PatternFill, Border, Side, Font, Alignment
# 字体
font = Font(size=18, bold=True, color="1874CD")
sheet.cell(row=1, column=1).font = font

# 边框
thin = Side(border_style="thin", color="000000")
border = Border(top=thin, left=thin, right=thin, bottom=thin)
sheet.cell(row=2, column=2).border = border

# 对齐方式
sheet.cell(row=2, column=2).alignment = Alignment(horizontal='center', vertical='center')

# 自动换行
worksheet.cell(row=1, column=1, value=value).alignment = Alignment(wrapText=True)

wk_sheet.cell(row=2,column=2,value='大区') #在第二行，第二列下入“大区”数值


wb.close()#关闭

import openpyxl
from openpyxl.styles import Border, Side, Font, Alignment

file = "07决算和报告.xlsx"

filepath = "D:\\Desktop\\技术标准excel\\" + file
topath = "D:\\Desktop\\技术标准excel\\new\\test.xlsx" # + file

#获取 工作簿对象
workbook=openpyxl.load_workbook(filepath)

#获取工作簿 workbook的所有工作表
shenames=workbook.sheetnames

for sheetName in shenames: 
    #获得工作簿的表名后，就可以获得表对象
    worksheet=workbook.get_sheet_by_name(sheetName)     # workbook[shenames[1]]

    # 插入两行并修改行高
    worksheet.insert_rows(2)               #在第行前面插入一行
    worksheet.insert_rows(2)               #在第行前面插入一行

    # 边框
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)
    worksheet.cell(row=2, column=1).border = border
    worksheet.cell(row=3, column=1).border = border

    # 字体
    font = Font(size=16, bold=True, color="000000")
    font2 = Font(size=10.5, bold=True, color="000000")
    worksheet.cell(row=2, column=1).font = font
    worksheet.cell(row=3, column=1).font = font
    worksheet.cell(row=4, column=9).font = font2
    worksheet.cell(row=4, column=10).font = font2
    worksheet.cell(row=4, column=11).font = font2

    # 自动换行
    worksheet.cell(row=4, column=9).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
    worksheet.cell(row=4, column=10).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
    worksheet.cell(row=4, column=11).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
    worksheet.cell(row=2, column=1).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)
    worksheet.cell(row=3, column=1).alignment = Alignment(horizontal='center', vertical='center', wrapText=True)

    worksheet.row_dimensions[2].height = 37.2  #设置第行的高度
    worksheet.row_dimensions[3].height = 37.2  #设置第行的高度
    worksheet.row_dimensions[4].height = 67.1  #设置第行的高度

    worksheet.column_dimensions['I'].width = 30.0
    worksheet.column_dimensions['J'].width = 30.0
    worksheet.column_dimensions['K'].width = 35.0

    #worksheet.merge_cells('A1:K1')
    worksheet.merge_cells('A2:K2')
    worksheet.merge_cells('A3:K3')

    worksheet.cell(row=2,column=1,value='请结合系统实现，详细描述本表的使用说明（业务场景、业务环节对表的具体操作、与其他表的配合使用、存储的具体内容）：')
    worksheet.cell(row=3,column=1,value='请填写是否有其他需要修改的内容：')
    worksheet.cell(row=4,column=9,value='是否必填（数据库设置要求）')
    worksheet.cell(row=4,column=10,value='结合系统实现，详细描述字段的使用说明')
    worksheet.cell(row=4,column=11,value='对字段的修改建议（1是如建议“新增”或“删除”字段，请注明“新增”或“删除”。2是对已有字段长度、值域、备注等信息提出修改建议）')

    for row in range(4, worksheet.max_row + 1) :
        worksheet.cell(row=row, column=9).border = border
        worksheet.cell(row=row, column=10).border = border
        worksheet.cell(row=row, column=11).border = border


workbook.save(topath) 

workbook.close()#关闭

# 读写Word

前提：

# 导入包
from docx import Document
from docx.shared import Inches  #英寸

# 文本

文本操作（基础）

# 在末尾添加段落
paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
# 在标识之前添加段落
prior_paragraph = paragraph.insert_paragraph_before('Lorem ipsum')
# 添加标题（默认1级）
document.add_heading('The REAL meaning of the universe')
# 添加2级标题
document.add_heading('The role of dolphins', level=2)
# 添加分页符
document.add_page_break()

段落样式

# 添加段落时设置样式
document.add_paragraph('Lorem ipsum dolor sit amet.', style='ListBullet')

# 添加段落后设置样式
paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
paragraph.style = 'List Bullet'

# 通过run的两个属性来设置粗体和斜体
paragraph = document.add_paragraph('Lorem ipsum ')
run = paragraph.add_run('dolor')
run.bold = True # == paragraph.add_run('dolor').bold = True
paragraph.add_run(' sit amet.')

字体样式

paragraph = document.add_paragraph('Normal text, ')
paragraph.add_run('text with emphasis.', 'Emphasis')

# 等价于

paragraph = document.add_paragraph('Normal text, ')
run = paragraph.add_run('text with emphasis.')
run.style = 'Emphasis'

# 表格

表格操作（基础）

# 添加一个2行2列的表格
table = document.add_table(rows=2, cols=2)
# 获取单元格
cell = table.cell(0, 1)
# 单元格赋值
cell.text = 'parrot, possibly dead'
# 获取一行
row = table.rows[1]
# 添加一行
row = table.add_row()
# 添加样式
table.style = 'LightShading-Accent1'

示例：将数组的数据放入表格

# get table data -------------
items = (
(7, '1024', 'Plush kittens'),
(3, '2042', 'Furbees'),
(1, '1288', 'French Poodle Collars, Deluxe'),

# add table ------------------
table = document.add_table(1, 3)
# populate header row --------
heading_cells = table.rows[0].cells
heading_cells[0].text = 'Qty'
heading_cells[1].text = 'SKU'
heading_cells[2].text = 'Description'
# add a data row for each item
for item in items:
cells = table.add_row().cells
cells[0].text = str(item.qty)
cells[1].text = item.sku
cells[2].text = item.desc

读取表格

path = "text\\预算管理一体化地债系统数据库设计(代码集).docx"
document = Document(path)  # 读入文件
tables = document.tables   # 获取文件中的表格集
table = tables[1]  # 获取文件中的第一个表格

# 遍历表格内容
for i in range(0, len(table.rows)):  # 从表格第一行开始循环读取表格数据
    for j in range(0, len(table.columns)):
        print(table.cell(i,j).text, end = '\t')
    print('', end = '\n')

# 图片

图片操作（基础）

# 添加图片
document.add_picture('image-filename.png')
# 添加图片时设置图片大小
document.add_picture('image-filename.png', width=Inches(1.0))

# 文档

文档操作（基础）

# 新建文件
from docx import Document
document = Document()
document.save('test.docx')

# 流式读写？？？
f = open('foobar.docx', 'rb')
document = Document(f)
f.close()


with open('foobar.docx', 'rb') as f:
source_stream = StringIO(f.read())
document = Document(source_stream)
source_stream.close()
...
target_stream = StringIO()
document.save(target_stream)

文本对齐

# 从父级元素继承
from docx.enum.text import WD_ALIGN_PARAGRAPH
document = Document()
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment
# None# indicating alignment is inherited from the style hierarc

# 从对象获取值
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
paragraph_format.alignment
# CENTER (1)

左右（首行）缩进

# 左缩进
from docx.shared import Inches
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.left_indent
# None # indicating indentation is inherited from the style hierarchy
paragraph_format.left_indent = Inches(0.5)
paragraph_format.left_indent
# 457200
paragraph_format.left_indent.inches
# 0.5

# 右缩进
from docx.shared import Pt
paragraph_format.right_indent
# None
paragraph_format.right_indent = Pt(24)
paragraph_format.right_indent
# 304800
paragraph_format.right_indent.pt
# 24.0


# 首行缩进
paragraph_format.first_line_indent
#None
paragraph_format.first_line_indent = Inches(-0.25)
paragraph_format.first_line_indent
# -228600
paragraph_format.first_line_indent.inches
# -0.25

制表符（见文档）
段间距（见文档）
行间距（见文档）
分页（见文档）
字体格式和颜色（见文档）

from docx import Document
document = Document()
run = document.add_paragraph().add_run()
font = run.font

from docx.shared import Pt
font.name = 'Calibri'
font.size = Pt(12)

sections（见文档）
页眉页脚（见文档）

# 按顺序读取doc文档

import os
import docx

from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph


def iter_block_items(parent):
    """
    Yield each paragraph and table child within *parent*, in document order.
    Each returned value is an instance of either Table or Paragraph. *parent*
    would most commonly be a reference to a main Document object, but
    also works for a _Cell object, which itself can contain paragraphs and tables.
    """
    if isinstance(parent, Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)


def read_table(table):
    return [[cell.text for cell in row.cells] for row in table.rows]


def read_word(word_path):
    doc = docx.Document(word_path)
    for block in iter_block_items(doc):
        if isinstance(block, Paragraph):
            print("text", [block.text])
        elif isinstance(block, Table):
            print("table", read_table(block))


if __name__ == '__main__':
    ROOT_DIR_P = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))  # 项目根目录
    word_path = os.path.join(ROOT_DIR_P, "data/test_to_word.docx")  # pdf文件路径及文件名
    # word_path = os.path.join(ROOT_DIR_P, "data/test_to_word2.docx")  # pdf文件路径及文件名
    read_word(word_path)

# 按模版生成Word文档

# docxtpl

第三方库官网 (opens new window) （遵循 jinja2 模板的语法）
参考模版 python-docx-template (opens new window)

测试脚本

import json
from docxtpl import DocxTemplate

# 指定JSON文件的路径
json_file_path = 'conf/doc_data.json'
res_doc_path = "files/my_word_template_res.docx"
doc = DocxTemplate("conf/my_word_template.docx")

# 读取JSON文件并将其内容解析为字典
with open(json_file_path, 'r', encoding='utf-8') as file:
    context = json.load(file)
doc.render(context)
doc.save(res_doc_path)

测试数据

{
  "name": "名称",
  "rtext": "富文本__\n_&",
  "special": "R('转义_ _<>_&')",
  "fruits": ["Apple", "Banana", "Cherry"],
  "display_paragraph":true,
  "Paragraphs" : [
    {
      "name": "段落 2.1",
      "table_name": "table_name1",
      "table_fields": ["id", "name", "age", "gender"],
      "table_data": [
        {"id": "1", "name": "张三", "age": "20", "gender": "男"},
        {"id": "2", "name": "李四", "age": "21", "gender": "女"},
        {"id": "3", "name": "王五", "age": "22", "gender": "男"}
      ]
    },
    {"name":"段落 2.2"}
  ]
}

测试模版（word）

------------- 循环
{% for fruit in fruits %}
- {{ fruit }}
{% endfor %}


------------- 段落
{% set paragraph %}
This is a new paragraph that we want to insert. It can contain multiple sentences.
{% endset %}

{{ paragraph }}

------------- 判断
{%p if display_paragraph %}
Here is my paragraph
{%p endif %}


------------- 拼接行
1111
{%- if display_paragraph -%}
2222
{%- else -%}
3333
{%- endif -%}
9999


------------- 循环段落（嵌套内容）
{% for Paragraph in Paragraphs %}
{{ Paragraph.name }}
{{ Paragraph.table_name }}

id  name    age Gender
{%tr for row in Paragraph. table_data %}
{{ row.id }}    {{ row.name }}  {{ row.age }}   {{ row.gender }}
{%tr endfor %}

{% endfor %}

# 按模版生成 Excel 文档

# xlsxtpl

第三方库官网 (opens new window)
xlsxtpl 使用 jinja2 作为其模板引擎，遵循 jinja2 模板的语法。

# 使用 Python + Mermaid 绘图并插入 Word 文档

使用 nodejs 安装 mermaid-cli，通过 Python 代码调用客户端生成图片保存到磁盘，再将磁盘图片插入到 Word 文档

安装 mermaid-cli npm install -g @mermaid-js/mermaid-cli
代码示例

import subprocess
from docx import Document
from docx.shared import Inches
import os

# 你的Mermaid代码
mermaid_code = """
graph TD
    A[Christmas] -->|Get money| B(Go shopping)
    B --> C{Let me think}
    C -->|One| D[Laptop]
    C -->|Two| E[iPhone]
    C -->|Three| F[Car]
"""

# 将Mermaid代码写入临时文件
with open('temp.mmd', 'w') as file:
    file.write(mermaid_code)

mmscpath = "D:\\Program Files\\nodejs\\npm_global\\mmdc.cmd"
# 使用Mermaid CLI将Mermaid代码转换为SVG
# subprocess.run(['mmdc', '-i', 'temp.mmd', '-o', 'temp.svg'])
subprocess.run([mmscpath, '-i', 'temp.mmd', '-o', 'temp.png'])
# 创建或加载Word文档
doc = Document()
doc.add_heading('Mermaid Diagram in Word', 0)
# 将图片插入到Word文档中
doc.add_picture('temp.png', width=Inches(4.0))
# 保存Word文档
doc.save('diagram.docx')
# 清理临时文件
os.remove('temp.mmd')
# os.remove('temp.svg')
os.remove('temp.png')

编辑

上次更新: 2025/01/02, 13:03:48

← Python简单语法学习 Python类库学习→