python处理excel数据

python处理excel数据

支持xlsx文件,不支持xls文件。

安装openpyxl库:

1
pip3 install openpyxl -i https://pypi.douban.com/simple/

创建excel文档:

1
2
3
4
5
6
from openpyxl import Workbook

wb = Workbook()
wb.create_sheet('MySheet1')
wb.create_sheet('MySheet2')
wb.save('new.xlsx')

创建和保存workbook

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from openpyxl import Workbook

# 创建一个excel文档
wb = Workbook()

# 获取默认的sheet
ws = wb.active
print(ws.title)
ws.title = '我的标题'

# 追加新的sheet,设置位置
ws1 = wb.create_sheet('新的sheet1')
ws2 = wb.create_sheet('新的sheet2', 0)
ws3 = wb.create_sheet('新的sheet3', -1)

# 设置sheet颜色
ws.sheet_properties.tabColor = '1072BA'
ws3.sheet_properties.tabColor = 'FFDD00'
wb.save('first.xlsx')

读取excel文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import openpyxl

# 读取excel
wb = openpyxl.load_workbook('first.xlsx')
print(type(wb))

# 获取工作表
print('获取所有工作表的名字')
print(wb.sheetnames, '\n')
for sheet in wb:
print(sheet.title)

# 获取指定工作表
sheet = wb['新的sheet3']
print(type(sheet))
print(sheet.title)
print(sheet.sheet_properties.tabColor)

wb.close()

load_workbook其它参数:

  • read_only:false,如果是true,文档不可写。

  • keep_vba:keep_vba,是否保留vba内容。

  • data_only:false,是否保留单元格公式。

  • keep_links:true,指定单元格的外部连接是否保存。

设置新的sheet2工作表中d3=20,e3=30,e6=sum(d3+e3)。

获取数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import openpyxl

# 读取excel
wb = openpyxl.load_workbook('first.xlsx', data_only=True)
print(type(wb))

# 获取工作表
print('获取所有工作表的名字')
print(wb.sheetnames, '\n')
for sheet in wb:
print(sheet.title)

# 获取指定工作表
sheet = wb['新的sheet2']
print(type(sheet))
print(sheet.title)
print(sheet.sheet_properties.tabColor)

# 获得单元格
print(sheet['E6'].value)
wb.close()

创建、获取、修改、复制和删除excel

如果title重复,会自动添加编号:

1
2
3
4
5
6
7
8
9
10
import openpyxl

# 读取excel
wb = openpyxl.load_workbook('first.xlsx')

# 创建工作表
ws1 = wb.create_chartsheet('hello word')

wb.save('first.xlsx')
wb.close()

其它操作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import openpyxl

# 读取excel
wb = openpyxl.load_workbook('first.xlsx')

# 创建工作表
ws1 = wb.create_sheet('hello word')

# 修改工作表
ws1.sheet_properties.tabColor = 'FF0000'
ws1.title = 'abcdefg'

# 获得工作表索引
print('所有的工作表:', wb.sheetnames)
index = wb.index(ws1)
print('index:', index)

# 复制工作表
wsc = wb.copy_worksheet(ws1)
wsc.title = '复制的工作表'
wsc.sheet_properties.tabColor = '0000BB'

# 删除工作表,两种方法
wb.remove(wsc)
del wb['新的sheet2']

wb.save('first.xlsx')
wb.close()

获取和设置一个单元格的数据

重新创建first.xlsx。

新的sheet2设置c3=1234,f8=hello word,d11=20

获取数据,单元格没有值返回none:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import openpyxl

# 读取excel
wb = openpyxl.load_workbook('first.xlsx')

# 获取所有工作表
print(wb.sheetnames, '\n')

sheet = wb['新的sheet2']
c3 = sheet['C3']
print(type(c3))

# 指定位置获取单元格数据
print(f'C3:{c3.value}')
print(f'D11:{sheet["D11"].value}')
print(f'F7:{sheet["F7"].value}')

# 插入数据
sheet['C7'].value = '你好'
sheet['B10'].value = '=SUM(C3,D11)'

# 行列获取单元格数据和修改数据
print(sheet.cell(row=7, column=3))
print(sheet.cell(row=7, column=3, value=200).value)

# 循环获取1-100行,1-100列的数据
# for x in range(1, 101):
# for y in range(1, 101):
# print(sheet.cell(row=x, column=y).value)

# 获取最大行最大列
print(sheet.max_column)
print(sheet.max_row)
for x in range(1, sheet.max_row):
for y in range(1, sheet.max_column):
cell = sheet.cell(row=x, column=y)
if cell.value != None:
print(sheet.cell(row=x, column=y).value)

wb.save('first.xlsx')
wb.close()

访问多个单元格

重新创建first.xlsx。

新的sheet2设置c5=aa,c6=xx,d5=bb,d6=dd,e5=cc,e6=pp,

获取数据,单元格没有值返回none:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import openpyxl

# 读取excel,获取工作表
wb = openpyxl.load_workbook('first.xlsx')
sheet = wb['新的sheet2']

# 获取c5~e6所有数据
print('获取子表格')
cell_range = sheet['C5':'E6']
print(type(cell_range))
print(cell_range)

print('输出子表格的cell')
for rows in cell_range:
for cell in rows:
print(cell)

# 获取特定的cell集合
col = sheet['C']
print(col)
print('max_row', sheet.max_row)

row = sheet['5']
print(row)
print('max_col', sheet.max_column)

# 获取多列多行
cols = sheet['C':'E']
print(cols)
print(len(cols))

rows = sheet['3:6']
print(rows)

# 先行后列插入,从第1行开始,插入3列,插入到第2行
i = 1
for rows in sheet.iter_rows(min_row=1, max_col=3, max_row=2):
for cell in rows:
print(cell)
cell.value = i
i += 1

# 先列后行插入
i = 1
for cols in sheet.iter_rows(min_row=3, max_col=3, max_row=4):
for cell in cols:
print(cell)
cell.value = i
i += 1

wb.save('first.xlsx')
wb.close

插入和删除行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import openpyxl

wb = openpyxl.load_workbook('table1.xlsx')

# 第1行前插入1行
ws = wb.active
ws.insert_rows(1)

# 第1行前插入5行,方法1
for i in range(5):
ws.insert_rows(1)
# 第1行前插入5行,方法2
ws.insert_rows(1, 5)

# 在25行插入9行
ws.insert_rows(25, 9)

# 删除28和29行
ws.delete_rows(28, 2)

wb.save('first.xlsx')
wb.close()

插入和删除列

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import openpyxl

wb = openpyxl.load_workbook('table1.xlsx')

# 第1列前插入1列
ws = wb.active
ws.insert_cols(1)

# 第1列前插入5列
ws.insert_cols(1, 5)

# 在第3列插入5列
ws.insert_cols(3, 5)

# 删除第3列开始的5列
ws.delete_cols(3, 5)

wb.save('first.xlsx')
wb.close()

改变行高和列宽

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import openpyxl

wb = openpyxl.load_workbook('table1.xlsx')
ws = wb.active

# 改变第10行行高为10
print(f'height:{ws.row_dimensions[10].height}')
ws.row_dimensions[10].height = 10

# 改变第12-14行行高为40
for i in range(12, 15):
ws.row_dimensions[i].height = 40

# 改变第1列宽为50
print(ws.column_dimensions['A'].width)
ws.column_dimensions['A'].width = 50

# 改变多行列宽为50
for col in 'BCE':
print(col)
ws.column_dimensions[col].width = 50

wb.save('first.xlsx')
wb.close()

移动范围

重新创建first.xlsx。

新的sheet2设置a2=1,a3=5,a4=9,b2=2,b3=3,b4=10,c2=3,c3=7,c4=11,d2=4,d3=8,d4=12

1
2
3
4
5
6
7
8
9
10
11
12
13
import openpyxl

wb = openpyxl.load_workbook('first.xlsx')
ws = wb.active

# c3-d4单元格移动,右移2下移1
ws.move_range("C3:D4", 1, 2)

# 移回去
ws.move_range("E4:F5", -1, -2)

wb.save('first.xlsx')
wb.close()

设置单元格文字颜色和背景颜色

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import openpyxl
from openpyxl.styles import *

wb = openpyxl.load_workbook('table1.xlsx')
ws = wb.active

# 设置b13文字颜色
redFont = Font(name='Arial', color='FF0000')
ws.cell(13, 2).font = redFont

# 设置b13背景颜色
blueFill = PatternFill('solid', fgColor='0000FF')
ws.cell(13, 2).fill = blueFill

wb.save('first.xlsx')
wb.close()

设置range的文字颜色和背景颜色

批量修改单元格:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import openpyxl
from openpyxl.styles import *

wb = openpyxl.load_workbook('table1.xlsx')
ws = wb.active

# 设置a22-c24
blueFont = Font(name='Arial', color='0000FF')
redFill = PatternFill('solid', fgColor='FF0000')
for col in range(1, 4):
for row in range(22, 25):
ws.cell(row, col).font = blueFont
ws.cell(row, col).fill = redFill

wb.save('first.xlsx')
wb.close()

格式化数字与日期

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from openpyxl import Workbook
import datetime

wb = Workbook()
ws = wb.active

ws.title = '格式化数字'
ws.column_dimensions['A'].width = 50

# 保留两位小数
ws['A2'].value = '124.5685656556'
ws['A2'].number_format = '0.00'

# 格式化日期,只保留年月日,方法1,字符串格式,居左
ws.column_dimensions['C'].width = 50
ws['C2'].value = datetime.datetime(2020, 6, 1).strftime('%m%d%Y')

# 格式化日期,只保留年月日,方法2,日期格式,居右
ws['C3'].value = datetime.datetime(2020, 6, 1)
ws['C3'].number_format = 'yyyy年m月d日'

wb.save('format.xlsx')

使用公式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from openpyxl import Workbook
import datetime

wb = Workbook()
ws = wb.active

ws.title = '使用公式'
ws['B2'] = '=sum(1,2,3,4,5,6,7,8,9,10)'
ws['B3'] = '=sum(3.14/8)'

ws['A1'] = 10
ws['A2'] = 20
ws['A3'] = 30
ws['C1'] = '=sum(A1:A3)'

wb.save('format.xlsx')

合并单元格和取消合并单元格

合并a1-c2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment

wb = Workbook()
ws = wb.active

ws.title = '合并单元格'
ws.merge_cells('A1:C2')
ws.cell(1, 1).value = '合并后的单元格'

# 居中,粗体
font = Font(name='宋体', bold=True, italic=True)
alignment = Alignment(horizontal='center', vertical='center')
ws['A1'].font = font
ws['A1'].alignment = alignment

# 取消合并
ws.unmerge_cells('A1:C2')

wb.save('cells.xlsx')

向excel文档插入图像

安装pillow库:

1
pip3 install Pillow
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from openpyxl import Workbook
from openpyxl.drawing.image import Image

wb = Workbook()
ws = wb.active

ws.title = '插入图像'

column_width = 12
row_height = 80

ws.column_dimensions['B'].width = column_width
ws.row_dimensions[3].height = row_height

image = Image('new.png')
new_size = (90, 90)
image.width, image.height = new_size
ws.add_image(image, 'B3')

wb.save('image.xlsx')