使用 python 处理 Excel(xls,xlsx)

参考资料

推荐 openpyxl

https://openpyxl.readthedocs.io/en/default/tutorial.html#loading-from-a-file
http://xlrd.readthedocs.io/en/latest/api.html?highlight=save#module-xlrd.xldate
Python-Excel 模块哪家强?
https://zhuanlan.zhihu.com/p/23998083

参考代码

参考
比较Excel中某一列的字段的各个值是否在白名单中存在,如果存在会在另一列中填1,否则填0。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2017/11/14 21:13
# @Author : BLKStone
# @Site : http://blkstone.github.io
# @File : excel_dereplication
# @Software: PyCharm Community Edition
import xlrd
from xlutils.copy import copy
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def read_whitelist():
whitelist = {}
with open('data/white_organizations.txt','r') as f:
for l in f.readlines():
whitelist[l.strip()]=1
f.close()
show_whitelist(whitelist)
return whitelist
def show_whitelist(whitelist):
print '-------白名单开始-------'
for k in whitelist.keys():
print k
print '-------白名单结束-------'
def excel_read_test():
data_rb = xlrd.open_workbook('data\\test.xlsx')
data_wb = copy(data_rb)
table = data_rb.sheets()[0]
# show_organization(table)
nrows = table.nrows
ncols = table.ncols
sheet_wb = data_wb.get_sheet(0)
whitelist = read_whitelist()
for i in range(1,nrows):
org_name = str(table.cell(i,6).value).strip()
if whitelist.has_key(org_name):
sheet_wb.write(i,8,1)
else:
sheet_wb.write(i,8,0)
data_wb.save("data\\result.xls")
# read demo
def show_organization(table):
nrows = table.nrows
ncols = table.ncols
for i in range(1,nrows):
# print table.cell(i,6).value
# print type(table.cell(i,6).value)
print str(table.cell(i,6).value).strip()
# write demo
def write_cell(table):
data_rb = xlrd.open_workbook('data\\test.xlsx')
data_wb = copy(data_rb)
sheet_wb = data_wb.get_sheet(0)
sheet_wb.write(4,4,1)
print '写入完成'
data_wb.save("data\\result.xls")
def mulu():
with open('data/mulu_white.txt','w') as fw:
with open('data/mulu.txt','r') as f:
for l in f.readlines():
print l.split('\t')[1].strip()
fw.write(l.split('\t')[1].strip()+'\n')
if __name__ == '__main__':
excel_read_test()
# read_whitelist()
# mulu()