-
Notifications
You must be signed in to change notification settings - Fork 9
/
XlsxReader.py
57 lines (50 loc) · 1.88 KB
/
XlsxReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import zipfile
from xml.etree.ElementTree import iterparse
def readXlsx(fileName,**args):
if "sheet" in args:
sheet=args["sheet"]
else:
sheet=1
if "header" in args:
isHeader=args["header"]
else:
isHeader=False
rows = []
row = {}
header = {}
z=zipfile.ZipFile(fileName)
# Get shared strings
strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml')) if el.tag.endswith('}t')]
value = ''
# Open specified worksheet
for e, el in iterparse(z.open('xl/worksheets/sheet%d.xml'%(sheet))):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
# If value is a shared string, use value as an index
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
# if it is the first row, then create a header hash for the names
# that COULD be used
if rows ==[]:
header[letter]=value
else:
if value != '':
# if there is a header row, use the first row's names as the row hash index
if isHeader == True and letter in header:
row[header[letter]] = value
else:
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
z.close()
return rows
if __name__ == '__main__':
print readXlsx('./akshaya-patra-data/lon and log details-xls.xlsx',sheet=1,header=True)[1] # Should print the first row