I’m trying to automate the process of having to move specific data, such as revenue, from downloaded financial statements to my valuation template. I feel like this should be doable and while I’m not that experienced with Python I tried it out with the aid of chatgpt.
According to the error message I get, it seems as if the packages can’t read the Excel files due to style issues from my understanding.
Cell In [19], line 15
12 return None # If item not found
14 # Load Twilio financial document with data_only=True
---> 15 twilio_wb = pd.read_excel('C:/Users/coldp/OneDrive/Documents/Twilio Inc NYSE TWLO Financials.xlsx')
16 twilio_ws = twilio_wb.active
18 # Load DCF template
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:457, in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
455 if not isinstance(io, ExcelFile):
456 should_close = True
--> 457 io = ExcelFile(io, storage_options=storage_options, engine=engine)
458 elif engine and engine != io.engine:
459 raise ValueError(
460 "Engine should not be specified when passing "
461 "an ExcelFile - ExcelFile already has the engine set"
462 )
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:1419, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
1416 self.engine = engine
1417 self.storage_options = storage_options
-> 1419 self._reader = self._engines[engine](self._io, storage_options=storage_options)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:525, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
514 """
515 Reader using openpyxl engine.
516
(...)
522 passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
523 """
524 import_optional_dependency("openpyxl")
--> 525 super().__init__(filepath_or_buffer, storage_options=storage_options)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:518, in BaseExcelReader.__init__(self, filepath_or_buffer, storage_options)
516 self.handles.handle.seek(0)
517 try:
--> 518 self.book = self.load_workbook(self.handles.handle)
519 except Exception:
520 self.close()
File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:536, in OpenpyxlReader.load_workbook(self, filepath_or_buffer)
533 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
534 from openpyxl import load_workbook
--> 536 return load_workbook(
537 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
538 )
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:317, in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
290 """Open the given filename and return the workbook
291
292 :param filename: the path to open or a file-like object
(...)
313
314 """
315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
--> 317 reader.read()
318 return reader.wb
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:281, in ExcelReader.read(self)
279 self.read_properties()
280 self.read_theme()
--> 281 apply_stylesheet(self.archive, self.wb)
282 self.read_worksheets()
283 self.parser.assign_names()
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:198, in apply_stylesheet(archive, wb)
195 return wb
197 node = fromstring(src)
--> 198 stylesheet = Stylesheet.from_tree(node)
200 if stylesheet.cell_styles:
202 wb._borders = IndexedList(stylesheet.borders)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:103, in Stylesheet.from_tree(cls, node)
101 for k in attrs:
102 del node.attrib[k]
--> 103 return super(Stylesheet, cls).from_tree(node)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\descriptors\serialisable.py:103, in Serialisable.from_tree(cls, node)
100 else:
101 attrib[tag] = obj
--> 103 return cls(**attrib)
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:94, in Stylesheet.__init__(self, numFmts, fonts, fills, borders, cellStyleXfs, cellXfs, cellStyles, dxfs, tableStyles, colors, extLst)
92 self.protections = self.cellXfs.prots
93 self._normalise_numbers()
---> 94 self.named_styles = self._merge_named_styles()
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:114, in Stylesheet._merge_named_styles(self)
111 named_styles = self.cellStyles.names
113 for style in named_styles:
--> 114 self._expand_named_style(style)
116 return named_styles
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:124, in Stylesheet._expand_named_style(self, named_style)
119 def _expand_named_style(self, named_style):
120 """
121 Bind format definitions for a named style from the associated style
122 record
123 """
--> 124 xf = self.cellStyleXfs[named_style.xfId]
125 named_style.font = self.fonts[xf.fontId]
126 named_style.fill = self.fills[xf.fillId]
File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\cell_style.py:185, in CellStyleList.__getitem__(self, idx)
184 def __getitem__(self, idx):
--> 185 return self.xf[idx]
IndexError: list index out of range
I made sure the excel files had no styles, weird images and so on but even so, the problem persists. I tried with both openpyxl and pandas but it still can’t read the files.
There could however still be issues with the excel file that I’m not aware of, I’ve been screening it to the best of my abilities.