Problem reading an Excel file using Pandas and Openpyxl

I’m trying to automate the process of having to move specific data, such as revenue, from downloaded financial statements to my valuation template. I feel like this should be doable and while I’m not that experienced with Python I tried it out with the aid of chatgpt.

According to the error message I get, it seems as if the packages can’t read the Excel files due to style issues from my understanding.

Cell In [19], line 15
     12     return None  # If item not found
     14 # Load Twilio financial document with data_only=True
---> 15 twilio_wb = pd.read_excel('C:/Users/coldp/OneDrive/Documents/Twilio Inc NYSE TWLO Financials.xlsx')
     16 twilio_ws = twilio_wb.active
     18 # Load DCF template

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:457, in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
    455 if not isinstance(io, ExcelFile):
    456     should_close = True
--> 457     io = ExcelFile(io, storage_options=storage_options, engine=engine)
    458 elif engine and engine != io.engine:
    459     raise ValueError(
    460         "Engine should not be specified when passing "
    461         "an ExcelFile - ExcelFile already has the engine set"
    462     )

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:1419, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
   1416 self.engine = engine
   1417 self.storage_options = storage_options
-> 1419 self._reader = self._engines[engine](self._io, storage_options=storage_options)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:525, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
    514 """
    515 Reader using openpyxl engine.
    516 
   (...)
    522     passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
    523 """
    524 import_optional_dependency("openpyxl")
--> 525 super().__init__(filepath_or_buffer, storage_options=storage_options)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:518, in BaseExcelReader.__init__(self, filepath_or_buffer, storage_options)
    516 self.handles.handle.seek(0)
    517 try:
--> 518     self.book = self.load_workbook(self.handles.handle)
    519 except Exception:
    520     self.close()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:536, in OpenpyxlReader.load_workbook(self, filepath_or_buffer)
    533 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
    534     from openpyxl import load_workbook
--> 536     return load_workbook(
    537         filepath_or_buffer, read_only=True, data_only=True, keep_links=False
    538     )

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:317, in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
    290 """Open the given filename and return the workbook
    291 
    292 :param filename: the path to open or a file-like object
   (...)
    313 
    314 """
    315 reader = ExcelReader(filename, read_only, keep_vba,
    316                     data_only, keep_links)
--> 317 reader.read()
    318 return reader.wb

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:281, in ExcelReader.read(self)
    279 self.read_properties()
    280 self.read_theme()
--> 281 apply_stylesheet(self.archive, self.wb)
    282 self.read_worksheets()
    283 self.parser.assign_names()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:198, in apply_stylesheet(archive, wb)
    195     return wb
    197 node = fromstring(src)
--> 198 stylesheet = Stylesheet.from_tree(node)
    200 if stylesheet.cell_styles:
    202     wb._borders = IndexedList(stylesheet.borders)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:103, in Stylesheet.from_tree(cls, node)
    101 for k in attrs:
    102     del node.attrib[k]
--> 103 return super(Stylesheet, cls).from_tree(node)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\descriptors\serialisable.py:103, in Serialisable.from_tree(cls, node)
    100     else:
    101         attrib[tag] = obj
--> 103 return cls(**attrib)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:94, in Stylesheet.__init__(self, numFmts, fonts, fills, borders, cellStyleXfs, cellXfs, cellStyles, dxfs, tableStyles, colors, extLst)
     92 self.protections = self.cellXfs.prots
     93 self._normalise_numbers()
---> 94 self.named_styles = self._merge_named_styles()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:114, in Stylesheet._merge_named_styles(self)
    111 named_styles = self.cellStyles.names
    113 for style in named_styles:
--> 114     self._expand_named_style(style)
    116 return named_styles

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:124, in Stylesheet._expand_named_style(self, named_style)
    119 def _expand_named_style(self, named_style):
    120     """
    121     Bind format definitions for a named style from the associated style
    122     record
    123     """
--> 124     xf = self.cellStyleXfs[named_style.xfId]
    125     named_style.font = self.fonts[xf.fontId]
    126     named_style.fill = self.fills[xf.fillId]

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\cell_style.py:185, in CellStyleList.__getitem__(self, idx)
    184 def __getitem__(self, idx):
--> 185     return self.xf[idx]

    IndexError: list index out of range

I made sure the excel files had no styles, weird images and so on but even so, the problem persists. I tried with both openpyxl and pandas but it still can’t read the files.

There could however still be issues with the excel file that I’m not aware of, I’ve been screening it to the best of my abilities.

Leave a Comment