8000 Excelfancy by jassinm · Pull Request #2370 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Excelfancy #2370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 29, 2012
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
adding na_repl, cols argument to excel formatter
  • Loading branch information
locojaydev committed Nov 22, 2012
commit afde3f21f088afa784d0d6b05c0703f6db59e13b
85 changes: 59 additions & 26 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
import pandas.lib as lib

import numpy as np

import itertools

from collections import namedtuple

docstring_to_string = """
Parameters
Expand Down Expand Up @@ -686,8 +686,23 @@ def grouper(x):
return result


ExcelCell = namedtuple("ExcelCell",
'row, col, val, style, mergestart, mergeend')
#from collections import namedtuple
# ExcelCell = namedtuple("ExcelCell",
# 'row, col, val, style, mergestart, mergeend')

class ExcelCell:
__fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
__slots__ = __fields__

def __init__(self, row, col, val,
style=None, mergestart=None, mergeend=None):
self.row = row
self.col = col
self.val = val
self.style = style
self.mergestart = mergestart
self.mergeend = mergeend


header_style = {"font": {"bold": True},
"borders": {"top": "thin",
Expand All @@ -698,47 +713,65 @@ def grouper(x):


class ExcelFormatter(object):
"""
Class for formatting a DataFrame to a list of ExcelCells,

Parameters
----------
df : dataframe
na_rep: na representation
index : boolean, default True
output row names (index)
cols : sequence, optional
Columns to write
"""

def __init__(self, df):
def __init__(self, df, na_rep='', cols=None):
self.df = df
self.rowcounter = 0
self.na_rep = na_rep
self.columns = cols
if cols is None:
self.columns = df.columns

def _format_value(self, val):
if lib.checknull(val):
val = self.na_rep
return val

def _format_header_mi(self):
levels = self.df.columns.format(sparsify=True, adjoin=False,
levels = self.columns.format(sparsify=True, adjoin=False,
names=False)
level_lenghts = fmt._get_level_lengths(levels)
level_lenghts = _get_level_lengths(levels)
for lnum, (records, values) in enumerate(zip(level_lenghts,
levels)):
name = self.df.columns.names[lnum]
yield ExcelCell(lnum, 0, name, header_style, None, None)
name = self.columns.names[lnum]
yield ExcelCell(lnum, 0, name, header_style)
for i in records:
if records[i] > 1:
yield ExcelCell(lnum, i + 1, values[i],
header_style, lnum, i + records[i])
else:
yield ExcelCell(lnum, i + 1, values[i],
header_style, None, None)
yield ExcelCell(lnum, i + 1, values[i], header_style)

self.rowcounter = lnum

def _format_header_regular(self):
for colindex, colname in enumerate(self.df.columns):
yield ExcelCell(self.rowcounter, colindex,
colname, header_style, None, None)
for colindex, colname in enumerate(self.columns):
yield ExcelCell(self.rowcounter, colindex, colname, header_style)

def _format_header(self):
if isinstance(self.df.columns, MultiIndex):
if isinstance(self.columns, MultiIndex):
gen = self._format_header_mi()
else:
gen = self._format_header_regular()

gen2 = ()
if self.df.index.names:
row = [x if x is not None else ''
for x in self.df.index.names] + [''] * len(self.df.columns)
for x in self.df.index.names] + [''] * len(self.columns)
if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
gen2 = (ExcelCell(self.rowcounter, colindex, val,
header_style, None, None)
gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
for colindex, val in enumerate(row))
self.rowcounter += 1
return itertools.chain(gen, gen2)
Expand All @@ -752,26 +785,26 @@ def _format_body(self):

def _format_regular_rows(self):
self.rowcounter += 1
for colidx, colname in enumerate(self.df.columns):
for colidx, colname in enumerate(self.columns):
series = self.df[colname]
for i, val in enumerate(series):
yield ExcelCell(self.rowcounter + i, colidx,
val, None, None, None)
yield ExcelCell(self.rowcounter + i, colidx, val)

def _format_hierarchical_rows(self):
self.rowcounter += 1
for idx, idxval in enumerate(self.df.index):
yield ExcelCell(self.rowcounter + idx, 0,
idxval, header_style, None, None)
yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)

for colidx, colname in enumerate(self.df.columns):
for colidx, colname in enumerate(self.columns):
series = self.df[colname]
for i, val in enumerate(series):
yield ExcelCell(self.rowcounter + i,
colidx + 1, val, None, None, None)
yield ExcelCell(self.rowcounter + i, colidx + 1, val)

def get_formatted_cells(self):
return itertools.chain(self._format_header(), self._format_body())
for cell in itertools.chain(self._format_header(),
self._format_body()):
cell.val = self._format_value(cell.val)
yield cell

#----------------------------------------------------------------------
# Array formatters
Expand Down
17 changes: 7 additions & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,7 @@ def to_panel(self):

to_wide = deprecate('to_wide', to_panel)

def _helper_csvexcel(self, writer, na_rep=None, cols=None,
def _helper_csv(self, writer, na_rep=None, cols=None,
header=True, index=True,
index_label=None, float_format=None):
if cols is None:
Expand Down Expand Up @@ -1315,7 +1315,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_for EDBE mat=None,
else:
csvout = csv.writer(f, lineterminator='\n', delimiter=sep,
quoting=quoting)
self._helper_csvexcel(csvout, na_rep=na_rep,
self._helper_csv(csvout, na_rep=na_rep,
float_format=float_format, cols=cols,
header=header, index=index,
index_label=index_label)
Expand Down Expand Up @@ -1368,16 +1368,13 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
from pandas.io.parsers import ExcelWriter
need_save = False
if isinstance(excel_writer, basestring):
excel_writer = ExcelWriter(excel_writer)
excel_writer = ExcelWriter(excel_writer, na_rep=na_rep)
need_save = True
# excel_writer.cur_sheet = sheet_name
# self._helper_csvexcel(excel_writer, na_rep=na_rep,
# float_format=float_format, cols=cols,
# header=header, index=index,
# index_label=index_label)
formatter = fmt.ExcelFormatter(self)

formatter = fmt.ExcelFormatter(self, na_rep=na_rep, cols=cols)
formatted_cells = formatter.get_formatted_cells()
excel_writer.write_cells(formatted_cells, sheet_name)
excel_writer.write_cells(formatted_cells, sheet_name,
startrow=startrow, startcol=startcol)
if need_save:
excel_writer.save()

Expand Down
5 changes: 4 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1959,8 +1959,10 @@ def _conv_value(val):
#convert value for excel dump
if isinstance(val, np.int64):
val = int(val)
if isinstance(val, np.bool8):
elif isinstance(val, np.bool8):
val = bool(val)
elif isinstance(val, lib.Timestamp):
val = val._repr_base

return val

Expand Down Expand Up @@ -2021,6 +2023,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
self._writecells_xls(cells, sheet_name, startrow, startcol)

def _writecells_xlsx(self, cells, sheet_name, startrow, startcol):

from openpyxl.cell import get_column_letter

if sheet_name in self.sheets:
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/parse_helper.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <errno.h>
#include <float.h>

double PANDAS_INLINE xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing);
double xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing);

int to_double(char *item, double *p_value, char sci, char decimal)
{
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ int P_INLINE end_field(parser_t *self) {
return 0;
}

int P_INLINE end_line(parser_t *self) {
int end_line(parser_t *self) {
int fields;
khiter_t k; /* for hash set detection */
int ex_fields = -1;
Expand Down
0