Closed
Description
from mailing list. this is caused by the change in handling of None in PyObjectHashTable
import datetime
import numpy
import pandas
def simple_test2():
data = [
[1, 'string1', 1.0],
[2, 'string2', 2.0],
[3, None, 3.0]
]
df = pandas.DataFrame({'key': [x[0] for x in data], 'grouper':
[x[1] for x in data], 'value': [x[2] for x in data]})
df['weights'] = df['value']/df['value'].sum()
gb = df.groupby('grouper').aggregate(numpy.sum)
print
print df
print ''
print gb
if __name__ == '__main__':
simple_test2()
0.7rc1 yields:
grouper key value weights
0 string1 1 1 0.1667
1 string2 2 2 0.3333
2 NaN 3 3 0.5000
key value weights
grouper
None 3 3 0.5000
string1 1 1 0.1667
string2 2 2 0.3333
0.7 final yields:
grouper key value weights
0 string1 1 1 0.166667
1 string2 2 2 0.333333
2 None 3 3 0.500000
key value weights
grouper
string1 1 1 0.166667
string2 2 2 0.333333
missing the last line