8000 Date bin shift by alexcjohnson · Pull Request #1201 · plotly/plotly.js · GitHub
[go: up one dir, main page]

Skip to content

Date bin shift #1201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 28, 2016
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor numeric bin shift for consistency, and lint
  • Loading branch information
alexcjohnson committed Nov 26, 2016
commit 1b9b1229974963cd4882c6b4214f855d713e7d18
143 changes: 73 additions & 70 deletions src/plots/cartesian/axes.js
Original file line number Diff line number Diff line change
Expand Up @@ -486,22 +486,19 @@ axes.expand = function(ax, data, options) {
};

axes.autoBin = function(data, ax, nbins, is2d) {
var datamin = Lib.aggNums(Math.min, null, data),
datamax = Lib.aggNums(Math.max, null, data),
blankcount = 0,
datacount,
i;
var dataMin = Lib.aggNums(Math.min, null, data),
dataMax = Lib.aggNums(Math.max, null, data);

if(ax.type === 'category') {
return {
start: datamin - 0.5,
end: datamax + 0.5,
start: dataMin - 0.5,
end: dataMax + 0.5,
size: 1
};
}

var size0;
if(nbins) size0 = ((datamax - datamin) / nbins);
if(nbins) size0 = ((dataMax - dataMin) / nbins);
else {
// totally auto: scale off std deviation so the highest bin is
// somewhat taller than the total number of bins, but don't let
Expand All @@ -518,100 +515,106 @@ axes.autoBin = function(data, ax, nbins, is2d) {
}

// piggyback off autotick code to make "nice" bin sizes
var dummyax;
var dummyAx;
if(ax.type === 'log') {
dummyax = {
dummyAx = {
type: 'linear',
range: [datamin, datamax],
range: [dataMin, dataMax],
r2l: Number
};
}
else {
dummyax = {
dummyAx = {
type: ax.type,
// conversion below would be ax.c2r but that's only different from l2r
// for log, and this is the only place (so far?) we would want c2r.
range: [datamin, datamax].map(ax.l2r),
range: [dataMin, dataMax].map(ax.l2r),
r2l: ax.r2l
};
}

axes.autoTicks(dummyax, size0);
var binstart = axes.tickIncrement(
axes.tickFirst(dummyax), dummyax.dtick, 'reverse'),
binend;

function nearEdge(v) {
// is a value within 1% of a bin edge?
return (1 + (v - binstart) * 100 / dummyax.dtick) % 100 < 2;
}
axes.autoTicks(dummyAx, size0);
var binStart = axes.tickIncrement(
axes.tickFirst(dummyAx), dummyAx.dtick, 'reverse'),
binEnd;

// check for too many data points right at the edges of bins
// (>50% within 1% of bin edges) or all data points integral
// and offset the bins accordingly
if(typeof dummyax.dtick === 'number') {
var edgecount = 0,
midcount = 0,
intcount = 0;

for(i = 0; i < data.length; i++) {
if(data[i] % 1 === 0) intcount++;
else if(!isNumeric(data[i])) blankcount++;

if(nearEdge(data[i])) edgecount++;
if(nearEdge(data[i] + dummyax.dtick / 2)) midcount++;
}
datacount = data.length - blankcount;

if(intcount === datacount && ax.type !== 'date') {
// all integers: if bin size is <1, it's because
// that was specifically requested (large nbins)
// so respect that... but center the bins containing
// integers on those integers
if(dummyax.dtick < 1) {
binstart = datamin - 0.5 * dummyax.dtick;
}
// otherwise start half an integer down regardless of
// the bin size, just enough to clear up endpoint
// ambiguity about which integers are in which bins.
else binstart -= 0.5;
}
else if(midcount < datacount * 0.1) {
if(edgecount > datacount * 0.3 ||
nearEdge(datamin) || nearEdge(datamax)) {
// lots of points at the edge, not many in the middle
// shift half a bin
var binshift = dummyax.dtick / 2;
binstart += (binstart + binshift < datamin) ? binshift : -binshift;
}
}
if(typeof dummyAx.dtick === 'number') {
binStart = autoShiftNumericBins(binStart, data, dummyAx, dataMin, dataMax);

var bincount = 1 + Math.floor((datamax - binstart) / dummyax.dtick);
binend = binstart + bincount * dummyax.dtick;
var bincount = 1 + Math.floor((dataMax - binStart) / dummyAx.dtick);
binEnd = binStart + bincount * dummyAx.dtick;
}
else {
// month ticks - should be the only nonlinear kind we have
// at this point.
if(dummyax.dtick.charAt(0) === 'M') {
binstart = autoShiftMonthBins(binstart, data, dummyax.dtick, datamin);
// month ticks - should be the only nonlinear kind we have at this point.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... after traces/histogram/clean_bins.js that is, correct?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only relevant to auto-binning, where nonlinear ticks are dtick values... dtick only has special meaning on date axes (where 'M<n>' is n-months) and on log axes, but when you display a histogram on log axes we still bin on a linear scale.

if(dummyAx.dtick.charAt(0) === 'M') {
binStart = autoShiftMonthBins(binStart, data, dummyAx.dtick, dataMin);
}

// calculate the endpoint for nonlinear ticks - you have to
// just increment until you're done
binend = binstart;
while(binend <= datamax) {
binend = axes.tickIncrement(binend, dummyax.dtick);
binEnd = binStart;
while(binEnd <= dataMax) {
binEnd = axes.tickIncrement(binEnd, dummyAx.dtick);
}
}

return {
start: ax.c2r(binstart),
end: ax.c2r(binend),
size: dummyax.dtick
start: ax.c2r(binStart),
end: ax.c2r(binEnd),
size: dummyAx.dtick
};
};


function autoShiftNumericBins(binStart, data, ax, dataMin, dataMax) {
var edgecount = 0,
midcount = 0,
intcount = 0,
blankCount = 0;

function nearEdge(v) {
// is a value within 1% of a bin edge?
return (1 + (v - binStart) * 100 / ax.dtick) % 100 < 2;
}

for(var i = 0; i < data.length; i++) {
if(data[i] % 1 === 0) intcount++;
else if(!isNumeric(data[i])) blankCount++;

if(nearEdge(data[i])) edgecount++;
if(nearEdge(data[i] + ax.dtick / 2)) midcount++;
}
var dataCount = data.length - blankCount;

if(intcount === dataCount && ax.type !== 'date') {
// all integers: if bin size is <1, it's because
// that was specifically requested (large nbins)
// so respect that... but center the bins containing
// integers on those integers
if(ax.dtick < 1) {
binStart = dataMin - 0.5 * ax.dtick;
}
// otherwise start half an integer down regardless of
// the bin size, just enough to clear up endpoint
// ambiguity about which integers are in which bins.
else binStart -= 0.5;
}
else if(midcount < dataCount * 0.1) {
if(edgecount > dataCount * 0.3 ||
nearEdge(dataMin) || nearEdge(dataMax)) {
// lots of points at the edge, not many in the middle
// shift half a bin
var binshift = ax.dtick / 2;
binStart += (binStart + binshift < dataMin) ? binshift : -binshift;
}
}
return binStart;
}


function autoShiftMonthBins(binStart, data, dtick, dataMin) {
var exactYears = 0,
exactMonths = 0,
Expand Down
0