8000 Merge pull request #588 from datastax/java764 · kecmu/java-driver@3f5e0fe · GitHub
[go: up one dir, main page]

Skip to content

Commit 3f5e0fe

Browse files
committed
Merge pull request apache#588 from datastax/java764
JAVA-764: LWT / CAS Consistency bug on Retry Policy.
2 parents d754952 + e3307d9 commit 3f5e0fe

File tree

13 files changed

+234
-144
lines changed

13 files changed

+234
-144
lines changed

changelog/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
- [improvement] JAVA-923: Position idempotent flag on object mapper queries.
3333
- [new feature] JAVA-1019: SchemaBuilder support for CREATE/ALTER/DROP KEYSPACE.
3434
- [bug] JAVA-1070: The Mapper should not prepare queries synchronously.
35+
- [new feature] JAVA-982: Introduce new method ConsistencyLevel.isSerial().
36+
- [bug] JAVA-764: Retry with the normal consistency level (not the serial one) when a write times out on the Paxos phase.
3537

3638
Merged from 2.0 branch:
3739

driver-core/src/main/java/com/datastax/driver/core/ConsistencyLevel.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,30 @@ static ConsistencyLevel fromCode(int code) {
5858
}
5959

6060
/**
61-
* Whether or not the the consistency level applies to the local data-center only.
61+
* Whether or not this consistency level applies to the local data-center only.
6262
*
6363
* @return whether this consistency level is {@code LOCAL_ONE} or {@code LOCAL_QUORUM}.
6464
*/
6565
public boolean isDCLocal() {
6666
return this == LOCAL_ONE || this == LOCAL_QUORUM;
6767
}
68+
69+
/**
70+
* Whether or not this consistency level is serial, that is,
71+
* applies only to the "paxos" phase of a
72+
* <a href="https://docs.datastax.com/en/cassandra/2.1/cassandra/dml/dml_ltwt_transaction_c.html">Lightweight transaction</a>.
73+
* <p/>
74+
* Serial consistency levels are only meaningful when executing conditional updates ({@code INSERT}, {@code UPDATE}
75+
* or {@code DELETE} statements with an {@code IF} condition).
76+
* <p/>
77+
* Two consistency levels belong to this category: {@link #SERIAL} and {@link #LOCAL_SERIAL}.
78+
*
79+
* @return whether this consistency level is {@link #SERIAL} or {@link #LOCAL_SERIAL}.
80+
* @see Statement#setSerialConsistencyLevel(ConsistencyLevel)
81+
* @see <a href="https://docs.datastax.com/en/cassandra/2.1/cassandra/dml/dml_ltwt_transaction_c.html">Lightweight transactions</a>
82+
*/
83+
public boolean isSerial() {
84+
return this == SERIAL || this == LOCAL_SERIAL;
85+
}
86+
6887
}

driver-core/src/main/java/com/datastax/driver/core/DefaultPreparedStatement.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ public ConsistencyLevel getConsistencyLevel() {
141141

142142
@Override
143143
public PreparedStatement setSerialConsistencyLevel(ConsistencyLevel serialConsistency) {
144-
if (serialConsistency != ConsistencyLevel.SERIAL && serialConsistency != ConsistencyLevel.LOCAL_SERIAL)
144+
if (!serialConsistency.isSerial())
145145
throw new IllegalArgumentException();
146146
this.serialConsistency = serialConsistency;
147147
return this;

driver-core/src/main/java/com/datastax/driver/core/RequestHandler.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ private void processRetryDecision(RetryPolicy.RetryDecision retryDecision, Conne
382382

383383
private void retry(final boolean retryCurrent, ConsistencyLevel newConsistencyLevel) {
384384
final Host h = current;
385-
this.retryConsistencyLevel = newConsistencyLevel;
385+
if (newConsistencyLevel != null)
386+
this.retryConsistencyLevel = newConsistencyLevel;
386387

387388
// We should not retry on the current thread as this will be an IO thread.
388389
manager.executor().execute(new Runnable() {

driver-core/src/main/java/com/datastax/driver/core/Statement.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ public ConsistencyLevel getConsistencyLevel() {
8282
/**
8383
* Sets the serial consistency level for the query.
8484
* <p/>
85-
* The serial consistency level is only used by conditional updates (so INSERT, UPDATE
86-
* and DELETE with an IF condition). For those, the serial consistency level defines
85+
* The serial consistency level is only used by conditional updates ({@code INSERT}, {@code UPDATE}
86+
* or {@code DELETE} statements with an {@code IF} condition).
87+
* For those, the serial consistency level defines
8788
* the consistency level of the serial phase (or "paxos" phase) while the
8889
* normal consistency level defines the consistency for the "learn" phase, i.e. what
8990
* type of reads will be guaranteed to see the update right away. For instance, if
@@ -106,19 +107,19 @@ public ConsistencyLevel getConsistencyLevel() {
106107
* {@code ConsistencyLevel.SERIAL} or {@code ConsistencyLevel.LOCAL_SERIAL}.
107108
*/
108109
public Statement setSerialConsistencyLevel(ConsistencyLevel serialConsistency) {
109-
if (serialConsistency != ConsistencyLevel.SERIAL && serialConsistency != ConsistencyLevel.LOCAL_SERIAL)
110-
throw new IllegalArgumentException();
110+
if (!serialConsistency.isSerial())
111+
throw new IllegalArgumentException("Supplied consistency level is not serial: " + serialConsistency);
111112
this.serialConsistency = serialConsistency;
112113
return this;
113114
}
114115

115116
/**
116117
* The serial consistency level for this query.
117118
* <p/>
118-
* See {@link #setSerialConsistencyLevel} for more detail on the serial consistency level.
119+
* See {@link #setSerialConsistencyLevel(ConsistencyLevel)} for more detail on the serial consistency level.
119120
*
120-
* @return the consistency level for this query, or {@code null} if no serial
121-
* consistency level has been specified (through {@code setSerialConsistencyLevel}).
121+
* @return the serial consistency level for this query, or {@code null} if no serial
122+
* consistency level has been specified (through {@link #setSerialConsistencyLevel(ConsistencyLevel)}).
122123
* In the latter case, the default serial consistency level will be used.
123124
*/
124125
public ConsistencyLevel getSerialConsistencyLevel() {
@@ -446,4 +447,4 @@ boolean isIdempotentWithDefault(QueryOptions queryOptions) {
446447
else
447448
return queryOptions.getDefaultIdempotence();
448449
}
449-
}
450+
}

driver-core/src/main/java/com/datastax/driver/core/policies/DefaultRetryPolicy.java

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ private DefaultRetryPolicy() {
4242
}
4343

4444
/**
45-
* Defines whether to retry and at which consistency level on a read timeout.
45+
* {@inheritDoc}
4646
* <p/>
47-
* This method triggers a maximum of one retry, and only if enough
47+
* This implementation triggers a maximum of one retry, and only if enough
4848
* replicas had responded to the read request but data was not retrieved
4949
* amongst those. Indeed, that case usually means that enough replica
5050
* are alive to satisfy the consistency but the coordinator picked a
@@ -53,15 +53,6 @@ private DefaultRetryPolicy() {
5353
* timeout the dead replica will likely have been detected as dead and
5454
* the retry has a high chance of success.
5555
*
56-
* @param statement the original query that timed out.
57-
* @param cl the original consistency level of the read that timed out.
58-
* @param requiredResponses the number of responses that were required to
59-
* achieve the requested consistency level.
60-
* @param receivedResponses the number of responses that had been received
61-
* by the time the timeout exception was raised.
62-
* @param dataRetrieved whether actual data (by opposition to data checksum)
63-
* was present in the received responses.
64-
* @param nbRetry the number of retries already performed for this operation.
6556
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and
6657
* {@code receivedResponses >= requiredResponses && !dataRetrieved}, {@code RetryDecision.rethrow()} otherwise.
6758
*/
@@ -74,9 +65,9 @@ public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int
7465
}
7566

7667
/**
77-
* Defines whether to retry and at which consistency level on a write timeout.
68+
* {@inheritDoc}
7869
* <p/>
79-
* This method triggers a maximum of one retry, and only in the case of
70+
* This implementation triggers a maximum of one retry, and only in the case of
8071
* a {@code WriteType.BATCH_LOG} write. The reasoning for the retry in
8172
* that case is that write to the distributed batch log is tried by the
8273
* coordinator of the write against a small subset of all the nodes alive
@@ -86,14 +77,6 @@ public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int
8677
* nodes will likely have been detected as dead and the retry has thus a
8778
* high chance of success.
8879
*
89-
* @param statement the original query that timed out.
90-
* @param cl the original consistency level of the write that timed out.
91-
* @param writeType the type of the write that timed out.
92-
* @param requiredAcks the number of acknowledgments that were required to
93-
* achieve the requested consistency level.
94-
* @param receivedAcks the number of acknowledgments that had been received
95-
* by the time the timeout exception was raised.
96-
* @param nbRetry the number of retry already performed for this operation.
9780
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and
9881
* {@code writeType == WriteType.BATCH_LOG}, {@code RetryDecision.rethrow()} otherwise.
9982
*/
@@ -103,37 +86,27 @@ public RetryDecision onWriteTimeout(Statement statement, ConsistencyLevel cl, Wr
10386
return RetryDecision.rethrow();
10487

10588
// If the batch log write failed, retry the operation as this might just be we were unlucky at picking candidates
89+
// JAVA-764: testing the write type automatically filters out serial consistency levels as these have always WriteType.CAS.
10690
return writeType == WriteType.BATCH_LOG ? RetryDecision.retry(cl) : RetryDecision.rethrow();
10791
}
10892

10993
/**
110-
* Defines whether to retry and at which consistency level on an
111-
* unavailable exception.
94+
* {@inheritDoc}
11295
* <p/>
113-
* This method triggers a retry iff no retry has been executed before
114-
* (nbRetry == 0), with
115-
* {@link RetryPolicy.RetryDecision#tryNextHost(ConsistencyLevel) RetryDecision.tryNextHost(cl)},
116-
* otherwise it throws an exception. The retry will be processed on the next host
117-
* in the query plan according to the current Load Balancing Policy.
118-
* Where retrying on the same host in the event of an Unavailable exception
119-
* has almost no chance of success, if the first replica tried happens to
120-
* be "network" isolated from all the other nodes but can still answer to
121-
* the client, it makes sense to retry the query on another node.
122-
*
123-
* @param statement the original query for which the consistency level cannot
124-
* be achieved.
125-
* @param cl the original consistency level for the operation.
126-
* @param requiredReplica the number of replica that should have been
127-
* (known) alive for the operation to be attempted.
128-
* @param aliveReplica the number of replica that were know to be alive by
129-
* the coordinator of the operation.
130-
* @param nbRetry the number of retry already performed for this operation.
131-
* @return {@code RetryDecision.rethrow()}.
96+
* This implementation does the following:
97+
* <ul>
98+
* <li>if this is the first retry ({@code nbRetry == 0}), it triggers a retry on the next host in the query plan
99+
* with the same consistency level ({@link RetryPolicy.RetryDecision#tryNextHost(ConsistencyLevel) RetryDecision#tryNextHost(null)}.
100+
* The rationale is that the first coordinator might have been network-isolated from all other nodes (thinking
101+
* they're down), but still able to communicate with the client; in that case, retrying on the same host has almost
102+
* no chance of success, but moving to the next host might solve the issue.</li>
103+
* <li>otherwise, the exception is rethrow.</li>
104+
* </ul>
132105
*/
133106
@Override
134107
public RetryDecision onUnavailable(Statement statement, ConsistencyLevel cl, int requiredReplica, int aliveReplica, int nbRetry) {
135108
return (nbRetry == 0)
136-
? RetryDecision.tryNextHost(cl)
109+
? RetryDecision.tryNextHost(null)
137110
: RetryDecision.rethrow();
138111
}
139112

driver-core/src/main/java/com/datastax/driver/core/policies/DowngradingConsistencyRetryPolicy.java

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -85,25 +85,14 @@ else if (knownOk == 1)
8585
}
8686

8787
/**
88-
* Defines whether to retry and at which consistency level on a read timeout.
88+
* {@inheritDoc}
8989
* <p/>
90-
* This method triggers a maximum of one retry. If less replica
90+
* This implementation triggers a maximum of one retry. If less replica
9191
* responded than required by the consistency level (but at least one
9292
* replica did respond), the operation is retried at a lower
9393
* consistency level. If enough replica responded but data was not
9494
* retrieve, the operation is retried with the initial consistency
9595
* level. Otherwise, an exception is thrown.
96-
*
97-
* @param statement the original query that timed out.
98-
* @param cl the original consistency level of the read that timed out.
99-
* @param requiredResponses the number of responses that were required to
100-
* achieve the requested consistency level.
101-
* @param receivedResponses the number of responses that had been received
102-
* by the time the timeout exception was raised.
103-
* @param dataRetrieved whether actual data (by opposition to data checksum)
104-
* was present in the received responses.
105-
* @param nbRetry the number of retry already performed for this operation.
106-
* @return a RetryDecision as defined above.
10796
*/
10897
@Override
10998
public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int requiredResponses, int receivedResponses, boolean dataRetrieved, int nbRetry) {
@@ -114,7 +103,7 @@ public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int
114103
// normal consistency levels on the committing phase. So the main use case for CAS reads is probably for
115104
// when you've timed out on a CAS write and want to make sure what happened. Downgrading in that case
116105
// would be always wrong so we just special case to rethrow.
117-
if (cl == ConsistencyLevel.SERIAL || cl == ConsistencyLevel.LOCAL_SERIAL)
106+
if (cl.isSerial())
118107
return RetryDecision.rethrow();
119108

120109
if (receivedResponses < requiredResponses) {
@@ -126,9 +115,9 @@ public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int
126115
}
127116

128117
/**
129-
* Defines whether to retry and at which consistency level on a write timeout.
118+
* {@inheritDoc}
130119
* <p/>
131-
* This method triggers a maximum of one retry. If {@code writeType ==
120+
* This implementation triggers a maximum of one retry. If {@code writeType ==
132121
* WriteType.BATCH_LOG}, the write is retried with the initial
133122
* consistency level. If {@code writeType == WriteType.UNLOGGED_BATCH}
134123
* and at least one replica acknowledged, the write is retried with a
@@ -137,16 +126,6 @@ public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, int
137126
* all, even if {@code receivedAcks > 0}). For other write types ({@code WriteType.SIMPLE}
138127
* and {@code WriteType.BATCH}), if we know the write has been persisted on at
139128
* least one replica, we ignore the exception. Otherwise, an exception is thrown.
140-
*
141-
* @param statement the original query that timed out.
142-
* @param cl the original consistency level of the write that timed out.
143-
* @param writeType the type of the write that timed out.
144-
* @param requiredAcks the number of acknowledgments that were required to
145-
* achieve the requested consistency level.
146-
* @param receivedAcks the number of acknowledgments that had been received
147-
* by the time the timeout exception was raised.
148-
* @param nbRetry the number of retry already performed for this operation.
149-
* @return a RetryDecision as defined above.
150129
*/
151130
@Override
152131
public RetryDecision onWriteTimeout(Statement statement, ConsistencyLevel cl, WriteType writeType, int requiredAcks, int receivedAcks, int nbRetry) {
@@ -170,28 +149,22 @@ public RetryDecision onWriteTimeout(Statement statement, ConsistencyLevel cl, Wr
170149
}
171150

172151
/**
173-
* Defines whether to retry and at which consistency level on an
174-
* unavailable exception.
152+
* {@inheritDoc}
175153
* <p/>
176-
* This method triggers a maximum of one retry. If at least one replica
154+
* This implementation triggers a maximum of one retry. If at least one replica
177155
* is know to be alive, the operation is retried at a lower consistency
178156
* level.
179-
*
180-
* @param statement the original query for which the consistency level cannot
181-
* be achieved.
182-
* @param cl the original consistency level for the operation.
183-
* @param requiredReplica the number of replica that should have been
184-
* (known) alive for the operation to be attempted.
185-
* @param aliveReplica the number of replica that were know to be alive by
186-
* the coordinator of the operation.
187-
* @param nbRetry the number of retry already performed for this operation.
188-
* @return a RetryDecision as defined above.
189157
*/
190158
@Override
191159
public RetryDecision onUnavailable(Statement statement, ConsistencyLevel cl, int requiredReplica, int aliveReplica, int nbRetry) {
192160
if (nbRetry != 0)
193161
return RetryDecision.rethrow();
194162

163+
// JAVA-764: if the requested consistency level is serial, it means that the operation failed at the paxos phase of a LWT.
164+
// Retry on the next host, on the assumption that the initial coordinator could be network-isolated.
165+
if (cl.isSerial())
166+
return RetryDecision.tryNextHost(null);
167+
195168
// Tries the biggest CL that is expected to work
196169
return maxLikelyToWorkCL(aliveReplica);
197170
}

driver-core/src/main/java/com/datastax/driver/core/policies/ExtendedRetryPolicy.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ public interface ExtendedRetryPolicy extends RetryPolicy {
4949
* is known to be idempotent.
5050
*
5151
* @param statement the original query that failed.
52-
* @param cl the original consistency level for the operation.
52+
* @param cl the requested consistency level for the operation.
53+
* Note that this is not necessarily the achieved consistency level (if any),
54+
* and it is never a {@link ConsistencyLevel#isSerial() serial} one.
5355
* @param e the exception that caused this request to fail.
5456
* @param nbRetry the number of retries already performed for this operation.
5557
* @return the retry decision. If {@code RetryDecision.RETHROW} is returned,

0 commit comments

Comments
 (0)
0