diff --git a/biojava-aa-prop/pom.xml b/biojava-aa-prop/pom.xml index 1a449c7ef8..72d5044e69 100644 --- a/biojava-aa-prop/pom.xml +++ b/biojava-aa-prop/pom.xml @@ -2,7 +2,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-aa-prop @@ -70,12 +70,12 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 org.biojava biojava-structure - 4.2.7 + 4.2.8 diff --git a/biojava-alignment/pom.xml b/biojava-alignment/pom.xml index 7dfe24c010..d1ebe860f9 100644 --- a/biojava-alignment/pom.xml +++ b/biojava-alignment/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-alignment biojava-alignment @@ -46,7 +46,7 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 compile @@ -74,7 +74,7 @@ org.biojava biojava-phylo - 4.2.7 + 4.2.8 diff --git a/biojava-core/pom.xml b/biojava-core/pom.xml index 90e465d2dc..20b75fcb33 100644 --- a/biojava-core/pom.xml +++ b/biojava-core/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-core diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java b/biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java index ede117fc88..b369c50fa9 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java @@ -31,6 +31,7 @@ import org.biojava.nbio.core.sequence.location.template.Location; import org.biojava.nbio.core.sequence.location.template.Point; import org.biojava.nbio.core.sequence.template.*; +import org.biojava.nbio.core.util.Equals; import java.io.Serializable; import java.util.ArrayList; @@ -59,7 +60,10 @@ public class SimpleAlignedSequence, C extends Compound> im // cached (lazily initialized) private int numGaps = -1; - private int[] alignmentFromSequence, sequenceFromAlignment; + private int numGapPositions = -1; + + private int[] alignmentFromSequence; + private int[] sequenceFromAlignment; /** * Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment. @@ -131,25 +135,35 @@ public void clearCache() { sequenceFromAlignment = null; } - @Override - public int getAlignmentIndexAt(int sequenceIndex) { - if (alignmentFromSequence == null) { - alignmentFromSequence = new int[original.getLength()]; - int s = 1, a = 1; - for (int i = 0; i < numBefore; i++, s++) { - alignmentFromSequence[s - 1] = a; - } - for (; s <= alignmentFromSequence.length && a <= length; s++, a++) { - while (a <= length && isGap(a)) { - a++; - } - alignmentFromSequence[s - 1] = a; - } - a--; - for (int i = 0; i < numAfter; i++, s++) { - alignmentFromSequence[s - 1] = a; + private void setAlignmentFromSequence() { + alignmentFromSequence = new int[original.getLength()]; + int s = 1, a = 1; + for (int i = 0; i < numBefore; i++, s++) { + alignmentFromSequence[s - 1] = a; + } + for (; s <= alignmentFromSequence.length && a <= length; s++, a++) { + while (a <= length && isGap(a)) { + a++; } + alignmentFromSequence[s - 1] = a; + } + a--; + for (int i = 0; i < numAfter; i++, s++) { + alignmentFromSequence[s - 1] = a; } + } + + @Override + public int[] getAlignmentFromSequence() { + if (alignmentFromSequence == null) + setAlignmentFromSequence(); + return alignmentFromSequence; + } + + @Override + public int getAlignmentIndexAt(int sequenceIndex) { + if (alignmentFromSequence == null) + setAlignmentFromSequence(); return alignmentFromSequence[sequenceIndex - 1]; } @@ -167,6 +181,7 @@ public Location getLocationInAlignment() { public int getNumGaps() { if (numGaps == -1) { numGaps = 0; + numGapPositions = 0; C cGap = getCompoundSet().getCompoundForString(gap); boolean inGap = false; for (C compound : getAsList()) { @@ -175,6 +190,7 @@ public int getNumGaps() { numGaps++; inGap = true; } + numGapPositions++; } else { inGap = false; } @@ -194,21 +210,31 @@ public int getOverlapCount() { return 1; } - @Override - public int getSequenceIndexAt(int alignmentIndex) { - if (sequenceFromAlignment == null) { - sequenceFromAlignment = new int[length]; - int a = 1, s = numBefore + 1; - for (int i = 0; i < getStart().getPosition(); i++, a++) { - sequenceFromAlignment[a - 1] = s; - } - for (; a <= length; a++) { - if (!isGap(a)) { - s++; - } - sequenceFromAlignment[a - 1] = s; + private void setSequenceFromAlignment() { + sequenceFromAlignment = new int[length]; + int a = 1, s = numBefore + 1; + for (int i = 0; i < getStart().getPosition(); i++, a++) { + sequenceFromAlignment[a - 1] = s; + } + for (; a <= length; a++) { + if (!isGap(a)) { + s++; } + sequenceFromAlignment[a - 1] = s; } + } + + @Override + public int[] getSequenceFromAlignment() { + if (sequenceFromAlignment == null) + setSequenceFromAlignment(); + return sequenceFromAlignment; + } + + @Override + public int getSequenceIndexAt(int alignmentIndex) { + if (sequenceFromAlignment == null) + setSequenceFromAlignment(); return sequenceFromAlignment[alignmentIndex - 1]; } @@ -266,6 +292,30 @@ public List getAsList() { return compounds; } + @Override + public boolean equals(Object o){ + + if(! Equals.classEqual(this, o)) { + return false; + } + + Sequence other = (Sequence)o; + if ( original.getAsList().size() != other.getAsList().size()) + return false; + + for ( int i = 0 ; i< original.getAsList().size() ; i++){ + if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i))) + return false; + } + return true; + } + + @Override + public int hashCode(){ + String s = getSequenceAsString(); + return s.hashCode(); + } + @Override public C getCompoundAt(int alignmentIndex) { return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ? @@ -382,4 +432,18 @@ private void setLocation(List steps) { public SequenceView getInverse() { throw new UnsupportedOperationException("Not supported yet."); } + + @Override + public int getNumGapPositions() { + if (numGapPositions == -1) + getNumGaps(); + return numGapPositions; + } + + @Override + public double getCoverage() { + + double coverage = getLength() - getNumGapPositions(); + return coverage / getOriginalSequence().getLength(); + } } diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java b/biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java index 5ddcde13a1..76a1fe1c8b 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java @@ -47,6 +47,18 @@ enum Step { COMPOUND, GAP } */ void clearCache(); + /** Returns the alignment. + * + * @return the alignment + */ + int[] getAlignmentFromSequence(); + + /** Returns the sequence positions at each alignment index + * + * @return array of the sequence positions + */ + int[] getSequenceFromAlignment(); + /** * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}. * Both indices are 1-indexed and inclusive. @@ -130,4 +142,20 @@ enum Step { COMPOUND, GAP } */ boolean isGap(int alignmentIndex); + /** + * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location} + * information or from gap {@link Compound}s, which may not necessarily result in the same number. + * + * @return number of gap positions in the sequence + */ + int getNumGapPositions(); + + /** + * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence. + * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength(). + * + * @return coverage of the original sequence by the aligned sequence + */ + double getCoverage(); + } diff --git a/biojava-genome/pom.xml b/biojava-genome/pom.xml index c82fade42d..452b909abd 100644 --- a/biojava-genome/pom.xml +++ b/biojava-genome/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-genome @@ -85,13 +85,13 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 compile org.biojava biojava-alignment - 4.2.7 + 4.2.8 compile diff --git a/biojava-integrationtest/pom.xml b/biojava-integrationtest/pom.xml index c52783d2a1..7fbc1deff2 100644 --- a/biojava-integrationtest/pom.xml +++ b/biojava-integrationtest/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-integrationtest jar @@ -32,7 +32,7 @@ org.biojava biojava-structure - 4.2.7 + 4.2.8 diff --git a/biojava-modfinder/pom.xml b/biojava-modfinder/pom.xml index 0ca1247f39..fded08c306 100644 --- a/biojava-modfinder/pom.xml +++ b/biojava-modfinder/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-modfinder biojava-modfinder @@ -31,7 +31,7 @@ org.biojava biojava-structure - 4.2.7 + 4.2.8 jar compile diff --git a/biojava-ontology/pom.xml b/biojava-ontology/pom.xml index 03b1c00b76..cf6af99b55 100644 --- a/biojava-ontology/pom.xml +++ b/biojava-ontology/pom.xml @@ -4,7 +4,7 @@ org.biojava biojava - 4.2.7 + 4.2.8 biojava-ontology diff --git a/biojava-phylo/pom.xml b/biojava-phylo/pom.xml index d374a993fd..8f4fae1576 100644 --- a/biojava-phylo/pom.xml +++ b/biojava-phylo/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-phylo @@ -44,7 +44,7 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 compile diff --git a/biojava-protein-disorder/pom.xml b/biojava-protein-disorder/pom.xml index 73c642d3e1..cdbd04df3b 100644 --- a/biojava-protein-disorder/pom.xml +++ b/biojava-protein-disorder/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-protein-disorder jar @@ -63,7 +63,7 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 diff --git a/biojava-sequencing/pom.xml b/biojava-sequencing/pom.xml index 0b4d66e069..2e5f4af250 100644 --- a/biojava-sequencing/pom.xml +++ b/biojava-sequencing/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-sequencing @@ -47,7 +47,7 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 compile diff --git a/biojava-structure-gui/pom.xml b/biojava-structure-gui/pom.xml index cfd684d103..d9ea4db9f5 100644 --- a/biojava-structure-gui/pom.xml +++ b/biojava-structure-gui/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 4.0.0 biojava-structure-gui @@ -25,13 +25,13 @@ org.biojava biojava-structure - 4.2.7 + 4.2.8 compile org.biojava biojava-core - 4.2.7 + 4.2.8 compile diff --git a/biojava-structure/pom.xml b/biojava-structure/pom.xml index db1c41a4f1..0577749f78 100644 --- a/biojava-structure/pom.xml +++ b/biojava-structure/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-structure biojava-structure @@ -22,13 +22,13 @@ org.biojava biojava-alignment - 4.2.7 + 4.2.8 compile org.biojava biojava-core - 4.2.7 + 4.2.8 compile diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java index d41e116971..fac2a86cbb 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java @@ -67,7 +67,7 @@ public class DownloadChemCompProvider implements ChemCompProvider { public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp"; - public static final String SERVER_LOCATION = "http://www.rcsb.org/pdb/files/ligand/"; + public static final String SERVER_LOCATION = "http://files.rcsb.org/ligands/download/"; private static File path; diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/secstruc/DSSPParser.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/secstruc/DSSPParser.java index aa5de5a35a..eb762b9f25 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/secstruc/DSSPParser.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/secstruc/DSSPParser.java @@ -31,6 +31,7 @@ import java.net.URL; import java.util.ArrayList; import java.util.List; +import java.util.zip.GZIPInputStream; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.ResidueNumber; @@ -95,8 +96,11 @@ public static List fetch(String pdb, Structure structure, boolean assign) throws IOException, StructureException { - InputStream in = new URL("http://www.rcsb.org/pdb/files/"+ - pdb+".dssp").openStream(); + URL url = new URL("http://files.rcsb.org/dssp/" + + pdb.toLowerCase().substring(1, 3) + "/" + + pdb.toLowerCase() + "/" + + pdb + ".dssp.gz"); + InputStream in = new GZIPInputStream(url.openStream()); Reader read = new InputStreamReader(in); BufferedReader reader = new BufferedReader(read); return generalParse(reader, structure, assign); diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/Test2JA5.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/Test2JA5.java index 2dbdda4c93..4835800e70 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/Test2JA5.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/Test2JA5.java @@ -20,9 +20,9 @@ */ package org.biojava.nbio.structure; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; import java.io.IOException; @@ -55,7 +55,7 @@ public void test2JA5() throws IOException, StructureException { // assertTrue(StructureTools.getNrAtoms(s1) == 0); // SeqRes contains 15 chains, but since we cannot align Chain N to AtomGroups => 14. - assertTrue(s1.getChains().size() == 14); + assertEquals(14, s1.getChains().size()); Chain nChain = null; try { @@ -84,8 +84,8 @@ public void test2JA5noHeader() throws IOException, StructureException { // This is not applicable anymore, we need to parse atoms to have chains to match. assertTrue(StructureTools.getNrAtoms(s1) == 0); - // All 15 seqres chains will be store. - assertTrue(s1.getChains().size() == 15); + // 2ja5 has been remediated on March 2017, now it has 14 chains in seqres matching the 14 chains in atoms (chain N has been removed) + assertEquals(14, s1.getChains().size()); Chain nChain = null; try { @@ -93,6 +93,6 @@ public void test2JA5noHeader() throws IOException, StructureException { } catch (StructureException e){ // this is expected here, since there is no chain N } - assertNotNull(nChain); + assertNull(nChain); } } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestURLIdentifier.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestURLIdentifier.java index 141cb9e034..3d55a3e286 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestURLIdentifier.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestURLIdentifier.java @@ -101,7 +101,7 @@ public void testURLParameters() throws StructureException, IOException { assertEquals("wrong length for chainId=A", 94, StructureTools.getRepresentativeAtomArray(reduced).length); try { - url = new URL("http://www.rcsb.org/pdb/files/1B8G.pdb.gz"); + url = new URL("http://files.rcsb.org/download/1B8G.pdb.gz"); id = new URLIdentifier(url); full = id.loadStructure(cache); diff --git a/biojava-survival/pom.xml b/biojava-survival/pom.xml index e6c0294b34..784fe909ab 100644 --- a/biojava-survival/pom.xml +++ b/biojava-survival/pom.xml @@ -4,7 +4,7 @@ org.biojava biojava - 4.2.7 + 4.2.8 biojava-survival diff --git a/biojava-ws/pom.xml b/biojava-ws/pom.xml index 0f2e83e414..fa83379487 100644 --- a/biojava-ws/pom.xml +++ b/biojava-ws/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.7 + 4.2.8 biojava-ws biojava-ws @@ -19,7 +19,7 @@ org.biojava biojava-core - 4.2.7 + 4.2.8 compile diff --git a/pom.xml b/pom.xml index 6513c4cb61..8085813a81 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ org.biojava biojava pom - 4.2.7 + 4.2.8 biojava BioJava is an open-source project dedicated to providing a Java framework for processing biological data. It provides analytical and statistical routines, parsers for common file formats and allows the @@ -44,7 +44,7 @@ scm:git:git@github.com:biojava/biojava.git https://github.com/biojava/biojava - biojava-4.2.7 + biojava-4.2.8 + --allow-script-in-comments 256m