diff --git a/.evergreen/compile.sh b/.evergreen/compile.sh new file mode 100755 index 00000000..279dffed --- /dev/null +++ b/.evergreen/compile.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -o xtrace # Write all commands first to stderr +set -o errexit # Exit the script with error if any of the commands fail + +############################################ +# Main Program # +############################################ + +# We always compile with the latest version of java +export JAVA_HOME="/opt/java/jdk8" +./gradlew -version +./gradlew -PxmlReports.enabled=true --info -x test clean check jar testClasses javadoc diff --git a/.evergreen/config.yml b/.evergreen/config.yml new file mode 100644 index 00000000..6e6945cf --- /dev/null +++ b/.evergreen/config.yml @@ -0,0 +1,417 @@ +######################################## +# Evergreen Template for MongoDB Drivers +######################################## + +# When a task that used to pass starts to fail +# Go through all versions that may have been skipped to detect +# when the task started failing +stepback: true + +# Mark a failure as a system/bootstrap failure (purple box) rather then a task +# failure by default. +# Actual testing tasks are marked with `type: test` +command_type: system + +# Protect ourself against rogue test case, or curl gone wild, that runs forever +# 12 minutes is the longest we'll ever run +exec_timeout_secs: 1440 # 24 minutes is the longest we'll ever run + +# What to do when evergreen hits the timeout (`post:` tasks are run automatically) +timeout: + - command: shell.exec + params: + script: | + ls -la + +functions: + "fetch source": + # Executes git clone and applies the submitted patch, if any + - command: git.get_project + params: + directory: "src" + # Applies the subitted patch, if any + # Deprecated. Should be removed. But still needed for certain agents (ZAP) + - command: git.apply_patch + # Make an evergreen expansion file with dynamic values + - command: shell.exec + params: + working_dir: "src" + script: | + # Get the current unique version of this checkout + if [ "${is_patch}" = "true" ]; then + CURRENT_VERSION=$(git describe)-patch-${version_id} + else + CURRENT_VERSION=latest + fi + + export DRIVERS_TOOLS="$(pwd)/../drivers-tools" + + # Python has cygwin path problems on Windows. Detect prospective mongo-orchestration home directory + if [ "Windows_NT" == "$OS" ]; then # Magic variable in cygwin + export DRIVERS_TOOLS=$(cygpath -m $DRIVERS_TOOLS) + fi + + export MONGO_ORCHESTRATION_HOME="$DRIVERS_TOOLS/.evergreen/orchestration" + export MONGODB_BINARIES="$DRIVERS_TOOLS/mongodb/bin" + export UPLOAD_BUCKET="${project}" + export PROJECT_DIRECTORY="$(pwd)" + + cat < expansion.yml + CURRENT_VERSION: "$CURRENT_VERSION" + DRIVERS_TOOLS: "$DRIVERS_TOOLS" + MONGO_ORCHESTRATION_HOME: "$MONGO_ORCHESTRATION_HOME" + MONGODB_BINARIES: "$MONGODB_BINARIES" + UPLOAD_BUCKET: "$UPLOAD_BUCKET" + PROJECT_DIRECTORY: "$PROJECT_DIRECTORY" + PREPARE_SHELL: | + set -o errexit + set -o xtrace + export DRIVERS_TOOLS="$DRIVERS_TOOLS" + export MONGO_ORCHESTRATION_HOME="$MONGO_ORCHESTRATION_HOME" + export MONGODB_BINARIES="$MONGODB_BINARIES" + export UPLOAD_BUCKET="$UPLOAD_BUCKET" + export PROJECT_DIRECTORY="$PROJECT_DIRECTORY" + + export TMPDIR="$MONGO_ORCHESTRATION_HOME/db" + export PATH="$MONGODB_BINARIES:$PATH" + export PROJECT="${project}" + EOT + # See what we've done + cat expansion.yml + + # Load the expansion file to make an evergreen variable with the current unique version + - command: expansions.update + params: + file: src/expansion.yml + + "prepare resources": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + rm -rf $DRIVERS_TOOLS + if [ "${project}" = "drivers-tools" ]; then + # If this was a patch build, doing a fresh clone would not actually test the patch + cp -R ${PROJECT_DIRECTORY}/ $DRIVERS_TOOLS + else + git clone git://github.com/mongodb-labs/drivers-evergreen-tools.git $DRIVERS_TOOLS + fi + echo "{ \"releases\": { \"default\": \"$MONGODB_BINARIES\" }}" > $MONGO_ORCHESTRATION_HOME/orchestration.config + + # Upload build artifacts that other tasks may depend on + # Note this URL needs to be totally unique, while predictable for the next task + # so it can automatically download the artifacts + "upload build": + # Compress and upload the entire build directory + - command: archive.targz_pack + params: + target: "${build_id}.tar.gz" + source_dir: ${PROJECT_DIRECTORY}/ + include: + - "./**" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: ${build_id}.tar.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${task_name}/${build_id}.tar.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + + "exec script": + - command: shell.exec + type: test + params: + working_dir: "src" + script: | + ${PREPARE_SHELL} + ${PROJECT_DIRECTORY}/${file} + + "upload mo artifacts": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + find $MONGO_ORCHESTRATION_HOME -name \*.log | xargs tar czf ${PROJECT_DIRECTORY}/mongodb-logs.tar.gz + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: ${PROJECT_DIRECTORY}/mongodb-logs.tar.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-mongodb-logs.tar.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: "mongodb-logs.tar.gz" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: ${DRIVERS_TOOLS}/.evergreen/orchestration/server.log + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-orchestration.log + bucket: mciuploads + permissions: public-read + content_type: ${content_type|text/plain} + display_name: "orchestration.log" + + "upload working dir": + - command: archive.targz_pack + params: + target: "working-dir.tar.gz" + source_dir: ${PROJECT_DIRECTORY}/ + include: + - "./**" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: working-dir.tar.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/artifacts/${task_id}-${execution}-working-dir.tar.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: "working-dir.tar.gz" + - command: archive.targz_pack + params: + target: "drivers-dir.tar.gz" + source_dir: ${DRIVERS_TOOLS} + include: + - "./**" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: drivers-dir.tar.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/artifacts/${task_id}-${execution}-drivers-dir.tar.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: "drivers-dir.tar.gz" + + "upload test results": + - command: attach.xunit_results + params: + file: ./src/*/build/test-results/TEST-*.xml + + "bootstrap mongo-orchestration": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + MONGODB_VERSION=${VERSION} TOPOLOGY=${TOPOLOGY} AUTH=${AUTH} SSL=${SSL} sh ${DRIVERS_TOOLS}/.evergreen/run-orchestration.sh + # run-orchestration generates expansion file with the MONGODB_URI for the cluster + - command: expansions.update + params: + file: mo-expansion.yml + + "run tests": + - command: shell.exec + type: test + params: + working_dir: "src" + script: | + ${PREPARE_SHELL} + HADOOP_VERSION=${HADOOP_VERSION} MONGODB_BINARIES=${MONGODB_BINARIES} AUTH=${AUTH} JDK=${JDK} .evergreen/run-tests.sh + + + "cleanup": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + cd "$MONGO_ORCHESTRATION_HOME" + # source the mongo-orchestration virtualenv if it exists + if [ -f venv/bin/activate ]; then + . venv/bin/activate + elif [ -f venv/Scripts/activate ]; then + . venv/Scripts/activate + fi + mongo-orchestration stop + cd - + rm -rf $DRIVERS_TOOLS || true + + "fix absolute paths": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for filename in $(find ${DRIVERS_TOOLS} -name \*.json); do + perl -p -i -e "s|ABSOLUTE_PATH_REPLACEMENT_TOKEN|${DRIVERS_TOOLS}|g" $filename + done + + "windows fix": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do + cat $i | tr -d '\r' > $i.new + mv $i.new $i + done + + "make files executable": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do + chmod +x $i + done + + "init test-results": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + echo '{"results": [{ "status": "FAIL", "test_file": "Build", "log_raw": "No test-results.json found was created" } ]}' > ${PROJECT_DIRECTORY}/test-results.json + + "install dependencies": + type: test + params: + working_dir: "src" + script: | + ${PREPARE_SHELL} + file="${PROJECT_DIRECTORY}/.evergreen/install-dependencies.sh" + [ -f ${file} ] && sh ${file} || echo "${file} not available, skipping" + +pre: + - func: "fetch source" + - func: "prepare resources" + - func: "windows fix" + - func: "fix absolute paths" + - func: "init test-results" + - func: "make files executable" + - func: "install dependencies" + +post: + # Removed, causing timeouts + # - func: "upload working dir" + - func: "upload mo artifacts" + - func: "upload test results" + - func: "cleanup" + + +tasks: + # Compile / check build variant + - name: static-analysis + commands: + - func: "exec script" + vars: + file: ".evergreen/compile.sh" + - func: "upload build" + + - name: "test" + depends_on: + - variant: "static-checks" + name: "static-analysis" + commands: + - func: "bootstrap mongo-orchestration" + - func: "run tests" + + +axes: + - id: hadoop-version + display_name: Hadoop Version + values: + - id: "1.2.1" + display_name: "1.2" + variables: + HADOOP_VERSION: "1.2.1" + - id: "2.7.2" + display_name: "2.7" + variables: + HADOOP_VERSION: "2.7.2" + - id: version + display_name: MongoDB Version + values: + - id: "3.4" + display_name: "3.4" + variables: + VERSION: "3.4" + - id: "3.2" + display_name: "3.2" + variables: + VERSION: "3.2" + - id: "3.0" + display_name: "3.0" + variables: + VERSION: "3.0" + - id: "2.6" + display_name: "2.6" + variables: + VERSION: "2.6" + - id: "2.4" + display_name: "2.4" + variables: + VERSION: "2.4" + - id: os + display_name: OS + values: + - id: "rhel62" + display_name: "RHEL 6.2" + run_on: rhel62-test + + - id: topology + display_name: Topology + values: + - id: "standalone" + display_name: Standalone + variables: + TOPOLOGY: "server" + - id: "replicaset" + display_name: Replica Set + variables: + TOPOLOGY: "replica_set" + - id: "sharded-cluster" + display_name: Sharded Cluster + variables: + TOPOLOGY: "sharded_cluster" + - id: auth + display_name: Authentication + values: + - id: "noauth" + display_name: NoAuth + variables: + AUTH: "noauth" + - id: ssl + display_name: SSL + values: + - id: "nossl" + display_name: NoSSL + variables: + SSL: "nossl" + - id: jdk + display_name: JDK + values: + - id: "jdk8" + display_name: JDK8 + variables: + JDK: "jdk8" + - id: "jdk7" + display_name: JDK7 + variables: + JDK: "jdk7" + +buildvariants: + +- name: static-checks + display_name: "Static Checks" + run_on: + - rhel62-test + tasks: + - name: "static-analysis" + +- matrix_name: "tests" + matrix_spec: { auth: "*", ssl: "*", jdk: "jdk7", version: "*", topology: "*", os: "*", hadoop-version: "*" } + display_name: "Hadoop ${hadoop-version} MongoDB ${version} ${jdk} ${topology} ${os} " + tags: ["tests-variant"] + tasks: + - name: "test" + +- matrix_name: "tests-jdk8" + matrix_spec: { auth: "*", ssl: "*", jdk: "jdk8", version: "3.4", topology: "standalone", os: "rhel62", hadoop-version: "*" } + display_name: "Hadoop ${hadoop-version} MongoDB ${version} ${jdk} ${topology} ${os} " + tags: ["tests-variant"] + tasks: + - name: "test" diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh new file mode 100755 index 00000000..1788eb06 --- /dev/null +++ b/.evergreen/run-tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -o xtrace # Write all commands first to stderr +set -o errexit # Exit the script with error if any of the commands fail + +# Supported/used environment variables: +# MONGODB_BINARIES The location of the MongoDB binaries, e.g. /usr/local/bin +# HADOOP_VERSION Sets the version of Hadoop to be used. +# AUTH Set to enable authentication. Values are: "auth" / "noauth" (default) +# JDK Set the version of java to be used. Java versions can be set from the java toolchain /opt/java +# "jdk5", "jdk6", "jdk7", "jdk8" + +MONGODB_BINARIES=${MONGODB_BINARIES:-} +AUTH=${AUTH:-noauth} +JDK=${JDK:-jdk} +PROJECT_DIRECTORY=${PROJECT_DIRECTORY:-} + +export HADOOP_VERSION=${HADOOP_VERSION:-2.7.2} +export HADOOP_PREFIX=$PROJECT_DIRECTORY/hadoop-binaries/hadoop-$HADOOP_VERSION +export HADOOP_HOME=$HADOOP_PREFIX +export HADOOP_USER_CLASSPATH_FIRST=true +export HIVE_HOME=$PROJECT_DIRECTORY/hadoop-binaries/apache-hive-1.2.1-bin + +export JAVA_HOME="/opt/java/${JDK}" + +./gradlew -version +./gradlew -Dmongodb_bin_dir=${MONGODB_BINARIES} -Dmongodb_option=${AUTH} -DHADOOP_VERSION=${HADOOP_VERSION} --stacktrace jar testsJar test cleanHadoop \ No newline at end of file diff --git a/History.md b/History.md index eb29a0e6..a948f311 100644 --- a/History.md +++ b/History.md @@ -1,3 +1,23 @@ +2.0.2 / 27th of January, 2017 +============================= + +This is a patch-level release that fixes two issues: + +* Allow "skip" to be set on MongoInputSplit (HADOOP-304) +* Correctly handle renaming nested fields in Hive (HADOOP-303) + +Thanks to mkrstic for the patch for HADOOP-304! + +For complete details on the issues resolved in 2.0.2, consult the release notes +on Jira: https://jira.mongodb.org/browse/HADOOP/fixforversion/17932 + +2.0.1 / 30th of August, 2016 +============================ + +This is a patch-level release that adds the noTimeout option to the cursor +used by MongoPaginatingSplitter. More details of the issue can be found +on the ticket's Jira page: https://jira.mongodb.org/browse/HADOOP-295 + 2.0.0 / 15th of August, 2016 ============================ diff --git a/README.md b/README.md index 7506d5f1..e205bc17 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,11 @@ -#MongoDB Connector for Hadoop +# End of Life Notice +The MongoDB Connector for Hadoop is now officially end-of-life (EOL). No further development, bugfixes, enhancements, documentation changes or maintenance will be provided by this project and pull requests will no longer be accepted. -##Purpose +--- + +# MongoDB Connector for Hadoop + +## Purpose The MongoDB Connector for Hadoop is a library which allows MongoDB (or backup files in its data format, BSON) to be used as an input source, or output destination, for Hadoop MapReduce tasks. It is designed to allow greater flexibility and performance and make it easy to integrate data in MongoDB with other parts of the Hadoop ecosystem including the following: * [**Pig**][pig-usage] diff --git a/build.gradle b/build.gradle index f69580a2..de0b2e32 100644 --- a/build.gradle +++ b/build.gradle @@ -20,12 +20,8 @@ ext.hiveHome = System.getenv("HIVE_HOME") ?: "${hadoopBinaries}/apache-hive-${hi ext.pigHome = System.getenv("PIG_HOME") ?: "${hadoopBinaries}/pig-${pigVersion}".toString() ext.dataHome = "${hadoopBinaries}/examples/data".toString() ext.docsHome = "${rootDir}/docs" -ext.mongoimport = new File('/mnt/jenkins/mongodb/32/32-release/bin/mongoimport').exists() ? - '/mnt/jenkins/mongodb/32/32-release/bin/mongoimport' : - '/usr/local/bin/mongoimport'; -ext.mongorestore = new File('/mnt/jenkins/mongodb/32/32-release/bin/mongorestore').exists() ? - '/mnt/jenkins/mongodb/32/32-release/bin/mongorestore' : - '/usr/local/bin/mongorestore'; +ext.mongoimport = '/usr/local/bin/mongoimport'; +ext.mongorestore = '/usr/local/bin/mongorestore'; ext.mongoURI = System.getenv("MONGO_INPUT_URI") ext.mongoAuthURI = System.getenv("MONGO_AUTH_URI") ext.mongoUsername = System.getenv("MONGO_USER") @@ -62,7 +58,7 @@ if(project.hasProperty("clusterVersion")) { } allprojects { - version = '2.0.0' + version = '2.0.2' group = 'org.mongodb.mongo-hadoop' } diff --git a/core/src/main/java/com/mongodb/hadoop/input/MongoInputSplit.java b/core/src/main/java/com/mongodb/hadoop/input/MongoInputSplit.java index 5d84b9d4..9582e17e 100644 --- a/core/src/main/java/com/mongodb/hadoop/input/MongoInputSplit.java +++ b/core/src/main/java/com/mongodb/hadoop/input/MongoInputSplit.java @@ -54,6 +54,7 @@ public class MongoInputSplit extends InputSplit implements Writable, org.apache. protected DBObject min; protected DBObject max; protected Integer limit; + protected Integer skip; protected boolean notimeout = false; protected transient DBCursor cursor; @@ -75,6 +76,7 @@ public MongoInputSplit(final MongoInputSplit other) { setQuery(other.getQuery()); setSort(other.getSort()); setLimit(other.getLimit()); + setSkip(other.getSkip()); } public MongoInputSplit(final Configuration conf) { @@ -82,10 +84,13 @@ public MongoInputSplit(final Configuration conf) { setAuthURI(MongoConfigUtil.getAuthURI(conf)); setInputURI(MongoConfigUtil.getInputURI(conf)); setKeyField(MongoConfigUtil.getInputKey(conf)); + setMax(MongoConfigUtil.getMaxSplitKey(conf)); + setMin(MongoConfigUtil.getMinSplitKey(conf)); setNoTimeout(MongoConfigUtil.isNoTimeout(conf)); setQuery(MongoConfigUtil.getQuery(conf)); setSort(MongoConfigUtil.getSort(conf)); setLimit(MongoConfigUtil.getLimit(conf)); + setSkip(MongoConfigUtil.getSkip(conf)); } public void setInputURI(final MongoClientURI inputURI) { @@ -194,6 +199,14 @@ public void setLimit(final Integer limit) { this.limit = limit; } + public Integer getSkip() { + return skip; + } + + public void setSkip(final Integer skip) { + this.skip = skip; + } + @Override public void write(final DataOutput out) throws IOException { BSONObject spec = BasicDBObjectBuilder.start() @@ -207,6 +220,7 @@ public void write(final DataOutput out) throws IOException { .add("max", getMax()) .add("notimeout", getNoTimeout()) .add("limit", limit) + .add("skip", skip) .get(); byte[] buf = _bsonEncoder.encode(spec); out.write(buf); @@ -250,6 +264,8 @@ public void readFields(final DataInput in) throws IOException { setLimit((Integer) spec.get("limit")); + setSkip((Integer) spec.get("skip")); + setNoTimeout((Boolean) spec.get("notimeout")); } @@ -272,6 +288,9 @@ public DBCursor getCursor() { if (this.max != null) { this.cursor.addSpecial("$max", this.max); } + if (skip != null) { + cursor = cursor.skip(skip); + } if (limit != null) { cursor = cursor.limit(limit); } @@ -295,6 +314,7 @@ public String toString() { + ", sort=" + this.sort + ", fields=" + this.fields + ", limit=" + this.limit + + ", skip=" + this.skip + ", notimeout=" + this.notimeout + '}'; } @@ -308,6 +328,7 @@ public int hashCode() { result = 31 * result + (this.sort != null ? this.sort.hashCode() : 0); result = 31 * result + (this.notimeout ? 1 : 0); result = 31 * result + (this.limit != null ? this.limit.hashCode() : 0); + result = 31 * result + (this.skip != null ? this.skip.hashCode() : 0); return result; } @@ -346,6 +367,10 @@ public boolean equals(final Object o) { || !limit.equals(that.getLimit())) { return false; } + if (skip == null && that.getSkip() != null + || !skip.equals(that.getSkip())) { + return false; + } return true; } diff --git a/core/src/main/java/com/mongodb/hadoop/splitter/MongoPaginatingSplitter.java b/core/src/main/java/com/mongodb/hadoop/splitter/MongoPaginatingSplitter.java index 4725f0e4..895d8331 100644 --- a/core/src/main/java/com/mongodb/hadoop/splitter/MongoPaginatingSplitter.java +++ b/core/src/main/java/com/mongodb/hadoop/splitter/MongoPaginatingSplitter.java @@ -2,6 +2,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObjectBuilder; +import com.mongodb.Bytes; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; @@ -87,7 +88,8 @@ public List calculateSplits() throws SplitFailedException { } cursor = inputCollection.find(rangeObj, splitKeyProjection); } - cursor = cursor.sort(splitKeyObj).skip(minDocs).limit(1); + cursor = cursor.sort(splitKeyObj).skip(minDocs).limit(1) + .setOptions(Bytes.QUERYOPTION_NOTIMEOUT); if (cursor.hasNext()) { maxBound = cursor.next().get(splitKey); diff --git a/core/src/test/java/com/mongodb/hadoop/io/MongoInputSplitTest.java b/core/src/test/java/com/mongodb/hadoop/io/MongoInputSplitTest.java new file mode 100644 index 00000000..3fa10faf --- /dev/null +++ b/core/src/test/java/com/mongodb/hadoop/io/MongoInputSplitTest.java @@ -0,0 +1,43 @@ +package com.mongodb.hadoop.io; + +import com.mongodb.hadoop.input.MongoInputSplit; +import com.mongodb.hadoop.util.MongoConfigUtil; +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; + +import static junit.framework.TestCase.assertEquals; + +public class MongoInputSplitTest { + + @Test + public void testConstructor() { + Configuration conf = new Configuration(); + MongoConfigUtil.setFields(conf, "{\"field\": 1}"); + MongoConfigUtil.setAuthURI(conf, "mongodb://auth"); + MongoConfigUtil.setInputURI(conf, "mongodb://input"); + MongoConfigUtil.setInputKey(conf, "field"); + MongoConfigUtil.setMaxSplitKey(conf, "{\"field\": 1e9}"); + MongoConfigUtil.setMinSplitKey(conf, "{\"field\": -1e9}"); + MongoConfigUtil.setNoTimeout(conf, true); + MongoConfigUtil.setQuery(conf, "{\"foo\": 42}"); + MongoConfigUtil.setSort(conf, "{\"foo\": -1}"); + MongoConfigUtil.setSkip(conf, 10); + + MongoInputSplit mis = new MongoInputSplit(conf); + + assertEquals(MongoConfigUtil.getFields(conf), mis.getFields()); + assertEquals(MongoConfigUtil.getAuthURI(conf), mis.getAuthURI()); + assertEquals(MongoConfigUtil.getInputURI(conf), mis.getInputURI()); + assertEquals(MongoConfigUtil.getInputKey(conf), mis.getKeyField()); + assertEquals(MongoConfigUtil.getMaxSplitKey(conf), mis.getMax()); + assertEquals(MongoConfigUtil.getMinSplitKey(conf), mis.getMin()); + assertEquals(MongoConfigUtil.isNoTimeout(conf), mis.getNoTimeout()); + assertEquals(MongoConfigUtil.getQuery(conf), mis.getQuery()); + assertEquals(MongoConfigUtil.getSort(conf), mis.getSort()); + assertEquals(MongoConfigUtil.getLimit(conf), (int) mis.getLimit()); + assertEquals(MongoConfigUtil.getSkip(conf), (int) mis.getSkip()); + + MongoInputSplit mis2 = new MongoInputSplit(mis); + assertEquals(mis, mis2); + } +} diff --git a/core/src/test/java/com/mongodb/hadoop/testutils/BaseHadoopTest.java b/core/src/test/java/com/mongodb/hadoop/testutils/BaseHadoopTest.java index 0a3ad3af..9d5ada7c 100644 --- a/core/src/test/java/com/mongodb/hadoop/testutils/BaseHadoopTest.java +++ b/core/src/test/java/com/mongodb/hadoop/testutils/BaseHadoopTest.java @@ -30,17 +30,18 @@ public abstract class BaseHadoopTest { public static final String HADOOP_HOME; public static final String PROJECT_VERSION = loadProperty("project" - + ".version", "2.0.0"); + + ".version", "2.0.2"); public static final String HADOOP_VERSION = loadProperty("hadoop.version", "2.7.2"); -// public static final String HIVE_HOME; public static final File PROJECT_HOME; public static final String HADOOP_BINARIES; public static final String EXAMPLE_DATA_HOME; private static final boolean TEST_IN_VM = Boolean.valueOf(System.getProperty("mongo.hadoop.testInVM", "false")); - private static final String MONGO_IMPORT; + private static final String MONGO_IMPORT = System.getProperty("mongodb_bin_dir") == null + ? "/usr/local/bin/mongoimport" + : System.getProperty("mongodb_bin_dir") + "/mongoimport"; private MongoClient client; @@ -52,19 +53,6 @@ public abstract class BaseHadoopTest { } PROJECT_HOME = current; - String property = System.getProperty("mongodb_server"); - String serverType = property != null ? property.replaceAll("-release", "") : "UNKNOWN"; - if (serverType.equals("27-nightly")) { - serverType = "master-nightly"; - property = serverType + "-release"; - } - final String path = format("/mnt/jenkins/mongodb/%s/%s/bin/mongoimport", serverType, property); - MONGO_IMPORT = new File(path).exists() ? path : "/usr/local/bin/mongoimport"; - if (!new File(MONGO_IMPORT).exists()) { - throw new RuntimeException(format("Can not locate mongoimport. Tried looking in '%s' and '%s' assuming a server " - + "type of '%s'", path, "/usr/local/bin/mongoimport", property)); - } - final File gradleProps = new File(PROJECT_HOME, ".gradle.properties"); if (gradleProps.exists()) { System.getProperties().load(new FileInputStream(gradleProps)); @@ -73,9 +61,7 @@ public abstract class BaseHadoopTest { EXAMPLE_DATA_HOME = new File(HADOOP_BINARIES, "examples/data").getCanonicalPath(); HADOOP_HOME = new File(HADOOP_BINARIES, format("hadoop-%s", HADOOP_VERSION)).getCanonicalPath(); -// HIVE_HOME = new File(System.getProperty("hive_home")).getCanonicalPath(); LOG.info("HADOOP_HOME = " + HADOOP_HOME); -// LOG.info("HIVE_HOME = " + HIVE_HOME); } catch (final IOException e) { throw new RuntimeException(e.getMessage(), e); @@ -163,9 +149,6 @@ public void mongoImport(final String collection, final File file) { if (isAuthEnabled()) { final List list = new ArrayList(asList("-u", "bob", "-p", "pwd123")); - if (!System.getProperty("mongodb_server", "").equals("22-release")) { - list.addAll(asList("--authenticationDatabase", "admin")); - } command.addAll(list); } final StringBuilder output = new StringBuilder(); diff --git a/gradle/hadoop.gradle b/gradle/hadoop.gradle index 8c477079..2f56f284 100644 --- a/gradle/hadoop.gradle +++ b/gradle/hadoop.gradle @@ -33,7 +33,7 @@ def execute(command, args = [], outStream = null, errStream = null, background = } def stopService(signal, service, name) { - for (String process : execute('jps')) { + for (String process : execute(System.getenv("JAVA_HOME") + '/bin/jps')) { if (process.endsWith(service)) { println("Shutting down ${name}") if (Os.isFamily(Os.FAMILY_WINDOWS)) { diff --git a/hive/src/main/java/com/mongodb/hadoop/hive/BSONSerDe.java b/hive/src/main/java/com/mongodb/hadoop/hive/BSONSerDe.java index 133e66e4..00573ed0 100644 --- a/hive/src/main/java/com/mongodb/hadoop/hive/BSONSerDe.java +++ b/hive/src/main/java/com/mongodb/hadoop/hive/BSONSerDe.java @@ -304,7 +304,7 @@ private Object deserializeStruct(final Object value, final StructTypeInfo valueT List struct = new ArrayList(structNames.size()); for (int i = 0; i < structNames.size(); i++) { - String fieldName = structNames.get(i); + String fieldName = structNames.get(i).toLowerCase(); // hiveMapping -> prefixed by parent struct names. // For example, in {"wife":{"name":{"first":"Sydney"}}}, diff --git a/hive/src/main/java/com/mongodb/hadoop/hive/input/HiveMongoInputFormat.java b/hive/src/main/java/com/mongodb/hadoop/hive/input/HiveMongoInputFormat.java index f07e2eda..eb003498 100644 --- a/hive/src/main/java/com/mongodb/hadoop/hive/input/HiveMongoInputFormat.java +++ b/hive/src/main/java/com/mongodb/hadoop/hive/input/HiveMongoInputFormat.java @@ -228,6 +228,12 @@ private String resolveMongoName( } String mapped = colNameMapping.get(colName); if (null == mapped) { + // Check to see if the column name is a prefix for a name in the column name mapping. + for (Map.Entry entry : colNameMapping.entrySet()) { + if (entry.getKey().startsWith(colName + ".")) { + return entry.getValue().split("\\.")[0]; + } + } return colName; } return mapped; diff --git a/hive/src/test/java/com/mongodb/hadoop/hive/HiveMappingTest.java b/hive/src/test/java/com/mongodb/hadoop/hive/HiveMappingTest.java index 8d70ab90..f8e406bf 100644 --- a/hive/src/test/java/com/mongodb/hadoop/hive/HiveMappingTest.java +++ b/hive/src/test/java/com/mongodb/hadoop/hive/HiveMappingTest.java @@ -6,12 +6,14 @@ import com.mongodb.MongoClientURI; import com.mongodb.hadoop.util.MongoClientURIBuilder; import com.mongodb.hadoop.util.MongoConfigUtil; +import com.mongodb.util.JSON; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.junit.Test; import java.sql.SQLException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -27,6 +29,50 @@ public class HiveMappingTest extends HiveTest { private static final Log LOG = LogFactory.getLog(HiveMappingTest.class); + @Test + public void nestedColumns() throws SQLException { + DBCollection collection = getCollection("hive_addresses"); + collection.drop(); + dropTable("hive_addresses"); + + collection.insert(user(1, "Jim", "Beam", "Clermont", "KY")); + collection.insert(user(2, "Don", "Draper", "New York", "NY")); + collection.insert(user(3, "John", "Elway", "Denver", "CO")); + + MongoClientURI uri = authCheck( + new MongoClientURIBuilder() + .collection("mongo_hadoop", collection.getName()) + ).build(); + + Map map = new HashMap() { + { + put("id", "_id"); + put("firstName", "firstName"); + put("lastName", "lastName"); + put("place.municipality", "address.city"); + put("place.region", "address.state"); + } + }; + + execute( + format( + "CREATE TABLE hive_addresses " + + "(id INT, firstName STRING, lastName STRING, " + + "place STRUCT)\n" + + "STORED BY '%s'\n" + + "WITH SERDEPROPERTIES('mongo.columns.mapping'='%s')\n" + + "TBLPROPERTIES ('mongo.uri'='%s')", + MongoStorageHandler.class.getName(), + JSON.serialize(map), + uri + )); + // Alias inner fields to avoid retrieving entire struct as a String. + Results execute = query("SELECT place.municipality AS city, place.region AS state, firstname from hive_addresses"); + assertEquals("KY", execute.getRow(0).get("state")); + assertEquals("Don", execute.getRow(1).get("firstname")); + assertEquals("Denver", execute.getRow(2).get("city")); + } + @Test public void nestedObjects() throws SQLException { DBCollection collection = getCollection("hive_addresses");