diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 1a42c533fb2ee..0443abdd818a4 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -77,7 +77,7 @@ jobs:
         TEST_DOCSTRINGS: 'true'
         # Tests that require large downloads over the networks are skipped in CI.
         # Here we make sure, that they are still run on a regular basis.
-        SKLEARN_SKIP_NETWORK_TESTS: '0'
+        SKLEARN_RUN_NETWORK_TESTS: 'true'
 
 # Will run all the time regardless of linting outcome.
 - template: build_tools/azure/posix.yml
diff --git a/build_tools/azure/posix-32.yml b/build_tools/azure/posix-32.yml
index 5e4689a2505e5..d0ea1f241f00a 100644
--- a/build_tools/azure/posix-32.yml
+++ b/build_tools/azure/posix-32.yml
@@ -16,7 +16,7 @@ jobs:
     JUNITXML: 'test-data.xml'
     OMP_NUM_THREADS: '2'
     OPENBLAS_NUM_THREADS: '2'
-    SKLEARN_SKIP_NETWORK_TESTS: '1'
+    SKLEARN_RUN_NETWORK_TESTS: 'false'
     NUMPY_VERSION: 'latest'
     SCIPY_VERSION: 'latest'
     CYTHON_VERSION: 'latest'
@@ -62,7 +62,7 @@ jobs:
         -e THREADPOOLCTL_VERSION=$THREADPOOLCTL_VERSION
         -e OMP_NUM_THREADS=$OMP_NUM_THREADS
         -e OPENBLAS_NUM_THREADS=$OPENBLAS_NUM_THREADS
-        -e SKLEARN_SKIP_NETWORK_TESTS=$SKLEARN_SKIP_NETWORK_TESTS
+        -e SKLEARN_RUN_NETWORK_TESTS=$SKLEARN_RUN_NETWORK_TESTS
         i386/ubuntu:18.04
         sleep 1000000
       displayName: 'Start container'
diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml
index ae5726aab0b65..ebe91a15041ec 100644
--- a/build_tools/azure/posix.yml
+++ b/build_tools/azure/posix.yml
@@ -17,7 +17,7 @@ jobs:
     JUNITXML: 'test-data.xml'
     OMP_NUM_THREADS: '2'
     OPENBLAS_NUM_THREADS: '2'
-    SKLEARN_SKIP_NETWORK_TESTS: '1'
+    SKLEARN_RUN_NETWORK_TESTS: 'false'
     CCACHE_DIR: $(Pipeline.Workspace)/ccache
     CCACHE_COMPRESS: '1'
     NUMPY_VERSION: 'latest'
diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
index b08cda50cfd60..adcc0d18710e2 100755
--- a/build_tools/azure/test_script.sh
+++ b/build_tools/azure/test_script.sh
@@ -23,6 +23,7 @@ pip list
 
 TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
 
+
 if [[ "$COVERAGE" == "true" ]]; then
     # Note: --cov-report= is used to disable to long text output report in the
     # CI logs. The coverage data is consolidated by codecov to get an online
@@ -47,5 +48,11 @@ cp setup.cfg $TEST_DIR
 cd $TEST_DIR
 
 set -x
-$TEST_CMD --pyargs sklearn
+if [[ "$SKLEARN_RUN_NETWORK_TESTS" == "true" ]]; then
+    # Tests that require large downloads over the networks are skipped in CI.
+    # Here we make sure, that they are still run on a regular basis.
+    $TEST_CMD --pyargs sklearn -m 'not skipnetwork'
+else
+    $TEST_CMD --pyargs sklearn
+fi
 set +x
diff --git a/build_tools/azure/windows.yml b/build_tools/azure/windows.yml
index 8a5edd4b93019..dcecb2ab2c596 100644
--- a/build_tools/azure/windows.yml
+++ b/build_tools/azure/windows.yml
@@ -15,7 +15,7 @@ jobs:
   variables:
     VIRTUALENV: 'testvenv'
     JUNITXML: 'test-data.xml'
-    SKLEARN_SKIP_NETWORK_TESTS: '1'
+    SKLEARN_RUN_NETWORK_TESTS: 'false'
     PYTEST_VERSION: '5.2.1'
     PYTEST_XDIST: 'true'
     TMP_FOLDER: '$(Agent.WorkFolder)\tmp_folder'
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index cb5a3dbfeed33..bccf5fde320e5 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -17,6 +17,12 @@ python -c "import platform; print(f'{platform.machine()}')"
 
 TEST_CMD="pytest --showlocals --durations=20 --pyargs"
 
+# Tests that require large downloads over the networks are skipped in CI.
+# Here we make sure, that they are still run on a regular basis.
+if [[ "$SKLEARN_RUN_NETWORK_TESTS" == "true" ]]; then
+    TEST_CMD="$TEST_CMD -m 'not skipnetwork'"
+fi
+
 # Run the tests on the installed version
 mkdir -p $TEST_DIR
 
diff --git a/conftest.py b/conftest.py
index 5c48de4ac36a3..0f2c50d5e86e4 100644
--- a/conftest.py
+++ b/conftest.py
@@ -14,6 +14,14 @@
 
 from sklearn.utils import _IS_32BIT
 from sklearn.externals import _pilutil
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_covtype
+from sklearn.datasets import fetch_kddcup99
+from sklearn.datasets import fetch_olivetti_faces
+from sklearn.datasets import fetch_rcv1
+
 from sklearn._min_dependencies import PYTEST_MIN_VERSION
 from sklearn.utils.fixes import np_version, parse_version
 
@@ -24,9 +32,36 @@
                       .format(PYTEST_MIN_VERSION))
 
 
-def pytest_addoption(parser):
-    parser.addoption("--skip-network", action="store_true", default=False,
-                     help="skip network tests")
+dataset_fetchers = {
+    'fetch_20newsgroups_fxt': fetch_20newsgroups,
+    'fetch_20newsgroups_vectorized_fxt': fetch_20newsgroups_vectorized,
+    'fetch_california_housing_fxt': fetch_california_housing,
+    'fetch_covtype_fxt': fetch_covtype,
+    'fetch_kddcup99_fxt': fetch_kddcup99,
+    'fetch_olivetti_faces_fxt': fetch_olivetti_faces,
+    'fetch_rcv1_fxt': fetch_rcv1,
+}
+
+
+# fetching a dataset with this fixture will never download if missing
+def _fetch_fixture(f):
+    def wrapped(*args, **kwargs):
+        kwargs['download_if_missing'] = False
+        try:
+            return f(*args, **kwargs)
+        except IOError:
+            pytest.skip("test requires -m 'not skipnetwork' to run")
+    return pytest.fixture(lambda: wrapped)
+
+
+fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
+fetch_20newsgroups_vectorized_fxt = \
+    _fetch_fixture(fetch_20newsgroups_vectorized)
+fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
+fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
+fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
+fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
+fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
 
 
 def pytest_collection_modifyitems(config, items):
@@ -50,14 +85,32 @@ def pytest_collection_modifyitems(config, items):
             )
             item.add_marker(marker)
 
-    # Skip tests which require internet if the flag is provided
-    if (config.getoption("--skip-network")
-            or int(os.environ.get("SKLEARN_SKIP_NETWORK_TESTS", "0"))):
-        skip_network = pytest.mark.skip(
-            reason="test requires internet connectivity")
-        for item in items:
-            if "network" in item.keywords:
-                item.add_marker(skip_network)
+    run_network_tests = 'not skipnetwork' in config.getoption("markexpr")
+    skip_network = pytest.mark.skip(
+        reason="test requires -m 'not skipnetwork' to run")
+
+    # download datasets during collection to avoid thread unsafe behavior
+    # when running pytest in parallel with pytest-xdist
+    dataset_features_set = set(dataset_fetchers)
+    datasets_to_download = set()
+
+    for item in items:
+        item_keywords = set(item.keywords)
+        dataset_to_fetch = item_keywords & dataset_features_set
+        if not dataset_to_fetch:
+            continue
+
+        if run_network_tests:
+            datasets_to_download |= dataset_to_fetch
+        else:
+            # network tests are skipped
+            item.add_marker(skip_network)
+
+    # download datasets that are needed to avoid thread unsafe behavior
+    # by pytest-xdist
+    if run_network_tests:
+        for name in datasets_to_download:
+            dataset_fetchers[name]()
 
     # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
     # run doctests only for numpy >= 1.14.
@@ -92,6 +145,12 @@ def pytest_collection_modifyitems(config, items):
                 item.add_marker(skip_marker)
 
 
+def pytest_runtest_setup(item):
+    run_network_tests = 'not skipnetwork' in item.config.getoption("markexpr")
+    if "network" in item.keywords and not run_network_tests:
+        pytest.skip("test requires -m 'not skipnetwork' to run")
+
+
 def pytest_configure(config):
     import sys
     sys._is_pytest_session = True
diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst
index d47ac6f614183..e943ffef80688 100644
--- a/doc/computing/computational_performance.rst
+++ b/doc/computing/computational_performance.rst
@@ -368,3 +368,203 @@ Links
 
   - :ref:`scikit-learn developer performance documentation <performance-howto>`
   - `Scipy sparse matrix formats documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
+
+.. _parallelism:
+
+Parallelism
+-----------
+
+Some scikit-learn estimators and utilities can parallelize costly operations
+using multiple CPU cores, thanks to the following components:
+
+- via the `joblib <https://joblib.readthedocs.io/en/latest/>`_ library. In
+  this case the number of threads or processes can be controlled with the
+  ``n_jobs`` parameter.
+- via OpenMP, used in C or Cython code.
+
+In addition, some of the numpy routines that are used internally by
+scikit-learn may also be parallelized if numpy is installed with specific
+numerical libraries such as MKL, OpenBLAS, or BLIS.
+
+We describe these 3 scenarios in the following subsections.
+
+Joblib-based parallelism
+........................
+
+When the underlying implementation uses joblib, the number of workers
+(threads or processes) that are spawned in parallel can be controlled via the
+``n_jobs`` parameter.
+
+.. note::
+
+    Where (and how) parallelization happens in the estimators is currently
+    poorly documented. Please help us by improving our docs and tackle `issue
+    14228 <https://github.com/scikit-learn/scikit-learn/issues/14228>`_!
+
+Joblib is able to support both multi-processing and multi-threading. Whether
+joblib chooses to spawn a thread or a process depends on the **backend**
+that it's using.
+
+Scikit-learn generally relies on the ``loky`` backend, which is joblib's
+default backend. Loky is a multi-processing backend. When doing
+multi-processing, in order to avoid duplicating the memory in each process
+(which isn't reasonable with big datasets), joblib will create a `memmap
+<https://docs.scipy.org/doc/numpy/reference/generated/numpy.memmap.html>`_
+that all processes can share, when the data is bigger than 1MB.
+
+In some specific cases (when the code that is run in parallel releases the
+GIL), scikit-learn will indicate to ``joblib`` that a multi-threading
+backend is preferable.
+
+As a user, you may control the backend that joblib will use (regardless of
+what scikit-learn recommends) by using a context manager::
+
+    from joblib import parallel_backend
+
+    with parallel_backend('threading', n_jobs=2):
+        # Your scikit-learn code here
+
+Please refer to the `joblib's docs
+<https://joblib.readthedocs.io/en/latest/parallel.html#thread-based-parallelism-vs-process-based-parallelism>`_
+for more details.
+
+In practice, whether parallelism is helpful at improving runtime depends on
+many factors. It is usually a good idea to experiment rather than assuming
+that increasing the number of workers is always a good thing. In some cases
+it can be highly detrimental to performance to run multiple copies of some
+estimators or functions in parallel (see oversubscription below).
+
+OpenMP-based parallelism
+........................
+
+OpenMP is used to parallelize code written in Cython or C, relying on
+multi-threading exclusively. By default (and unless joblib is trying to
+avoid oversubscription), the implementation will use as many threads as
+possible.
+
+You can control the exact number of threads that are used via the
+``OMP_NUM_THREADS`` environment variable::
+
+    OMP_NUM_THREADS=4 python my_script.py
+
+Parallel Numpy routines from numerical libraries
+................................................
+
+Scikit-learn relies heavily on NumPy and SciPy, which internally call
+multi-threaded linear algebra routines implemented in libraries such as MKL,
+OpenBLAS or BLIS.
+
+The number of threads used by the OpenBLAS, MKL or BLIS libraries can be set
+via the ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, and
+``BLIS_NUM_THREADS`` environment variables.
+
+Please note that scikit-learn has no direct control over these
+implementations. Scikit-learn solely relies on Numpy and Scipy.
+
+.. note::
+    At the time of writing (2019), NumPy and SciPy packages distributed on
+    pypi.org (used by ``pip``) and on the conda-forge channel are linked
+    with OpenBLAS, while conda packages shipped on the "defaults" channel
+    from anaconda.org are linked by default with MKL.
+
+
+Oversubscription: spawning too many threads
+...........................................
+
+It is generally recommended to avoid using significantly more processes or
+threads than the number of CPUs on a machine. Over-subscription happens when
+a program is running too many threads at the same time.
+
+Suppose you have a machine with 8 CPUs. Consider a case where you're running
+a :class:`~GridSearchCV` (parallelized with joblib) with ``n_jobs=8`` over
+a :class:`~HistGradientBoostingClassifier` (parallelized with OpenMP). Each
+instance of :class:`~HistGradientBoostingClassifier` will spawn 8 threads
+(since you have 8 CPUs). That's a total of ``8 * 8 = 64`` threads, which
+leads to oversubscription of physical CPU resources and to scheduling
+overhead.
+
+Oversubscription can arise in the exact same fashion with parallelized
+routines from MKL, OpenBLAS or BLIS that are nested in joblib calls.
+
+Starting from ``joblib >= 0.14``, when the ``loky`` backend is used (which
+is the default), joblib will tell its child **processes** to limit the
+number of threads they can use, so as to avoid oversubscription. In practice
+the heuristic that joblib uses is to tell the processes to use ``max_threads
+= n_cpus // n_jobs``, via their corresponding environment variable. Back to
+our example from above, since the joblib backend of :class:`~GridSearchCV`
+is ``loky``, each process will only be able to use 1 thread instead of 8,
+thus mitigating the oversubscription issue.
+
+Note that:
+
+- Manually setting one of the environment variables (``OMP_NUM_THREADS``,
+  ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, or ``BLIS_NUM_THREADS``)
+  will take precedence over what joblib tries to do. The total number of
+  threads will be ``n_jobs * <LIB>_NUM_THREADS``. Note that setting this
+  limit will also impact your computations in the main process, which will
+  only use ``<LIB>_NUM_THREADS``. Joblib exposes a context manager for
+  finer control over the number of threads in its workers (see joblib docs
+  linked below).
+- Joblib is currently unable to avoid oversubscription in a
+  multi-threading context. It can only do so with the ``loky`` backend
+  (which spawns processes).
+
+You will find additional details about joblib mitigation of oversubscription
+in `joblib documentation
+<https://joblib.readthedocs.io/en/latest/parallel.html#avoiding-over-subscription-of-cpu-ressources>`_.
+
+
+Configuration switches
+-----------------------
+
+Python runtime
+..............
+
+:func:`sklearn.set_config` controls the following behaviors:
+
+:assume_finite:
+
+    used to skip validation, which enables faster computations but may
+    lead to segmentation faults if the data contains NaNs.
+
+:working_memory:
+
+    the optimal size of temporary arrays used by some algorithms.
+
+.. _environment_variable:
+
+Environment variables
+......................
+
+These environment variables should be set before importing scikit-learn.
+
+:SKLEARN_SITE_JOBLIB:
+
+    When this environment variable is set to a non zero value,
+    scikit-learn uses the site joblib rather than its vendored version.
+    Consequently, joblib must be installed for scikit-learn to run.
+    Note that using the site joblib is at your own risks: the versions of
+    scikit-learn and joblib need to be compatible. Currently, joblib 0.11+
+    is supported. In addition, dumps from joblib.Memory might be incompatible,
+    and you might loose some caches and have to redownload some datasets.
+
+    .. deprecated:: 0.21
+
+       As of version 0.21 this parameter has no effect, vendored joblib was
+       removed and site joblib is always used.
+
+:SKLEARN_ASSUME_FINITE:
+
+    Sets the default value for the `assume_finite` argument of
+    :func:`sklearn.set_config`.
+
+:SKLEARN_WORKING_MEMORY:
+
+    Sets the default value for the `working_memory` argument of
+    :func:`sklearn.set_config`.
+
+:SKLEARN_SEED:
+
+    Sets the seed of the global random generator when running the tests,
+    for reproducibility.
+=======
diff --git a/doc/conftest.py b/doc/conftest.py
index 4496bb74152ac..3f563187c8465 100644
--- a/doc/conftest.py
+++ b/doc/conftest.py
@@ -5,7 +5,6 @@
 
 from sklearn.utils import IS_PYPY
 from sklearn.utils._testing import SkipTest
-from sklearn.utils._testing import check_skip_network
 from sklearn.datasets import get_data_home
 from sklearn.datasets._base import _pkl_filepath
 from sklearn.datasets._twenty_newsgroups import CACHE_NAME
@@ -18,7 +17,6 @@ def setup_labeled_faces():
 
 
 def setup_rcv1():
-    check_skip_network()
     # skip the test in rcv1.rst if the dataset is not already loaded
     rcv1_dir = join(get_data_home(), "RCV1")
     if not exists(rcv1_dir):
@@ -35,7 +33,6 @@ def setup_twenty_newsgroups():
 def setup_working_with_text_data():
     if IS_PYPY and os.environ.get('CI', None):
         raise SkipTest('Skipping too slow test with PyPy on CI')
-    check_skip_network()
     cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
     if not exists(cache_path):
         raise SkipTest("Skipping dataset loading doctests")
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 78c1175620c4f..674780d46d780 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -493,6 +493,11 @@ a benchmark script and profiling output (see :ref:`monitoring_performances`).
 Also check out the :ref:`performance-howto` guide for more details on
 profiling and Cython optimizations.
 
+* Code that uses the network such as the dataset functions prefixed with
+  `fetch_*` are off by default. These tests can be run with::
+
+    $ pytest sklearn -m 'not skipnetwork'
+
 .. note::
 
   The current state of the scikit-learn code base is not compliant with
diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index 4612cd5deb4bc..8f34f7c46cb21 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -1,67 +1,5 @@
-""" Network tests are only run, if data is already locally available,
-or if download is specifically requested by environment variable."""
 import builtins
-from functools import wraps
-from os import environ
 import pytest
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.datasets import fetch_california_housing
-from sklearn.datasets import fetch_covtype
-from sklearn.datasets import fetch_kddcup99
-from sklearn.datasets import fetch_olivetti_faces
-from sklearn.datasets import fetch_rcv1
-
-
-def _wrapped_fetch(f, dataset_name):
-    """ Fetch dataset (download if missing and requested by environment) """
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-
-    @wraps(f)
-    def wrapped(*args, **kwargs):
-        kwargs['download_if_missing'] = download_if_missing
-        try:
-            return f(*args, **kwargs)
-        except IOError:
-            pytest.skip("Download {} to run this test".format(dataset_name))
-    return wrapped
-
-
-@pytest.fixture
-def fetch_20newsgroups_fxt():
-    return _wrapped_fetch(fetch_20newsgroups, dataset_name='20newsgroups')
-
-
-@pytest.fixture
-def fetch_20newsgroups_vectorized_fxt():
-    return _wrapped_fetch(fetch_20newsgroups_vectorized,
-                          dataset_name='20newsgroups_vectorized')
-
-
-@pytest.fixture
-def fetch_california_housing_fxt():
-    return _wrapped_fetch(fetch_california_housing,
-                          dataset_name='california_housing')
-
-
-@pytest.fixture
-def fetch_covtype_fxt():
-    return _wrapped_fetch(fetch_covtype, dataset_name='covtype')
-
-
-@pytest.fixture
-def fetch_kddcup99_fxt():
-    return _wrapped_fetch(fetch_kddcup99, dataset_name='kddcup99')
-
-
-@pytest.fixture
-def fetch_olivetti_faces_fxt():
-    return _wrapped_fetch(fetch_olivetti_faces, dataset_name='olivetti_faces')
-
-
-@pytest.fixture
-def fetch_rcv1_fxt():
-    return _wrapped_fetch(fetch_rcv1, dataset_name='rcv1')
 
 
 @pytest.fixture
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 256b79db4865c..289603c7d609b 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -13,8 +13,7 @@
 
 from sklearn import datasets
 from sklearn.base import clone
-from sklearn.datasets import (make_classification, fetch_california_housing,
-                              make_regression)
+from sklearn.datasets import make_classification, make_regression
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn.ensemble._gradient_boosting import predict_stages
@@ -345,8 +344,7 @@ def test_max_feature_regression():
     assert deviance < 0.5, "GB failed with deviance %.4f" % deviance
 
 
-@pytest.mark.network
-def test_feature_importance_regression():
+def test_feature_importance_regression(fetch_california_housing_fxt):
     """Test that Gini importance is calculated correctly.
 
     This test follows the example from [1]_ (pg. 373).
@@ -354,7 +352,7 @@ def test_feature_importance_regression():
     .. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
        of statistical learning. New York: Springer series in statistics.
     """
-    california = fetch_california_housing()
+    california = fetch_california_housing_fxt()
     X, y = california.data, california.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py
index 779e7b6574e3e..1bef09b86e1df 100644
--- a/sklearn/utils/_testing.py
+++ b/sklearn/utils/_testing.py
@@ -472,11 +472,6 @@ def set_random_state(estimator, random_state=0):
     pass
 
 
-def check_skip_network():
-    if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 0)):
-        raise SkipTest("Text tutorial requires large dataset download")
-
-
 def _delete_folder(folder_path, warn=False):
     """Utility function to cleanup a temporary folder if still existing.