8000 [WIP]: Tweak Int64Index.reindex() performance by kozlov-alexey · Pull Request #984 · IntelPython/sdc · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

[WIP]: Tweak Int64Index.reindex() performance #984

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fallback to STL impl and more TO-DOs added
  • Loading branch information
kozlov-alexey committed Aug 17, 2021
commit 540e288dbe99c180c91c0e0947e89903373921fe
1 change: 1 addition & 0 deletions sdc/extensions/indexes/int64_index_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ def pd_int64_index_reindex_overload(self, target, method=None, level=None, limit
raise TypingError('{} Not allowed for non comparable indexes. \
Given: self={}, target={}'.format(_func_name, self, target))

# FIXME: handle case when target is not numpy array!
def pd_int64_index_reindex_impl(self, target, method=None, level=None, limit=None, tolerance=None):
# for Int64Index case index.data can be passed to native function that can built the map
# and fill the resulting indexer more efficiently than generic implementation
Expand Down
13 changes: 11 additions & 2 deletions sdc/extensions/sdc_hashmap_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,14 @@ def codegen(context, builder, sig, args):
res_ctinfo = context.make_helper(builder, res_type, res_val)
lir_key_type = context.get_value_type(types.int64)

size_val = context.compile_internal(
data_size_val = context.compile_internal(
builder,
lambda arr: len(arr),
types.int64(index_data_type),
[data_val]
)

searched_size_val = context.compile_internal(
builder,
lambda arr: len(arr),
types.int64(searched_type),
Expand All @@ -1164,14 +1171,16 @@ def codegen(context, builder, sig, args):
[lir_key_type.as_pointer(),
lir_key_type.as_pointer(),
lir.IntType(64),
lir.IntType(64),
lir_key_type.as_pointer(),])
fn_hashmap_fill_indexer = builder.module.get_or_insert_function(
fnty, name=f"native_map_and_fill_indexer_int64")

res = builder.call(fn_hashmap_fill_indexer,
[data_ctinfo.data,
searched_ctinfo.data,
size_val,
data_size_val,
searched_size_val,
res_ctinfo.data])
return context.cast(builder, res, types.uint8, types.bool_)

Expand Down
67 changes: 39 additions & 28 deletions sdc/native/conc_dict_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@
// *****************************************************************************

#include <Python.h>
#include <unordered_map>
#include "hashmap.hpp"
#include <chrono>
#include <iostream>

class TrivialTBBHashCompare {
class TrivialInt64TBBHashCompare {
public:
static size_t hash(const int64_t& val) {
return (size_t)val;
Expand All @@ -39,7 +38,17 @@ class TrivialTBBHashCompare {
}
};

using namespace std::chrono;
struct TrivialInt64Hash {
public:
TrivialInt64Hash() = default;
TrivialInt64Hash(const TrivialInt64Hash&) = default;
~TrivialInt64Hash() = default;
size_t operator()(const int64_t& val) const {
return (size_t)val;
}
};

using namespace std;

#define declare_hashmap_create(key_type, val_type, suffix) \
void hashmap_create_##suffix(NRT_MemInfo** meminfo, \
Expand Down Expand Up @@ -228,37 +237,31 @@ void set_number_of_threads(uint64_t threads)
utils::tbb_control::set_threads_num(threads);
}

uint8_t native_map_and_fill_indexer_int64(int64_t* data, int64_t* searched, int64_t size, int64_t* res)
uint8_t native_map_and_fill_indexer_int64(int64_t* data, int64_t* searched, int64_t dsize, int64_t ssize, int64_t* res)
{
auto t1 = high_resolution_clock::now();
auto my_map_ptr = new tbb::concurrent_hash_map<int64_t, int64_t, TrivialTBBHashCompare>(2*size, TrivialTBBHashCompare());
auto& my_map = *my_map_ptr;

#if SUPPORTED_TBB_VERSION
// FIXME: we need to store the allocated map somewhere and re-use it later
// here it's allocated on the heap (but not freed) to avoid calling dtor (like pandas does the map once built is cached)
auto ptr_my_map = new tbb::concurrent_hash_map<int64_t, int64_t, TrivialInt64TBBHashCompare>(2*dsize, TrivialInt64TBBHashCompare());
utils::tbb_control::get_arena().execute([&]() {
tbb::parallel_for(tbb::blocked_range<size_t>(0, size),
tbb::parallel_for(tbb::blocked_range<size_t>(0, dsize),
[&](const tbb::blocked_range<size_t>& r) {
for(size_t i=r.begin(); i!=r.end(); ++i) {
my_map.emplace(data[i], i);
ptr_my_map->emplace(data[i], i);
}
}
);
});

if (my_map.size() < size)
if (ptr_my_map->size() < dsize)
return 0;

auto t2 = high_resolution_clock::now();
duration<double, std::ratio<1, 1>> ms_double = t2 - t1;
auto ms_int = duration_cast<milliseconds>(t2 - t1);
std::cout << "native (TBB) building map: " << ms_int.count() << " ms, (" << ms_double.count() << " sec)" << std::endl;

auto it_map_end = my_map.end();
utils::tbb_control::get_arena().execute([&]() {
tbb::parallel_for(tbb::blocked_range<size_t>(0, size),
tbb::parallel_for(tbb::blocked_range<size_t>(0, ssize),
[&](const tbb::blocked_range<size_t>& r) {
for(size_t i=r.begin(); i!=r.end(); ++i) {
auto it_pair = my_map.equal_range(searched[i]);
if (it_pair.first != my_map.end()) {
auto it_pair = ptr_my_map->equal_range(searched[i]);
if (it_pair.first != ptr_my_map->end()) {
res[i] = it_pair.first->second;
} else {
res[i] = -1;
Expand All @@ -268,15 +271,23 @@ uint8_t native_map_and_fill_indexer_int64(int64_t* data, int64_t* searched, int6
);
});

auto t3 = high_resolution_clock::now();
ms_double = t3 - t2;
ms_int = duration_cast<milliseconds>(t3 - t2);
std::cout << "native (TBB) filling indexer: " << ms_int.count() << " ms, (" << ms_double.count() << " sec)" << std::endl;
ms_double = t3 - t1;
ms_int = duration_cast<milliseconds>(t3 - t1);
std::cout << "total time: " << ms_int.count() << " ms, (" << ms_double.count() << " sec)" << std::endl;
return 1;
#else
auto ptr_my_map = new std::unordered_map<int64_t, int64_t, TrivialInt64Hash>(2*dsize, TrivialInt64Hash());
for(size_t i=0; i<dsize; ++i) {
ptr_my_map->emplace(data[i], i);
}

if (ptr_my_map->size() < dsize)
return 0;

for(size_t i=0; i<ssize; ++i) {
auto it = ptr_my_map->find(searched[i]);
res[i] = (it != ptr_my_map->end()) ? it->second : -1;
}

return 1;
#endif
}


Expand Down
0