Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump hictk and use Arrow to construct DFs of pixels #56

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ project(
include(FetchContent)
FetchContent_Declare(
hictk
URL "${CMAKE_CURRENT_SOURCE_DIR}/external/hictk-0.0.12.tar.xz"
URL_HASH "SHA256=9545d948f6feee5f6db0db3648d118483cc54c1f71273b937541b97f67268073"
URL "${CMAKE_CURRENT_SOURCE_DIR}/external/hictk-v1.0.0.tar.xz"
URL_HASH "SHA256=4986317845b8b62183c16ad258e4ad5b5a9f68289fe89f538a9481f363e9b899"
EXCLUDE_FROM_ALL
SYSTEM)

Expand All @@ -43,6 +43,7 @@ set(HICTK_ENABLE_TESTING OFF)
set(HICTK_BUILD_EXAMPLES OFF)
set(HICTK_BUILD_BENCHMARKS OFF)
set(HICTK_WITH_EIGEN OFF)
set(HICTK_WITH_ARROW ON)
set(HICTK_BUILD_TOOLS OFF)
set(HICTK_INSTALL OFF)
FetchContent_MakeAvailable(hictk nanobind)
Expand All @@ -55,6 +56,8 @@ if(WIN32)
target_compile_definitions(hictkpy_project_options INTERFACE NOMINMAX _CRT_SECURE_NO_WARNINGS)
endif()

target_compile_definitions(hictkpy_project_options INTERFACE HICTK_WITH_ARROW)

add_library(hictkpy_project_warnings INTERFACE)
target_compile_options(
hictkpy_project_warnings
Expand Down
121 changes: 121 additions & 0 deletions conanfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Copyright (C) 2024 Roberto Rossini <[email protected]>
#
# SPDX-License-Identifier: MIT

from conan import ConanFile
from conan.tools.build import check_min_cppstd


required_conan_version = ">=1.53.0"


class HictkpyConan(ConanFile):
name = "hictkpy"
description = "Python bindings for hictk: read and write .cool and .hic files directly from Python."
license = "MIT"
topics = ("hictk", "bioinformatics")
homepage = "https://github.com/paulsengroup/hictk"
url = "https://github.com/paulsengroup/hictk"
package_type = "library"
settings = "os", "arch", "compiler", "build_type"

options = {
"shared": [True, False],
"fPIC": [True, False],
}

default_options = {
"shared": False,
"fPIC": True,
}

generators = "CMakeDeps"

@property
def _min_cppstd(self):
return 17

def requirements(self):
self.requires("arrow/16.1.0#f484da4c4c27c1eb3ead5d61b70635cd")
self.requires("boost/1.85.0#d9ae7996a5b917e0cfd4b738aa976dee", force=True)
self.requires("bshoshany-thread-pool/4.1.0#be1802a8768416a6c9b1393cf0ce5e9c")
self.requires("concurrentqueue/1.0.4#1e48e1c712bcfd892087c9c622a51502")
self.requires("fast_float/6.1.1#e29acaa3d0543dee343abe3f6815346e")
self.requires("fmt/10.2.1#9199a7a0611866dea5c8849a77467b25")
self.requires("hdf5/1.14.3#31ccd8d4de83844f5db48471df1944a1")
self.requires("highfive/2.9.0#c57477beed8b0110fadeb6da8f48bcc5")
self.requires("libdeflate/1.19#3ea74a4549efc14d4b1202dc4bfbf602")
self.requires("parallel-hashmap/1.3.12#dc7755096d8a1fac7792fdd85760b6ca")
self.requires("readerwriterqueue/1.0.6#aaa5ff6fac60c2aee591e9e51b063b83")
self.requires("span-lite/0.11.0#519fd49fff711674cfed8cd17d4ed422")
self.requires("spdlog/1.14.1#972bbf70be1da4bc57ea589af0efde03")
self.requires("thrift/0.20.0#31cd4d031653f03467ba430c4ecdd6d8", force=True)
self.requires("zstd/1.5.6#afefe79a309bc2a7b9f56c2093504c8b", force=True)

def validate(self):
if self.settings.get_safe("compiler.cppstd"):
check_min_cppstd(self, self._min_cppstd)

def configure(self):
if self.settings.compiler in ["clang", "gcc"]:
self.settings.compiler.libcxx = "libstdc++11"

self.options["arrow"].compute = True
self.options["arrow"].parquet = False
self.options["arrow"].with_boost = True
self.options["arrow"].with_re2 = True
self.options["arrow"].with_thrift = False
self.options["boost"].system_no_deprecated = True
self.options["boost"].asio_no_deprecated = True
self.options["boost"].filesystem_no_deprecated = True
self.options["boost"].filesystem_version = 4
self.options["boost"].zlib = False
self.options["boost"].bzip2 = False
self.options["boost"].lzma = False
self.options["boost"].zstd = False
self.options["boost"].without_atomic = False
self.options["boost"].without_charconv = True
self.options["boost"].without_chrono = True
self.options["boost"].without_container = True
self.options["boost"].without_context = True
self.options["boost"].without_contract = True
self.options["boost"].without_coroutine = True
self.options["boost"].without_date_time = True
self.options["boost"].without_exception = True
self.options["boost"].without_fiber = True
self.options["boost"].without_filesystem = False
self.options["boost"].without_graph = True
self.options["boost"].without_graph_parallel = True
self.options["boost"].without_iostreams = True
self.options["boost"].without_json = True
self.options["boost"].without_locale = True
self.options["boost"].without_log = True
self.options["boost"].without_math = True
self.options["boost"].without_mpi = True
self.options["boost"].without_nowide = True
self.options["boost"].without_program_options = True
self.options["boost"].without_python = True
self.options["boost"].without_random = True
self.options["boost"].without_regex = True
self.options["boost"].without_serialization = True
self.options["boost"].without_stacktrace = True
self.options["boost"].without_system = False
self.options["boost"].without_test = True
self.options["boost"].without_thread = True
self.options["boost"].without_timer = True
self.options["boost"].without_type_erasure = True
self.options["boost"].without_url = True
self.options["boost"].without_wave = True
self.options["fmt"].header_only = True
self.options["hdf5"].enable_cxx = False
self.options["hdf5"].hl = False
self.options["hdf5"].threadsafe = False
self.options["hdf5"].parallel = False
self.options["highfive"].with_boost = False
self.options["highfive"].with_eigen = False
self.options["highfive"].with_opencv = False
self.options["highfive"].with_xtensor = False
self.options["spdlog"].header_only = True
self.options["thrift"].with_libevent = False
self.options["thrift"].with_openssl = False
self.options["zstd"].build_programs = False
34 changes: 0 additions & 34 deletions conanfile.txt

This file was deleted.

Binary file removed external/hictk-0.0.12.tar.xz
Binary file not shown.
Binary file added external/hictk-v1.0.0.tar.xz
Binary file not shown.
11 changes: 9 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
[build-system]
requires = [
"conan>=2.0.5",
"pyarrow==16.1.0",
"scikit-build-core>=0.8",
"numpy",
"pandas>=2.1.0,!=2.2.0",
Expand Down Expand Up @@ -32,6 +33,7 @@ classifiers = [
dependencies = [
"numpy",
"pandas>=2.1.0,!=2.2.0",
"pyarrow==16.1.0",
"scipy",
]

Expand All @@ -53,8 +55,10 @@ HICTK_BUILD_EXAMPLES = "OFF"
HICTK_BUILD_BENCHMARKS = "OFF"
HICTK_BUILD_TOOLS = "OFF"
HICTK_ENABLE_GIT_VERSION_TRACKING = "OFF"
HICTK_WITH_ARROW = "ON"
HICTK_WITH_EIGEN = "OFF"
BUILD_SHARED_LIBS = "OFF"
CONAN_INSTALL_ARGS = "--settings=compiler.cppstd=17;--build=missing;--update;--options=*/*:shared=False"
CONAN_INSTALL_ARGS = "--settings=compiler.cppstd=17;--build=missing;--build=b2/*;--update;--options=*/*:shared=False"

[tool.setuptools_scm]
write_to = "src/_version.py"
Expand All @@ -73,14 +77,17 @@ filterwarnings = [
skip = ["*musllinux*", "pp*"]
test-command = "python -m pytest {project}/test"
test-extras = ["test"]
test-skip = ["*universal2", "pp*"]
test-skip = ["*universal2"]

# Setuptools bug causes collision between pypy and cpython artifacts
before-build = [
"rm -rf '{project}/build'",
]
environment = { PIP_VERBOSE=1 }

# We are using static linking, thus repairing wheels is not necessary.
repair-wheel-command = ""

[tool.cibuildwheel.macos]
environment = { MACOSX_DEPLOYMENT_TARGET=10.15 }

Expand Down
63 changes: 58 additions & 5 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,59 @@ find_package(
COMPONENTS Interpreter Development.Module
REQUIRED)

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include(), end='')"
RESULT_VARIABLE STATUS
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR)
if(STATUS EQUAL 0)
message(STATUS "Found pyarrow include directory: ${PYARROW_INCLUDE_DIR}")
else()
message(FATAL_ERROR "Unable to find pyarrow include directory")
endif()

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(' '.join(pyarrow.get_library_dirs()), end='')"
RESULT_VARIABLE STATUS
OUTPUT_VARIABLE PYARROW_LIB_DIRS)
if(STATUS EQUAL 0)
message(STATUS "Found pyarrow link directory: ${PYARROW_LIB_DIRS}")
else()
message(FATAL_ERROR "Unable to find pyarrow link directory directory")
endif()

execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"import pyarrow; print(' '.join(lib for lib in pyarrow.get_libraries() if lib != 'arrow'), end='')"
RESULT_VARIABLE STATUS
OUTPUT_VARIABLE PYARROW_LIBS)
if(STATUS EQUAL 0)
message(STATUS "Found pyarrow libraries: ${PYARROW_LIBS}")
else()
message(FATAL_ERROR "Unable to find pyarrow libraries")
endif()

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import numpy; print(numpy.get_include(), end='')"
RESULT_VARIABLE STATUS
OUTPUT_VARIABLE NUMPY_INCLUDE_DIR)
if(STATUS EQUAL 0)
message(STATUS "Found numpy include directory: ${NUMPY_INCLUDE_DIR}")
else()
message(FATAL_ERROR "Unable to find numpy include directory")
endif()

execute_process(COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; pyarrow.create_library_symlinks()"
RESULT_VARIABLE STATUS)
if(NOT
STATUS
EQUAL
0)
message(FATAL_ERROR "Unable to create symlink to arrow libraries")
endif()

include_directories("${PYARROW_INCLUDE_DIR}" "${NUMPY_INCLUDE_DIR}")
link_directories(${PYARROW_LIB_DIRS})

nanobind_add_module(
_hictkpy
NB_STATIC
Expand All @@ -22,11 +75,11 @@ nanobind_add_module(
target_include_directories(_hictkpy PRIVATE include)
target_link_libraries(
_hictkpy
PRIVATE hictkpy_project_options
hictkpy_project_warnings
hictk::cooler
hictk::file
hictk::hic)
PRIVATE hictkpy_project_options hictkpy_project_warnings
PUBLIC hictk::cooler
hictk::file
hictk::hic
${PYARROW_LIBS})

install(TARGETS _hictkpy LIBRARY DESTINATION hictkpy)

Expand Down
4 changes: 4 additions & 0 deletions src/hictkpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: MIT

#include <arrow/python/api.h>
#include <nanobind/nanobind.h>
#include <nanobind/operators.h>
#include <nanobind/stl/optional.h>
Expand Down Expand Up @@ -261,6 +262,9 @@ namespace nb = nanobind;
using namespace nb::literals;

NB_MODULE(_hictkpy, m) {
if (arrow::py::import_pyarrow() == -1) {
throw std::runtime_error("failed to initialize pyarrow runtime");
}
[[maybe_unused]] auto np = nb::module_::import_("numpy");
[[maybe_unused]] auto pd = nb::module_::import_("pandas");
[[maybe_unused]] auto ss = nb::module_::import_("scipy.sparse");
Expand Down
6 changes: 6 additions & 0 deletions src/hictkpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
#
# SPDX-License-Identifier: MIT

def _load_arrow_python_lib():
import pyarrow


_load_arrow_python_lib()


from ._hictkpy import (
__doc__,
Expand Down
1 change: 0 additions & 1 deletion src/hictkpy_multires_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include "hictkpy/multires_file.hpp"


namespace hictkpy::multires_file {

void ctor(hictk::MultiResFile* fp, std::string_view path) {
Expand Down
12 changes: 7 additions & 5 deletions src/hictkpy_pixel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,14 @@ nb::object PixelSelector::to_df() const {
if (int_pixels()) {
using T = std::int32_t;
return pixel_iterators_to_df(s->bins(), s->template begin<T>(), s->template end<T>(),
join, mirror);
} else {
using T = double;
return pixel_iterators_to_df(s->bins(), s->template begin<T>(), s->template end<T>(),
join, mirror);
join, mirror)
.attr("to_pandas")();
}

using T = double;
return pixel_iterators_to_df(s->bins(), s->template begin<T>(), s->template end<T>(), join,
mirror)
.attr("to_pandas")();
},
selector);
}
Expand Down
Loading
Loading