Skip to content

Commit

Permalink
implement overlay for subpath graph
Browse files Browse the repository at this point in the history
  • Loading branch information
jeizenga committed Jun 13, 2024
1 parent 321b5a4 commit 67cca98
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 41 deletions.
105 changes: 105 additions & 0 deletions src/subpath_overlay.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include <atomic>
#include "subpath_overlay.hpp"

#include <handlegraph/util.hpp>

namespace vg {

using namespace std;
using namespace handlegraph;

SubpathOverlay::SubpathOverlay(const PathHandleGraph* backing,
const step_handle_t& begin, const step_handle_t& end) : backing_graph(backing) {

for (auto step = begin; step != end; step = backing->get_next_step(step)) {
subpath_handles.emplace_back(backing->get_handle_of_step(step));
}
}

bool SubpathOverlay::has_node(nid_t node_id) const {
return node_id <= subpath_handles.size();
}

handle_t SubpathOverlay::get_handle(const nid_t& node_id, bool is_reverse) const {
return handlegraph::number_bool_packing::pack(node_id, is_reverse);
}

nid_t SubpathOverlay::get_id(const handle_t& handle) const {
return handlegraph::number_bool_packing::unpack_number(handle);
}

bool SubpathOverlay::get_is_reverse(const handle_t& handle) const {
return handlegraph::number_bool_packing::unpack_bit(handle);
}

handle_t SubpathOverlay::flip(const handle_t& handle) const {
return handlegraph::number_bool_packing::toggle_bit(handle);
}

size_t SubpathOverlay::get_length(const handle_t& handle) const {
return backing_graph->get_length(subpath_handles[get_id(handle) - 1]);
}

std::string SubpathOverlay::get_sequence(const handle_t& handle) const {
return backing_graph->get_sequence(get_underlying_handle(handle));
}

size_t SubpathOverlay::get_node_count() const {
return subpath_handles.size();
}

nid_t SubpathOverlay::min_node_id() const {
return 1;
}

nid_t SubpathOverlay::max_node_id() const {
return subpath_handles.size();
}

bool SubpathOverlay::follow_edges_impl(const handle_t& handle, bool go_left,
const std::function<bool(const handle_t&)>& iteratee) const {
if (go_left != get_is_reverse(handle)) {
if (get_id(handle) == 1) {
return true;
}
else {
return iteratee(get_handle(get_id(handle) - 1, get_is_reverse(handle)));
}
}
else {
if (get_id(handle) == subpath_handles.size()) {
return true;
}
else {
return iteratee(get_handle(get_id(handle) + 1, get_is_reverse(handle)));
}
}
}

bool SubpathOverlay::for_each_handle_impl(const std::function<bool(const handle_t&)>& iteratee, bool parallel) const {

if (!parallel) {
bool keep_going = true;
for (size_t i = 1; keep_going && i <= subpath_handles.size(); ++i) {
keep_going = iteratee(get_handle(i, false));
}
return keep_going;
} else {
std::atomic<bool> keep_going(true);
#pragma omp parallel for
for (size_t i = 1; i <= subpath_handles.size(); ++i) {
keep_going = keep_going && iteratee(get_handle(i, false));
}
return keep_going;
}
}

handle_t SubpathOverlay::get_underlying_handle(const handle_t& handle) const {
handle_t underlying = subpath_handles[get_id(handle) - 1];
if (get_is_reverse(handle)) {
underlying = backing_graph->flip(underlying);
}
return underlying;
}

}
107 changes: 107 additions & 0 deletions src/subpath_overlay.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#ifndef VG_SUBGRAPH_OVERLAY_HPP_INCLUDED
#define VG_SUBGRAPH_OVERLAY_HPP_INCLUDED

/**
* \file subgraph_overlay.hpp
*
* Provides SourceSinkOverlay, a HandleGraph implementation that joins all the
* heads and tails of a backing graph to single source and sink nodes.
*
*/


#include "handle.hpp"

#include <handlegraph/util.hpp>


namespace vg {

using namespace handlegraph;

/**
* Overlay that presents a linear graph corresponding to a subinterval of a path
*/
class SubpathOverlay : public ExpandingOverlayGraph {

public:
/**
* Construct new SubpathOverlay between two steps. The 'end' step is past-the-last.
*/
SubpathOverlay(const PathHandleGraph* backing,
const step_handle_t& begin, const step_handle_t& end);
SubpathOverlay() = default;

~SubpathOverlay() = default;

////////////////////////////////////////////////////////////////////////////
// Handle-based interface
////////////////////////////////////////////////////////////////////////////

/// Method to check if a node exists by ID
bool has_node(nid_t node_id) const;

/// Look up the handle for the node with the given ID in the given orientation
handle_t get_handle(const nid_t& node_id, bool is_reverse = false) const;

/// Get the ID from a handle
nid_t get_id(const handle_t& handle) const;

/// Get the orientation of a handle
bool get_is_reverse(const handle_t& handle) const;

/// Invert the orientation of a handle (potentially without getting its ID)
handle_t flip(const handle_t& handle) const;

/// Get the length of a node
size_t get_length(const handle_t& handle) const;

/// Get the sequence of a node, presented in the handle's local forward
/// orientation.
std::string get_sequence(const handle_t& handle) const;

/// Return the number of nodes in the graph
size_t get_node_count() const;

/// Return the smallest ID in the graph, or some smaller number if the
/// smallest ID is unavailable. Return value is unspecified if the graph is empty.
nid_t min_node_id() const;

/// Return the largest ID in the graph, or some larger number if the
/// largest ID is unavailable. Return value is unspecified if the graph is empty.
nid_t max_node_id() const;

///////////////////////////////////
/// ExpandingOverlayGraph interface
///////////////////////////////////

/**
* Returns the handle in the underlying graph that corresponds to a handle in the
* overlay
*/
handle_t get_underlying_handle(const handle_t& handle) const;

protected:

/// Loop over all the handles to next/previous (right/left) nodes. Passes
/// them to a callback which returns false to stop iterating and true to
/// continue. Returns true if we finished and false if we stopped early.
bool follow_edges_impl(const handle_t& handle, bool go_left, const std::function<bool(const handle_t&)>& iteratee) const;

/// Loop over all the nodes in the graph in their local forward
/// orientations, in their internal stored order. Stop if the iteratee
/// returns false. Can be told to run in parallel, in which case stopping
/// after a false return value is on a best-effort basis and iteration
/// order is not defined. Returns true if we finished and false if we
/// stopped early.
bool for_each_handle_impl(const std::function<bool(const handle_t&)>& iteratee, bool parallel = false) const;

/// the backing graph
const HandleGraph* backing_graph = nullptr;

std::vector<handle_t> subpath_handles;
};

}

#endif
136 changes: 95 additions & 41 deletions src/unittest/overlays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

#include "vg/io/json2pb.h"
#include "../split_strand_graph.hpp"
#include "../subpath_overlay.hpp"
#include "../utility.hpp"
#include "../handle.hpp"
#include "random_graph.hpp"

#include "../vg.hpp"
Expand All @@ -21,60 +23,112 @@
namespace vg {
namespace unittest {
using namespace std;
using namespace bdsg;

TEST_CASE("StrandSplitGraph overlay produces graphs that are single stranded", "[overlay][handle]") {
TEST_CASE("StrandSplitGraph overlay produces graphs that are single stranded", "[overlay][handle]") {

int num_tests = 100;
int seq_size = 500;
int var_len = 10;
int var_count = 30;

for (int i = 0; i < num_tests; ++i) {

int num_tests = 100;
int seq_size = 500;
int var_len = 10;
int var_count = 30;
bdsg::HashGraph graph;
random_graph(seq_size, var_len, var_count, &graph);

StrandSplitGraph split(&graph);
REQUIRE(handlealgs::is_single_stranded(&split));

REQUIRE(split.get_node_count() == 2 * graph.get_node_count());

unordered_map<handle_t, int> node_count;

for (int i = 0; i < num_tests; ++i) {
int split_edge_trav_count = 0;
int graph_edge_trav_count = 0;

split.for_each_handle([&](const handle_t& h) {
REQUIRE(split.get_sequence(h) == graph.get_sequence(split.get_underlying_handle(h)));

bdsg::HashGraph graph;
random_graph(seq_size, var_len, var_count, &graph);

StrandSplitGraph split(&graph);
REQUIRE(handlealgs::is_single_stranded(&split));
split.follow_edges(h, false, [&](const handle_t& n) {
REQUIRE(graph.has_edge(split.get_underlying_handle(h),
split.get_underlying_handle(n)));
split_edge_trav_count++;
});
split.follow_edges(h, true, [&](const handle_t& p) {
REQUIRE(graph.has_edge(split.get_underlying_handle(p),
split.get_underlying_handle(h)));
split_edge_trav_count++;
});

REQUIRE(split.get_node_count() == 2 * graph.get_node_count());
node_count[split.get_underlying_handle(h)]++;
});

graph.for_each_handle([&](const handle_t& h) {
REQUIRE(node_count[h] == 1);
REQUIRE(node_count[graph.flip(h)] == 1);
graph.follow_edges(h, false, [&](const handle_t& n) {
graph_edge_trav_count++;
});
graph.follow_edges(h, true, [&](const handle_t& p) {
graph_edge_trav_count++;
});
});

REQUIRE(split_edge_trav_count == 2 * graph_edge_trav_count);
}
}

TEST_CASE("SubpathOverlay works as expected", "[overlay][handle]") {


HashGraph graph;
auto h1 = graph.create_handle("A");
auto h2 = graph.create_handle("C");
auto h3 = graph.create_handle("G");
auto h4 = graph.create_handle("T");

graph.create_edge(h1, h2);
graph.create_edge(h1, h3);
graph.create_edge(h2, h4);
graph.create_edge(h3, h4);

auto p = graph.create_path_handle("p");

vector<step_handle_t> steps;
steps.push_back(graph.append_step(p, h1));
steps.push_back(graph.append_step(p, h2));
steps.push_back(graph.append_step(p, h4));

for (size_t i = 0; i < steps.size(); ++i) {
for (size_t j = i; j < steps.size(); ++j) {

unordered_map<handle_t, int> node_count;
SubpathOverlay subpath(&graph, steps[i], graph.get_next_step(steps[j]));

int split_edge_trav_count = 0;
int graph_edge_trav_count = 0;
REQUIRE(handlealgs::is_single_stranded(&subpath));

split.for_each_handle([&](const handle_t& h) {
REQUIRE(split.get_sequence(h) == graph.get_sequence(split.get_underlying_handle(h)));

split.follow_edges(h, false, [&](const handle_t& n) {
REQUIRE(graph.has_edge(split.get_underlying_handle(h),
split.get_underlying_handle(n)));
split_edge_trav_count++;
});
split.follow_edges(h, true, [&](const handle_t& p) {
REQUIRE(graph.has_edge(split.get_underlying_handle(p),
split.get_underlying_handle(h)));
split_edge_trav_count++;
});

node_count[split.get_underlying_handle(h)]++;
REQUIRE(subpath.get_node_count() == j - i + 1);
size_t manual_node_count = 0;
subpath.for_each_handle([&](const handle_t& h) {
++manual_node_count;
});
REQUIRE(manual_node_count == subpath.get_node_count());

graph.for_each_handle([&](const handle_t& h) {
REQUIRE(node_count[h] == 1);
REQUIRE(node_count[graph.flip(h)] == 1);
graph.follow_edges(h, false, [&](const handle_t& n) {
graph_edge_trav_count++;
});
graph.follow_edges(h, true, [&](const handle_t& p) {
graph_edge_trav_count++;
});
});
auto nodes = handlealgs::lazier_topological_order(&subpath);

REQUIRE(split_edge_trav_count == 2 * graph_edge_trav_count);
for (size_t k = 0; k < nodes.size(); ++k) {
auto s = graph.get_handle_of_step(steps[i + k]);
REQUIRE(subpath.get_underlying_handle(nodes[k]) == s);
REQUIRE(subpath.get_sequence(nodes[k]) == graph.get_sequence(s));

REQUIRE(subpath.get_degree(nodes[k], true) == (k == 0 ? 0 : 1));
REQUIRE(subpath.get_degree(nodes[k], false) == (k + 1 == nodes.size() ? 0 : 1));
}
}
}
}


}
}

0 comments on commit 67cca98

Please sign in to comment.