#include "DropletUtils.h" template std::vector process_list(Rcpp::List incoming) { const size_t nsamples=incoming.size(); std::vector output(nsamples); for (size_t i=0; i void compare_lists(U left, V right) { if (left.size()!=right.size()) { throw std::runtime_error("lists are not of the same length"); } const size_t nsamples=left.size(); for (size_t i=0; i(cells); auto Genes=process_list(genes); auto Umis=process_list(umis); auto Reads=process_list(reads); compare_lists(Cells, Genes); compare_lists(Cells, Umis); compare_lists(Cells, Reads); const double mf=check_numeric_scalar(minfrac, "minimum fraction"); const int diagcode=check_numeric_scalar(diagnostics, "diagcode"); // Setting up the ordering vector. const size_t nsamples=Cells.size(); size_t nmolecules=0; for (size_t i=0; i molecule; std::vector ordering(nmolecules); auto ordIt=ordering.begin(); for (size_t i=0; ifirst = i; ordIt->second = j; ++ordIt; } } // Sorting the indices based on the list values. std::sort(ordering.begin(), ordering.end(), [&](const molecule& left, const molecule& right) { const auto& left_gene=Genes[left.first][left.second]; const auto& right_gene=Genes[right.first][right.second]; if (left_gene < right_gene) { return true; } else if (left_gene > right_gene) { return false; } const auto& left_umi=Umis[left.first][left.second]; const auto& right_umi=Umis[right.first][right.second]; if (left_umi < right_umi) { return true; } else if (left_umi > right_umi) { return false; } const auto& left_cell=Cells[left.first][left.second]; const auto& right_cell=Cells[right.first][right.second]; return left_cell < right_cell; }); // Setting up the output, indicating which values to keep from each sample. std::vector notswapped(nsamples); for (size_t i=0; ifirst][ostart->second]; int total_nreads=max_nread; auto best_mol=ostart; // ostart is always equal to oend at this point, so incrementing the latter to get to the next read. ++oend; while (oend!=ordering.end() && same_combination(*ostart, *oend)) { const int current_nread=Reads[oend->first][oend->second]; if (current_nread > max_nread) { max_nread=current_nread; best_mol=oend; } total_nreads += current_nread; ++oend; } if (double(max_nread)/total_nreads >= mf) { notswapped[best_mol->first][best_mol->second]=1; } if (diagcode) { ++nunique; } ostart=oend; } // Creating the output list. Rcpp::List outlist(nsamples); for (size_t i=0; i= nsamples) { throw std::runtime_error("multiple instances of the same combination observed in a single sample"); } indices[nnzero]=oend->first; values[nnzero]=Reads[oend->first][oend->second]; ++oend; ++nnzero; } diag_out->set_row_indexed(counter, nnzero, indices.begin(), values.begin()); ostart=oend; ++counter; } output[1]=diag_out->yield(); } return output; END_RCPP }