Skip to content

Commit

Permalink
Add injection_order for featuretable, custumized rt correction model
Browse files Browse the repository at this point in the history
  • Loading branch information
yufongpeng committed Jan 4, 2024
1 parent ef6d1e2 commit dd8c2ef
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 185 deletions.
15 changes: 9 additions & 6 deletions src/SphingolipidsID.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export SPDB, LIBRARY_POS, FRAGMENT_POS, ADDUCTCODE, CLASSDB,
initialize_cluster!, analyte2cluster!, select_cluster!,
model_cluster!, compare_models, @model, predfn_cluster!,
expand_cluster!, update_cluster!, replace_cluster!,
show_cluster, model_rt!, apply_rt!, predict_rt, err_rt, abs_err_rt, rt_correction,
show_cluster, model_rt!, apply_rt!, predict_rt, err_rt, abs_err_rt, rt_correction, update_rt_correction!,
# Plots
plot_rt_mw, plotlyjs, gr, histogram_transition

Expand Down Expand Up @@ -414,27 +414,30 @@ propertynames(quant::Quantification) = (:batch, :config, propertynames(getfield(
Type holding rt data and regression line for correcting rt from old batch (data for identification) to new batch (data for quantification).
* `formula`: formula of regression line.
* `data`: raw data from new batch.
* `table`: old transition table generated from `project.analye` and function `analytetable_mrm`.
* `table`: old transition table generated from `project.analyte` and function `analytetable_mrm`.
* `fn`: `Dictionary` containg regression coefficents for each class.
This is a callable object taking a class and rt or vector of class and a vector of rt as input, and returning new rt or a vector of new rt. If the class is not in `fn`, it will return the original rt.
"""
struct RTCorrection
data::AbstractRawData
table::Table
fn::Dictionary
model::Dictionary
end
(fn::RTCorrection)(cls::ClassSP, dt) = model_predict(get(fn.model, cls, nothing), dt)
(fn::RTCorrection)(cls::Vector{<: ClassSP}, dt) = mapreduce(vcat, cls, dt) do c, d
model_predict(get(fn.model, c, nothing), Table([d]))
end
(fn::RTCorrection)(cls::ClassSP, rt) = [1, rt]'get(fn.fn, cls, [0, 1])
(fn::RTCorrection)(cls::Vector{<: ClassSP}, rt::Vector) = [repeat([1], length(rt)) rt]'get.(Ref(fn.fn), cls, Ref([0, 1]))
function getproperty(rtcor::RTCorrection, p::Symbol)
if !in(p, fieldnames(RTCorrection)) && in(p, propertynames(getfield(rtcor, :table)))
getproperty(getfield(rtcor, :table), p)
else
getfield(rtcor, p)
end
end
propertynames(rtcor::RTCorrection) = Tuple(unique((:data, :table, :fn, propertynames(getfield(rtcor, :table))...)))
propertynames(rtcor::RTCorrection) = Tuple(unique((:data, :table, :model, propertynames(getfield(rtcor, :table))...)))

"""
Project <: AbstractProject
Expand Down
126 changes: 48 additions & 78 deletions src/data.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
"""
sort_data(tbl::Table)
Create a new `Table` and sort it by `:datafile`.
Create a new `Table` and sort it by `:injection_order`.
"""
sort_data(tbl::Table) = sort_data!(deepcopy(tbl))
"""
sort_data!(tbl::Table)
Sort the table by `:datafile` in place.
Sort the table by `:injection_order` in place.
"""
function sort_data!(tbl::Table)
sort!(tbl, :datafile)
sort!(tbl, :injection_order)
tbl
end

"""
file_order(tbl::Table)
Return unique `tbl.datafile` for filling CE, mz2 or m/z range by the order.
Return unique `tbl.datafile` for filling CE, mz2 or m/z range by the `injection_order`.
"""
file_order(tbl::Table) = in(:datafile, propertynames(tbl)) ? unique(tbl.datafile) : throw(ArgumentError("This table doesn't contain datafile."))
file_order(tbl::Table) = !in(:datafile, propertynames(tbl)) ? throw(ArgumentError("This table doesn't contain `datafile`.")) :
!in(:datafile, propertynames(tbl)) ? throw(ArgumentError("This table doesn't contain `injection_order`.")) :
map(x -> tbl.datafile[findfirst(==(x), tbl.injection_order)], sort!(unique(tbl.injection_order)))

"""
fill_polarity!(tbl::Table, polarity::Union{<: AbstractVector, <: Tuple})
Expand All @@ -28,13 +30,12 @@ file_order(tbl::Table) = in(:datafile, propertynames(tbl)) ? unique(tbl.datafile
Fill column `polarity` with `polarity`.
If the input is not a number, it requires that column `datafile` exists and uses the order or values as keys to assign polarity.
When the input is a `Vector` or `Tuple`, the values should represent polarity of each data file in the order as same as `file_order(tbl)`.
When the input is a number, it fills all values with the number.
When the input is a `Vector` or `Tuple`, the values should represent polarity of each `tbl.injection_order`.
When the input is a Dictionary, the keys should contain each `tbl.datafile`.
"""
function fill_polarity!(tbl::Table, polarity)
mapping = datafile_mapping(tbl, polarity)
tbl.polarity .= getindex.(Ref(mapping), tbl.datafile)
tbl.polarity .= fill_data(tbl, polarity)
tbl
end

Expand All @@ -52,13 +53,7 @@ Create a table with column `polarity` and fill it with `polarity`.
See `fill_polarity!`.
"""
function fill_polarity(tbl::Table, polarity)
mapping = datafile_mapping(tbl, polarity)
Table(tbl; polarity = getindex.(Ref(mapping), tbl.datafile))
end

fill_polarity(tbl::Table, polarity::Bool) =
Table(tbl; polarity = repeat([polarity], length(tbl)))
fill_polarity(tbl::Table, polarity) = Table(tbl; polarity = fill_data(tbl, polarity))

"""
fill_mz2!(tbl::Table, mz2::Union{<: AbstractVector, <: Tuple})
Expand All @@ -67,13 +62,12 @@ fill_polarity(tbl::Table, polarity::Bool) =
Fill column `mz2` with `mz2`.
If the input is not a number, it requires that column `datafile` exists and uses the order or values as keys to assign mz2.
When the input is a `Vector` or `Tuple`, the values should represent mz2 of each data file in the order as same as `file_order(tbl)`.
When the input is a number, it fills all values with the number.
When the input is a `Vector` or `Tuple`, the values should represent mz2 of each `tbl.injection_order`.
When the input is a Dictionary, the keys should contain each `tbl.datafile`.
"""
function fill_mz2!(tbl::Table, mz2)
mapping = datafile_mapping(tbl, mz2)
tbl.mz2 .= getindex.(Ref(mapping), tbl.datafile)
tbl.mz2 .= fill_data(tbl, mz2)
tbl
end

Expand All @@ -91,13 +85,7 @@ Create a table with column `mz2` and fill it with `mz2`.
See `fill_mz2!`.
"""
function fill_mz2(tbl::Table, mz2)
mapping = datafile_mapping(tbl, mz2)
Table(tbl; mz2 = getindex.(Ref(mapping), tbl.datafile))
end

fill_mz2(tbl::Table, mz2::Float64) =
Table(tbl; mz2 = repeat([mz2], length(tbl)))
fill_mz2(tbl::Table, mz2) = Table(tbl; mz2 = fill_data(tbl, mz2))

"""
fill_ce!(tbl::Table, ce::Union{<: AbstractVector, <: Tuple})
Expand All @@ -106,13 +94,12 @@ fill_mz2(tbl::Table, mz2::Float64) =
Fill column `collision_energy` with `ce`.
If the input is not a number, it requires that column `datafile` exists and uses the order or values as keys to assign ce.
When the input is a `Vector` or `Tuple`, the values should represent ce of each data file in the order as same as `file_order(tbl)`.
When the input is a number, it fills all values with the number.
When the input is a `Vector` or `Tuple`, the values should represent collision energy of each `tbl.injection_order`.
When the input is a Dictionary, the keys should contain each `tbl.datafile`.
"""
function fill_ce!(tbl::Table, ce)
mapping = datafile_mapping(tbl, ce)
tbl.collision_energy .= getindex.(Ref(mapping), tbl.datafile)
tbl.collision_energy .= fill_data(tbl, ce)
tbl
end

Expand All @@ -130,13 +117,7 @@ Create a table with column `collision_energy` and fill it with `ce`.
See `fill_ce!`.
"""
function fill_ce(tbl::Table, ce)
mapping = datafile_mapping(tbl, ce)
Table(tbl; collision_energy = getindex.(Ref(mapping), tbl.datafile))
end

fill_ce(tbl::Table, ce::Float64) =
Table(tbl; collision_energy = repeat([ce], length(tbl)))
fill_ce(tbl::Table, ce) = Table(tbl; collision_energy = fill_data(tbl, ce))

"""
fill_range!(tbl::Table, mzr::Union{<: AbstractVector, <: Tuple})
Expand All @@ -145,13 +126,12 @@ fill_ce(tbl::Table, ce::Float64) =
Fill column `mz_range` with `mzr`.
If the input is not a interval, it requires that column `datafile` exists and uses the order or values as keys to assign ce.
When the input is a `Vector` or `Tuple`, the values should represent ce of each data file in the order as same as `file_order(tbl)`.
When the input is a interval, it fills all values with the interval.
When the input is a `Vector` or `Tuple`, the values should represent m/z range of each `tbl.injection_order`.
When the input is a Dictionary, the keys should contain each `tbl.datafile`.
"""
function fill_range!(tbl::Table, mzr)
mapping = datafile_mapping(tbl, mzr)
tbl.mz_range .= getindex.(Ref(mapping), tbl.datafile)
tbl.mz_range .= fill_data(tbl, mzr)
tbl
end

Expand All @@ -169,22 +149,11 @@ Create a table with column `mz_range` and fill it with `mzr`.
See `fill_range!`.
"""
function fill_range(tbl::Table, mzr)
mapping = datafile_mapping(tbl, mzr)
Table(tbl; mz_range = getindex.(Ref(mapping), tbl.datafile))
end

fill_range(tbl::Table, mzr::RealIntervals) =
Table(tbl; mz_range = repeat([mzr], length(tbl)))
fill_range(tbl::Table, mzr) = Table(tbl; mz_range = fill_data(tbl, mzr))

function datafile_mapping(tbl::Table, data::Union{<: AbstractVector, <: Tuple})
in(:datafile, propertynames(tbl)) || throw(ArgumentError("Column `datafile` is not in the table"))
Dictionary(unique(tbl.datafile), data)
end
function datafile_mapping(tbl::Table, data::Union{<: AbstractDictionary, <: AbstractDict})
in(:datafile, propertynames(tbl)) || throw(ArgumentError("Column `datafile` is not in the table"))
data
end
fill_data(tbl::Table, data::Union{<: AbstractVector, <: Tuple}) = getindex.(Ref(data), tbl.injection_order)
fill_data(tbl::Table, data::Union{<: AbstractDictionary, <: AbstractDict}) = getindex.(Ref(data), tbl.datafile)
fill_data(tbl::Table, data) = repeat([data], length(tbl))

"""
rsd(v)
Expand All @@ -195,7 +164,7 @@ rsd(v) = std(v) / mean(v)
"""
rmae(v)
Relative mean absolute error, i.e., `(maximum(v) - minimum(v)) / 2 / mean(v)`.
Relative mean absolute error.
"""
function rmae(v)
m = mean(v)
Expand Down Expand Up @@ -226,22 +195,23 @@ end
n = 3,
err_fn = default_error,
err_tol = 0.5,
other_fn = Dictionary{Symbol, Any}([:mz_range, :polarity, :datafile], [splat(union), only ∘ unique, nothing]))
other_fn = Dictionary{Symbol, Any}([:mz_range, :polarity, :datafile, :injection_order], [splat(union), only ∘ unique, nothing, nothing]))
Filter out and combine duplicated or unstable features.
Filter out and combine duplicated or unstable features. Each `tbl.id` will map to a unique feature.
* `filter`: whether filter data based on number of detection and signal errors.
* `combine`: whether combine data after grouping based on `mz1`, `mz2`, `rt`, and `collision_energy`. `datafile` is ignored.
* `filter`: whether filter data based on number of detection and signal errors. Only works when `signal` is not `nothing`.
* `combine`: whether combine data after grouping based on `mz1`, `mz2`, `rt`, and `collision_energy`.
* `use_match_id`: whether using `match_id` instead of `mz1`, `mz2`, `rt` to group data.
* `raw_id`: a `Bool` determining whether preserving the original `tbl.id` as `tbl.raw_id` for staying connection to the raw featuretable.
* `raw_id`: a `Bool` determining whether preserving the original `tbl.id` as `tbl.raw_id` for staying connection to the raw featuretable. Only works when `combine` is true.
* `rt_tol`: maximum allowable difference in retention time for two compounds to consider them as the same.
* `mz_tol`: maximum allowable difference in m/z for two compounds to consider them as the same.
* `n`: minimal number of detection. The default is 3.
* `signal`: `:area` or `:height` for concentration estimation. The default is `:area`. If `nothing` is given, this function will not calculate errors.
* `est_fn`: estimation function for calculating estimated value. The default is `mean`.
* `err_fn`: error function. If the length is three or above, the default is `rsd`; otherwise, it is `re`.
* `err_fn`: error function. If the length is three or above, the default is `rsd`; otherwise, it is `rmae`.
* `err_tol`: maximum allowable signal error.
* `other_fn`: a `Dictionary` specifying functions for calculating other signal information.
* `other_fn`: a `Dictionary` specifying functions for combining other signal information. The keys are column names, and the values are functions. If the value is `nothing`, the column will be removed.
For columns not mentioned, `est_fn` will be used for numeric value, and `first` will be used for other types.
"""
function filter_combine_features!(tbl::Table;
filter = true,
Expand All @@ -255,7 +225,7 @@ function filter_combine_features!(tbl::Table;
est_fn = mean,
err_fn = default_error,
err_tol = 0.5,
other_fn = Dictionary{Symbol, Any}([:mz_range, :polarity, :datafile], [splat(union), only unique, nothing]))
other_fn = Dictionary{Symbol, Any}([:mz_range, :polarity, :datafile, :injection_order], [splat(union), only unique, nothing, nothing]))
sort!(tbl, [:mz2, :mz1, :rt])
calc_signal = !isnothing(signal)
filter = calc_signal && filter
Expand Down Expand Up @@ -289,20 +259,20 @@ function filter_combine_features!(tbl::Table;
if raw_id && combine
ids = map(loc -> tbl.id[loc], locs)
end
#@p tbl |> DataFrame |> groupby(__, :id) |> combine(__, All() .=> mean, :area => err => :error, renamecols = false) |> Table
del = [k for (k, v) in pairs(other_fn) if isnothing(v)]
tbl = Table(tbl; (del .=> nothing)...)
calc_signal && set!(other_fn, signal, est_fn)
get!(other_fn, :id, only unique)
for k in propertynames(tbl)
get!(other_fn, k, mean)
end
fill!(tbl.id, 0)
for (i, loc) in enumerate(locs)
tbl.id[loc] .= i
end
filter!(x -> >(x.id, 0), tbl)
sort!(tbl, :id)
combine || return tbl
del = [k for (k, v) in pairs(other_fn) if isnothing(v)]
tbl = Table(tbl; (del .=> nothing)...)
calc_signal && set!(other_fn, signal, est_fn)
get!(other_fn, :id, only unique)
for k in propertynames(tbl)
eltype(getproperty(tbl, k)) isa Number ? get!(other_fn, k, est_fn) : get!(other_fn, k, first)
end
gtbl = @p tbl groupview(getproperty(:id))
calc_signal && (errors = @p gtbl map(err_fn(getproperty(_, signal))) collect)
nt = @p gtbl map(map((k, v) -> k => other_fn[k](v), propertynames(_), columns(_))) map(NamedTuple)
Expand Down
25 changes: 14 additions & 11 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ function read_featuretable_mzmine3(path;
sym = CSV.read(path, Table; select = idsym, silencewarnings, maxwarnings, debug)
datafile = Dict(propertynames(fwhm) .=> map(col -> match(r".*:(.*):.*", string(col))[1], propertynames(fwhm)))
id = findfirst.(!ismissing, fwhm)
uid = unique(id)
tbl = Table(
id = collect(1:n),
mz1 = tbl.mz,
Expand All @@ -67,7 +68,8 @@ function read_featuretable_mzmine3(path;
FWHM = getindex.(fwhm, id),
symmetry = get.(sym, replace!(findfirst.(!ismissing, sym), nothing => :_symmetry), 1.0),
polarity = trues(n),
datafile = getindex.(Ref(datafile), id)
datafile = getindex.(Ref(datafile), id),
injection_order = map(x -> findfirst(==(x), uid), id)
)
tbl
end
Expand Down Expand Up @@ -145,7 +147,8 @@ function read_featuretable_masshunter_mrm(path;
datafile = (@p zip(data.datafile, rep) |> mapmany(repeat([_[1]], _[2])))
)
tbl = tbl[tbl.symmetry .!= "MM"]
Table(tbl, symmetry = parse.(Float64, string.(tbl.symmetry)))
udf = unique(tbl.datafile)
Table(tbl; symmetry = parse.(Float64, string.(tbl.symmetry)), injection_order = map(x -> findfirst(==(x), udf), tbl.datafile))
end

"""
Expand Down Expand Up @@ -331,48 +334,48 @@ end
Base.show(io::IO, analyte::AnalyteID) = print(io, isempty(analyte.compound) ? "?" : last(analyte), " @", round(analyte.rt, digits = 2), " MW=", round(mw(analyte), digits = 4))
Base.show(io::IO, transition::TransitionID) = print(io, transition.compound, transition.quantifier ? "" : " (qualifier)")

Base.show(io::IO, data::PreIS) = print(io, "PreIS in ", data.polarity ? "positive" : "negative", " ion mode with $(length(data.mz2)) products")
Base.show(io::IO, data::PreIS) = print(io, "PreIS in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features")
function Base.show(io::IO, ::MIME"text/plain", data::PreIS)
print(io, data, ":\n")
for dt in zip(data.range, data.mz2)
println(io, " ", dt[1], " -> ", round(dt[2]; digits = 4))
end
end

Base.show(io::IO, data::MRM) = print(io, "MRM in ", data.polarity ? "positive" : "negative", " ion mode with $(length(data.mz2)) products")
Base.show(io::IO, data::MRM) = print(io, "MRM in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features")
function Base.show(io::IO, ::MIME"text/plain", data::MRM)
print(io, data, ":\n")
for (i, m) in enumerate(data.mz2)
v = @view data.table.mz1[data.table.mz2_id == i]
v = @view data.table.mz1[data.table.mz2_id .== i]
v = length(v) > 10 ? string(join(round.(v[1:5]; digits = 4), ", "), ", ..., ", join(round.(v[end - 4:end]; digits = 4))) : join(round.(v; digits = 4), ", ")
println(io, " ", v, " -> ", round(m; digits = 4))
end
end

Base.show(io::IO, data::QuantData{T}) where T = print(io, "QuantData{$T} ", data.raw)
Base.show(io::IO, data::QuantData{T}) where T = print(io, "QuantData{$T} in ", data.raw.polarity ? "positive" : "negative", " ion mode with ", length(data.analyte), " analytes")
function Base.show(io::IO, ::MIME"text/plain", data::QuantData{T}) where T
print(io, data, ":\n")
show(io, MIME"text/plain"(), data.table)
end
Base.show(io::IO, data::QCData{T}) where T = print(io, "QCData{$T} ", data.raw)
Base.show(io::IO, data::QCData{T}) where T = print(io, "QCData{$T} in ", data.raw.polarity ? "positive" : "negative", " ion mode with ", length(data.analyte), " analytes")
function Base.show(io::IO, ::MIME"text/plain", data::QCData{T}) where T
print(io, data, ":\n")
show(io, MIME"text/plain"(), qualitytable(data))
end
Base.show(io::IO, data::SerialDilution{T}) where T = print(io, "SerialDilution{$T} ", data.raw)
Base.show(io::IO, data::SerialDilution{T}) where T = print(io, "SerialDilution{$T} in ", data.raw.polarity ? "positive" : "negative", " ion mode with ", length(data.analyte), " analytes")
function Base.show(io::IO, ::MIME"text/plain", data::SerialDilution{T}) where T
print(io, data, ":\n")
show(io, MIME"text/plain"(), qualitytable(data))
end
Base.show(io::IO, qt::Quantification) = print(io, "Quantification")
Base.show(io::IO, qt::Quantification) = print(io, "Quantification with ", length(qt.analyte), " analytes")
function Base.show(io::IO, ::MIME"text/plain", qt::Quantification)
print(io, qt, ":\n")
show(io, MIME"text/plain"(), qt.batch)
end
Base.show(io::IO, rtcor::RTCorrection) = print(io, "RTCorrection")
Base.show(io::IO, rtcor::RTCorrection) = print(io, "RTCorrection with ", length(rtcor.model), " correction functions")
function Base.show(io::IO, ::MIME"text/plain", rtcor::RTCorrection)
print(io, rtcor, ":\n")
show(io, MIME"text/plain"(), rtcor.fn)
show(io, MIME"text/plain"(), rtcor.model)
end

Base.show(io::IO, pj::Project) = print(io, "Project with ", length(pj), " analytes")
Expand Down
Loading

0 comments on commit dd8c2ef

Please sign in to comment.