coelution_isobar, coelution_isf, set_state!, using @info instead of p…

…rint, kw update for ChemistryQuantitativeAnalysis v0.4
yufongpeng · Jan 18, 2024 · 2d937bd · 2d937bd
1 parent 8ca6fff
commit 2d937bd
Show file tree

Hide file tree

Showing 421 changed files with 14,679 additions and 19,070 deletions.
diff --git a/Project.toml b/Project.toml
@@ -30,7 +30,7 @@ UnitfulMoles = "999f2bd7-36bf-5ba7-9bc1-c9473aa75374"
 [compat]
 AnovaGLM = "0.2"
 CSV = "0.10"
-ChemistryQuantitativeAnalysis = "0.3"
+ChemistryQuantitativeAnalysis = "0.4"
 Clustering = "0.15"
 DataPipes = "0.3"
 Dictionaries = "0.3"

diff --git a/src/SphingolipidsID.jl b/src/SphingolipidsID.jl
@@ -54,13 +54,13 @@ export SPDB, LIBRARY_POS, FRAGMENT_POS, ADDUCTCODE, CLASSDB,
  analytetable_mrm, transitiontable, concurrent_transition, write_transition, 
  read_transition, union_transition!, union_transition, diff_transition!, diff_transition,
  # Quantification
- quantification_mrm, set_quantification_mrm!, match, match!, quantify, quantify!, set_qc_id!, qctable, set_serialdilution!, run_serialdilution, 
+ quantification_mrm, set_quantification_mrm!, match, match!, quantify, quantify!, set_qc_id!, qctable, set_serialdilution_id!, run_serialdilution, 
  run_serialdilution!, serialdilutiontable,
  # Utils/Query
  q!, qand, qor, qnot, spid, analyteid, transitionid, lcb, acyl, acylα, acylβ, reuse,
  new_project, allow_unknown, only_known, ncb, ndb, nox, ndbox, compound_formula, 
  mw, mz, class, chain, sumcomp, rt, cluster, incluster, @ri_str,
- assign_parent!, assign_isf_parent!, mode,
+ assign_parent!, assign_isf_parent!, mode, coeluting_isf, coeluting_isobar, set_state!,
  # RT prediction
  initialize_cluster!, analyte2cluster!, select_cluster!,
  model_cluster!, compare_models, @model, predfn_cluster!,
@@ -71,7 +71,7 @@ export SPDB, LIBRARY_POS, FRAGMENT_POS, ADDUCTCODE, CLASSDB,
 
 import Base: getproperty, propertynames, show, print, isless, isequal, isempty, keys, length, iterate, getindex, view, firstindex, lastindex, sort, sort!, in,
  union, union!, intersect, setdiff, deleteat!, delete!, push!, pop!, popat!, popfirst!, reverse, reverse!, getproperty, copy, convert, match
-
+import JLD2: wconvert, rconvert, writeas
 """
  const SPDB
 

diff --git a/src/io.jl b/src/io.jl
@@ -326,57 +326,63 @@ function read_preis(file::String)
  PreIS(Table(tbl; mz2_id = map(x -> findfirst(==(x), mz2), tbl.mz2), mz2 = nothing, range = nothing, polarity = nothing), range, mz2, polarity, delete!(config, :table))
 end
 
-function read_project(file::String)
- endswith(file, ".project") || throw(ArgumentError("The file is not a valid Project directory"))
- spdb = JLD2.load_object(joinpath(file, "SPDB.jld2"))
- for (i, p) in enumerate(spdb[:SCORE].param)
- in(p, SPDB[:SCORE].param) && continue
- if length(p) == 2
- push!(SPDB[:SCORE].param, p)
- id = lastindex(SPDB[:SCORE].param)
- objective = eval(p.objective)
- push!(SPDB[:SCORE].fn, (analyte, mutate) -> mutate ? (analyte.score = id => objective(analyte)) : (id => objective(analyte)))
- else
- weight = @match p.weight begin
- 1 => :w1
- _ => p.weight
- end # any 2-arg fn: (analyte, cpd) -> Number
- replace_int = @λ begin
- x::Int => Expr(:call, :get!, :dict, x, 0)
- :all => Expr(:call, :sum, Expr(:call, :values, :dict))
- x::Expr => Expr(x.head, map(replace_int, x.args)...)
- x => x
- end
- transform_score(dict) = eval(replace_int(p.objective))
- converter = eval(p.converter)
- weight = eval(weight)
- push!(SPDB[:SCORE].param, p)
- i = lastindex(SPDB[:SCORE].param)
- fn = (analyte::AnalyteSP, mutate::Bool) -> begin
- dict = Dict{Int, Float64}()
- for cpd in analyte
- uw = converter(cpd.state)
- dict[uw] = get!(dict, uw, 0) + weight(analyte, cpd)
- end
- result = i => transform_score(dict)
- mutate ? (analyte.cpdsc = result) : result
+function cons_score!(p)
+ if length(p) == 2
+ push!(SPDB[:SCORE].param, p)
+ id = lastindex(SPDB[:SCORE].param)
+ objective = eval(p.objective)
+ push!(SPDB[:SCORE].fn, (analyte, mutate) -> mutate ? (analyte.score = id => objective(analyte)) : (id => objective(analyte)))
+ else
+ weight = @match p.weight begin
+ 1 => :w1
+ _ => p.weight
+ end # any 2-arg fn: (analyte, cpd) -> Number
+ replace_int = @λ begin
+ x::Int => Expr(:call, :get!, :dict, x, 0)
+ :all => Expr(:call, :sum, Expr(:call, :values, :dict))
+ x::Expr => Expr(x.head, map(replace_int, x.args)...)
+ x => x
+ end
+ transform_score(dict) = eval(replace_int(p.objective))
+ converter = eval(p.converter)
+ weight = eval(weight)
+ push!(SPDB[:SCORE].param, p)
+ i = lastindex(SPDB[:SCORE].param)
+ fn = (analyte::AnalyteSP, mutate::Bool) -> begin
+ dict = Dict{Int, Float64}()
+ for cpd in analyte
+ uw = converter(cpd.state)
+ dict[uw] = get!(dict, uw, 0) + weight(analyte, cpd)
  end
- push!(SPDB[:SCORE].fn, fn)
+ result = i => transform_score(dict)
+ mutate ? (analyte.cpdsc = result) : result
  end
+ push!(SPDB[:SCORE].fn, fn)
  end
- for (k, v) in pairs(spdb)
- if get(SPDB, k, nothing) != v
- SPDB[k] = v
- end
+end
+
+function read_project(file::String)
+ endswith(file, ".project") || throw(ArgumentError("The file is not a valid Project directory"))
+ @info "Read | $file as Project"
+ @info "Read | SPDB"
+ spdb = JLD2.load_object(joinpath(file, "SPDB.jld2"))
+ @info "Read | Reconstructing score function"
+ for p in spdb[:SCORE].param
+ in(p, SPDB[:SCORE].param) || cons_score!(p)
  end
+ @info "Read | appendix"
  appendix = JLD2.load_object(joinpath(file, "appendix.jld2"))
+ @info "Read | quantification"
  quantification = read_quantification(joinpath(file, "quantification.qt"))
+ @info "Read | analyte"
  analyte = JLD2.load_object(joinpath(file, "analyte.jld2"))
  project = last(first(analyte)).project
  projects = map(JLD2.load_object(joinpath(file, "projects.jld2"))) do x
  isnothing(x) ? project : x
  end
+ @info "Read | data"
  rt_corrections = JLD2.load_object(joinpath(file, "rt_corrections.jld2"))
+ # reconstruct model function?
  data = map(x -> read(x), readdir(joinpath(file, "data"); join = true))
  for (dt, p, r) in zip(data, projects, rt_corrections)
  ismissing(p) || set!(dt.config, :project, p)
@@ -386,6 +392,12 @@ function read_project(file::String)
  project.data = data
  project.quantification = quantification
  project.appendix = appendix
+ #=
+ if haskey(appendix, :predfn_replaces)
+ @info "Read | Reconstructing cluster_predict function"
+ predfn_cluster!(project; appendix[:predfn_replaces]...)
+ end
+ =#
  project
 end
 
@@ -572,49 +584,51 @@ end
 Base.show(io::IO, analyte::AnalyteID) = print(io, isempty(analyte.compound) ? "?" : last(analyte), " @", round(analyte.rt, digits = 2), " MW=", round(mw(analyte), digits = 4))
 Base.show(io::IO, transition::TransitionID) = print(io, transition.compound, transition.quantifier ? "" : " (qualifier)")
 
-Base.show(io::IO, data::PreIS) = print(io, "PreIS in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features")
+Base.show(io::IO, data::PreIS) = print(io, "PreIS(", data.table, ", ", data.range, ", ", data.mz2, ", ", data.polarity, ", ", data.config)
 function Base.show(io::IO, ::MIME"text/plain", data::PreIS)
- print(io, data, ":\n")
+ print(io, typeof(data), " in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features:")
  for dt in zip(data.range, data.mz2)
- println(io, " ", dt[1], " -> ", round(dt[2]; digits = 4))
+ print(io, "\n ", dt[1], " -> ", round(dt[2]; digits = 4))
  end
 end
 
-Base.show(io::IO, data::MRM) = print(io, "MRM in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features")
+Base.show(io::IO, data::MRM) = print(io, "MRM(", data.table, ", ", data.mz2, ", ", data.polarity, ", ", data.config)
 function Base.show(io::IO, ::MIME"text/plain", data::MRM)
- print(io, data, ":\n")
+ print(io, typeof(data), " in ", data.polarity ? "positive" : "negative", " ion mode with ", length(data.table), " features:")
  for (i, m) in enumerate(data.mz2)
  v = @view data.table.mz1[data.table.mz2_id .== i]
- v = length(v) == 1 ? string(round(first(v); digits = 4)) : length(v) > 10 ? string(join(round.(v[1:5]; digits = 4), ", "), ", ..., ", join(round.(v[end - 4:end]; digits = 4), ", ")) : join(round.(v; digits = 4), ", ")
- println(io, " ", v, " -> ", round(m; digits = 4))
+ v = length(v) == 1 ? string(round(first(v); digits = 4)) : length(v) > 10 ? string(join(round.(v[1:5]; digits = 4), ", "), ", …, ", join(round.(v[end - 4:end]; digits = 4), ", ")) : join(round.(v; digits = 4), ", ")
+ print(io, "\n ", v, " -> ", round(m; digits = 4))
  end
 end
 
-Base.show(io::IO, rtcor::RTCorrection) = print(io, "RTCorrection with ", length(rtcor.model), " correction functions")
+Base.show(io::IO, rtcor::RTCorrection) = print(io, "RTCorrection(", rtcor.data, ", ", rtcor.table, ", ", rtcor.model, ", ", rtcor.config)
 function Base.show(io::IO, ::MIME"text/plain", rtcor::RTCorrection)
- print(io, rtcor, ":\n")
+ print(io, typeof(rtcor), " with ", length(rtcor.model), " correction functions:\n")
  show(io, MIME"text/plain"(), rtcor.model)
- println(io)
 end
 
-Base.show(io::IO, pj::Project) = print(io, "Project with ", length(pj), " analytes")
+Base.show(io::IO, qt::Quantification) = print(io, "Quantification(", qt.batch, ", ", qt.config)
+function Base.show(io::IO, ::MIME"text/plain", qt::Quantification)
+ print(io, typeof(qt), " of ")
+ show(io, MIME"text/plain"(), qt.batch)
+end
+Base.show(io::IO, pj::Project) = print(io, "Project(", pj.analyte, ", ", pj.data, ", ", pj.quantification, ", ", pj.appendix)
 function Base.show(io::IO, ::MIME"text/plain", pj::Project)
- print(io, pj, ":\n")
+ print(io, typeof(pj), " with ", length(pj), " analytes:\n")
  println(io, "∘ Salt: ", get(pj.appendix, :anion, "unknown"))
  println(io, "∘ Signal: ", get(pj.appendix, :signal, "unknown"))
  println(io, "∘ Data: ")
  for data in pj.data
  show(io, MIME"text/plain"(), data)
  println(io)
  end
- print(io, "∘ Analytes: ")
+ print(io, "\n∘ Analytes: ")
  if length(pj) > 10
  for analyte in @view pj[1:5]
  print(io, "\n ", analyte)
  end
- println(io, "\n\t", ".")
- println(io, "\t", ".")
- print(io, "\t", ".")
+ print(io, "\n\t", "⋮")
  for analyte in @view pj[end - 4:end]
  print(io, "\n ", analyte)
  end
@@ -623,6 +637,12 @@ function Base.show(io::IO, ::MIME"text/plain", pj::Project)
  print(io, "\n ", analyte)
  end
  end
+ if !isnothing(pj.quantification)
+ print(io, "\n\n∘ Quantification:\n")
+ print(io, lpad("\n", 40, "-"))
+ show(io, MIME"text/plain"(), pj.quantification)
+ print(io, lpad("\n", 40, "-"))
+ end
 end
 
 function Base.show(io::IO, qcmd::QueryCommands)
@@ -664,9 +684,7 @@ function Base.show(io::IO, aquery::Query)
  for r in @view aquery[1:5]
  print(io, "\n ", r)
  end
- println(io, "\n\t", ".")
- println(io, "\t", ".")
- print(io, "\t", ".")
+ print(io, "\n\t", "⋮")
  for r in @view aquery[end - 4:end]
  print(io, "\n ", r)
  end

diff --git a/src/preis.jl b/src/preis.jl
@@ -130,8 +130,7 @@ function preis!(project::Project, preis::PreIS; data_id = -1)
  db = SPDB[preis.polarity ? :LIBRARY_POS : :LIBRARY_NEG]
  for subft in groupview(getproperty(:mz2_id), ft)
  products = id_product(dt.mz2[subft.mz2_id[1]], preis.polarity; mz_tol = preis.config[:mz_tol])
- printstyled("PreIS> ", color = :green, bold = true)
- println(products)
+ @info string("PreIS | ", products)
  for data in subft
  cpdsvanilla = mapreduce(vcat, eachindex(db)) do db_id
  abs(db.mz[db_id] - data.mz1) > preis.config[:mz_tol] ? CompoundSPVanilla[] :
@@ -434,8 +433,7 @@ function preis!(
 
  for subft in groupview(getproperty(:mz2_id), ft)
  products = id_product(dt.mz2[subft.mz2_id[1]], polarity; mz_tol)
- printstyled("PreIS> ", color = :green, bold = true)
- println(products)
+ @info string("PreIS | ", products)
  for data in subft
  cpdsvanilla = mapreduce(vcat, eachindex(db)) do db_id
  abs(db.mz[db_id] - data.mz1) > mz_tol ? CompoundSPVanilla[] :
@@ -469,16 +467,14 @@ Sort, merge, split, and delete analytes after all PreIS data are added.
 function finish_profile!(project::Project; rt_tol = 0.1, err_tol = 0.3)
  set!(project.appendix, :rt_tol, rt_tol)
  set!(project.appendix, :err_tol, err_tol)
- printstyled("PreIS> ", color = :green, bold = true)
- println("Sorting compounds")
+ @info "PreIS | Sorting compounds"
  for analyte in project
  sort!(analyte, lt = isless_class)
  for cpd in analyte
  sort!(cpd.fragment, :ion1; lt = isless_ion)
  end
  end
- printstyled("PreIS> ", color = :green, bold = true)
- println("Merging, splitting and deleting analytes")
+ @info "PreIS | Merging, splitting and deleting analytes"
  del = Int[]
  for (i, analyte) in enumerate(project)
  signal = last(analyte).signal
@@ -518,29 +514,43 @@ function finish_profile!(project::Project; rt_tol = 0.1, err_tol = 0.3)
  end
  unique!(del)
  deleteat!(project, del)
+ @info "PreIS | Assigning ISF"
  assign_isf_parent!(project)
  for analyte in project
+ cpd = last(analyte)
+ id = findall(ion -> in(ion.adduct, class_db_index(ion.molecule).parent_ion), cpd.fragment.ion1)
+ v = query_data.(Ref(project), cpd.fragment.source[id], cpd.fragment.id[id], :isf)
+ analyte.state[state_id(:isf)] = analyte.state[state_id(:isf)] == -1 ? -1 : any(==(1), v) ? 1 : any(==(-1), v) ? -1 : 0
  for cpd in analyte
  cpd.project = project
  end
  end
- printstyled("PreIS> ", color = :green, bold = true)
- println("Sorting analytes")
+ @info "PreIS | Sorting analytes"
  sort!(project; by = x -> (mw(x), rt(x)))
  project
 end
 
-assign_isf_parent!(project::Project) =
- for analyte in project
+function assign_isf_parent!(project::Project)
+ aq = q!(project, qnot(:isf!))
+ for analyte in aq
  for cpd in @view analyte[1:end - 1]
- set_data!.(Ref(project), cpd.fragment.source, cpd.fragment.id, :isf, 1)
+ set_data!.(Ref(project), cpd.fragment.source, cpd.fragment.id, :isf, -1)
  end
+ end
+ ids = Dict{Int, Vector{Int}}()
+ for analyte in aq
  cpd = last(analyte)
  id = findall(ion -> !in(ion.adduct, class_db_index(ion.molecule).parent_ion), cpd.fragment.ion1)
- set_data_if!.(Ref(project), cpd.fragment.source[id], cpd.fragment.id[id], :isf, -1, <(1))
+ set_data!.(Ref(project), cpd.fragment.source[id], cpd.fragment.id[id], :isf, -1)
  id = setdiff(eachindex(cpd.fragment), id)
- set_data!.(Ref(project), cpd.fragment.source[id], cpd.fragment.id[id], :isf, 1)
+ for i in id
+ push!(get!(ids, cpd.fragment.source[i], Int[]), cpd.fragment.id[i])
+ end
+ end
+ for (k, v) in pairs(ids)
+ replace_data!.(Ref(project), k, unique!(v), :isf, 0 => 1, -1 => 0)
  end
+end
 
 assign_parent!(project::Project) =
  for analyte in project