Skip to content

Commit

Permalink
Merge pull request #16 from kgori/scale_q
Browse files Browse the repository at this point in the history
Methods to scale all Q and P matrices such that a branch length of 1 = 1 expected substitution
  • Loading branch information
jangevaare authored May 27, 2019
2 parents 7e83139 + 32a1d6a commit b730fed
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/nucleic_acid/k80/absolute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ end
end
α = mod.α; β = mod.β

e₁ = 0.25 * exp(-4 * β * t)
e₂ = 0.5 * exp(-2 *+ β) * t)
e₁ = 0.25 * exp(-β * t)
e₂ = 0.5 * exp(-0.5 *+ β) * t)

P₁ = 0.25 + e₁ + e₂
P₂ = 0.25 + e₁ - e₂
Expand Down
4 changes: 2 additions & 2 deletions src/nucleic_acid/k80/relative.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ end
end
κ = mod.κ

e₁ = 0.25 * exp(-4 * t/+ 2))
e₂ = 0.5 * exp(-2 * t *+ 1)/+ 2))
e₁ = 0.25 * exp(-t)
e₂ = 0.5 * exp(-0.5 * t *+ 1))

P₁ = 0.25 + e₁ + e₂
P₂ = 0.25 + e₁ - e₂
Expand Down
43 changes: 43 additions & 0 deletions src/nucleic_acid/nucleic_acid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ Q_{A, A} & Q_{A, C} & Q_{A, G} & Q_{A, T} \\\\
Q_{C, A} & Q_{C, C} & Q_{C, G} & Q_{C, T} \\\\
Q_{G, A} & Q_{G, C} & Q_{G, G} & Q_{G, T} \\\\
Q_{T, A} & Q_{T, C} & Q_{T, G} & Q_{T, T} \\end{bmatrix}\$
Call as either
1) `Q(model)`, or
2) `Q(model, bool)`
Form (2) scales the matrix so that ``_π(model) ⋅ -diag(Q(model)) = 1``
when bool=true. `Q(model, false)` is equivalent to `Q(model)`.
"""
@inline function Q(mod::NASM)
α = (mod)
Expand All @@ -41,6 +47,24 @@ Q_{T, A} & Q_{T, C} & Q_{T, G} & Q_{T, T} \\end{bmatrix}\$
β * πT, α * πT, γ * πT, -* πA + α * πC + γ * πG))
end


function Q(mod::NASM, scale::Bool)
if scale
q = Q(mod)
scale = 1/(-diag(q)' * (mod))
return q * scale
else
Q(mod)
end
end


scale_generic(mod::NASM) = 1/(-diag(Q(mod))' * (mod))


_scale(mod::NASM) = scale_generic(mod)


@inline function P_generic(mod::NASM, t::Float64)
if t < 0.0
error("t must be positive")
Expand All @@ -49,6 +73,15 @@ end
end


function P_generic(mod::NASM, t, scale::Bool)
if scale
P(mod, t * _scale(mod))
else
P(mod, t)
end
end


function P_generic(mod::NASM, t::Array{Float64})
if any(t .< 0.0)
error("t must be positive")
Expand Down Expand Up @@ -99,5 +132,15 @@ P_{G, A} & P_{G, C} & P_{G, G} & P_{G, T} \\\\
P_{T, A} & P_{T, C} & P_{T, G} & P_{T, T} \\end{bmatrix}\$
for specified time
Call as either
1) `P(model, t)`, or
2) `P(model, t, bool)`
Form (2) obtains its probabilities from the scaled Q matrix if bool=true.
Branch lengths estimated from a scaled P matrix are in units of expected
number of substitutions per site. `P(model, t, false)` is equivalent to
`P(model, t)`.
"""
P(mod::NASM, t) = P_generic(mod, t)

P(mod::NASM, t, scale) = P_generic(mod, t, scale)
100 changes: 96 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,58 @@ import SubstitutionModels._π,

function Qtest(mod::NASM)
x = Q(mod)
return x[DNA_A, DNA_A] == -(x[DNA_A, DNA_C] +
return x[DNA_A, DNA_A] -(x[DNA_A, DNA_C] +
x[DNA_A, DNA_G] +
x[DNA_A, DNA_T]) &&
x[DNA_C, DNA_C] == -(x[DNA_C, DNA_A] +
x[DNA_C, DNA_C] -(x[DNA_C, DNA_A] +
x[DNA_C, DNA_G] +
x[DNA_C, DNA_T]) &&
x[DNA_G, DNA_G] == -(x[DNA_G, DNA_A] +
x[DNA_G, DNA_G] -(x[DNA_G, DNA_A] +
x[DNA_G, DNA_C] +
x[DNA_G, DNA_T]) &&
x[DNA_T, DNA_T] == -(x[DNA_T, DNA_A] +
x[DNA_T, DNA_T] -(x[DNA_T, DNA_A] +
x[DNA_T, DNA_C] +
x[DNA_T, DNA_G])
end


function test_scaling(mod::NASM)
q = Q(mod, true)
return (mod) -diag(q) 1.0
end


"""
Test that P(t) = exp(Q × t) and Q × t = log(P(t))
"""
function test_p_q_relationship(mod::NASM, scaled::Bool)
q = Q(mod, scaled)
p_1 = P(mod, 1.0, scaled)
p_2 = P(mod, 2.0, scaled)

return p_1 exp(q) &&
p_2 exp(q * 2.0) &&
q real(log(Array(p_1))) &&
q * 2.0 real(log(Array(p_2)))
end

@testset "JC69" begin
testmod1 = JC69()
testmod2 = JC69(1.0)
testmod3 = JC69(2.0)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P_generic(testmod1, [1.5 3.0])[1] P(testmod1, 1.5)
@test P_generic(testmod1, [1.5 3.0])[2] P(testmod1, 3.0)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -45,8 +77,18 @@ end
@testset "K80" begin
testmod1 = K80(0.5)
testmod2 = K80(0.5, 1.0)
testmod3 = K80(0.5, 2.0)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -61,8 +103,18 @@ end
@testset "F81" begin
testmod1 = F81(0.1, 0.2, 0.3, 0.4)
testmod2 = F81(1.0, 0.1, 0.2, 0.3, 0.4)
testmod3 = F81(2.0, 0.1, 0.2, 0.3, 0.4)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -77,8 +129,18 @@ end
@testset "F84" begin
testmod1 = F84(0.75, 0.1, 0.2, 0.3, 0.4)
testmod2 = F84(0.75, 1.0, 0.1, 0.2, 0.3, 0.4)
testmod3 = F84(0.75, 2.0, 0.1, 0.2, 0.3, 0.4)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -93,8 +155,18 @@ end
@testset "HKY85" begin
testmod1 = HKY85(0.75, 0.1, 0.2, 0.3, 0.4)
testmod2 = HKY85(0.75, 1.0, 0.1, 0.2, 0.3, 0.4)
testmod3 = HKY85(0.75, 2.0, 0.1, 0.2, 0.3, 0.4)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -109,8 +181,18 @@ end
@testset "TN93" begin
testmod1 = TN93(0.6, 0.7, 0.1, 0.2, 0.3, 0.4)
testmod2 = TN93(0.6, 0.7, 1.0, 0.1, 0.2, 0.3, 0.4)
testmod3 = TN93(0.6, 0.7, 2.0, 0.1, 0.2, 0.3, 0.4)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test isapprox(diag(P(testmod1, 1.0e9)), (testmod1), atol = 1.0e-5)
Expand All @@ -125,8 +207,18 @@ end
@testset "GTR" begin
testmod1 = GTR(1.0, 2.0, 3.0, 4.0, 5.0, 0.1, 0.2, 0.3, 0.4)
testmod2 = GTR(1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 0.1, 0.2, 0.3, 0.4)
testmod3 = GTR(1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 0.1, 0.2, 0.3, 0.4)
@test Qtest(testmod1)
@test Q(testmod1) == Q(testmod2)
@test test_scaling(testmod1)
@test test_scaling(testmod2)
@test test_scaling(testmod3)
@test test_p_q_relationship(testmod1, true)
@test test_p_q_relationship(testmod2, true)
@test test_p_q_relationship(testmod3, true)
@test test_p_q_relationship(testmod1, false)
@test test_p_q_relationship(testmod2, false)
@test test_p_q_relationship(testmod3, false)
@test P(testmod1, 1.0e2) P_generic(testmod1, 1.0e2)
@test P(testmod2, 1.0e2) P_generic(testmod2, 1.0e2)
@test P(testmod1, [1.0 2.0])[1] P_generic(testmod1, 1.0)
Expand Down

0 comments on commit b730fed

Please sign in to comment.