Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.4 #127

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
GeneticVariantBase = "2447270c-d849-4bf9-ac0d-b5c0b265991c"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Expand Down
7 changes: 6 additions & 1 deletion src/SnpArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ __precompile__()

module SnpArrays

using GeneticVariantBase
using CodecZlib, CodecXz, CodecBzip2, CodecZstd, TranscodingStreams
using Adapt, Glob, LinearAlgebra, LoopVectorization, Missings, Mmap, Printf
using Requires, SparseArrays, Statistics, StatsBase
Expand All @@ -18,14 +19,16 @@ import Tables: table
export AbstractSnpArray, AbstractSnpBitMatrix, AbstractSnpLinAlg
export SnpArray, SnpBitMatrix, SnpLinAlg, SnpData, StackedSnpArray
export compress_plink, decompress_plink, split_plink, merge_plink, write_plink
export counts, grm, grm_admixture, maf, mean, minorallele, missingpos, missingrate
export counts, grm, grm_admixture, maf, maf!, mean, minorallele, missingpos, missingrate
export std, var, vcf2plink
export counts, grm, maf, mean, minorallele, missingpos, missingrate, std, var
export vcf2plink, kinship_pruning
export ADDITIVE_MODEL, DOMINANT_MODEL, RECESSIVE_MODEL
export CuSnpArray
import VariantCallFormat: findgenokey, VCF, header

# this is exporting functions necessary

const ADDITIVE_MODEL = Val(1)
const DOMINANT_MODEL = Val(2)
const RECESSIVE_MODEL = Val(3)
Expand All @@ -43,6 +46,8 @@ include("linalg_bitmatrix.jl")
include("reorder.jl")
include("vcf2plink.jl")
include("admixture.jl")
include("iterator.jl")

AbstractSnpArray = Union{SnpArray, SubArray{UInt8, 1, SnpArray}, SubArray{UInt8, 2, SnpArray},
StackedSnpArray, SubArray{UInt8, 1, StackedSnpArray}, SubArray{UInt8, 2, StackedSnpArray}}

Expand Down
109 changes: 109 additions & 0 deletions src/iterator.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
mutable struct SnpArrayIterator <: VariantIterator
snpdata::SnpData
end

mutable struct SnpArrayIndex <: Variant
index::Int
end

@inline function Base.eltype(::Type{<:VariantIterator})
SnpArrayIndex
end

function Base.iterate(itr::SnpArrayIterator, state=1)
if state <= 0
throw(BoundsError(itr, state))
end
if state > size(itr.snpdata.snparray,2)
return nothing
else
index = SnpArrayIndex(state)
state = state + 1
return (index, state)
end
end

@inline function Base.length(itr::SnpArrayIterator)
return size(itr.snpdata.snparray, 2)
end

function iterator(s::SnpData)
iterator = SnpArrayIterator(s)
return iterator
end

function chrom(s::SnpData, snpindex::SnpArrayIndex)::String
result = s.snp_info[snpindex.index,:chromosome]
return result
end

function pos(s::SnpData, snpindex::SnpArrayIndex)::Int
result = s.snp_info[snpindex.index,:position]
return result
end

function rsid(s::SnpData, snpindex::SnpArrayIndex)::String
result = s.snp_info[snpindex.index,:snpid]
return result
end

#SnpData subtype of Genetic Data

function alleles(s::SnpData, snpindex::SnpArrayIndex)::Vector{String}
allele1 = s.snp_info[snpindex.index,:allele1]
allele2 = s.snp_info[snpindex.index,:allele2]
return [allele1, allele2]
end

function alt_allele(s::SnpData, snpindex::SnpArrayIndex)::String
alt = s.snp_info[snpindex.index,:allele2]
return alt
end

function ref_allele(s::SnpData, snpindex::SnpArrayIndex)::String
ref = s.snp_info[snpindex.index,:allele1]
return ref
end

struct MAFData
maf_vector::Vector{Float64}
end

function calculate_maf_data(s::SnpData)
maf_vector = maf(s.snparray)
result = MAFData(maf_vector)
return result
end

function maf_index(maf_data::MAFData, snpindex::SnpArrayIndex)
return maf_data.maf_vector[snpindex.index]
end

function hwepval(s::SnpData, snpindex::SnpArrayIndex)
genotypes = s.snparray[:,snpindex.index]

n00 = sum(genotypes .== 0x00)
n01 = sum(genotypes .== 0x02)
n11 = sum(genotypes .== 0x03)

pval = hwe(n00,n01,n11)
return pval

end

# 0 for homozygous allele 1
# 2 Heterozygous
# 3 homozygous allele 2
# 1 is for missing

function alt_dosages!(arr::AbstractArray{T}, s::SnpData, snpindex::SnpArrayIndex) where T <: Real
# @assert size(s.snparray) == size(arr)
copyto!(arr, @view(s.snparray[:, snpindex.index]))
return arr
end

function alt_genotypes!(arr::AbstractArray{T}, s::SnpData, snpindex::SnpArrayIndex) where T <: Real
# @assert size(s.snparray) == size(arr)
copyto!(arr, @view(s.snparray[:, snpindex.index]))
return arr
end
2 changes: 1 addition & 1 deletion src/snpdata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const PERSON_INFO_KEYS = [:fid, :iid, :father, :mother, :sex, :phenotype]

Type to store SNP and person information along with the SnpArray.
"""
struct SnpData
struct SnpData <: GeneticData
people::Int
snps::Int
snparray::SnpArray
Expand Down
Loading