Remove low-quality sequences by base-pair quality, sequence length or unknown base "N".

bc_seq_filter(
  x,
  min_average_quality = 30,
  min_read_length = 0,
  N_threshold = 0,
  sample_name = ""
)

# S4 method for ShortReadQ
bc_seq_filter(
  x,
  min_average_quality = 30,
  min_read_length = 0,
  N_threshold = 0
)

# S4 method for DNAStringSet
bc_seq_filter(x, min_read_length = 0, N_threshold = 0)

# S4 method for data.frame
bc_seq_filter(x, min_read_length = 0, N_threshold = 0)

# S4 method for character
bc_seq_filter(
  x,
  min_average_quality = 30,
  min_read_length = 0,
  N_threshold = 0,
  sample_name = basename(x)
)

# S4 method for integer
bc_seq_filter(x, min_read_length = 0, N_threshold = 0)

# S4 method for list
bc_seq_filter(
  x,
  min_average_quality = 30,
  min_read_length = 0,
  N_threshold = 0,
  sample_name = names(x)
)

Arguments

x

A single or a list of Fastq file, ShortReadQ, DNAStringSet, data.frame, integer vector.

min_average_quality

A numeric or a vector of numeric, specifying the threshold of the minimum average base quality of a sequence to be kept.

min_read_length

A single or a vector of integer, specifying the sequence length threshold.

N_threshold

A integer or a vector of integer, specifying the maximum N can be in a sequence.

sample_name

A string vector, specifying the sample name in the output.

Value

A ShortReadQ or DNAStringSet object with sequences passed the filters.

Examples

library(ShortRead)

fq_file <- system.file("extdata", "simple.fq", package="CellBarcode")

# apply a filter to fastq files
bc_seq_filter(fq_file)
#> $simple.fq
#> class: ShortReadQ
#> length: 1 reads; width: 12 cycles
#> 

# Read in fastq files to get ShortReadQ object
sr <- readFastq(fq_file[1])
# apply sequencing quality filter to ShortReadQ
bc_seq_filter(sr)
#> class: ShortReadQ
#> length: 1 reads; width: 12 cycles

# get DNAStringSet object
ds <- sread(sr)
# Apply sequencing quality filter to DNAStringSet
bc_seq_filter(ds)
#> DNAStringSet object of length 1:
#>     width seq
#> [1]    12 AAAAAGGCCCCC

###