HiCPipeline


class chr3d.HiCPipeline(
    genome_index: str,
    chrom_sizes: str,
    threads: int = 1,
    assembly: str = 'hg38',
    min_mapq: int = 30,
    min_distance: int = 1000,
    resolutions: Optional[List[int]] = None,
    n_splits: int = 0,
    call_tads: bool = True,
    tad_windows: Optional[List[int]] = None,
    call_loops: bool = True,
    loop_fdr: float = 0.1,
    call_compartments: bool = True,
    compartment_phasing_track: Optional[str] = None,
    fragment_bed: Optional[str] = None,
)

Complete Hi-C data processing pipeline orchestrator. Combines all Hi-C processing steps into a single pipeline.

Parameters

Parameter	Type	Description
genome_index	`str`	Path to BWA-indexed genome FASTA
chrom_sizes	`str`	Path to chromosome sizes file
threads	`int`	Number of threads for parallel processing (default: 1)
assembly	`str`	Genome assembly name (default: `'hg38'`)
min_mapq	`int`	Minimum mapping quality (default: 30)
min_distance	`int`	Minimum pair distance in bp (default: 1000)
resolutions	`Optional[List[int]]`	List of matrix resolutions in bp (default: `[1000, 5000, 10000, 25000, 50000, 100000]`)
n_splits	`int`	Split FASTQ into N chunks for parallel alignment; 0 = no splitting (default: 0)
call_tads	`bool`	Run TAD/insulation calling after matrix generation (default: `True`)
tad_windows	`Optional[List[int]]`	Window sizes in bp for insulation scoring (default: library defaults)
call_loops	`bool`	Run loop calling after matrix generation (default: `True`)
loop_fdr	`float`	FDR threshold for loop significance (default: 0.1)
call_compartments	`bool`	Run A/B compartment calling (default: `True`)
compartment_phasing_track	`Optional[str]`	Path to BED file for phasing E1 sign (default: `None`)
fragment_bed	`Optional[str]`	Path to restriction fragment BED (default: `None`)

Methods

run


def run(
    self,
    fastq1: Optional[str] = None,
    fastq2: Optional[str] = None,
    output_dir: str = './results',
    sample_id: str = 'sample',
    cleanup: bool = False,
    start_from: int = 1,
) -> Dict[str, Any]

Run the complete Hi-C pipeline, or resume from a later step.

Parameters:

Parameter	Type	Description
fastq1	`Optional[str]`	Path to R1 FASTQ file
fastq2	`Optional[str]`	Path to R2 FASTQ file
output_dir	`str`	Output directory (default: `'./results'`)
sample_id	`str`	Sample identifier (default: `'sample'`)
cleanup	`bool`	Remove intermediate files (default: `False`)
start_from	`int`	Step to resume from: 1=alignment, 2=SAM/BAM, 3=pairs, 4=matrix (default: 1)

Returns:

Dict[str, Any] containing:

'output_sam': Path to aligned SAM file
'sorted_bam': Path to sorted BAM file
'filtered_pairs': Path to filtered pairs file
'cool_file': Path to contact matrix .cool file
'mcool_file': Path to multi-resolution .mcool file
'timing': Step-by-step timing breakdown

Example:


import chr3d as c3d
 
hic = c3d.HiCPipeline(
    genome_index="/data/genomes/hg38.fa",
    chrom_sizes="/data/genomes/hg38.chrom.sizes",
    threads=24
)
 
stats = hic.run(
    fastq1="sample_R1.fastq.gz",
    fastq2="sample_R2.fastq.gz",
    output_dir="results/",
    sample_id="sample1"
)
 
print(f"Output mcool: {stats['mcool_file']}")