{"lab": {"@id": "/labs/4dn-dcic-lab/", "uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "display_title": "4DN DCIC, HMS", "@type": ["Lab", "Item"], "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "award": {"@id": "/awards/1U01CA200059-01/", "display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "title": "ATAC-seq", "status": "released", "cfde_term": {"status": "released", "uuid": "16b1d5fa-60e1-4492-8455-07907c159ae1", "display_title": "bulk assay for transposase-accessible chromatin using sequencing", "@id": "/ontology-terms/OBI:0003089/", "@type": ["OntologyTerm", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "other_tags": ["1D"], "date_created": "2019-03-28T18:52:07.876815+00:00", "submitted_by": {"error": "no view permissions"}, "last_modified": {"modified_by": {"error": "no view permissions"}, "date_modified": "2023-03-01T20:01:48.836342+00:00"}, "raw_file_types": "Reads (fastq) provided by lab", "reference_pubs": [{"authors": ["Buenrostro JD", "Giresi PG", "Zaba LC", "Chang HY", "Greenleaf WJ"], "short_attribution": "Buenrostro JD et al. (2013)", "uuid": "081e8f0e-fb2e-4943-8805-932367d4fa42", "journal": "Nature methods", "@type": ["Publication", "Item"], "@id": "/publications/081e8f0e-fb2e-4943-8805-932367d4fa42/", "status": "current", "display_title": "Buenrostro JD et al. (2013) PMID:24097267", "date_published": "2013-12", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}], "schema_version": "1", "static_content": [{"content": {"filetype": "md", "display_title": "Overview", "title": "Overview", "uuid": "028571fa-7f64-4fa1-9afa-1a1444df1e0f", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/028571fa-7f64-4fa1-9afa-1a1444df1e0f/", "name": "resources.data-analysis.atacseq-processing-pipeline.overview", "content": "The 4DN ATAC-seq data processing pipeline uses the ENCODE ATAC-seq pipeline v1.1.1. We have modified the logistics of the pipeline execution without changing the content of the pipeline.\n\nWe have split the pipeline into two sub-pipelines; 1) alignment and filtering and 2) peak calling. A quality control report is generated at each step.\n\nFor certain cases, we add a replicate-merging step between the two steps (not a part of the ENCODE pipeline).\n* If an experiment set has >1 biological replicates and >1 technical replicates in a biological replicate, we merge the technical replicates.\n\nWe perform the basic QC that comes with the ATAC-seq pipeline, but we do not perform ATAQC, an additional QC that requires a set of reference files that do not yet have official release/documentation, i.e. we use the flag `atac.disable_ataqc=True`.\n\nFor more detail, see description/documentation from ENCODE.\n\n<dl> <a href=\"https://s3.amazonaws.com/4dn-dcic-public/static-pages/atac-pipeline.png\" target=\"_blank\"> <img src=\"https://s3.amazonaws.com/4dn-dcic-public/static-pages/atac-pipeline.png\" /> </a> </dl>\n", "options": {"filetype": "md", "collapsible": false, "default_open": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267307e3a"]}}, "location": "tab:data_processing"}, {"content": {"filetype": "md", "display_title": "Alignment and filtering", "title": "Alignment and filtering", "uuid": "1bc67741-3be5-4bae-bd3d-2694eb3a9211", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/1bc67741-3be5-4bae-bd3d-2694eb3a9211/", "name": "resources.data-analysis.atacseq-processing-pipeline.aln", "content": "The first step is run on the fastq files that correspond to a single technical replicate (a single technical replicate may contain multiple sequencing replicates).\n\nReads are aligned to the reference genome with `bowtie2` and filtered. The output (tagAlign) is a set of read positions in gzipped bed format.\n\nThis step is equivalent of running the ENCODE ATAC-seq pipeline with parameter `align_only=True`.\n\nA quality control report is linked from the main output `tagAlign` file.\n\nA more detailed description of this step can be found at : [Workflow graph and metadata](https://data.4dnucleome.org/workflows/d497c075-ab57-4c5b-8c65-7bb8410f26fa/)", "options": {"filetype": "md", "collapsible": false, "default_open": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267307e3a"]}}, "location": "tab:data_processing"}, {"content": {"filetype": "md", "display_title": "Merging", "title": "Merging", "uuid": "a48b793f-45b6-4ea5-a1e7-5a00fd12e2e6", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/a48b793f-45b6-4ea5-a1e7-5a00fd12e2e6/", "name": "resources.data-analysis.atacseq-processing-pipeline.merge", "content": "In some cases, replicates are merged after the first step. The merging rule is as below.\n\n* If an experiment set has more than 1 biological replicate, and each biological replicate has more than 1 technical replicate, then the technical replicates are merged.\n* If an experiment set has 1 biological replicate, the technical replicates are treated as if biological replicates in the subsequent step. \n\nA more detailed description of this step can be found at : [Workflow graph and metadata](https://data.4dnucleome.org/workflows/2b10e472-065e-43ed-992c-fccad6417b65)\n", "options": {"filetype": "md", "collapsible": false, "default_open": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267998888"]}}, "location": "tab:data_processing"}, {"content": {"filetype": "md", "display_title": "Peak calling and Quality Report Generation", "title": "Peak calling and Quality Report Generation", "uuid": "ce510b9b-f3fe-465a-ac40-a0493b6d0ac1", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/ce510b9b-f3fe-465a-ac40-a0493b6d0ac1/", "name": "resources.data-analysis.atacseq-processing-pipeline.post-aln", "content": "Using TagAlign files obtained from the earlier step, a signal fold change track (in `bigwig` format) is calculated using MACS2. Peaks are also called using MACS2 and two final call sets (optimal peaks and conservative peaks, in `bigbed` format) are reported after applying an overlap method. A quality control report is linked from the output signal fold change bigwig file.\n\nA more detailed description of this step can be found at : [Workflow graph and metadata](https://data.4dnucleome.org/workflows/510d321f-8a79-479d-95eb-c8fd703972a7/)", "options": {"filetype": "md", "collapsible": false, "default_open": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267307e3a"]}}, "location": "tab:data_processing"}, {"content": {"filetype": "md", "display_title": "Source files", "title": "Source files", "uuid": "33742072-356e-4691-9c31-69fd3e9f20f3", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/33742072-356e-4691-9c31-69fd3e9f20f3/", "name": "resources.data-analysis.chipseq-processing-pipeline.source", "content": "The pipeline components are pre-installed in a publicly available Docker image on Docker Hub (`4dn-dcic/encode-chipseq:v2.1.6`), which is adapted from the ENCODE docker image (`quay.io/encode-dcc/chip-seq-pipeline:v2.1.6`). The pipeline structure is described in Workflow Description Language (WDL) and has been modified from the original ENCODE WDL. The source code for the Docker image and the WDL code can be found on GitHub.\n\nLatest runs:\n\n* 4DN WDL/Docker : https://github.com/4dn-dcic/chip-seq-pipeline2\n* Original ENCODE WDL/Docker : https://github.com/ENCODE-DCC/chip-seq-pipeline2\n\nThe Docker image for the previous version of the pipeline (`4dn-dcic/encode-chipseq:v1.1.1`) is identical to the ENCODE docker image (`quay.io/encode-dcc/chip-seq-pipeline:v1.1.1`). The WDL for this version has also been modified from the original ENCODE WDL.", "options": {"filetype": "md", "collapsible": false, "default_open": true, "convert_ext_links": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267307e3a"]}}, "location": "tab:data_processing"}, {"content": {"filetype": "html", "display_title": "atacseq-processed-files", "uuid": "1eaaab45-2442-4304-947d-9f8f1313ea2f", "lab": {"uuid": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989", "@id": "/labs/4dn-dcic-lab/", "@type": ["Lab", "Item"], "display_title": "4DN DCIC, HMS", "status": "current", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.lab_submitter", "submits_for.828cd4fe-ebb0-4b36-a94a-d2e3a36cc989"]}}, "@id": "/static-sections/1eaaab45-2442-4304-947d-9f8f1313ea2f/", "name": "atacseq-processed-files", "content": "Below is a description of the file formats available on the portal as outputs of the data processing pipeline. \n<br><br> \n\n<style> \n    table, th, td { \n        border: 1px solid #ddd; \n        font-size: 100%; \n        padding: 20px; \n    } \n</style> \n<table style=\"width:100%\"> \n    <thead> \n        <tr> \n            <th style=\"text-align:left; padding:15px; width:160px\">File Type</th>\n            <th style=\"text-align:left; padding:15px; width:130px\">File Format</th> \n            <th style=\"text-align:left; padding:15px\">Description</th> \n        </tr> \n    </thead> \n    <tr> \n        <td style=\"padding:20px\">Read positions</td>\n        <td style=\"padding:20px\"><a href=\"http://genome.ucsc.edu/FAQ/FAQformat.html#format1\">bed</a></td>  \n        <td style=\"padding:20px\">Positions of aligned reads in bed format.</td> \n    </tr> \n    <tr> \n        <td style=\"padding:20px\">Peaks</td>\n        <td style=\"padding:20px\"><a href=\"http://genome.ucsc.edu/goldenPath/help/bigBed.html\">bigBed</a> (.bb)</td> \n        <td style=\"padding:20px\">A set of optimal peaks. After peak calling, IDR (irreproducible discovery rate) analysis is performed, which looks for consistency in peak calls. The optimal set consists of the largest set of peaks that pass IDR analysis, derived from either the biological replicates, or from pseudoreplicates (subsampled sets of pooled reads).</td> \n    </tr> \n    <tr> \n        <td style=\"padding:20px\">Conservative peaks</td>\n        <td style=\"padding:20px\"><a href=\"http://genome.ucsc.edu/goldenPath/help/bigBed.html\">bigBed</a> (.bb)</td>\n        <td style=\"padding:20px\">A set of conservative peaks. Specifically, the set of peak calls that pass IDR analysis of biological replicates.</td>\n    </tr>\n    <tr> \n        <td style=\"padding:20px\">Signal fold change</td>\n        <td style=\"padding:20px\"><a href=\"http://genome.ucsc.edu/goldenPath/help/bigWig.html\">bigwig</a> (.bw)</td>\n        <td style=\"padding:20px\">Fold change in signal. This file can be visualized on the data portal in <a href=\"https://higlass.io\">HiGlass</a>, and has a QC metric associated with it.</td> \n    </tr> \n</table>", "options": {"filetype": "html", "collapsible": false, "default_open": true}, "status": "released", "award": {"display_title": "4D NUCLEOME NETWORK DATA COORDINATION AND INTEGRATION CENTER - PHASE I", "uuid": "b0b9c607-f8b4-4f02-93f4-9895b461334b", "@type": ["Award", "Item"], "status": "current", "@id": "/awards/1U01CA200059-01/", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "@type": ["StaticSection", "UserContent", "Item"], "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin", "role.owner", "userid.986b362f-4eb6-4a9c-8173-3ab267307e3a"]}}, "location": "tab:processed_files"}], "controlled_term": {"term_url": "http://www.ebi.ac.uk/efo/EFO_0007045", "uuid": "b46d437b-8a2c-4439-b194-8e3339030d86", "@type": ["OntologyTerm", "Item"], "display_title": "ATAC-seq", "status": "released", "term_name": "ATAC-seq", "term_id": "EFO:0007045", "@id": "/ontology-terms/EFO:0007045/", "preferred_name": "ATAC-seq", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}}, "experiment_name": "atac-seq", "valid_item_types": ["ExperimentAtacseq"], "experiment_category": "Sequencing", "assay_classification": "Linear DNA Enrichment", "assay_subclass_short": "Open Chromatin", "assay_subclassification": "Open Chromatin", "@id": "/experiment-types/atac-seq/", "@type": ["ExperimentType", "Item"], "uuid": "5b35245d-5777-4983-b7e8-8dffd9ab83ab", "principals_allowed": {"view": ["system.Everyone"], "edit": ["group.admin"]}, "display_title": "ATAC-seq", "external_references": [], "@context": "/terms/", "aggregated-items": {}, "validation-errors": []}