{"id":16265053,"url":"https://github.com/robaina/parallelbam","last_synced_at":"2026-03-12T12:10:21.674Z","repository":{"id":57450704,"uuid":"399551602","full_name":"Robaina/parallelBAM","owner":"Robaina","description":"Tools to parallelize operations on large BAM files","archived":false,"fork":false,"pushed_at":"2022-09-07T08:09:20.000Z","size":334,"stargazers_count":2,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"main","last_synced_at":"2025-03-16T22:27:14.430Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"cc-by-4.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/Robaina.png","metadata":{"files":{"readme":"README.ipynb","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2021-08-24T17:31:15.000Z","updated_at":"2023-11-24T04:38:19.000Z","dependencies_parsed_at":"2022-09-10T05:51:29.041Z","dependency_job_id":null,"html_url":"https://github.com/Robaina/parallelBAM","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Robaina%2FparallelBAM","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Robaina%2FparallelBAM/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Robaina%2FparallelBAM/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Robaina%2FparallelBAM/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/Robaina","download_url":"https://codeload.github.com/Robaina/parallelBAM/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":244029438,"owners_count":20386407,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-10-10T17:05:52.326Z","updated_at":"2026-03-12T12:10:21.587Z","avatar_url":"https://github.com/Robaina.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Parallelizing operations on SAM/BAM files\\n\",\n    \"\\n\",\n    \"SAM/BAM files are typically large, thus, operations on these files are time intensive. This project provides tools to parallelize operations on SAM/BAM files. The workflow follows:\\n\",\n    \"\\n\",\n    \"1. Split BAM/SAM file in _n_ chunks\\n\",\n    \"2. Perform operation in each chunk in a dedicated process and save resulting SAM/BAM chunk \\n\",\n    \"3. Merge results back into a single SAM/BAM file\\n\",\n    \"\\n\",\n    \"Depends on:\\n\",\n    \"\\n\",\n    \"1. Samtools\\n\",\n    \"\\n\",\n    \"# Installation\\n\",\n    \"\\n\",\n    \"1. Git clone project\\n\",\n    \"2. cd to cloned project directory\\n\",\n    \"3. ```sudo python setup.py install```\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Usage\\n\",\n    \"\\n\",\n    \"There is one main function named ```parallelizedBAMoperation```. This function takes as mandatory arguments:\\n\",\n    \"\\n\",\n    \"1. path to original bam file (should be ordered)\\n\",\n    \"2. a callable function to perform the operation on each bam file chunk\\n\",\n    \"\\n\",\n    \"The callable function must accept the following two first arguments: (i) path to input bam file and (ii) path to resulting output bam file, in this order.\\n\",\n    \"\\n\",\n    \"# Note\\n\",\n    \"\\n\",\n    \"Preparing a bam file to run an operation in parallel takes a while, thus is not worth it when the operatin itself takes a short time. For example, preparing a typical bam file for parallelization (in 8 processes) can take almost a minute.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import shutil\\n\",\n    \"from parallelbam.parallelbam import parallelizeBAMoperation, getNumberOfReads\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def foo(input_bam, output_bam):\\n\",\n    \"    shutil.copyfile(input_bam, output_bam)\\n\",\n    \"    \\n\",\n    \"    \\n\",\n    \"parallelizeBAMoperation('parallelbam2/tests/toy_sample.bam',\\n\",\n    \"                        foo, output_path=None,\\n\",\n    \"                        n_processes=4)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"1000\"\n      ]\n     },\n     \"execution_count\": 5,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"getNumberOfReads('parallelbam2/tests/toy_sample.bam')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"1000\"\n      ]\n     },\n     \"execution_count\": 6,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"getNumberOfReads('parallelbam2/tests/processed.bam')\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.10.6 ('parallelbam')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.10.6\"\n  },\n  \"vscode\": {\n   \"interpreter\": {\n    \"hash\": \"d7acfd6951bc0fa0485d92016d3c99713d079b52e001e9aed733ba0bc441773f\"\n   }\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Frobaina%2Fparallelbam","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Frobaina%2Fparallelbam","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Frobaina%2Fparallelbam/lists"}