{"id":23116068,"url":"https://github.com/micmurawski/cloud-array","last_synced_at":"2025-08-12T23:03:53.935Z","repository":{"id":59472851,"uuid":"471023776","full_name":"micmurawski/cloud-array","owner":"micmurawski","description":"cloud-array is an open-source Python library for storing and streaming large Numpy Arrays on local file systems and major cloud proviers CDNs.","archived":false,"fork":false,"pushed_at":"2024-10-24T13:55:02.000Z","size":44,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"main","last_synced_at":"2025-07-07T22:12:49.443Z","etag":null,"topics":["aws","azure","big-data","bigarray","blob-storage","cloud","data-structures","digitalocean-spaces","gcp","gcp-cloud-storage","ibm-cloud-object-storage","numpy","s3","stream-processing","streaming","zadara"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/micmurawski.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2022-03-17T14:29:43.000Z","updated_at":"2022-04-05T14:51:27.000Z","dependencies_parsed_at":"2025-06-01T18:48:31.230Z","dependency_job_id":null,"html_url":"https://github.com/micmurawski/cloud-array","commit_stats":null,"previous_names":[],"tags_count":5,"template":false,"template_full_name":null,"purl":"pkg:github/micmurawski/cloud-array","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/micmurawski%2Fcloud-array","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/micmurawski%2Fcloud-array/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/micmurawski%2Fcloud-array/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/micmurawski%2Fcloud-array/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/micmurawski","download_url":"https://codeload.github.com/micmurawski/cloud-array/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/micmurawski%2Fcloud-array/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":270149345,"owners_count":24535728,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-08-12T02:00:09.011Z","response_time":80,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["aws","azure","big-data","bigarray","blob-storage","cloud","data-structures","digitalocean-spaces","gcp","gcp-cloud-storage","ibm-cloud-object-storage","numpy","s3","stream-processing","streaming","zadara"],"created_at":"2024-12-17T04:10:51.530Z","updated_at":"2025-08-12T23:03:53.881Z","avatar_url":"https://github.com/micmurawski.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Cloud Array\n\n`cloud-array` is an open-source Python library for storing and streaming large Numpy Arrays on local file systems and major cloud providers CDNs. It automatically chunks a large array of data into arbitrary chunks sizes and uploads them into the targeted direcotry.\n \n ```python\nimport numpy as np\nfrom cloud_array import CloudArray\n\nshape = (10000, 100, 100)\nchunk_shape = (10, 10, 10)\n\nf = np.memmap(\n    'memmapped.dat',\n    dtype=np.float32,\n    mode='w+',\n    shape=shape\n)\n\narray = CloudArray(\n    chunk_shape=chunk_shape,\n    array=f,\n    url=\"s3://example_bucket/dataset0\"\n)\narray.save()\nprint(array[:100,:100,:100])\n\n ```\n ## Links\n* https://pypi.org/project/cloud-array/","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmicmurawski%2Fcloud-array","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmicmurawski%2Fcloud-array","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmicmurawski%2Fcloud-array/lists"}