{"id":20450013,"url":"https://github.com/turboway/dorisclient","last_synced_at":"2025-04-13T02:11:08.401Z","repository":{"id":37083106,"uuid":"503685500","full_name":"TurboWay/DorisClient","owner":"TurboWay","description":null,"archived":false,"fork":false,"pushed_at":"2024-11-29T06:19:51.000Z","size":104,"stargazers_count":17,"open_issues_count":0,"forks_count":4,"subscribers_count":2,"default_branch":"main","last_synced_at":"2025-03-26T19:45:40.674Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/TurboWay.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2022-06-15T08:46:11.000Z","updated_at":"2024-11-29T06:19:55.000Z","dependencies_parsed_at":"2024-07-08T10:14:52.913Z","dependency_job_id":"caf05440-7cf5-4f8e-b13b-91fbcd9cca90","html_url":"https://github.com/TurboWay/DorisClient","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/TurboWay%2FDorisClient","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/TurboWay%2FDorisClient/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/TurboWay%2FDorisClient/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/TurboWay%2FDorisClient/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/TurboWay","download_url":"https://codeload.github.com/TurboWay/DorisClient/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":248654090,"owners_count":21140236,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-11-15T10:49:40.070Z","updated_at":"2025-04-13T02:11:08.396Z","avatar_url":"https://github.com/TurboWay.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# DorisClient\n\npython for apache-doris\n\n# Install\n\n```shell\npip install DorisClient\n```\n\n# Use\n\n## Create Test Table\n\n```sql\nCREATE TABLE `streamload_test` (\n  `id` int(11) NULL COMMENT \"\",\n  `shop_code` varchar(64) NULL COMMENT \"\",\n  `sale_amount` decimal(18, 2) NULL COMMENT \"\"\n) ENGINE=OLAP\nUNIQUE KEY(`id`)\nCOMMENT \"test\"\nDISTRIBUTED BY HASH(`id`) BUCKETS 3\nPROPERTIES (\n\"replication_allocation\" = \"tag.location.default: 3\",\n\"in_memory\" = \"false\",\n\"storage_format\" = \"V2\"\n);\n\n-- If you want to enable sequence streamload, make sure Doris table enable sequence load first\n-- ALTER TABLE streamload_test ENABLE FEATURE \"SEQUENCE_LOAD\" WITH PROPERTIES (\"function_column.sequence_type\" = \"bigint\");\n```\n\n## streamload\n\n```python\nfrom DorisClient import DorisSession, DorisLogger, Logger\n\n# DorisLogger.setLevel('ERROR')  # default:INFO\n\ndoris_cfg = {\n    'fe_servers': ['10.211.7.131:8030', '10.211.7.132:8030', '10.211.7.133:8030'],\n    'database': 'testdb',\n    'user': 'test',\n    'passwd': '123456',\n}\ndoris = DorisSession(**doris_cfg)\n\n# append\ndata = [\n    {'id': '1', 'shop_code': 'sdd1', 'sale_amount': '99'},\n    {'id': '2', 'shop_code': 'sdd2', 'sale_amount': '5'},\n    {'id': '3', 'shop_code': 'sdd3', 'sale_amount': '3'},\n]\ndoris.streamload('streamload_test', data)\n\n# delete\ndata = [\n    {'id': '1'},\n]\ndoris.streamload('streamload_test', data, merge_type='DELETE')\n\n# merge\ndata = [\n    {'id': '10', 'shop_code': 'sdd1', 'sale_amount': '99', 'delete_flag': 0},\n    {'id': '2', 'shop_code': 'sdd2', 'sale_amount': '5', 'delete_flag': 1},\n    {'id': '3', 'shop_code': 'sdd3', 'sale_amount': '3', 'delete_flag': 1},\n]\ndoris.streamload('streamload_test', data, merge_type='MERGE', delete='delete_flag=1')\n\n# Sequence append\ndata = [\n    {'id': '1', 'shop_code': 'sdd1', 'sale_amount': '99', 'source_sequence': 11, },\n    {'id': '1', 'shop_code': 'sdd2', 'sale_amount': '5', 'source_sequence': 2},\n    {'id': '2', 'shop_code': 'sdd3', 'sale_amount': '3', 'source_sequence': 1},\n]\ndoris.streamload('streamload_test', data, sequence_col='source_sequence')\n\n# Sequence merge\ndata = [\n    {'id': '1', 'shop_code': 'sdd1', 'sale_amount': '99', 'source_sequence': 100, 'delete_flag': 0},\n    {'id': '1', 'shop_code': 'sdd2', 'sale_amount': '5', 'source_sequence': 120, 'delete_flag': 0},\n    {'id': '2', 'shop_code': 'sdd3', 'sale_amount': '3', 'source_sequence': 100, 'delete_flag': 1},\n]\ndoris.streamload('streamload_test', data, sequence_col='source_sequence', merge_type='MERGE',\n                 delete='delete_flag=1')\n\n\n# streamload default retry config:  max_retry=3, retry_diff_seconds=3\n# if you don't want to retry, \"_streamload\" can help you\ndoris._streamload('streamload_test', data)\n\n# if you want to changed retry config, follow code will work \nfrom DorisClient import DorisSession, Retry\n\nmax_retry = 5\nretry_diff_seconds = 10\n\n\nclass MyDoris(DorisSession):\n\n    @Retry(max_retry=max_retry, retry_diff_seconds=retry_diff_seconds)\n    def streamload(self, table, dict_array, **kwargs):\n        return self._streamload(table, dict_array, **kwargs)\n\n\ndoris = MyDoris(**doris_cfg)\ndoris.streamload('streamload_test', data)\n```\n\n## execute doris-sql\n\n```python\nfrom DorisClient import DorisSession\n\ndoris_cfg = {\n    'fe_servers': ['10.211.7.131:8030', '10.211.7.132:8030', '10.211.7.133:8030'],\n    'database': 'testdb',\n    'user': 'test',\n    'passwd': '123456',\n}\ndoris = DorisSession(**doris_cfg)\n\nsql = 'select * from streamload_test limit 1'\n\n# fetch all the rows by sql, return dict array\nrows = doris.read(sql)\nprint(rows)\n\n# fetch all the rows by sql, return tuple array\nrows = doris.read(sql, cursors=None)\nprint(rows)\n\n# execute sql commit\ndoris.execute('truncate table streamload_test')\n```\n\n## collect meta\n\n```python\nfrom DorisClient import DorisMeta\n\ndoris_cfg = {\n    'fe_servers': ['10.211.7.131:8030', '10.211.7.132:8030', '10.211.7.133:8030'],\n    'database': 'testdb',\n    'user': 'test',\n    'passwd': '123456',\n}\ndm = DorisMeta(**doris_cfg)\n\n# auto create table for collect doris meta\n# 1. meta_table for saving all table meta\n# 2. meta_tablet for saving all tablet meta\n# 3. meta_partition for saving all partition meta\n# 4. meta_size for saving all table size meta\n# 5. meta_table_count for saving all table row count\n# 6. meta_materialized_view for saving all materialized view\n# 6. meta_backup for saving all backup view\ndm.create_tables()\n\n# collect table meta \u003e\u003e meta_table\ndm.collect_table()\n\n# collect partition meta \u003e\u003e meta_partition\ndm.collect_partition()\n\n# collect tablet meta \u003e\u003e meta_tablet \n# deploy collect_partition\ndm.collect_tablet()\n\n# collect table size meta \u003e\u003e meta_size\ndm.collect_size()\n\n# collect table row count \u003e\u003e meta_table_count\ndm.collect_table_count()\n\n# collect materialized view meta \u003e\u003e meta_materialized_view\ndm.collect_materialized_view(only_insert=True)\n\n# collect backup meta \u003e\u003e meta_backup\ndm.collect_backup()\n```\n\n\n## modify buckets\n\n```python\nfrom DorisClient import DorisAdmin\n\n# # debug\n# import logging\n# logger = logging.getLogger()\n# logger.setLevel(logging.DEBUG) \n\ndoris_cfg = {\n    'fe_servers': ['10.211.7.131:8030', '10.211.7.132:8030', '10.211.7.133:8030'],\n    'database': 'testdb',\n    'user': 'test',\n    'passwd': '123456',\n}\nda = DorisAdmin(**doris_cfg)\n\n# modify the number and method of buckets for the specified table\nda.modify(database_name='testdb', table_name='streamload_test', distribution_key='id,shop_code', buckets=1)\n\n# modify the number and method of buckets for partition\nda.modify(database_name='testdb', table_name='partition_tb', partition_name='p20231214', buckets=2)\n\n# only rebuild table and remove unsupport properties\nda.modify(database_name='testdb', table_name='streamload_test', only_rebuild=True, ignore_properties='in_memory')\n\n# only rebuild table and add properties\nda.modify(database_name='testdb', table_name='streamload_test', only_rebuild=True, add_properties='\"enable_unique_key_merge_on_write\" = \"true\"')\n```","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fturboway%2Fdorisclient","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fturboway%2Fdorisclient","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fturboway%2Fdorisclient/lists"}