{"id":15662536,"url":"https://github.com/bluejoe2008/spark-http-stream","last_synced_at":"2025-10-12T13:04:05.859Z","repository":{"id":49202334,"uuid":"101147195","full_name":"bluejoe2008/spark-http-stream","owner":"bluejoe2008","description":"spark structured streaming via HTTP communication","archived":false,"fork":false,"pushed_at":"2022-07-07T21:01:11.000Z","size":212,"stargazers_count":18,"open_issues_count":7,"forks_count":9,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-07-12T21:01:32.330Z","etag":null,"topics":["http","spark","spark-structured-streaming"],"latest_commit_sha":null,"homepage":"","language":"Scala","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-2-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/bluejoe2008.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2017-08-23T06:45:34.000Z","updated_at":"2022-10-23T12:02:20.000Z","dependencies_parsed_at":"2022-09-11T08:20:14.866Z","dependency_job_id":null,"html_url":"https://github.com/bluejoe2008/spark-http-stream","commit_stats":null,"previous_names":[],"tags_count":1,"template":false,"template_full_name":null,"purl":"pkg:github/bluejoe2008/spark-http-stream","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/bluejoe2008%2Fspark-http-stream","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/bluejoe2008%2Fspark-http-stream/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/bluejoe2008%2Fspark-http-stream/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/bluejoe2008%2Fspark-http-stream/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/bluejoe2008","download_url":"https://codeload.github.com/bluejoe2008/spark-http-stream/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/bluejoe2008%2Fspark-http-stream/sbom","scorecard":{"id":244554,"data":{"date":"2025-08-11","repo":{"name":"github.com/bluejoe2008/spark-http-stream","commit":"ede3c2e8f20b5c265c8e29157e12ef455d5749f1"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":1.7,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Code-Review","score":0,"reason":"Found 0/30 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Maintained","score":0,"reason":"0 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"SAST","score":0,"reason":"no SAST tool detected","details":["Warn: no pull requests merged into dev branch"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: BSD 2-Clause \"Simplified\" License: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Vulnerabilities","score":0,"reason":"143 existing vulnerabilities detected","details":["Warn: Project is vulnerable to: GHSA-h46c-h94j-95f3","Warn: Project is vulnerable to: GHSA-wf8f-6423-gfxg","Warn: Project is vulnerable to: GHSA-288c-cq4h-88gq","Warn: Project is vulnerable to: GHSA-4gq5-ch57-c2mg","Warn: Project is vulnerable to: GHSA-4w82-r329-3q67","Warn: Project is vulnerable to: GHSA-57j2-w4cx-62h2","Warn: Project is vulnerable to: GHSA-5949-rw7g-wx7w","Warn: Project is vulnerable to: GHSA-5r5r-6hpj-8gg9","Warn: Project is vulnerable to: GHSA-5ww9-j83m-q7qx","Warn: Project is vulnerable to: GHSA-645p-88qh-w398","Warn: Project is vulnerable to: GHSA-6fpp-rgj9-8rwc","Warn: Project is vulnerable to: GHSA-85cw-hj65-qqv9","Warn: Project is vulnerable to: GHSA-89qr-369f-5m5x","Warn: Project is vulnerable to: GHSA-8c4j-34r4-xr8g","Warn: Project is vulnerable to: GHSA-8w26-6f25-cm9x","Warn: Project is vulnerable to: GHSA-9gph-22xh-8x98","Warn: Project is vulnerable to: GHSA-9m6f-7xcq-8vf8","Warn: Project is vulnerable to: GHSA-c8hm-7hpq-7jhg","Warn: Project is vulnerable to: GHSA-cf6r-3wgc-h863","Warn: Project is vulnerable to: GHSA-cggj-fvv3-cqwv","Warn: Project is vulnerable to: GHSA-cjjf-94ff-43w7","Warn: Project is vulnerable to: GHSA-cmfg-87vq-g5g4","Warn: Project is vulnerable to: GHSA-cvm9-fjm9-3572","Warn: Project is vulnerable to: GHSA-f3j5-rmmp-3fc5","Warn: Project is vulnerable to: GHSA-f9xh-2qgp-cq57","Warn: Project is vulnerable to: GHSA-fmmc-742q-jg75","Warn: Project is vulnerable to: GHSA-fqwf-pjwf-7vqv","Warn: Project is vulnerable to: GHSA-gjmw-vf9h-g25v","Warn: Project is vulnerable to: GHSA-gwp4-hfv6-p7hw","Warn: Project is vulnerable to: GHSA-gww7-p5w4-wrfv","Warn: Project is vulnerable to: GHSA-h3cw-g4mq-c5x2","Warn: Project is vulnerable to: GHSA-h592-38cm-4ggp","Warn: Project is vulnerable to: GHSA-h822-r4r5-v8jg","Warn: Project is vulnerable to: GHSA-jjjh-jjxp-wpff","Warn: Project is vulnerable to: GHSA-m6x4-97wx-4q27","Warn: Project is vulnerable to: GHSA-mph4-vhrx-mv67","Warn: Project is vulnerable to: GHSA-mx7p-6679-8g3q","Warn: Project is vulnerable to: GHSA-p43x-xfjf-5jhr","Warn: Project is vulnerable to: GHSA-q93h-jc49-78gg","Warn: Project is vulnerable to: GHSA-qjw2-hr98-qgfh","Warn: Project is vulnerable to: GHSA-qr7j-h6gg-jmgc","Warn: Project is vulnerable to: GHSA-qxxx-2pp7-5hmx","Warn: Project is vulnerable to: GHSA-r3gr-cxrf-hg25","Warn: Project is vulnerable to: GHSA-r695-7vr9-jgc2","Warn: Project is vulnerable to: GHSA-rfx6-vp9g-rh7v","Warn: Project is vulnerable to: GHSA-rgv9-q543-rqg4","Warn: Project is vulnerable to: GHSA-rpr3-cw39-3pxh","Warn: Project is vulnerable to: GHSA-v585-23hc-c647","Warn: Project is vulnerable to: GHSA-vfqx-33qm-g869","Warn: Project is vulnerable to: GHSA-w3f4-3q6j-rh82","Warn: Project is vulnerable to: GHSA-wh8g-3j2c-rqj5","Warn: Project is vulnerable to: GHSA-5mg8-w23w-74h3","Warn: Project is vulnerable to: GHSA-7g45-4rm6-3mm3","Warn: Project is vulnerable to: GHSA-mvr2-9pj6-7w5j","Warn: Project is vulnerable to: GHSA-4gg5-vx3j-xwc7","Warn: Project is vulnerable to: GHSA-735f-pc8j-v9w8","Warn: Project is vulnerable to: GHSA-77rm-9x9h-xj3g","Warn: Project is vulnerable to: GHSA-g5ww-5jh7-63cx","Warn: Project is vulnerable to: GHSA-h4h5-3hr4-j3g2","Warn: Project is vulnerable to: GHSA-wrvw-hg22-4m67","Warn: Project is vulnerable to: GHSA-6phf-73q6-gh87","Warn: Project is vulnerable to: GHSA-wxr5-93ph-8wr9","Warn: Project is vulnerable to: GHSA-6hgm-866r-3cjv","Warn: Project is vulnerable to: GHSA-fjq5-5j5f-mvxh","Warn: Project is vulnerable to: GHSA-pvp8-3xj6-8c6x","Warn: Project is vulnerable to: GHSA-3832-9276-x7gf","Warn: Project is vulnerable to: GHSA-78wr-2p64-hpwj","Warn: Project is vulnerable to: GHSA-gwrp-pvrq-jmwv","Warn: Project is vulnerable to: GHSA-j288-q9x7-2f5v","Warn: Project is vulnerable to: GHSA-cgp8-4m63-fhh5","Warn: Project is vulnerable to: GHSA-5mcr-gq6c-3hq2","Warn: Project is vulnerable to: GHSA-7vpq-g998-qpv7","Warn: Project is vulnerable to: GHSA-9vjp-v76f-g363","Warn: Project is vulnerable to: GHSA-cqqj-4p63-rrmm","Warn: Project is vulnerable to: GHSA-f256-j965-7f32","Warn: Project is vulnerable to: GHSA-grg4-wf29-r9vv","Warn: Project is vulnerable to: GHSA-p2v9-g2qv-p635","Warn: Project is vulnerable to: GHSA-wm47-8v5p-wjpj","Warn: Project is vulnerable to: GHSA-wx5j-54mm-rqqq","Warn: Project is vulnerable to: GHSA-xfv3-rrfm-f2rv","Warn: Project is vulnerable to: GHSA-p979-4mfw-53vg","Warn: Project is vulnerable to: GHSA-2qrg-x229-3v8q","Warn: Project is vulnerable to: GHSA-65fg-84f6-3jq3","Warn: Project is vulnerable to: GHSA-f7vh-qwp3-x37m","Warn: Project is vulnerable to: GHSA-fp5r-v3w9-4333","Warn: Project is vulnerable to: GHSA-w9p3-5cr8-m3jj","Warn: Project is vulnerable to: GHSA-r7pg-v2c8-mfg3","Warn: Project is vulnerable to: GHSA-rhrv-645h-fjfh","Warn: Project is vulnerable to: GHSA-4g9r-vxhx-9pgx","Warn: Project is vulnerable to: GHSA-7hfm-57qf-j43q","Warn: Project is vulnerable to: GHSA-crv7-7245-f45f","Warn: Project is vulnerable to: GHSA-mc84-pj99-q6hh","Warn: Project is vulnerable to: GHSA-xqfj-vm6h-2x34","Warn: Project is vulnerable to: GHSA-jpmf-8cj2-595g","Warn: Project is vulnerable to: GHSA-pr9x-qmp5-j3rr","Warn: Project is vulnerable to: GHSA-qm7f-r83w-3p46","Warn: Project is vulnerable to: GHSA-8r28-r8cp-g6cp","Warn: Project is vulnerable to: GHSA-8wm5-8h9c-47pc","Warn: Project is vulnerable to: GHSA-9r7g-325h-mxrm","Warn: Project is vulnerable to: GHSA-f5fw-25gw-5m92","Warn: Project is vulnerable to: GHSA-f8vc-wfc8-hxqh","Warn: Project is vulnerable to: GHSA-gx2c-fvhc-ph4j","Warn: Project is vulnerable to: GHSA-h24p-qwf4-84q8","Warn: Project is vulnerable to: GHSA-mf7c-35mq-75pj","Warn: Project is vulnerable to: GHSA-rmpj-7c96-mrg8","Warn: Project is vulnerable to: GHSA-58jx-f5rf-qgqf","Warn: Project is vulnerable to: GHSA-7r82-7xv7-xcpj","Warn: Project is vulnerable to: GHSA-2jc4-r94c-rp7h","Warn: Project is vulnerable to: GHSA-94rr-4jr5-9h2p","Warn: Project is vulnerable to: GHSA-wv7w-rj2x-556x","Warn: Project is vulnerable to: GHSA-xm78-4m3g-7wm7","Warn: Project is vulnerable to: GHSA-6mqq-8r44-vmjc","Warn: Project is vulnerable to: GHSA-8cw6-5qvp-q3wj","Warn: Project is vulnerable to: GHSA-8rhc-48pp-52gr","Warn: Project is vulnerable to: GHSA-fp5j-3fpf-mhj5","Warn: Project is vulnerable to: GHSA-phg2-9c5g-m4q7","Warn: Project is vulnerable to: GHSA-r34r-f84j-5x4x","Warn: Project is vulnerable to: GHSA-w4r4-65mg-45x2","Warn: Project is vulnerable to: GHSA-2hw2-62cp-p9p7","Warn: Project is vulnerable to: GHSA-7286-pgfv-vxvh","Warn: Project is vulnerable to: GHSA-7cwj-j333-x7f7","Warn: Project is vulnerable to: GHSA-ccqf-c5hq-77mp","Warn: Project is vulnerable to: GHSA-c27h-mcmw-48hv","Warn: Project is vulnerable to: GHSA-r6j9-8759-g62w","Warn: Project is vulnerable to: GHSA-cj7v-27pg-wf7q","Warn: Project is vulnerable to: GHSA-qh8g-58pp-2wxh","Warn: Project is vulnerable to: GHSA-26vr-8j45-3r4w","Warn: Project is vulnerable to: GHSA-6x9x-8qw9-9pp6","Warn: Project is vulnerable to: GHSA-7vx9-xjhr-rw6h","Warn: Project is vulnerable to: GHSA-84q7-p226-4x5w","Warn: Project is vulnerable to: GHSA-ghgj-3xqr-6jfm","Warn: Project is vulnerable to: GHSA-m6cp-vxjx-65j6","Warn: Project is vulnerable to: GHSA-p26g-97m4-6q7c","Warn: Project is vulnerable to: GHSA-qw69-rqj8-6qw8","Warn: Project is vulnerable to: GHSA-vgg8-72f2-qm23","Warn: Project is vulnerable to: GHSA-wfcc-pff6-rgc5","Warn: Project is vulnerable to: GHSA-xc67-hjx6-cgg6","Warn: Project is vulnerable to: GHSA-gwcr-j4wh-j3cq","Warn: Project is vulnerable to: GHSA-qvxv-pmq9-4q7g","Warn: Project is vulnerable to: GHSA-55g7-9cwv-5qfv","Warn: Project is vulnerable to: GHSA-fjpj-2g6w-x25r","Warn: Project is vulnerable to: GHSA-pqr6-cmr2-h8hf","Warn: Project is vulnerable to: GHSA-qcwq-55hx-v3vh"],"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-17T07:19:51.465Z","repository_id":49202334,"created_at":"2025-08-17T07:19:51.465Z","updated_at":"2025-08-17T07:19:51.465Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":279011467,"owners_count":26084947,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-10-12T02:00:06.719Z","response_time":53,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["http","spark","spark-structured-streaming"],"created_at":"2024-10-03T13:33:08.757Z","updated_at":"2025-10-12T13:04:05.807Z","avatar_url":"https://github.com/bluejoe2008.png","language":"Scala","funding_links":[],"categories":[],"sub_categories":[],"readme":"# spark-http-stream\n\nspark-http-stream transfers Spark structured stream over HTTP protocol. Unlike tcp streams, Kafka streams and HDFS file streams, http streams often flow across distributed big data clusters on the Web. This feature is very helpful to build global data processing pipelines across different data centers (scientific research institues, for example) who own seperated data sets.\n\nspark-http-stream provides:\n* `HttpStreamServer`: a HTTP server which receives, collects and provides http streams \n* `HttpStreamSource`: reads messages from a `HttpStreamServer`, acts as a structured streaming `Source`\n* `HttpStreamSink`: sends messages to a `HttpStreamServer` using HTTP-POST commands, acts as a structured streaming `Sink`\n\nalso spark-http-stream provides:\n* `HttpStreamClient`: a client used to communicate with a `HttpStreamServer`, developped upon HttpClient\n* `HttpStreamSourceProvider`: a StreamSourceProvider which creates `HttpStreamSource`\n* `HttpStreamSinkProvider`: a StreamSinkProvider which creates `HttpStreamSink`\n\nThe simple archtecture of spark-http-stream is shown below:\n\n\u003cimg src=\"https://github.com/bluejoe2008/spark-http-stream/blob/master/docs/arch.png?raw=true\" width=\"600\"\u003e\n\n## importing spark-http-stream\n\nuse maven to import spark-http-stream:\n\n\t\u003cdependency\u003e\n\t    \u003cgroupId\u003eorg.grapheco\u003c/groupId\u003e\n\t    \u003cartifactId\u003espark-http-stream\u003c/artifactId\u003e\n\t    \u003cversion\u003e0.9.1\u003c/version\u003e\n\t\u003c/dependency\u003e\n\n## Starts a standalone HttpStreamServer\n\n`HttpStreamServer` is actually a Jetty server with a `HttpStreamServlet`, it can be started using following code:\n\n\tval server = HttpStreamServer.start(\"/xxxx\", 8080);\n    \nWhen `http://localhost:8080/xxxx` is requested, the `HttpStreamServlet` will use an embeded `ActionsHandler` to \nparse request message, perform certain action(`fecthSchema`, `fetchStream`, etc), and return response message.\n\nBy default, an `NullActionsHandler` is provided. Of coz it can be replaced with a `MemoryBufferAsReceiver`:\n\n\tserver.withBuffer()\n\t\t.addListener(new ObjectArrayPrinter())\n\t\t.createTopic[(String, Int, Boolean, Float, Double, Long, Byte)](\"topic-1\")\n\t\t.createTopic[String](\"topic-2\");\n      \nor with a `KafkaAsReceiver`:\n\n\tserver.withKafka(\"vm105:9092,vm106:9092,vm107:9092,vm181:9092,vm182:9092\")\n\t\t.addListener(new ObjectArrayPrinter());\n\nas shown above, several kinds of `ActionsHandler` are defined in spark-http-stream:\n\n* `NullActionsHandler`: does nothing\n* `MemoryBufferAsReceiver`: maintains a local memory buffer, stores data sent from producers into buffer, and allows consumers to fetch data in batch\n* `KafkaAsReceiver`: forwards all received data to Kafka\n\nNotes that MemoryBufferAsReceiver maintains a server-side message buffer, while KafkaAsReceiver only forwards messages to Kafka cluster.\n\n## HttpStreamSource, HttpStreamSink\n\nThe following code shows how to load messages from a `HttpStreamSource`:\n\n\tval lines = spark.readStream.format(classOf[HttpStreamSourceProvider].getName)\n\t\t.option(\"httpServletUrl\", \"http://localhost:8080/xxxx\")\n\t\t.option(\"topic\", \"topic-1\");\n\t\t.option(\"includesTimestamp\", \"true\")\n\t\t.load();\n\t\t\noptions:\n\n* `httpServletUrl`: path to the servlet\n* `topic`: topic name of messages to be consumed\n* `includesTimestamp`: tells if each row in the loaded DataFrame includes a time stamp or not, default value is `false`\n* `timestampColumnName`: name assigned to the time stamp column, default value is '\\_TIMESTAMP\\_'\n* `msFetchPeriod`: time interval in milliseconds for message polling, default value is `1`(1ms)\n\nThe following code shows how to output messages to a `HttpStreamSink`:\n\n\tval query = lines.writeStream\n\t\t.format(classOf[HttpStreamSinkProvider].getName)\n\t\t.option(\"httpServletUrl\", \"http://localhost:8080/xxxx\")\n\t\t.option(\"topic\", \"topic-1\")\n\t\t.start();\n\t\t\noptions:\n\n* httpServletUrl: path to the servlet\n* topic: topic name of produced messages\n* maxPacketSize: max size in bytes of each message packet, if the actual DataFrame is too large, it will be splitted into serveral packets, default value is `10*1024*1024`(10M)\n\nNote that `HttpStreamSource` is only available when the `HttpStreamServer` is equiped with a  `MemoryBufferAsReceiver` (use `withBuffer`, as shown above). If the HttpStreamServer choose Kafka as back-end message system (use `withKafka`), it is wrong to consume data from `HttpStreamSource`, just use `KafkaSource` (see http://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html) instead:\n\n\tval df = spark\n\t\t.readStream\n\t\t.format(\"kafka\")\n\t\t.option(\"kafka.bootstrap.servers\", \"vm105:9092,vm106:9092,vm107:9092,vm181:9092,vm182:9092\")\n\t\t.option(\"subscribe\", \"topic-1\")\n\t\t.load()\n\nsee https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamSourceSinkTest.scala and https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamKafkaTest.scala to get complete example code.\n\n## Understanding ActionsHandler\n\nas shown in previous section, serveral kinds of `ActionsHandler` are defined in spark-http-stream: `NullActionsHandler`, \n`MemoryBufferAsReceiver`, `KafkaAsReceiver`.\n\nusers can also customize their own `ActionsHandler` as they will. The interface looks like:\n\n\ttrait ActionsHandler {\n\t\tdef listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries;\n\t\tdef destroy();\n\t}\n\t\nhere `ActionHandlerEntries` is just an alias of `PartialFunction[String, Map[String, Any]]`, which accepts an input argument `action: String`, and returns an output argument `responseBody: Map[String, Any]`. the `listActionHandlerEntries` method is often written as a set of `case` expression:\n\n\toverride def listActionHandlerEntries(requestBody: Map[String, Any])\n\t\t: PartialFunction[String, Map[String, Any]] = {\n\t\tcase \"actionSendStream\" ⇒ handleSendStream(requestBody);\n\t}\n\nthe code shown above says: this `ActionsHandler` only handles action `actionSendStream`, in this case, it calls  the method `handleSendStream(requestBody)` to handle request and output its return value as response. If other action is requested, an `UnsupportedActionException` will be thrown by the HttpStreamServer. \n\n`ActionsHandlerFactory` is defined to tell how to create a ActionsHandler with required parameters:\n\n\ttrait ActionsHandlerFactory {\n\t\tdef createInstance(params: Params): ActionsHandler;\n\t}\n\n## Embedding HttpStreamServer in Web application servers\n\nspark-http-stream provides a servlet named `ConfigurableHttpStreamingServlet`, users can configure the servlet in web.xml:\n\n\t\u003cservlet\u003e\n\t\t\u003cservlet-name\u003ehttpStreamServlet\u003c/servlet-name\u003e\n\t\t\u003cservlet-class\u003eorg.apache.spark.sql.execution.streaming.http.ConfigurableHttpStreamServlet\u003c/servlet-class\u003e\n\t\t\u003cinit-param\u003e\n\t\t\t\u003cparam-name\u003ehandlerFactoryName\u003c/param-name\u003e\n\t\t\t\u003cparam-value\u003eorg.apache.spark.sql.execution.streaming.http.KafkaAsReceiverFactory\u003c/param-value\u003e\n\t\t\u003c/init-param\u003e\n\t\t\u003cinit-param\u003e\n\t\t\t\u003cparam-name\u003ebootstrapServers\u003c/param-name\u003e\n\t\t\t\u003cparam-value\u003evm105:9092,vm106:9092,vm107:9092,vm181:9092,vm182:9092\u003c/param-value\u003e\n\t\t\u003c/init-param\u003e\n\t\u003c/servlet\u003e\n\n\t\u003cservlet-mapping\u003e\n\t\t\u003cservlet-name\u003ehttpStreamServlet\u003c/servlet-name\u003e\n\t\t\u003curl-pattern\u003e/xxxx\u003c/url-pattern\u003e\n\t\u003c/servlet-mapping\u003e\n\t\nAs shown above, a servlet of `ConfigurableHttpStreamServlet` is defined with a ActionsHandlerFactory `KafkaAsReceiverFactory`, required parameters for the `ActionsHandlerFactory` (`bootstrapServers`, for example), are defined as a set of `init-param`s.\n\n## Using HttpStreamClient\n\nHttpStreamClient` provides a HTTP client used to communicate with a `HttpStreamServer`. It contains serveral methods:\n\n* `sendDataFrame`: send a `DataFrame` to the server, if the `DataFrame` is too large, it will be splitted into smaller packets\n* `sendRows`: send data (as `Array[Row]`) to server\n* `fetchSchema`: retrieves schema of certain topic\n* `fecthStream`: retrieves data (as 'Array[RowEx]') from server\n* `subscribe`: subscribe a topic and retrieves a subscriberId\n* `unsubscribe`: unsubscribe\n\nNote that some methods are only available when the server is equipped with correct `ActionsHandler`. As an example, the `KafkaAsReceiver` only handles action `actionSendStream`, that means, if you called `fetchStream` and `sendDataFrame` methods of the HttpStreamClient, it works well. But it will fail and throw an `UnsupportedActionException` when you called `subscribe` method.\n\n```\n+---------------+------------------------+-----------------+\n|  methods      | MemoryBufferAsReceiver | KafkaAsReceiver |\n+---------------+------------------------+-----------------+\n| sendDataFrame |             √          |        √        |\n+---------------+------------------------+-----------------+\n| sendRows      |             √          |        √        |\n+---------------+------------------------+-----------------+\n| fetchSchema   |             √          |        X        |\n+---------------+------------------------+-----------------+\n| fecthStream   |             √          |        X        |\n+---------------+------------------------+-----------------+\n| subscribe     |             √          |        X        |\n+---------------+------------------------+-----------------+\n| unsubscribe   |             √          |        X        |\n+---------------+------------------------+-----------------+\n```\n\n## StreamListener\n\n`StreamListener` works when new data is arrived and will be consumed by `ActionsHandler`:\n\n\ttrait StreamListener {\n\t\tdef onArrive(topic: String, objects: Array[RowEx]);\n\t}\n\t\nTwo kinds of `StreamListener`s are provided:\n\n* `StreamCollector`: collects data in a local memory buffer\n* `StreamPrinter`: prints data while arriving\n\nan example messages look like this:\n\n\t++++++++topic=topic-1++++++++\n\tRowEx([hello1,1,true,0.1,0.1,1,49],1,0,2017-08-27 20:37:56.432)\n\tRowEx([hello2,2,false,0.2,0.2,2,50],1,1,2017-08-27 20:37:56.432)\n\tRowEx([hello3,3,true,0.3,0.3,3,51],1,2,2017-08-27 20:37:56.432)\n\t\n## Schema, data types, RowEx\n\nspark-http-stream only supports data types which can be recognized by Spark Encoders. These data types includes: `String`, `Boolean`, `Int`, `Long`, `Float`, `Double`, `Byte`, `Array[]`.\n\nA row will be wrapped as a `RowEx` object on receiving. `RowEx` is a data structure richer than `Row`. It contains some members and methods:\n\n* `originalRow`: original row\n* `batchId`: batch id passed by Spark\n* `offsetInBatch`: offset of this row in current batch\n* `withTimestamp()`: returns a `Row` with a timestamp\n* `withId()`: returns a `Row` with its id\n* `extra()`: returns a triple (batchId, offsetInBatch, timestamp)\n\nConsidering an original row has values [hello1,1,true,0.1,0.1,1,49], following code show contents of mentioned structures:\n\n### originalRow:\n```\n+---------------+-------+--------------+-----------+------------+--------+---------+\n| String:hello1 | Int:1 | Boolean:true | Float:0.1 | Double:0.1 | Long:1 | Byte:49 | \n+---------------+-------+--------------+-----------+------------+--------+---------+\n```\n\n### RowEx:\n```\n+---------------+-------+--------------+-----++--------+-------+-------------------------------+\n| String:hello1 | Int:1 | Boolean:true | ... || Long:1 | Int:0 | Timestamp:2017-08-27 20:37:56 |\n+---------------+-------+--------------+-----++--------+-------+-------------------------------+\n```\n\n### withTimestamp():\n```\n+---------------+-------+--------------+-----------+-----+-------------------------------+\n| String:hello1 | Int:1 | Boolean:true | Float:0.1 | ... | Timestamp:2017-08-27 20:37:56 |\n+---------------+-------+--------------+-----------+-----+-------------------------------+\n```\n\n### withId():\n```\n+---------------+-------+--------------+-----------+------------+--------+---------+------------+\n| String:hello1 | Int:1 | Boolean:true | Float:0.1 | Double:0.1 | Long:1 | Byte:49 | String:1-0 |\n+---------------+-------+--------------+-----------+------------+--------+---------+------------+\n```\n\n### extra():\n```\n+--------+-------+-------------------------------+\n| Long:1 | Int:0 | Timestamp:2017-08-27 20:37:56 |\n+--------+-------+-------------------------------+\n```\n\n## SerDe\n\nspark-http-stream defines a SerilizerFactory to create a SerializerInstance:\n\n\ttrait SerializerFactory {\n\t\tdef getSerializerInstance(serializerName: String): SerializerInstance;\n\t}\n\t\nan `SerializerFactory.DEFAULT` object is provided which is able to create two kinds of serializers:\n\n* `java`: creates a JavaSerializer\n* `kryo`: creates a KryoSerializer\n\nNew kind of Serializer, `json` serializer, for example, is welcome. \n\nBy default, `HttpStreamClient` and `HttpStreamServer` uses `kryo` serializer.\n\n## Tests\n\n* `HttpStreamServerClientTest`: tests HttpStreamServer/Client, https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamServerClientTest.scala\n* `HttpStreamSourceSinkTest`: tests HttpStreamSource and HttpStreamSink, https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamSourceSinkTest.scala\n* `HttpStreamKafkaTest`: tests HttpStreamSink with Kafka as underlying message reveiver, https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamKafkaTest.scala\n* `HttpStreamDemo`: a tool helps to test HttpTextStream and HttpTextSink, https://github.com/bluejoe2008/spark-http-stream/blob/master/src/test/scala/HttpStreamDemo.scala\n\nsteps to tests HttpStreamDemo:\n\n1. choose machine A, run `HttpStreamDemo start-server-on 8080 /xxxx`, this starts a HTTP server which receives data from machine B\n2. choose machine B, run `nc -lk 9999`\n3. run `HttpStreamDemo read-from http://machine-a-host:8080/xxxx` on machine B\n4. run `HttpStreamDemo write-into http://machine-a-host:8080/xxxx` on machine C\n5. type some text in nc, data will be received by HttpStreamSink and then consumed as HttpStreamSource, finally displayed on console\n\n## dependencies\n\n* `kafka-clients-0.10`: used by `KafkaAsReceiver`\n* `httpclient-4.5`: HttpStreamClient uses HttpClient project\n* `jetty-9.0`: HttpStreamServer is devploped upon Jetty\n* `spark-2.1`: spark structued streaming libray\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbluejoe2008%2Fspark-http-stream","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fbluejoe2008%2Fspark-http-stream","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbluejoe2008%2Fspark-http-stream/lists"}