{"id":15732373,"url":"https://github.com/stopsopa/docker-puppeteer-html-scraper","last_synced_at":"2026-04-30T22:35:47.815Z","repository":{"id":84268999,"uuid":"135635242","full_name":"stopsopa/docker-puppeteer-html-scraper","owner":"stopsopa","description":"(Deprecated -\u003e use better https://github.com/stopsopa/html-scraper-browserless) Microservice tool to scraping html from \"any\" page","archived":false,"fork":false,"pushed_at":"2023-09-01T23:12:43.000Z","size":1380,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":2,"default_branch":"master","last_synced_at":"2025-06-28T05:44:37.098Z","etag":null,"topics":["docker","html-scraper","node","nodejs","puppeteer","scraper"],"latest_commit_sha":null,"homepage":"https://stopsopa.github.io/docker-puppeteer-html-scraper/example.html","language":"JavaScript","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/stopsopa.png","metadata":{"files":{"readme":"Readme.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2018-05-31T21:04:50.000Z","updated_at":"2022-01-17T15:53:12.000Z","dependencies_parsed_at":"2024-10-25T03:06:18.650Z","dependency_job_id":null,"html_url":"https://github.com/stopsopa/docker-puppeteer-html-scraper","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/stopsopa/docker-puppeteer-html-scraper","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stopsopa%2Fdocker-puppeteer-html-scraper","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stopsopa%2Fdocker-puppeteer-html-scraper/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stopsopa%2Fdocker-puppeteer-html-scraper/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stopsopa%2Fdocker-puppeteer-html-scraper/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/stopsopa","download_url":"https://codeload.github.com/stopsopa/docker-puppeteer-html-scraper/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stopsopa%2Fdocker-puppeteer-html-scraper/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":32479448,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-04-30T13:12:12.517Z","status":"ssl_error","status_checked_at":"2026-04-30T13:12:06.837Z","response_time":57,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["docker","html-scraper","node","nodejs","puppeteer","scraper"],"created_at":"2024-10-04T00:09:07.638Z","updated_at":"2026-04-30T22:35:42.807Z","avatar_url":"https://github.com/stopsopa.png","language":"JavaScript","funding_links":[],"categories":[],"sub_categories":[],"readme":"# DEPRECATED\ncreated in 2018, but I'll leave it here\n\nuse better https://github.com/stopsopa/html-scraper-browserless Microservice tool to scraping html from \"any\" page\n\n(above suggestion is also deprecated. I wouldn't suggest it anymore)\n\n\n# Installation:\n\n    git clone this repository and go to main directory\n    yarn\n    make build\n    cp config.js.dist config.js\n    \n    # manually change password in config.js for basic auth\n    \n    node server.js --port 7778\n    \n\n# Using:\n    \nJust visit:\n\n        http://localhost:7778/generate\n\n\n# cli tool:\n\n    /bin/bash pdf.sh \"https://www.google.com/search?ncr=\u0026q=puppeteer\"\n\n    /bin/bash pdf.sh \"https://www.google.com/search?ncr=\u0026q=puppeteer\" html.html\n    \n    \n# Current execution environment:\n\n    - node v8.9.4\n    - yarn\n    - Docker version 18.03.1-ce, build 9ee9f40\n    \n# Puppeteer in Docker:\n    \n[Running puppeteer in Docker](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker)       \n    \n# Test server with redirections:\n\n    # run server\n    make test   \n    # then render http://\u003cyour_local_machine_ip\u003e/one through http://localhost:7778/generate page \n    \n# Ping:\n    \n    http://xx.xx.xx.xx:7778/pdf-generator-check \n    http://slowwly.robertomurray.co.uk/delay/32000/url/https://github.com/stopsopa/docker-puppeteer-pdf-generator\n    \n# Useful things:  \n        \n    docker run -it --rm puppeteer-alpine-generate-pdf /usr/bin/chromium-browser --version        \n        $ Chromium 64.0.3282.168\n        \n    or if you follow node:8-slim : https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker\n        docker run -it --rm --cap-add=SYS_ADMIN --rm puppeteer-chrome-linux /usr/bin/google-chrome-unstable --version\n            Google Chrome 68.0.3438.3 dev\n        \n    on mac:\n        /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --version\n            $ Google Chrome 66.0.3359.181\n        /Applications/Google\\ Chrome\\ Canary.app/Contents/MacOS/Google\\ Chrome\\ Canary --version\n            $ Google Chrome 69.0.3445.0 canary\n    \n        \n# Dev notes:\n    \nI've build this \"on top\" of docker not \"in\" docker which mean that I have instance of node server an this server creates on demand container with puppeteer to generate one pdf and kill this container because there is chance that this approach will make entire solution more stable.\nAnd this server was ment to build as fast as possible to do its job. Main priority was to build it fast and make it work and it's seems to work so far.\n\n# Issues\n\n- https://github.com/GoogleChrome/puppeteer/issues/902\n           \n    \n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstopsopa%2Fdocker-puppeteer-html-scraper","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fstopsopa%2Fdocker-puppeteer-html-scraper","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstopsopa%2Fdocker-puppeteer-html-scraper/lists"}