{"id":34032369,"url":"https://github.com/machinia/scraper-factory","last_synced_at":"2026-04-08T15:33:11.203Z","repository":{"id":35063847,"uuid":"199960505","full_name":"machinia/scraper-factory","owner":"machinia","description":"Scraping library to retrieve data from useful pages, such as Amazon wishlists","archived":false,"fork":false,"pushed_at":"2022-11-04T19:35:58.000Z","size":67,"stargazers_count":1,"open_issues_count":4,"forks_count":0,"subscribers_count":1,"default_branch":"master","last_synced_at":"2026-02-14T23:16:07.081Z","etag":null,"topics":["amazon","amazon-wishlist","scrape","scrapers","scraping","scraping-python","scraping-websites","scrapy","scrapy-crawler","scrapy-spider","scrapy-spiders","wishlist"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/machinia.png","metadata":{"files":{"readme":"README.rst","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2019-08-01T02:21:10.000Z","updated_at":"2024-06-04T02:35:46.000Z","dependencies_parsed_at":"2023-01-15T13:03:38.169Z","dependency_job_id":null,"html_url":"https://github.com/machinia/scraper-factory","commit_stats":null,"previous_names":[],"tags_count":3,"template":false,"template_full_name":null,"purl":"pkg:github/machinia/scraper-factory","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinia%2Fscraper-factory","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinia%2Fscraper-factory/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinia%2Fscraper-factory/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinia%2Fscraper-factory/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/machinia","download_url":"https://codeload.github.com/machinia/scraper-factory/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinia%2Fscraper-factory/sbom","scorecard":{"id":609470,"data":{"date":"2025-08-11","repo":{"name":"github.com/machinia/scraper-factory","commit":"79804f127ec9be692795ff1603f95730164a61f7"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.3,"checks":[{"name":"Code-Review","score":10,"reason":"all changesets reviewed","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Maintained","score":0,"reason":"0 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: MIT License: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 30 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-21T02:17:55.664Z","repository_id":35063847,"created_at":"2025-08-21T02:17:55.664Z","updated_at":"2025-08-21T02:17:55.664Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":31562688,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-04-08T14:31:17.711Z","status":"ssl_error","status_checked_at":"2026-04-08T14:31:17.202Z","response_time":54,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.5:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["amazon","amazon-wishlist","scrape","scrapers","scraping","scraping-python","scraping-websites","scrapy","scrapy-crawler","scrapy-spider","scrapy-spiders","wishlist"],"created_at":"2025-12-13T18:45:39.939Z","updated_at":"2026-04-08T15:33:11.195Z","avatar_url":"https://github.com/machinia.png","language":"Python","readme":"Scraping Factory\n================\n\n|travis| |coverage| |pypi| |pyversion|\n\nScraping library to retrieve data from useful pages, such as Amazon wishlists\n\nAPI\n---\n\nThe API to use the library, scrape data and manage spiders is the following:\n\n- ``scrape(SPIDER_NAME, URL)``: scrapes the given ``URL`` using the spider referenced on ``SPIDER_NAME``.\n- ``spiders()``: list all spiders found by the library.\n\n\nCustom Spiders\n--------------\n\nUsing custom spiders is possible, as long as they:\n\n- They must be implemented as a class, and inherit from ``BaseSpider``.\n\n- The spider file need to be either on ``scraper_factory/spiders``, or in a custom location, as long as the environment variable ``$SPIDER_PATH`` is set to the directory where the spider is located.\n\nUsage example\n-------------\n\n.. code-block:: python\n\n    \u003e\u003e\u003e import scraper_factory as SF\n    \u003e\u003e\u003e SF.scrape('amazon-wishlist', 'https://www.amazon.com/hz/wishlist/ls/24XY9873RPAYN')\n    [{\n    \t'id': 'I1MZVK8RDPYK8P',\n    \t'title': 'AmazonBasics Heavy Weight Ruled Lined Index Cards, White, 3x5 Inch Card, 100-Count - AMZ63500',\n    \t'byline': None,\n    \t'price': None,\n    \t'link': 'https://www.amazon.com/dp/B06XSRLP51/',\n    \t'img': 'https://images-na.ssl-images-amazon.com/images/I/71i7LVTzpsL._SS135_.jpg'\n    }, {\n    \t'id': 'I14TUJ6TADACU5',\n    \t'title': \"Women's Walking Shoes Sock Sneakers - Mesh Slip On Air Cushion Lady Girls Modern Jazz Dance Easy Shoes Platform Loafers\",\n    \t'byline': None,\n    \t'price': None,\n    \t'link': 'https://www.amazon.com/dp/B07MWCDJ9X/',\n    \t'img': 'https://images-na.ssl-images-amazon.com/images/I/61sHA7o-bxL._SS135_.jpg'\n    }, {\n    \t'id': 'I3C97JA2JR06PN',\n    \t'title': 'Tenergy Redigrill\\xa0Smoke-Less Infrared Grill, Indoor Grill, Heating\\xa0Electric Tabletop Grill, Non-Stick Easy to Clean\\xa0BBQ Grill, for Party/Home, ETL Certified',\n    \t'byline': None,\n    \t'price': '$179.99',\n    \t'link': 'https://www.amazon.com/dp/B07BZ412HT/',\n    \t'img': 'https://images-na.ssl-images-amazon.com/images/I/41uGvSPg-ML._SS135_.jpg'\n    }, {\n    \t'id': 'I1C7RJI2H0VWZ7',\n    \t'title': 'Shelf Liners for Wire Shelf Liner Set of 4 - Graphite (14-Inch-by-36-Inch)',\n    \t'byline': None,\n    \t'price': '$29.99',\n    \t'link': 'https://www.amazon.com/dp/B01N9V4A9A/',\n    \t'img': 'https://images-na.ssl-images-amazon.com/images/I/71Lg6J7sGHL._SS135_.jpg'\n    },\n    ...]\n\nInstallation\n------------\n\nLatest release through PyPI:\n\n.. code-block:: sh\n\n    $ pip install scraper_factory\n\nDevelopment version:\n\n.. code-block:: sh\n\n    $ git clone git@github.com:machinia/scraper-factory.git\n    $ cd scraper_factory\n    $ pip install -e .\n\n\n.. |travis| image:: https://img.shields.io/travis/machinia/scraper-factory\n    :target: https://travis-ci.org/machinia/scraper-factory\n    :alt: Travis Build\n.. |coverage| image:: https://coveralls.io/repos/github/machinia/scraper-factory/badge.svg\n    :target: https://coveralls.io/github/machinia/scraper-factory\n    :alt: Test coverage\n.. |pypi| image:: https://badge.fury.io/py/scraper-factory.svg\n    :target: https://badge.fury.io/py/scraper-factory\n    :alt: PyPI - Latest version\n.. |pyversion| image:: https://img.shields.io/pypi/pyversions/scraper_factory\n    :target: https://pypi.org/project/scraper-factory/\n    :alt: PyPI - Python Version\n\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmachinia%2Fscraper-factory","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmachinia%2Fscraper-factory","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmachinia%2Fscraper-factory/lists"}