{"id":15304834,"url":"https://github.com/asirihewage/simplest-xpath-web-scraper","last_synced_at":"2026-01-29T21:02:54.894Z","repository":{"id":136072200,"uuid":"453634968","full_name":"asirihewage/simplest-xpath-web-scraper","owner":"asirihewage","description":"Simplest web scraper created using Python3 and MongoDB","archived":false,"fork":false,"pushed_at":"2022-11-21T23:43:40.000Z","size":154,"stargazers_count":2,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"main","last_synced_at":"2025-06-07T23:51:01.290Z","etag":null,"topics":["data","data-mining","python3","scraper","web","webscrping"],"latest_commit_sha":null,"homepage":"https://w3genesis.com","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/asirihewage.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2022-01-30T09:15:38.000Z","updated_at":"2023-03-04T23:46:24.000Z","dependencies_parsed_at":null,"dependency_job_id":"4c0b464e-2aa4-4b4e-ac25-635c7134c3ee","html_url":"https://github.com/asirihewage/simplest-xpath-web-scraper","commit_stats":null,"previous_names":[],"tags_count":2,"template":false,"template_full_name":null,"purl":"pkg:github/asirihewage/simplest-xpath-web-scraper","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/asirihewage%2Fsimplest-xpath-web-scraper","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/asirihewage%2Fsimplest-xpath-web-scraper/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/asirihewage%2Fsimplest-xpath-web-scraper/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/asirihewage%2Fsimplest-xpath-web-scraper/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/asirihewage","download_url":"https://codeload.github.com/asirihewage/simplest-xpath-web-scraper/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/asirihewage%2Fsimplest-xpath-web-scraper/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28884706,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-01-29T19:55:09.949Z","status":"ssl_error","status_checked_at":"2026-01-29T19:55:08.490Z","response_time":59,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.5:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["data","data-mining","python3","scraper","web","webscrping"],"created_at":"2024-10-01T07:58:12.993Z","updated_at":"2026-01-29T21:02:54.863Z","avatar_url":"https://github.com/asirihewage.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Simplest xpath web scraper\nSimples web scraper created using Python3\n- extract data using multiple xpaths from multiple urls\n- save response in MongoDB\n- exceptions and error handling\n- only for basic web sraping work from static HTML web pages\n\n## setup Data.py for each url with xpath\n```json\n    {\n        \"url\": \"https://www.technology.pitt.edu/blog/zoom10faq\",\n        \"xpaths\": [\n            {\n                \"questions\": '//div[@class=\"field-item even\"]/h2/text()',\n                \"answers\": '//div[@class=\"field-item even\"]/p/text()',\n                \"correct_answer\": '//div[@class=\"field-item even\"]/p[0]/text()'\n            }\n        ]\n    }\n```\n## setup mongodb database connection string\n```python\nmyclient = pymongo.MongoClient(\"mongodb://host:port/\") # or add the connection url\nmydb = myclient[\"database\"]\nmycol = mydb[\"collection\"]\n```\n\n## install python dependancies\n```commandline\npip3 install -r requirements.txt\n```\n\n## run\n```commandline\npython3 main.py\n```\n\n## response\n ![Simplest xpath web scraper](47dcf6e5-0d63-4824-9135-e2b4171a171f.jfif)\n\n### Author : Asiri Hewage","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fasirihewage%2Fsimplest-xpath-web-scraper","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fasirihewage%2Fsimplest-xpath-web-scraper","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fasirihewage%2Fsimplest-xpath-web-scraper/lists"}