{"id":13432316,"url":"https://github.com/mozilla/fathom","last_synced_at":"2025-05-14T20:07:36.621Z","repository":{"id":6083633,"uuid":"54228300","full_name":"mozilla/fathom","owner":"mozilla","description":"A framework for extracting meaning from web pages","archived":false,"fork":false,"pushed_at":"2023-11-25T13:34:18.000Z","size":24754,"stargazers_count":1974,"open_issues_count":112,"forks_count":74,"subscribers_count":54,"default_branch":"master","last_synced_at":"2025-05-12T15:27:13.795Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":"http://mozilla.github.io/fathom/","language":"JavaScript","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mpl-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/mozilla.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":"CODE_OF_CONDUCT.md","threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null}},"created_at":"2016-03-18T20:03:05.000Z","updated_at":"2025-04-25T08:13:11.000Z","dependencies_parsed_at":"2024-04-20T20:46:42.730Z","dependency_job_id":"8f5d8dc6-ec7b-4c04-933c-b899b60d6190","html_url":"https://github.com/mozilla/fathom","commit_stats":{"total_commits":1400,"total_committers":23,"mean_commits":"60.869565217391305","dds":"0.25357142857142856","last_synced_commit":"2b2c84eace185b4cc6fa4f75d00d028728a30f8a"},"previous_names":[],"tags_count":26,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mozilla%2Ffathom","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mozilla%2Ffathom/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mozilla%2Ffathom/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mozilla%2Ffathom/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/mozilla","download_url":"https://codeload.github.com/mozilla/fathom/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":254219373,"owners_count":22034397,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-07-31T02:01:10.272Z","updated_at":"2025-05-14T20:07:36.568Z","avatar_url":"https://github.com/mozilla.png","language":"JavaScript","readme":"# Fathom\n\nFathom is a supervised-learning system for recognizing parts of web pages—pop-ups, address forms, slideshows—or for classifying a page as a whole. A DOM flows in one side, and DOM nodes flow out the other, tagged with types and probabilities that those types are correct. A Prolog-like language makes it straightforward to specify the “smells” that suggest each type, and a neural-net-based trainer determines the optimal contribution of each smell. Finally, the FathomFox web extension lets you collect and label a corpus of web pages for training.\n\nContinue reading at \u003chttps://mozilla.github.io/fathom/intro.html#why\u003e.\n\n__[Documentation](https://mozilla.github.io/fathom)__\n","funding_links":[],"categories":["JavaScript"],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmozilla%2Ffathom","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmozilla%2Ffathom","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmozilla%2Ffathom/lists"}