{"id":49453664,"url":"https://github.com/flickz/newspaperjs","last_synced_at":"2026-06-02T05:00:45.675Z","repository":{"id":19888790,"uuid":"88191751","full_name":"flickz/newspaperjs","owner":"flickz","description":"News extraction and scraping. Article Parsing","archived":true,"fork":false,"pushed_at":"2023-03-04T02:58:40.000Z","size":4183,"stargazers_count":75,"open_issues_count":12,"forks_count":20,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-10-22T08:37:14.088Z","etag":null,"topics":["crawler","news","news-aggregator","nodejs","scraper","webcrawling","webscraping"],"latest_commit_sha":null,"homepage":"","language":"HTML","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/flickz.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"License.md","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2017-04-13T17:52:14.000Z","updated_at":"2025-05-14T04:04:58.000Z","dependencies_parsed_at":"2024-06-11T19:07:51.155Z","dependency_job_id":"20aaa1f6-90b6-4dc8-b63d-d9e58d1b3703","html_url":"https://github.com/flickz/newspaperjs","commit_stats":{"total_commits":53,"total_committers":4,"mean_commits":13.25,"dds":0.4716981132075472,"last_synced_commit":"ef473b1a1c080586ffd9fb2f12dcdc04c7120794"},"previous_names":[],"tags_count":4,"template":false,"template_full_name":null,"purl":"pkg:github/flickz/newspaperjs","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/flickz%2Fnewspaperjs","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/flickz%2Fnewspaperjs/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/flickz%2Fnewspaperjs/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/flickz%2Fnewspaperjs/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/flickz","download_url":"https://codeload.github.com/flickz/newspaperjs/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/flickz%2Fnewspaperjs/sbom","scorecard":{"id":403212,"data":{"date":"2025-08-11","repo":{"name":"github.com/flickz/newspaperjs","commit":"ef473b1a1c080586ffd9fb2f12dcdc04c7120794"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":2,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Code-Review","score":0,"reason":"Found 0/6 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Maintained","score":0,"reason":"project is archived","details":["Warn: Repository is archived."],"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: License.md:0","Info: FSF or OSI recognized license: MIT License: License.md:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":-1,"reason":"internal error: error during branchesHandler.setup: internal error: githubv4.Query: Resource not accessible by integration","details":null,"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 26 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Vulnerabilities","score":0,"reason":"33 existing vulnerabilities detected","details":["Warn: Project is vulnerable to: GHSA-93q8-gq69-wqmw","Warn: Project is vulnerable to: GHSA-67hx-6x53-jw92","Warn: Project is vulnerable to: GHSA-v6h2-p8h4-qcjw","Warn: Project is vulnerable to: GHSA-cwfw-4gq5-mrqx","Warn: Project is vulnerable to: GHSA-g95f-p29q-9xw4","Warn: Project is vulnerable to: GHSA-grv7-fg5c-xmjg","Warn: Project is vulnerable to: GHSA-3xgq-45jj-v275","Warn: Project is vulnerable to: GHSA-9vvw-cc9w-f27h","Warn: Project is vulnerable to: GHSA-gxpj-cx7g-858c","Warn: Project is vulnerable to: GHSA-2j2x-2gpw-g8fm","Warn: Project is vulnerable to: GHSA-fjxv-7rqg-78g4","Warn: Project is vulnerable to: GHSA-ww39-953v-wcq6","Warn: Project is vulnerable to: GHSA-896r-f27r-55mw","Warn: Project is vulnerable to: GHSA-fvqr-27wr-82fm","Warn: Project is vulnerable to: GHSA-4xc9-xhrj-v574","Warn: Project is vulnerable to: GHSA-x5rq-j2xg-h7qm","Warn: Project is vulnerable to: GHSA-jf85-cpcp-j695","Warn: Project is vulnerable to: GHSA-p6mc-m468-83gw","Warn: Project is vulnerable to: GHSA-29mw-wpgm-hmr9","Warn: Project is vulnerable to: GHSA-35jh-r3h4-6jhm","Warn: Project is vulnerable to: GHSA-952p-6rrq-rcjv","Warn: Project is vulnerable to: GHSA-f8q6-p94x-37v3","Warn: Project is vulnerable to: GHSA-vh95-rmgr-6w4m","Warn: Project is vulnerable to: GHSA-xvch-5gv4-984h","Warn: Project is vulnerable to: GHSA-w9mr-4mfr-499f","Warn: Project is vulnerable to: GHSA-rp65-9cf3-cjxr","Warn: Project is vulnerable to: GHSA-hrpp-h998-j3pp","Warn: Project is vulnerable to: GHSA-6g33-f262-xjp4","Warn: Project is vulnerable to: GHSA-p8p7-x288-28g6","Warn: Project is vulnerable to: GHSA-c2qf-rxjj-qqgw","Warn: Project is vulnerable to: GHSA-52f5-9888-hmc6","Warn: Project is vulnerable to: GHSA-72xf-g2v4-qvf3","Warn: Project is vulnerable to: GHSA-j8xg-fqg3-53r7"],"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-18T20:34:17.390Z","repository_id":19888790,"created_at":"2025-08-18T20:34:17.390Z","updated_at":"2025-08-18T20:34:17.390Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":33806987,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-26T15:22:16.424Z","status":"online","status_checked_at":"2026-06-02T02:00:07.132Z","response_time":109,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["crawler","news","news-aggregator","nodejs","scraper","webcrawling","webscraping"],"created_at":"2026-04-30T04:01:03.703Z","updated_at":"2026-06-02T05:00:45.669Z","avatar_url":"https://github.com/flickz.png","language":"HTML","funding_links":[],"categories":["📦 Legacy \u0026 Inactive Projects"],"sub_categories":[],"readme":"# Newspaperjs\nNews extraction and scraping. Maximizing the power of [Request](https://github.com/request/request) and [Cheerio](https://github.com/cheeriojs/cheerio). \n\nInspired by \"[Codelucas - Python Newspaper lib](https://github.com/codelucas/newspaper)\"\n\n## Features\n* News url identification\n* News Categories extraction\n* Text extraction from html\n* Top image extraction from html\n* Description extraction from html\n* Keyword extraction from html\n* Author extraction from html\n\n## Installation\n```\nnpm install newspaperjs\n```\n## Using API\n```js\nconst Build = require('newspaperjs').Build;\nconst Article = require('newspaperjs').Article\n```\n### Building a news source\nBuilding a Source will extract its categories and articles url with two simple methods.\n#### .getCategoriesUrl(url{string}, cateOfInterest[array])\nGet all categories url. When **cateOfInterest** is specified, it's only extract their links if found. Returns Promise, an array of categories url.\n\n```js\nBuild.getCategoriesUrl('https://www.nytimes.com', ['politics', 'sports', 'technology']).then(categories=\u003e{\n    console.log(categories); \n}).catch(reason=\u003e{\n    console.log(reason);\n})\n//[\n     'https://www.nytimes.com/pages/politics'\n     'https://www.nytimes.com/pages/sports',\n     'https://www.nytimes.com/pages/technology'\n  ]\n```\n#### .getArticlesUrl(categoriesUrl{string})\nGet all articles url from a category url. Returns Promise, array of articles url.\n\n```js\n Build.getArticlesUrl('https://www.nytimes.com/pages/politics').then(result=\u003e{\n    console.log(result);\n}).catch(reason=\u003e{\n    console.log(reason)\n})\n//[\n   'https://www.nytimes.com/2017/06/12/us/politics/trump-travel-ban-court-of-appeals.html',\n  'https://www.nytimes.com/aponline/2017/06/12/us/politics/ap-us-trump-lawsuit-the-latest.html',\n  'https://www.nytimes.com/aponline/2017/06/12/us/politics/ap-us-supreme-court-biotech-drugs.html',\n  'https://www.nytimes.com/2017/06/12/us/trump-lawsuit-private-businesses.html',\n  'https://www.nytimes.com/2017/06/12/us/politics/ivanka-trump-comey-donald-trump-fox-and-friends.html',\n  'https://www.nytimes.com/2017/06/12/us/politics/unions-come-into-the-justices-cross-hairs-again.html',\n  'https://www.nytimes.com/2017/06/11/us/politics/ducks-washington-reflecting-pool-unity.html',\n  'https://www.nytimes.com/2017/06/11/us/politics/preet-bharara-trump-contacts.html',\n  'https://www.nytimes.com/2017/06/11/us/politics/jeff-sessions-russia-trump-attorney-general-senate.html',\n  'https://www.nytimes.com/2017/06/11/us/politics/defense-secretary-jim-mattis-trump.html',\n...]\n```\n### Extracting and Parsing News Article.\nExtract news article using the article url provided and parse the content.\n#### .Article(url{string})\nExtract and Parse news article, in order to access title, text, topImage, date, author, description and keywords of the article.\n\n```js\nArticle('https://www.nytimes.com/2017/06/10/us/politics/sessions-senate-russia-election.html')\n.then(result=\u003e{\n    console.log(result);\n}).catch(reason=\u003e{\n    console.log(reason);\n})\n{\n    title: 'Sessions Will Testify in Senate on Russian Meddling in Election',\n\n    text: \" AdvertisementBy CHARLIE SAVAGEJUNE 10, 2017\\nWASHINGTON — Attorney General Jeff Sessions told Congress on Saturday that he would testify before the Senate Intelligence Committee on Tuesday about issues related to Russia’s interference in the 2016 election.  Mr. Sessions had been scheduled to testify before other committees about the Justice Department’s budget that day, but he will instead appear before the intelligence panel. Mr.Sessions said he would send Rod J. Rosenstein, the deputy attorney general, to testify about the department’s budget before the House and Senate appropriations panels.... \",\n\n    topImage:'https://static01.nyt.com/images/2017/06/11/us/11dcSESSIONS/11dcSESSIONS-facebookJumbo.jpg',\n\n    date: '2017-06-10T20:08:09-04:00',\n\n    author: 'Charlie Savage',\n\n    description: 'Instead of discussing the Justice Department budget, Attorney General Jeff Sessions will face questions from members of Congress who have access to intelligence materials on the Russia inquiry.',\n\n    keywords: [ 'Russian Interference in 2016 US Elections and Ties to Trump Associates', 'Sessions  Jefferson B III', \n    'Justice Department', \n    'United States Politics and Government', 'Attorneys General', \n    'Senate Committee on Intelligence','Trump  Donald J', 'Comey  James B' ]\n}\n```\n## Author\nAuthored and maintained by **Oluwaseun Omoyajowo**. Like to get in touch?\n\nEmail: [omoyajowo2015@gmail.com](mailto:omoyajowo2015@gmail.com)\n\nTwitter: [@oluwaseunOmoya](https://twitter.com/oluwaseunOmoya)\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fflickz%2Fnewspaperjs","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fflickz%2Fnewspaperjs","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fflickz%2Fnewspaperjs/lists"}