{"id":21815297,"url":"https://github.com/knovour/json-web-crawler","last_synced_at":"2025-10-06T13:51:17.593Z","repository":{"id":26334965,"uuid":"29783654","full_name":"Knovour/json-web-crawler","owner":"Knovour","description":"Use JSON to list all elements (with css 3 and jquery selector) that you want to crawl.","archived":false,"fork":false,"pushed_at":"2023-07-19T07:00:46.000Z","size":1583,"stargazers_count":17,"open_issues_count":2,"forks_count":2,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-09-14T03:59:17.377Z","etag":null,"topics":["crawler","javascript","jquery","json","web-crawler"],"latest_commit_sha":null,"homepage":null,"language":"JavaScript","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/Knovour.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null}},"created_at":"2015-01-24T16:52:44.000Z","updated_at":"2023-09-08T16:54:05.000Z","dependencies_parsed_at":"2024-01-15T03:57:30.323Z","dependency_job_id":"d7994343-b6d5-4724-bbbd-a26e32d4decc","html_url":"https://github.com/Knovour/json-web-crawler","commit_stats":{"total_commits":62,"total_committers":3,"mean_commits":"20.666666666666668","dds":"0.32258064516129037","last_synced_commit":"5f3c85b2b174977d26f739aa63799b23965467f0"},"previous_names":[],"tags_count":2,"template":false,"template_full_name":null,"purl":"pkg:github/Knovour/json-web-crawler","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Knovour%2Fjson-web-crawler","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Knovour%2Fjson-web-crawler/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Knovour%2Fjson-web-crawler/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Knovour%2Fjson-web-crawler/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/Knovour","download_url":"https://codeload.github.com/Knovour/json-web-crawler/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Knovour%2Fjson-web-crawler/sbom","scorecard":{"id":79508,"data":{"date":"2025-08-11","repo":{"name":"github.com/Knovour/json-web-crawler","commit":"5f3c85b2b174977d26f739aa63799b23965467f0"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":1.3,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Maintained","score":0,"reason":"0 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Code-Review","score":0,"reason":"Found 0/28 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":0,"reason":"license file not detected","details":["Warn: project does not have a license file"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 2 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Vulnerabilities","score":0,"reason":"11 existing vulnerabilities detected","details":["Warn: Project is vulnerable to: GHSA-wf5p-g6vw-rhxx","Warn: Project is vulnerable to: GHSA-jr5f-v2jv-69x6","Warn: Project is vulnerable to: GHSA-v6h2-p8h4-qcjw","Warn: Project is vulnerable to: GHSA-jchw-25xp-jwwc","Warn: Project is vulnerable to: GHSA-cxjh-pqwp-8mfp","Warn: Project is vulnerable to: GHSA-fjxv-7rqg-78g4","Warn: Project is vulnerable to: GHSA-pq67-2wwv-3xjx","Warn: Project is vulnerable to: GHSA-8cj5-5rvv-wf4v","Warn: Project is vulnerable to: GHSA-72xf-g2v4-qvf3","Warn: Project is vulnerable to: GHSA-j8xg-fqg3-53r7","Warn: Project is vulnerable to: GHSA-3h5v-q93c-6h6q"],"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-15T05:29:54.969Z","repository_id":26334965,"created_at":"2025-08-15T05:29:54.969Z","updated_at":"2025-08-15T05:29:54.969Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":278621839,"owners_count":26017253,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-10-06T02:00:05.630Z","response_time":65,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["crawler","javascript","jquery","json","web-crawler"],"created_at":"2024-11-27T15:17:28.405Z","updated_at":"2025-10-06T13:51:17.571Z","avatar_url":"https://github.com/Knovour.png","language":"JavaScript","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Json Web Crawler\n\n[![NPM version](https://d25lcipzij17d.cloudfront.net/badge.svg?id=gh\u0026type=6\u0026v=0.8.1\u0026x2=0)](https://www.npmjs.com/package/json-web-crawler)\n[![Node version](https://img.shields.io/badge/node-%3E=%208.0.0-brightgreen.svg)]()\n[![Open Source Love](https://badges.frapsoft.com/os/mit/mit.svg?v=102)](https://github.com/ellerbrock/open-source-badge/)\n\nUse JSON to list all elements (with css 3 and jquery selector) that you want to crawl.\n\n**[Demo]**\n\n## Usage\n\n```javascript\nnpm i json-web-crawler --save\n```\n```javascript\nconst crawl = require('json-web-crawler');\n\ncrawl('HTML content', your json setting)\n  .then(console.log)\n  .catch(console.error);\n```\n\n## Settings\n\n### type\n\nThe default type is `content`.\n\n- content: crawl specific $container to a single json.\n- list: crawl a list like Google search result into multi data.\n\n### container\n\nDOM element that will focus on. If type is `list`, it will crawl each container class.\n\n### listOption\n\nOptional, enable in `list` type only, use when you don't want to crawl the whole list. ** ALL STRAT FROM 0 **\n\n- `[ 'limit', 10 ]`: ten elements only (eq(0) ~ eq(9)).\n- `[ 'range', 6, 12 ]`: from eq(6) to eq(12 - 1). If without end, it will continue to the last one.\n- `[ 'focus', 0, 3, 7, ... ]`: specific elements in list (eq(0), eq(3), eq(7), ...). You can use -1, -2 to count from backward.\n- `[ 'ignore', 1, 2, 5 ]`: elements you want to ignore it. You can use -1, -2 to count from backward.\n\n### crawl\n\n`keyName: { options }` =\u003e `keyName: data`\n\n```javascript\ncrawl: {\n  image: {\n    elem: 'img',\n    get: 'src'\n  }\n}\n\n// will become\nimage: IMAGE_SRC_URL\n```\n\n#### options\n\n- elem: element inside `container`. If empty or undefined, it will use `container` or `listElems` instead\n- noChild (boolean): remove all children elem under $(elem)\n- outOfContainer (boolean): If exist, It will use $('html').find(elem)\n- get: return type of element\n  - `text`\n  - `num`\n  - `length`: $element.length\n  - `attrName`: $element.attr('attrName')\n  - `data-dataName`: $element.data('dataNAme')\n  - `data-dataName:X`: `X` is optional.\n    - If data is an array, set `data-dataName:0` will return `$elem.data('dataAttribute')[0]`.\n    - If data is an object, set `data-dataName:id` will return `$elem.data('dataAttribute')['id']`.\n    - If X not exist, it will return the whole data.\n- process: If you want to do something else after 'get' (string type only)\n\n```javascript\n// You can use some simple functions that existed in lodash.\nprocess: [\n  ['match', /regex here/, number],  // =\u003e str.match(/regex here/)[number], return array if no number, but will cause other process won't work\n  ['split', ',', number],           // =\u003e str.split(',')[number], return array if no number, but will cause other process won't work\n  ['replace', 'one', 'two'],\n  ['substring', 0, 3],\n  ['prepend', 'text'],              // =\u003e 'text' + value\n  ['append', 'text'],               // =\u003e value + 'text'\n  ['indexOf', 'text']               // =\u003e return number\n  ['INDENPENDENT_FUNCTION'],        // like encodeURI, encodeURIComponent, unescape, etc...\n  /**\n    * Due to lodash has the same name `escape` \u0026 `unescape` functions with\n    * different behavior, the origin `escape` \u0026 `unescape` function will\n    * renamed to `encode` \u0026 `decode` instead.\n    */\n],\n\n// Or you want to DIY, you can use function instead\nprocess(value, $elem /* jquery dom */) {\n  // do something\n\n  return newValue;\n}\n```\n\n- collect: If the value you want is sperated to several elements, use collect to find them all.\n  - elems: contain multi elements array.\n  - loop (boolean): It will run all elems (like `li`) you want to get\n  - combineWith: without this, collect will return array\n\n- default: return default value when elem not found, null or undefined (`process` will be ignored)\n\n### pageNotFound\n\nIf match, it will return page not found error.\n\n- elem\n- get\n- check: like `process`, but only one step\n\n## Example\n\n### Content Type\n\nSteam Dota2 page in `demo`.\n\n```javascript\nconst setting = {\n  type: 'content',\n  container: '#game_highlights .rightcol',\n  crawl: {\n    appId: {\n      elem: '.glance_tags',\n      get:  'data-appid'\n    },\n    appName: {\n      outOfContainer: true,\n      elem: '.apphub_AppName',\n      get:  'text'\n    },\n    image: {\n      elem: '.game_header_image_full',\n      get:  'src'\n    },\n    reviews: {\n      elem: '.game_review_summary:eq(0)',\n      get:  'text',\n    },\n    tags: {\n      elem: '.glance_tags',\n      collect: {\n        elems: [{\n          elem: 'a.app_tag:eq(0)',\n          get:  'text'\n        }, {\n          elem: 'a.app_tag:eq(1)',\n          get:  'text'\n        }, {\n          elem: 'a.app_tag:eq(2)',\n          get:  'text'\n        }],\n        combineWith: ', '\n      }\n    },\n    allTags: {\n      elem: '.glance_tags a.app_tag',\n      collect: {\n        loop: true,\n        get:  'text',\n        combineWith: ', '\n      }\n    },\n    description: {\n      elem: '.game_description_snippet',\n      get:  'text',\n      process(value, $elem) {\n        return value.split(', ');\n      }\n    },\n    releaseDate: {\n      elem: '.release_date .date',\n      get:  'text'\n    }\n  }\n};\n```\n\n### List Type\n\nKickStarter popular list in `demo`.\n\n```javascript\nconst setting = {\n  pageNotFound: [{\n    elem: '.grey-frame-inner h1',\n    get:  'text',\n    check: ['equal', '404']\n  }],\n  type: 'list',\n  container: '#projects_list .project-card',\n  listOption: [ 'limit', 3 ],\n  // listOption: [ 'range', 0, 10 ],\n  // listOption: [ 'ignore', 0, 2, -1 ],\n  // listOption: [ 'focus', 3, -3 ],\n  crawl: {\n    projectID: {\n      get: 'data-pid',\n    },\n    name: {\n      elem: '.project-title',\n      get:  'text',\n    },\n    image: {\n      elem: '.project-thumbnail img',\n      get:  'src'\n    },\n    link: {\n      elem: '.project-title a',\n      get:  'href',\n      process: [\n        [ 'split', '?', 0 ],\n        [ 'prepend', 'https://www.kickstarter.com' ]\n      ]\n    },\n    description: {\n      elem: '.project-blurb',\n      get:  'text'\n    },\n    funded: {\n      elem: '.project-stats-value:eq(0)',\n      get:  'text'\n    },\n    percentPledged: {\n      elem: '.project-percent-pledged',\n      get:  'style',\n      process: [\n        [ 'split', /:\\s?/g, 1 ]\n      ]\n    },\n    pledged: {\n      elem: '.money.usd',\n      get:  'num'\n    }\n  }\n};\n```\n\n[Demo]: https://tonicdev.com/knovour/json-web-crawler-demo\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fknovour%2Fjson-web-crawler","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fknovour%2Fjson-web-crawler","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fknovour%2Fjson-web-crawler/lists"}