{"id":17458161,"url":"https://github.com/pocesar/actor-sitemap-to-request-queue","last_synced_at":"2025-04-02T21:29:48.946Z","repository":{"id":138843208,"uuid":"299176134","full_name":"pocesar/actor-sitemap-to-request-queue","owner":"pocesar","description":null,"archived":false,"fork":false,"pushed_at":"2020-10-02T17:23:59.000Z","size":7,"stargazers_count":1,"open_issues_count":0,"forks_count":0,"subscribers_count":2,"default_branch":"master","last_synced_at":"2024-10-18T06:28:49.502Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"JavaScript","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/pocesar.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2020-09-28T03:24:22.000Z","updated_at":"2023-03-04T05:53:58.000Z","dependencies_parsed_at":"2023-03-17T23:15:49.802Z","dependency_job_id":null,"html_url":"https://github.com/pocesar/actor-sitemap-to-request-queue","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/pocesar%2Factor-sitemap-to-request-queue","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/pocesar%2Factor-sitemap-to-request-queue/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/pocesar%2Factor-sitemap-to-request-queue/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/pocesar%2Factor-sitemap-to-request-queue/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/pocesar","download_url":"https://codeload.github.com/pocesar/actor-sitemap-to-request-queue/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":246895500,"owners_count":20851281,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-10-18T03:55:45.761Z","updated_at":"2025-04-02T21:29:48.923Z","avatar_url":"https://github.com/pocesar.png","language":"JavaScript","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Sitemap to RequestQueue\n\nDownloads a sitemap.xml files and append them to a RequestQueue of your choice.\n\n## Example\n\n```js\n// this is your actor\nApify.main(async () =\u003e {\n  const { proxyConfig } = await Apify.getInput();\n  const requestQueue = await Apify.openRequestQueue();\n\n  // this is needed so it doesn't execute everytime there's a migration\n  const run = (await Apify.getValue('SITEMAP-CALL', run)) || { runId: '', actorId: '' };\n\n  if (!run || !run.runId) {\n    // this might take a while!\n    const runCall = await Apify.call('pocesar/sitemap-to-request-queue', {\n      // required proxy configuration, like { useApifyProxy: true, apifyProxyGroups: ['SHADER'] }\n      proxyConfig,\n      // use this for this run's RequestQueue, but can be a named one, or if you\n      // leave it empty, it will be placed on the remote run RQ\n      targetRQ: requestQueue.queueId,\n      // required sitemaps\n      startUrls: [{\n        url: \"http://example.com/sitemap1.xml\",\n        userData: {\n          label: \"DETAILS\" // userData will passthrough\n        }\n      }, {\n        url: \"http://example.com/sitemap2.xml\",\n      }],\n      // Provide your own transform callback to filter or alter the request before adding it to the queue\n      transform: ((request) =\u003e {\n        if (!request.url.includes('detail')) {\n          return null;\n        }\n\n        request.userData.label = request.url.includes('/item/') ? 'DETAILS' : 'CATEGORY';\n\n        return request;\n      }).toString()\n    }, { waitSecs: 0 });\n\n    run.runId = runCall.id;\n    run.actorId = runCall.actId;\n\n    await Apify.setValue('SITEMAP-CALL', run);\n  }\n\n  await Apify.utils.waitForRunToFinish(run);\n\n  const crawler = new Apify.PuppeteerCrawler({\n    requestQueue, // ready to use!\n    //...\n  });\n\n  await crawler.run();\n});\n```\n\n## License\n\nApache 2.0\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fpocesar%2Factor-sitemap-to-request-queue","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fpocesar%2Factor-sitemap-to-request-queue","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fpocesar%2Factor-sitemap-to-request-queue/lists"}