{"id":23863478,"url":"https://github.com/beechit/solrfal-textextract","last_synced_at":"2025-08-18T03:37:15.733Z","repository":{"id":53688192,"uuid":"78626202","full_name":"beechit/solrfal-textextract","owner":"beechit","description":"dd text extracting for SOLR indexing of FileAbstractionLayer based files in TYPO3 CMS","archived":false,"fork":false,"pushed_at":"2021-03-19T10:26:02.000Z","size":21,"stargazers_count":1,"open_issues_count":0,"forks_count":3,"subscribers_count":1,"default_branch":"master","last_synced_at":"2025-07-02T10:51:17.043Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"PHP","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/beechit.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2017-01-11T10:02:34.000Z","updated_at":"2021-03-19T10:25:30.000Z","dependencies_parsed_at":"2022-09-05T04:01:59.049Z","dependency_job_id":null,"html_url":"https://github.com/beechit/solrfal-textextract","commit_stats":null,"previous_names":[],"tags_count":5,"template":false,"template_full_name":null,"purl":"pkg:github/beechit/solrfal-textextract","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beechit%2Fsolrfal-textextract","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beechit%2Fsolrfal-textextract/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beechit%2Fsolrfal-textextract/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beechit%2Fsolrfal-textextract/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/beechit","download_url":"https://codeload.github.com/beechit/solrfal-textextract/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beechit%2Fsolrfal-textextract/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":270940594,"owners_count":24671676,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-08-18T02:00:08.743Z","response_time":89,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2025-01-03T07:59:54.692Z","updated_at":"2025-08-18T03:37:15.689Z","avatar_url":"https://github.com/beechit.png","language":"PHP","funding_links":[],"categories":[],"sub_categories":[],"readme":"Text extraction for Apache Solr + TYPO3\n=======================================\n\nThis TYPO3 extension provides a hook/aspect that uses the signal of ext:solrfal during indexing to extract the contents \nof known text files.\n \nIt uses the binary `pdftotext` for this (when present on the machine) and has a fallback to the standalone apache Tika jar (when present on the system).\n\nThere are some additional checks when processing pdf files to determine if the contents is encrypted. \nIf encrypted it tries the fallback to `tika`. \n\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbeechit%2Fsolrfal-textextract","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fbeechit%2Fsolrfal-textextract","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbeechit%2Fsolrfal-textextract/lists"}