{"id":15389132,"url":"https://github.com/splitbrain/gotextextract","last_synced_at":"2025-07-25T23:36:25.006Z","repository":{"id":237449175,"uuid":"637691074","full_name":"splitbrain/gotextextract","owner":"splitbrain","description":"Cross Platform text extractor for common office files","archived":false,"fork":false,"pushed_at":"2023-06-20T07:20:50.000Z","size":8,"stargazers_count":3,"open_issues_count":0,"forks_count":0,"subscribers_count":2,"default_branch":"master","last_synced_at":"2025-02-28T20:04:27.239Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":"","language":"Go","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/splitbrain.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2023-05-08T07:56:43.000Z","updated_at":"2025-02-19T00:27:27.000Z","dependencies_parsed_at":null,"dependency_job_id":"5ddcbe3e-e46f-41cf-be8a-2209d6dd52d4","html_url":"https://github.com/splitbrain/gotextextract","commit_stats":{"total_commits":9,"total_committers":1,"mean_commits":9.0,"dds":0.0,"last_synced_commit":"5adaabe590ad7815409fb6550195a2c87b700f63"},"previous_names":["splitbrain/gotextextract"],"tags_count":4,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/splitbrain%2Fgotextextract","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/splitbrain%2Fgotextextract/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/splitbrain%2Fgotextextract/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/splitbrain%2Fgotextextract/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/splitbrain","download_url":"https://codeload.github.com/splitbrain/gotextextract/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":252914758,"owners_count":21824429,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-10-01T14:59:31.073Z","updated_at":"2025-05-07T16:27:41.424Z","avatar_url":"https://github.com/splitbrain.png","language":"Go","readme":"# Go Text Extractor\n\nThis is meant as a simple way to extract raw text contents from different file formats to be used in search indexing. It is not meant to display contents true to their orignal layout.\n\nIt currently supports the following file formats:\n\n  * `pdf` -- PDF using [ledongthuc/pdf](https://github.com/ledongthuc/pdf) \n  * `docx` -- Microsoft Word, naive extraction from the xml\n  * `odt` -- Open/Libreoffice Document, naive extraction from the xml\n  * `pptx` -- Microsoft Powerpoint, naive extraction from the xml\n  * `odp` -- Open/Libreoffice Presentation, naive extraction from the xml\n\n## Usage\n\n    gotextextract [--type \u003ctype\u003e] \u003cfile\u003e\n\nSimply give the file to extract as argument. If no file type (see above) is given, it will try to guess it from the file extension. The extracted text will be printed to stdout.\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsplitbrain%2Fgotextextract","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fsplitbrain%2Fgotextextract","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsplitbrain%2Fgotextextract/lists"}