{"id":19252371,"url":"https://github.com/ssciwr/mailcom","last_synced_at":"2025-04-21T13:30:59.645Z","repository":{"id":37866111,"uuid":"479280298","full_name":"ssciwr/mailcom","owner":"ssciwr","description":"Recognize and pseudonymize named entities in emails","archived":false,"fork":false,"pushed_at":"2025-04-17T15:28:58.000Z","size":16800,"stargazers_count":1,"open_issues_count":8,"forks_count":1,"subscribers_count":2,"default_branch":"main","last_synced_at":"2025-04-17T21:24:28.371Z","etag":null,"topics":["anonymization","data-privacy","llm-inference","pseudonymization","text-preprocessing"],"latest_commit_sha":null,"homepage":"https://ssciwr.github.io/mailcom/","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/ssciwr.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2022-04-08T06:59:17.000Z","updated_at":"2025-04-11T10:28:04.000Z","dependencies_parsed_at":"2024-09-13T20:12:21.005Z","dependency_job_id":"e19755c8-3acc-4f39-ba0e-b7a0af9c1ac1","html_url":"https://github.com/ssciwr/mailcom","commit_stats":null,"previous_names":["ssciwr/mailcom","ssciwr/anonymize"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ssciwr%2Fmailcom","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ssciwr%2Fmailcom/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ssciwr%2Fmailcom/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ssciwr%2Fmailcom/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/ssciwr","download_url":"https://codeload.github.com/ssciwr/mailcom/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":250064558,"owners_count":21368927,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["anonymization","data-privacy","llm-inference","pseudonymization","text-preprocessing"],"created_at":"2024-11-09T18:26:37.689Z","updated_at":"2025-04-21T13:30:54.636Z","avatar_url":"https://github.com/ssciwr.png","language":"Python","readme":"# mailcom\nTool to parse email body from email text (eml file), and retains only the text, with names removed, for French of Spanish emails.\n\n# Installation\nInstall using  \n`python -m pip install mailcom`\n\nYou will also need to download the French and Spanish models for spaCy and Stanza using the provided script - run this in the terminal:\n\n`./get-models.sh`\n\nFor an overview over the available languages and models, check the [spaCy](https://spacy.io/usage/models) website.\n\n# Usage\nThe package uses spaCy for sentencizing, based on the default language models, and transformers for NER recognition.\nCurrently, you have to set the language and eml file directory manually at the top of `parse.py`; the default directory is `data/in`. Then run `python parse.py`. After the run, the output can be found in `data/out`.\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fssciwr%2Fmailcom","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fssciwr%2Fmailcom","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fssciwr%2Fmailcom/lists"}