{"id":15395509,"url":"https://github.com/stef/ec-experts","last_synced_at":"2025-07-03T19:38:26.360Z","repository":{"id":10086069,"uuid":"12144506","full_name":"stef/ec-experts","owner":"stef","description":"expert groups mining","archived":false,"fork":false,"pushed_at":"2013-11-05T15:49:03.000Z","size":184,"stargazers_count":1,"open_issues_count":0,"forks_count":1,"subscribers_count":2,"default_branch":"master","last_synced_at":"2025-03-27T20:46:07.790Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"other","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/stef.png","metadata":{"files":{"readme":"readme.txt","changelog":null,"contributing":null,"funding":null,"license":"COPYING","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2013-08-15T21:40:55.000Z","updated_at":"2023-09-08T16:41:28.000Z","dependencies_parsed_at":"2022-09-05T17:21:58.465Z","dependency_job_id":null,"html_url":"https://github.com/stef/ec-experts","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/stef/ec-experts","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stef%2Fec-experts","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stef%2Fec-experts/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stef%2Fec-experts/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stef%2Fec-experts/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/stef","download_url":"https://codeload.github.com/stef/ec-experts/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stef%2Fec-experts/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":263389488,"owners_count":23459406,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-10-01T15:28:39.022Z","updated_at":"2025-07-03T19:38:26.294Z","avatar_url":"https://github.com/stef.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# you can automatically install and run ec-experts if you are running\n# debian or ubuntu by issuing the following command\n# wget -O - https://raw.github.com/stef/ec-experts/master/readme.txt | sh -\n\n# install required dependencies\n# or if not on debian/ubuntu\n# sudo pip install -r requirements.txt\nsudo apt-get install python-lxml python-dateutil git python-pip\n\n# git clone this project\ngit clone https://github.com/stef/ec-experts.git\ncd ec-experts\n\n# create data directory\nmkdir data\n\n# run an update\n./update.sh\n\n# update.sh performs the following steps: (you can and should use the\n# commands below to achieve manual improvements when deduplicating)\n\n# 0. for all this to work, you have to be in the ec-experts directory\n# where you have cloned it while installing.\n\n# 1. you download the newest expert register dump from:\n# \"http://ec.europa.eu/transparency/regexpert/view/transparency/openXML.cfm?file=RegExp_xml_{today}.xml\"\n# where you have to replace {today} with the date in the following format:\n# YYYYMMDD\n\n# 2. extract register from xml dump to json\n# this is needed for all the following steps, but only once\n# after downloading the dump in step 1.\n# python extract.py data/regexp_{today}.xml \u003edata/regexp_{today}.json\n# again replace {today} with the date in the format from step 1.\n\n# 3. transform and dump expert register\n# this step deduplicates the names from the intermediary format in step 2.\n# using the contents of dedup.txt found in this directory.\n# you can add more deduplication blocks or edit the existing ones to\n# achieve better results.\n# this step generates a csv file called data/entities-{today}.csv\n# which you can use for further datamining.\n# python experts.py data/regexp_{today}.json dedup.txt \u003edata/entities-{today}.csv\n# don't forget to replace {today} with YYYYMMDD\n\n# 4. optionally (update.sh does this automatically) find new\n# candidates for dedup expert and rep names.\n# python dedup.py data/entities-{today}.csv org_name \u003edata/dedup-{today}.txt\n# notice the \"org_name\" in above line, this command searches for\n# possible duplicate names in all the organisation names and outputs\n# these into data/dedup-${today}.txt\n\n# alternatively you can run a similar command for the names of the\n# experts: \n# python dedup.py data/entities-${today}.csv name \u003e\u003edata/dedup-${today}.txt\n# notice the \u003e\u003e which appends and not overwrites the results for the\n# organizations in the previous example. Also notable i the change\n# from \"org_name\" to \"name\", which is neccessary for selecting the\n# names of the experts.\n\n# That's about it. You should perform steps 3. and 4. iteratively,\n# while editing dedup.txt and merging dedup candidate blocks from\n# data/dedup-${today}.txt into it, until you have a\n# data/entities-{today}.csv file that is clean enough for you.\n\n# you can redo also steps 1. and 2. daily, to regenerate the csv based\n# on the newest data from the commission.\n\n# When you're done, load the generated data/entities-{today}.csv \n# file in your favourite spreadsheet editor for further analysis.\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstef%2Fec-experts","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fstef%2Fec-experts","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstef%2Fec-experts/lists"}