{"id":18783419,"url":"https://github.com/futuresea-dev/mapping-task","last_synced_at":"2025-10-25T19:38:28.069Z","repository":{"id":157980027,"uuid":"399563860","full_name":"futuresea-dev/mapping-task","owner":"futuresea-dev","description":"mapping task","archived":false,"fork":false,"pushed_at":"2021-08-25T21:09:51.000Z","size":7,"stargazers_count":1,"open_issues_count":0,"forks_count":2,"subscribers_count":3,"default_branch":"main","last_synced_at":"2024-12-29T11:44:43.376Z","etag":null,"topics":["abc","dateutil","html","pydantic","python","requests","scheduler","typing"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/futuresea-dev.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2021-08-24T18:16:45.000Z","updated_at":"2022-02-18T04:32:38.000Z","dependencies_parsed_at":null,"dependency_job_id":"466dffb0-fd49-4420-9534-6ae8447a40d3","html_url":"https://github.com/futuresea-dev/mapping-task","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/futuresea-dev%2Fmapping-task","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/futuresea-dev%2Fmapping-task/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/futuresea-dev%2Fmapping-task/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/futuresea-dev%2Fmapping-task/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/futuresea-dev","download_url":"https://codeload.github.com/futuresea-dev/mapping-task/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":239699579,"owners_count":19682574,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["abc","dateutil","html","pydantic","python","requests","scheduler","typing"],"created_at":"2024-11-07T20:39:17.643Z","updated_at":"2025-10-25T19:38:27.996Z","avatar_url":"https://github.com/futuresea-dev.png","language":"Python","readme":"# mapping-task\n\n\n\n    ##\n    # strip html tag function\n    def strip_tags(html):\n        s = MLStripper()\n        s.feed(html)\n        return s.get_data()\n\n\n    def main():\n        # define session\n        s = requests.Session()\n    # get all article list\n    api_list_url = \"https://mapping-test.fra1.digitaloceanspaces.com/data/list.json\"\n    r = s.get(api_list_url)\n\n    # check status code\n    if r.status_code == 200:\n        # convert content to json\n        article_list = r.json()\n\n        # for loop each article and get article id\n        for article in article_list:\n            data = {}\n            article_id = article[\"id\"]\n\n            # make article url and get article data\n            arc_data_url = \"https://mapping-test.fra1.digitaloceanspaces.com/data/articles/{id}.json\".format(id=article_id)\n            get_data = s.get(arc_data_url)\n            if get_data.status_code == 200:\n                # convert content to json\n                data = get_data.json()\n                categories = [data[\"category\"]]\n                # modify data , add field depend on Article model\n                data[\"categories\"] = categories\n                data[\"publication_date\"] = datetime.strptime(data[\"pub_date\"], '%Y-%m-%d-%H;%M;%S')\n                if \"mod_date\" in data:\n                    data[\"modification_date\"] = datetime.strptime(data[\"mod_date\"], '%Y-%m-%d-%H:%M:%S')\n                else:\n                    data[\"modification_date\"] = datetime.now()\n\n                # if sections's length big than 4  then slice it\n                if len(data[\"sections\"]) \u003e 4:\n                    data[\"sections\"] = data[\"sections\"][:4]\n\n                # make temp sections and put it\n                tmp_section = []\n                for idx, section in enumerate(data[\"sections\"]):\n                    tmp = {}\n\n                    # make each temp section\n                    for k, v in section.items():\n                        if k == \"text\":\n                            tmp[k] = strip_tags(v)\n                        elif k == \"type\" and v == \"media\":\n                            tmp[\"text\"] = \"media\"\n                            tmp[\"type\"] = \"text\"\n                        else:\n                            tmp[k] = v\n                    tmp_section.append(tmp)\n\n                # put sections\n                data[\"sections\"] = tmp_section\n\n                # get media data\n                arc_media_url = \"https://mapping-test.fra1.digitaloceanspaces.com/data/media/{id}.json\".format(id=article_id)\n                get_data = s.get(arc_media_url)\n\n                # if status code == 200\n                if get_data.status_code == 200:\n                    # get image, media data and modify section\n                    img_data = get_data.json()\n\n                    # put image section\n                    data[\"sections\"].append(img_data[0])\n\n                    # modify media section and put section\n                    img_data[1][\"publication_date\"] = datetime.strptime(img_data[1][\"pub_date\"], '%Y-%m-%d-%H;%M;%S')\n                    if \"mod_date\" in img_data[1]:\n                        img_data[1][\"modification_date\"] = datetime.strptime(img_data[1][\"mod_date\"], '%Y-%m-%d-%H:%M:%S')\n                    else:\n                        img_data[1][\"modification_date\"] = datetime.now()\n                    data[\"sections\"].append(img_data[1])\n                # else create empty section.\n                else:\n                    image_section = {\"type\": \"image\", \"url\": \"https://google.com\", \"alt\": \"none\", \"caption\": \"none\",\n                                     \"source\": \"none\"}\n                    media_section = {\"type\": \"media\", \"id\": \"None\", \"url\": \"https://google.com\", \"thumbnail\": \"https\"\n                                                                                                              \"://google.com\", \"author\": \"none\", \"caption\": \"none\", \"publication_date\": datetime.now(), \"modification_date\": datetime.now(), \"duration\": 0}\n                    data[\"sections\"].append(image_section)\n                    data[\"sections\"].append(media_section)\n                user = Article(**data)\n                print (user)\n            else:\n                pass\n    else:\n        print(\"invalid status : \" + str(r.status_code))\n\n\nif __name__ == \"__main__\":\n\n    # create schedule running function every 5 minutes.\n    schedule.every(5).minutes.do(main)\n    while True:\n        schedule.run_pending()\n    # main()\n    # print (\"done\")\n\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ffuturesea-dev%2Fmapping-task","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Ffuturesea-dev%2Fmapping-task","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ffuturesea-dev%2Fmapping-task/lists"}