{"id":41396000,"url":"https://github.com/maryamteimouri/dataanalysis-and-knowledgediscovery","last_synced_at":"2026-01-23T12:53:54.699Z","repository":{"id":221863516,"uuid":"755576348","full_name":"maryamteimouri/DataAnalysis-and-KnowledgeDiscovery","owner":"maryamteimouri","description":"This project aims to practice the steps of Crisp Data Mining ( CRISP-DM ). The repository includes 3 phases, data understanding, supervised learning, and unsupervised learning.","archived":false,"fork":false,"pushed_at":"2024-02-11T10:39:07.000Z","size":493,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"main","last_synced_at":"2024-02-14T11:35:20.867Z","etag":null,"topics":["agglomerative-clustering","classification","clustering","crisp-dm","cross-validation","data-preprocessing","data-visualization","dendogram","k-means-clustering","one-hot-encode","pca","principal-component-analysis","z-score"],"latest_commit_sha":null,"homepage":"","language":"Jupyter Notebook","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"gpl-3.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/maryamteimouri.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null}},"created_at":"2024-02-10T16:26:44.000Z","updated_at":"2024-02-11T10:42:43.000Z","dependencies_parsed_at":"2024-02-10T17:43:29.673Z","dependency_job_id":null,"html_url":"https://github.com/maryamteimouri/DataAnalysis-and-KnowledgeDiscovery","commit_stats":null,"previous_names":["maryamteimouri/data-analysis-and-knowledge-discovery"],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/maryamteimouri/DataAnalysis-and-KnowledgeDiscovery","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/maryamteimouri%2FDataAnalysis-and-KnowledgeDiscovery","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/maryamteimouri%2FDataAnalysis-and-KnowledgeDiscovery/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/maryamteimouri%2FDataAnalysis-and-KnowledgeDiscovery/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/maryamteimouri%2FDataAnalysis-and-KnowledgeDiscovery/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/maryamteimouri","download_url":"https://codeload.github.com/maryamteimouri/DataAnalysis-and-KnowledgeDiscovery/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/maryamteimouri%2FDataAnalysis-and-KnowledgeDiscovery/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28692280,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-01-23T11:01:27.039Z","status":"ssl_error","status_checked_at":"2026-01-23T11:00:26.909Z","response_time":59,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.6:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["agglomerative-clustering","classification","clustering","crisp-dm","cross-validation","data-preprocessing","data-visualization","dendogram","k-means-clustering","one-hot-encode","pca","principal-component-analysis","z-score"],"created_at":"2026-01-23T12:53:54.614Z","updated_at":"2026-01-23T12:53:54.679Z","avatar_url":"https://github.com/maryamteimouri.png","language":"Jupyter Notebook","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Data Analysis and Knowledge Discovery\nThis project aims to practice the steps of Crisp Data Mining ( CRISP-DM ).\nThe repository includes 3 phases, data understanding, supervised learning, and unsupervised learning.\n\n- In P1, data understanding, I practice looking at the data and **checking data quality** by plotting numeric and categorical features. Also, I apply some **preprocessing** methods like **min-max scaling to [0,1]**,  **standardizing the features to 0 mean and unit variance**, and **one-hot encoding**.\n\n- In P2, supervised learning, 3 **classification** methods are implemented; **K nearest neighbor (KNN), ride regression, and KNN regression**. For **hyperparameter optimization**, I used **one-leave-out cross-validation**.\n\n- In P3, Unsupervised learning, some preprocessing for data visualization methods are implemented; **z-score standardization**, **principal component analysis (PCA)**, and **dendrograms**. Moreover, two clustering methods are applied; **Agglomerative hierarchical** and **K-means clustering**.\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmaryamteimouri%2Fdataanalysis-and-knowledgediscovery","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmaryamteimouri%2Fdataanalysis-and-knowledgediscovery","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmaryamteimouri%2Fdataanalysis-and-knowledgediscovery/lists"}