{"id":22862487,"url":"https://github.com/tomi77/learninghaskelldataanalysis","last_synced_at":"2025-10-28T00:02:45.763Z","repository":{"id":139070427,"uuid":"42388426","full_name":"tomi77/LearningHaskellDataAnalysis","owner":"tomi77","description":"Examples from \"Learning Haskell Data Analysis\"","archived":false,"fork":false,"pushed_at":"2015-10-04T19:10:53.000Z","size":1168,"stargazers_count":5,"open_issues_count":0,"forks_count":2,"subscribers_count":1,"default_branch":"master","last_synced_at":"2025-04-30T21:52:38.712Z","etag":null,"topics":["haskell","haskell-book","haskell-learning"],"latest_commit_sha":null,"homepage":null,"language":"Haskell","has_issues":false,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/tomi77.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2015-09-13T08:09:40.000Z","updated_at":"2020-06-14T17:46:36.000Z","dependencies_parsed_at":"2023-03-14T03:01:36.227Z","dependency_job_id":null,"html_url":"https://github.com/tomi77/LearningHaskellDataAnalysis","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/tomi77%2FLearningHaskellDataAnalysis","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/tomi77%2FLearningHaskellDataAnalysis/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/tomi77%2FLearningHaskellDataAnalysis/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/tomi77%2FLearningHaskellDataAnalysis/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/tomi77","download_url":"https://codeload.github.com/tomi77/LearningHaskellDataAnalysis/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":251789310,"owners_count":21644081,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["haskell","haskell-book","haskell-learning"],"created_at":"2024-12-13T10:13:40.759Z","updated_at":"2025-10-28T00:02:40.720Z","avatar_url":"https://github.com/tomi77.png","language":"Haskell","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Intro\n\n    cabal repl\n\n# Chapter 2\n\n## getColumnInCSV\n\nfirst example\n\n    csv \u003c- parseCSVFromFile \"all_week.csv\"\n    either (\\error -\u003e Left \"Problem Reading File\") (\\csv -\u003e getColumnInCSV csv \"mag\") csv\n\nsecond example\n\n    csv \u003c- parseCSVFromFile \"all_week.csv\"\n    either (\\error -\u003e Left \"Problem Reading File\") (\\csv -\u003e getColumnInCSV csv \"not a column\") csv\n\n## applyToColumnInCSV\n\nfirst example\n\n    csv \u003c- parseCSVFromFile \"all_week.csv\"\n    either (\\error -\u003e Left \"Problem Reading File\") (\\csv -\u003e applyToColumnInCSV (average . readColumn) csv \"mag\") csv\n\nsecond example\n\n    csv \u003c- parseCSVFromFile \"all_week.csv\"\n    either (\\error -\u003e Left \"Problem Reading File\") (\\csv -\u003e applyToColumnInCSV (average . readColumn) csv \"not a column\") csv\n\n## applyToColumnInCSVFile\n\n    applyToColumnInCSVFile (average . readColumn) \"all_week.csv\" \"mag\"\n    applyToColumnInCSVFile (maximum . readColumn) \"all_week.csv\" \"mag\"\n    applyToColumnInCSVFile (minimum . readColumn) \"all_week.csv\" \"mag\"\n\n## convertCSVFileToSQL\n\n    convertCSVFileToSQL \"all_week.csv\" \"earthquakes.sql\" \"oneWeek\" [\"time TEXT\", \"latitude REAL\", \"longitude REAL\", \"depth REAL\", \"mag REAL\", \"magType TEXT\", \"nst INTEGER\", \"gap REAL\", \"dmin REAL\", \"rms REAL\", \"net REAL\", \"id TEXT\", \"updated TEXT\", \"place TEXT\", \"type TEXT\"]\n    conn \u003c- connectSqlite3 \"earthquakes.sql\"\n    magnitudes \u003c- quickQuery' conn \"SELECT mag FROM oneWeek\" []\n    fromSql $ head $ head magnitudes :: Double\n    let magnitudesDouble = map(\\ record -\u003e fromSql $ head record :: Double) magnitudes\n    average magnitudesDouble\n\n# Chapter 3\n\n## countFieldsInEachRecord\n\n    csv \u003c- parseCSVFromFile \"poorFieldsCount.csv\"\n    either Left (\\ csv -\u003e Right $ countFieldsInEachRecord csv) csv\n\n## lineNumbersWithIncorrectCount\n\n    csv \u003c- parseCSVFromFile \"poorFieldsCount.csv\"\n    either Left (\\ csv -\u003e Right $ lineNumbersWithIncorrectCount csv) csv\n\nnext example\n\n    csv \u003c- parseCSVFromFile \"all_week.csv\"\n    either Left (\\ csv -\u003e Right $ lineNumbersWithIncorrectCount csv) csv\n\n## identifyMatchingFields\n\n    identifyMatchingFields (\\x -\u003e x =~ \"Journ\") [\"1\", \"Clark Kent\", \"Journalist\", \"Metropolis\"] [\"Id\", \"Name\", \"Profession\", \"Location\"] 0\n    identifyMatchingFields (\\x -\u003e x =~ \"Hero\") [\"1\", \"Clark Kent\", \"Journalist\", \"Metropolis\"] [\"Id\", \"Name\", \"Profession\", \"Location\"] 0\n    identifyMatchingFields (== \"Metropolis\") [\"1\", \"Clark Kent\", \"Journalist\", \"Metropolis\"] [\"Id\", \"Name\", \"Profession\", \"Location\"] 0\n\n## identifyInCSV\n\n    csv \u003c- parseCSVFromFile \"poordata.csv\"\n    either (\\error -\u003e Left \"CSV Problem\") (\\ csv -\u003e identifyInCSV (\\ x -\u003e x =~ \"PA\") csv \"Number\") csv\n\nmultiple fields\n\n    csv \u003c- parseCSVFromFile \"poordata.csv\"\n    either (\\error -\u003e Left \"\") (\\ csv -\u003e identifyInCSV (\\ x -\u003e x =~ \"male\") csv \"Number\") csv\n\n## identifyInCSVFile\n\n    identifyInCSVFile (\\ x -\u003e x =~ \"^$\") \"poordata.csv\" \"Number\"\n\nbetter version\n\n    identifyInCSVFile (\\ x -\u003e x =~ \"^\\\\s*$\") \"poordata.csv\" \"Number\"\n\n## identifyInCSVFileFromColumn\n\n    identifyInCSVFileFromColumn (\\ x -\u003e not (x =~ \"^[1-9][0-9]?/[1-9][0-9]?/[12][0-9][0-9][0-9]$\")) \"poordata.csv\" \"Number\" \"Birthday\"\n\nfixed data\n\n    identifyInCSVFileFromColumn (\\ x -\u003e not (x =~ \"^[1-9][0-9]?/[1-9][0-9]?/[12][0-9][0-9][0-9]$\")) \"poordataFixed.csv\" \"Number\" \"Birthday\"\n\n# Chapter 4\n\n    convertCSVFileToSQL \"aapl.csv\" \"aapl.sql\" \"aapl\" [\"date STRING\", \"open REAL\", \"high REAL\", \"low REAL\", \"close REAL\", \"volume REAL\", \"adjclose REAL\"]\n\n## pullStockClosingPrices\n\n    aapl \u003c- pullStockClosingPrices \"aapl.sql\" \"aapl\"\n    plot (PNG \"aapl.png\") $ Data2D [Title \"AAPL\"] [] $ aapl\n    plot (PNG \"aapl_line.png\") $ Data2D [Title \"AAPL\", Style Lines] [] $ aapl\n    plot (PNG \"aapl_oneyear.png\") $ Data2D [Title \"AAPL\", Style Lines] [] $ take 252 aapl\n\n## applyPercentChangeToData\n\n    aapl \u003c- pullStockClosingPrices \"aapl.sql\" \"aapl\"\n    let aapl252 = take 252 aapl\n    let aapl252pc = applyPercentChangeToData aapl252\n\ngoogle\n\n    convertCSVFileToSQL \"googl.csv\" \"googl.sql\" \"googl\" [\"date STRING\", \"open REAL\", \"high REAL\", \"low REAL\", \"close REAL\", \"volume REAL\", \"adjclose REAL\"]\n    googl \u003c- pullStockClosingPrices \"googl.sql\" \"googl\"\n    let googl252 = take 252 googl\n    let googl252pc = applyPercentChangeToData googl252\n\nmicrosoft\n\n    convertCSVFileToSQL \"msft.csv\" \"msft.sql\" \"msft\" [\"date STRING\", \"open REAL\", \"high REAL\", \"low REAL\", \"close REAL\", \"volume REAL\", \"adjclose REAL\"]\n    msft \u003c- pullStockClosingPrices \"msft.sql\" \"msft\"\n    let msft252 = take 252 msft\n    let msft252pc = applyPercentChangeToData msft252\n\nall data\n\n    plot (PNG \"aapl_googl_msft_pc.png\") [Data2D [Title \"AAPL - One Year, % Change\", Style Lines, Color Red] [] aapl252pc, Data2D [Title \"GOOGL - One Year, % Change\", Style Lines, Color Blue] [] googl252pc, Data2D [Title \"MSFT - One Year, % Change\", Style Lines, Color Green] [] msft252pc]\n\n## applyMovingAverageToData\n\n    aapl \u003c- pullStockClosingPrices \"aapl.sql\" \"aapl\"\n    let aapl252 = take 252 aapl\n    let aapl252pc = applyPercentChangeToData aapl252\n    let aapl252ma20 = applyMovingAverageToData aapl252pc 20\n    plot (PNG \"aapl_20dayma.png\") [Data2D [Title \"AAPL - One Year, % Change\", Style Lines, Color Red] [] aapl252pc, Data2D [Title \"AAPL 20-Day MA\", Style Lines, Color Black] [] aapl252ma20]\n\nearthquakes\n\n    convertCSVFileToSQL \"all_month.csv\" \"earthquakes.sql\" \"oneMonth\" [\"time TEXT\", \"latitude REAL\", \"longitude REAL\", \"depth REAL\", \"mag REAL\", \"magType TEXT\", \"nst INTEGER\", \"gap REAL\", \"dmin REAL\", \"rms REAL\", \"net REAL\", \"id TEXT\", \"updated TEXT\", \"place TEXT\", \"type TEXT\"]\n    coords \u003c- pullLatitudeLongitude \"earthquakes.sql\" \"oneMonth\"\n    plot (PNG \"earthquakes.png\") [Data2D [Title \"Earthquakes\", Color Red, Style Dots] [] coords]\n\n# Chapter 5\n\n## probabilityMassFunction\n\n    :l src/LearningHaskellDataAnalysis02 src/LearningHaskellDataAnalysis04 src/LearningHaskellDataAnalysis05\n    :m LearningHaskellDataAnalysis02 LearningHaskellDataAnalysis04 LearningHaskellDataAnalysis05\n    import Graphics.EasyPlot\n    plot (PNG \"coinflips.png\") $ Function2D [Title \"Coin Flip Probabilities\"] [Range 0 1000] (\\ k -\u003e probabilityMassFunction (floor k) 1000 0.5)\n\nperfect\n\n    probabilityMassFunction 500 1000 0.5\n\nsum\n\n    sum $ map (\\ k -\u003e probabilityMassFunction k 1000 0.5) [0..1000]\n\n99%\n\n    sum $ map (\\ k -\u003e probabilityMassFunction k 1000 0.5) [460..540]\n\nrandom\n\n    import System.Random\n    g \u003c- newStdGen\n    random g :: (Double, StdGen)\n\n3 random double\n\n    take 3 $ randoms g :: [Double]\n\n5 random integers from 0 to 100\n\n    take 5 $ randomRs (0, 100) g\n\nrandom coin flips\n\n    let coinflips = take 1000 $ randomRs (0, 1) g\n    sum coinflips\n\nbaseball\n\n    import LearningHaskellDataAnalysis02\n    convertCSVFileToSQL \"winloss2014.csv\" \"winloss.sql\" \"winloss\" [\"date TEXT\", \"awayteam TEXT\", \"hometeam TEXT\", \"awayscore INTEGER\", \"homescore INTEGER\"]\n    import LearningHaskellDataAnalysis04\n    queryDatabase \"winloss.sql\" \"SELECT SUM(awayscore), SUM(homescore) FROM winloss\"\n    runsAtHome \u003c- queryDatabase \"winloss.sql\" \"SELECT hometeam, SUM(homescore) FROM winloss GROUP BY hometeam ORDER BY hometeam\"\n    runsAway \u003c- queryDatabase \"winloss.sql\" \"SELECT awayteam, SUM(awayscore) FROM winloss GROUP BY awayteam ORDER BY awayteam\"\n    let runsHomeAway = zip (readDoubleColumn runsAtHome 1) (readDoubleColumn runsAway 1)\n    import Graphics.EasyPlot\n    plot (PNG \"HomeScoreAwayScore.png\") $ Data2D [Title \"Runs at Home (x axis) and RunsAway (y axis)\"] [] runsHomeAway\n    let runsHomeAwayDiff = map (\\ (a, b) -\u003e a - b) runsHomeAway\n    plot (PNG \"HomeScoreAwayScoreDiff.png\") $ Data2D [Title \"Difference in Runs at Home and Runs Away\"] [] $ zip [1..] runsHomeAwayDiff\n    average runsHomeAwayDiff\n    standardDeviation runsHomeAwayDiff\n    import Data.List\n    standardDeviation runsHomeAwayDiff / (sqrt $ genericLength runsHomeAwayDiff)\n    plot (PNG \"standardNormal.png\") $ Function2D [Title \"Standard Normal\"] [Range (-4) 4] (\\ x -\u003e exp(-(x*x)/2)/sqrt(2*pi))\n\n# Chapter 6\n\n    import LearningHaskellDataAnalysis04\n    import LearningHaskellDataAnalysis04\n    import LearningHaskellDataAnalysis05\n    import LearningHaskellDataAnalysis06\n    queryDatabase \"winloss.sql\" \"SELECT COUNT(*) FROM winloss\"\n    queryDatabase \"winloss.sql\" \"SELECT COUNT(*) FROM winloss WHERE awayscore == homescore\"\n    homeRecord \u003c- queryDatabase \"winloss.sql\" \"SELECT homeTeam, SUM(homescore \u003e awayscore), SUM(homescore), COUNT(*) FROM winloss GROUP BY homeTeam\"\n    awayRecord \u003c- queryDatabase \"winloss.sql\" \"SELECT awayTeam, SUM(awayscore \u003e homescore), SUM(awayscore), COUNT(*) FROM winloss GROUP BY awayTeam\"\n    let totalWins = zipWith (+) (readDoubleColumn homeRecord 1) (readDoubleColumn awayRecord 1)\n    let totalRuns = zipWith (+) (readDoubleColumn homeRecord 2) (readDoubleColumn awayRecord 2)\n    let totalGames = zipWith (+) (readDoubleColumn homeRecord 3) (readDoubleColumn awayRecord 3)\n    let winPrecentage = zipWith (/) totalWins totalGames\n    let runsPerGame = zipWith (/) totalRuns totalGames\n    any (\\ xi -\u003e abs((xi - average runsPerGame) / standardDeviation runsPerGame) \u003e 3) runsPerGame\n    any (\\ xi -\u003e abs((xi - average winPrecentage ) / standardDeviation winPrecentage ) \u003e 3) winPrecentage\n    import Graphics.EasyPlot\n    plot (PNG \"runs_and_wins.png\") $ Data2D [Title \"Runs per Game VS Win % in 2014\", Color Red] [] $ zip runsPerGame winPrecentage\n    pearsonR runsPerGame winPrecentage\n    pearsonRsqrd runsPerGame winPrecentage\n    let (gradient, intercept) = linearRegression runsPerGame winPrecentage\n    let winEstimate = map (\\ x -\u003e x * gradient + intercept) [3.3, 3.4 .. 4.7]\n    let regressionLine = zip [3.3, 3.4 .. 4.7] winEstimate\n    plot (PNG \"runs_and_wins_with_regression.png\") [Data2D [Title \"Runs per Game VS Win % in 2014\", Color Red] [] (zip runsPerGame winPrecentage), Data2D [Title \"Regression Line\", Style Lines, Color Blue] [] regressionLine]\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ftomi77%2Flearninghaskelldataanalysis","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Ftomi77%2Flearninghaskelldataanalysis","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ftomi77%2Flearninghaskelldataanalysis/lists"}