{"id":14982330,"url":"https://github.com/obenner/data-engineering-interview-questions","last_synced_at":"2025-05-14T13:07:17.996Z","repository":{"id":46117228,"uuid":"394003533","full_name":"OBenner/data-engineering-interview-questions","owner":"OBenner","description":"More than 2000+ Data engineer interview questions.","archived":false,"fork":false,"pushed_at":"2025-01-26T15:28:29.000Z","size":960,"stargazers_count":1303,"open_issues_count":2,"forks_count":465,"subscribers_count":21,"default_branch":"master","last_synced_at":"2025-04-11T23:56:05.071Z","etag":null,"topics":["airflow","avro","aws","azure","cassandra","data-engineering","data-structures","flink","flume","hadoop","hadoop-hdfs","hbase","hive","impala","interview","interview-questions","kafka","nifi","spark","sql"],"latest_commit_sha":null,"homepage":"","language":null,"has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/OBenner.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2021-08-08T15:49:45.000Z","updated_at":"2025-04-11T20:14:06.000Z","dependencies_parsed_at":"2024-10-12T00:00:32.118Z","dependency_job_id":"3ff097c0-4c6f-498b-b200-e68a7aa78485","html_url":"https://github.com/OBenner/data-engineering-interview-questions","commit_stats":{"total_commits":17,"total_committers":6,"mean_commits":"2.8333333333333335","dds":"0.47058823529411764","last_synced_commit":"3f2f51c664e9da1b673e176a9ad9b945c6634b11"},"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OBenner%2Fdata-engineering-interview-questions","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OBenner%2Fdata-engineering-interview-questions/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OBenner%2Fdata-engineering-interview-questions/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OBenner%2Fdata-engineering-interview-questions/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/OBenner","download_url":"https://codeload.github.com/OBenner/data-engineering-interview-questions/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":254149958,"owners_count":22022851,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["airflow","avro","aws","azure","cassandra","data-engineering","data-structures","flink","flume","hadoop","hadoop-hdfs","hbase","hive","impala","interview","interview-questions","kafka","nifi","spark","sql"],"created_at":"2024-09-24T14:05:12.822Z","updated_at":"2025-05-14T13:07:12.986Z","avatar_url":"https://github.com/OBenner.png","language":null,"readme":"\u003ch1 align=\"center\"\u003eMore than 2000+ questions for preparing a Data Engineer interview.\u003c/h1\u003e\n\u003ch2 align=\"center\"\u003e\u003ca href=\"./content/full.md\"\u003eFull list of questions\u003c/a\u003e\u003c/h2\u003e\n\u003ch1 align=\"center\"\u003eInterview questions for Data Engineer\u003c/h1\u003e\n\u003cdiv\u003e\n\u003ctable\u003e\n  \u003ctr\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003eDatabases and Data Warehouses\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003eGitHub Repo\u003c/th\u003e\n    \u003cth\u003eOfficial page\u003c/th\u003e\n    \u003cth\u003eQuestions\u003c/th\u003e\n    \u003cth\u003eDescription\u003c/th\u003e\n    \u003cth\u003eUseful links\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/cassandra\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Cassandra\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://cassandra.apache.org/_/index.html\"\u003e\u003cimg  style=\"vertical-align:middle\" src=\"img/icon/cassandra.ico\" alt=\"Cassandra\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/cassandra.md\"\u003eApache Cassandra\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eCassandra is a distributed, wide-column store, NoSQL database management system.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/Anant/awesome-cassandra\"\u003eAwesome Cassandra\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/greenplum-db/gpdb\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Greenplum\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://greenplum.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/greenplum.ico\" alt=\"Greenplum\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/greenplum.md\"\u003eGreenplum\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eGreenplum is a big data technology based on MPP architecture and the Postgres open source database technology.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/kongyew/awesome-greenplum\"\u003eAwesome Greenplum\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/mongodb/mongo\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"MongoDB\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://www.mongodb.com/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/mongo.ico\" alt=\"MongoDB\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/mongo.md\"\u003eMongoDB\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eMongoDB is a document-oriented database.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/ramnes/awesome-mongodb\"\u003eAwesome MongoDB\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/hbase\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Hbase\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://hbase.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/hbase.ico\" alt=\"Hbase\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/hbase.md\"\u003eApache Hbase\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eHBase is an open-source non-relational distributed database.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/rayokota/awesome-hbase\"\u003eAwesome HBase\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/hive\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Hive\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://hive.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/hive.ico\" alt=\"Hive\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/hive.md\"\u003eApache Hive\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Hive is a data warehouse software project built on top of Apache Hadoop for providing data query and analysis.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/dharmeshkakadia/awesome-hive\"\u003eAwesome Hive\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"https://aws.amazon.com/dynamodb/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/dynamodb.ico\" alt=\"Amazon DynamoDB\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/dynamodb.md\"\u003eAmazon DynamoDB\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eAmazon DynamoDB is a fully managed proprietary NoSQL database service.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/alexdebrie/awesome-dynamodb\"\u003eAwesome DynamoDB\u003c/a\u003e\n        \u003ca href=\"https://github.com/donnemartin/awesome-aws\"\u003eAwesome AWS\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"https://aws.amazon.com/redshift\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/redshift.ico\" alt=\"Amazon Redshift\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/redshift.md\"\u003eAmazon Redshift\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eAmazon Redshift is a data warehouse product.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/awslabs/amazon-redshift-utils\"\u003eAmazon Redshift Utilities\u003c/a\u003e\n        \u003ca href=\"https://github.com/donnemartin/awesome-aws\"\u003eAwesome AWS\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"https://cloud.google.com/bigquery\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/bigquery.ico\" alt=\"BigQuery\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/bigquery.md\"\u003eBigQuery GCP\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eBigQuery is a fully-managed, serverless data warehouse.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/coty/awesome-bigquery\"\u003eAwesome BigQuery\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"https://cloud.google.com/bigtable\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/bigtable.ico\" alt=\"Bigtable\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/bigtable.md\"\u003eBigtable GCP\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eBigtable is a fully managed wide-column and key-value NoSQL database service.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/zrosenbauer/awesome-bigtable\"\u003eAwesome Bigtable\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003cth colspan=\"5\"\u003e\u003ca\u003e\u003c/a\u003e\u003c/th\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003eData Formats\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/avro\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Avro\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://avro.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/avro.ico\" alt=\"Avro\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/avro.md\"\u003eApache Avro\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eAvro is a row-oriented remote procedure call and data serialization framework.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/m0nhawk/awesome-avro\"\u003eAwesome Avro\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/parquet\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Parquet\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://parquet.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/parquet.ico\" alt=\"Parquet\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/parquet.md\"\u003eApache Parquet\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Parquet is a column-oriented data file format designed for efficient data storage and retrieval.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/delta-io\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Delta\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://delta.io/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/deltalake.ico\" alt=\"Delta\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/delta.md\"\u003eDelta\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eDelta Lake is a storage framework that enables building a Lakehouse architecture with compute engines\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/MrPowers/delta-examples\"\u003eDelta examples\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n \u003cth colspan=\"5\"\u003e\u003ca\u003e\u003c/a\u003e\u003c/th\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003eBig Data Frameworks\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/airflow\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Airflow\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://airflow.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/airflow.ico\" alt=\"Airflow\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/airflow.md\"\u003eApache Airflow\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Airflow is a workflow management platform for data engineering pipelines.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/jghoman/awesome-apache-airflow\"\u003eAwesome Airflow\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/flume\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Flume\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://flume.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/flume.ico\" alt=\"Flume\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/flume.md\"\u003eApache Flume\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Flume is a distributed, reliable, and available software for efficiently collecting, aggregating, and moving large amounts of log data.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/hadoop\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Hadoop\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://hadoop.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/hadoop.ico\" alt=\"Hadoop\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/hadoop.md\"\u003eApache Hadoop\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Hadoop is a collection of software utilities that facilitates using a network of many computers to solve problems involving massive amounts of data and computation.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/youngwookim/awesome-hadoop\"\u003eAwesome Hadoop\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/impala\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Impala\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://impala.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/impala.ico\" alt=\"Impala\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/impala.md\"\u003eApache Impala\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Impala is a parallel processing SQL query engine for data stored in a computer cluster running Apache Hadoop.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n  \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/kafka\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Kafka\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://kafka.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/kafka.ico\" alt=\"Kafka\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/kafka.md\"\u003eApache Kafka\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Kafka is a distributed event store and stream-processing platform.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/semantalytics/awesome-kafka\"\u003eAwesome Kafka\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/nifi\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"NiFi\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://nifi.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/nifi.ico\" alt=\"NiFi\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/nifi.md\"\u003eApache NiFi\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache NiFi is a software project designed to automate the flow of data between software systems.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/jfrazee/awesome-nifi\"\u003eAwesome NiFi\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/spark\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Spark\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://spark.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/spark.ico\" alt=\"Spark\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/spark.md\"\u003eApache Spark\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Spark is unified analytics engine for large-scale data processing.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/awesome-spark/awesome-spark\"\u003eAwesome Spark\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/flink\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Flink\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://flink.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/flink.ico\" alt=\"Flink\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/flink.md\"\u003eApache Flink\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eApache Flink is unified stream-processing and batch-processing framework.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/wuchong/awesome-flink\"\u003eAwesome Flink\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/kubernetes/kubernetes\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Kubernetes\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://kubernetes.io/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/kuber.ico\" alt=\"Kubernetes\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/kubernetes.md\"\u003eKubernetes\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e  Kubernetes is a system for managing containerized applications across multiple hosts.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/ramitsurana/awesome-kubernetes\"\u003eAwesome Kubernetes\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\u003cth colspan=\"5\"\u003e\u003ca\u003e\u003c/a\u003e\u003c/th\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003eCloud providers\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/aws\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"AWS\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://aws.amazon.com/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/aws.ico\" alt=\"AWS\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/aws.md\"\u003eAmazon Web Services\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eAmazon web service is an online platform that provides scalable and cost-effective cloud computing solutions.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/donnemartin/awesome-aws\"\u003eAwesome AWS\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/Azure\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Azure\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://azure.microsoft.com/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/azure.ico\" alt=\"Azure\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/azure.md\"\u003eMicrosoft Azure\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eMicrosoft Azure is Microsoft's public cloud computing platform.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/kristofferandreasen/awesome-azure\"\u003eAwesome Azure\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/GoogleCloudPlatform\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"GCP\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://cloud.google.com/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/gcp.ico\" alt=\"GCP\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/gcp.md\"\u003eGoogle Cloud Platform\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eGoogle Cloud Platform is a suite of cloud computing services.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/GoogleCloudPlatform/awesome-google-cloud\"\u003eAwesome GCP\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\u003cth colspan=\"5\"\u003e\u003ca\u003e\u003c/a\u003e\u003c/th\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003e\u003cb\u003eTheory\u003c/b\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"./content/dwha.md\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/dwha.ico\" alt=\"DWHA\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/dwha.md\"\u003eDWH Architectures\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eA data warehouse architecture is a method of defining the overall architecture of data communication processing and presentation that exist for end-clients computing within the enterprise.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/numetriclabz/awesome-db\"\u003eAwesome databases\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"./content/data-structure.md\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/datastruct.ico\" alt=\"Airflow\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/data-structure.md\"\u003eData Structures\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eA data structure is a specialized format for organizing, processing, retrieving and storing data. \u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"./content/sql.md\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/sql.ico\" alt=\"SQL\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/sql.md\"\u003eSQL\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eSQL is a domain-specific language used in programming and designed for managing data held in a relational database management system (RDBMS).\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/danhuss/awesome-sql\"\u003eAwesome SQL\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\u003cth colspan=\"5\"\u003e\u003ca\u003e\u003c/a\u003e\u003c/th\u003e\n  \u003ctr\u003e\n    \u003cth colspan=\"5\"\u003eData visualization tools/BI\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003ctr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"./content/tableau.md\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/tableau.ico\" alt=\"Tableau\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/tableau.md\"\u003eTableau\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eTableau is a powerful data visualization tool used in the Business Intelligence.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"./content/looker.md\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/looker.ico\" alt=\"Looker\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/looker.md\"\u003eLooker\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eLooker is an enterprise platform for BI, data applications, and embedded analytics that helps you explore and share insights in real time.\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n \u003ctr\u003e\n    \u003cth\u003e\u003ca href=\"https://github.com/apache/superset\"\u003e\u003cimg  style=\"vertical-align:middle\"  src=\"img/icon/github.ico\" alt=\"Kafka\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth colspan=\"2\"\u003e\u003ca href=\"https://superset.apache.org/\"\u003e\u003cimg style=\"vertical-align:middle\" src=\"img/icon/superset.ico\" alt=\"Apache Superset\"\u003e\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"./content/superset.md\"\u003eApache Superset\u003c/a\u003e\u003c/th\u003e\n    \u003cth\u003eSuperset is a modern data exploration and data visualization platform\u003c/th\u003e\n    \u003cth\u003e\u003ca href=\"TODO\"\u003eTODO\u003c/a\u003e\u003c/th\u003e\n  \u003c/tr\u003e\n\n\n\u003c/table\u003e\n\u003c/div\u003e\n\u003cdiv\u003e\n\u003ch2 align=\"center\"\u003e Contribution \u003c/h2\u003e\n\u003ch3\u003ePlease contribute to this repository to help it make better. Any change like new question, code improvement, doc improvement etc is very welcome.\u003c/h3\u003e\n\u003c/div\u003e","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fobenner%2Fdata-engineering-interview-questions","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fobenner%2Fdata-engineering-interview-questions","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fobenner%2Fdata-engineering-interview-questions/lists"}