https://github.com/hrassi/pandas_starter
https://github.com/hrassi/pandas_starter
Last synced: over 1 year ago
JSON representation
- Host: GitHub
- URL: https://github.com/hrassi/pandas_starter
- Owner: hrassi
- Created: 2024-02-04T10:08:52.000Z (over 2 years ago)
- Default Branch: main
- Last Pushed: 2024-02-04T10:21:42.000Z (over 2 years ago)
- Last Synced: 2025-01-24T06:09:44.442Z (over 1 year ago)
- Language: Python
- Size: 7.81 KB
- Stars: 0
- Watchers: 1
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: readme.txt
Awesome Lists containing this project
README
Python Pandas and SQlite3 :
pandas tutorial :
https://www.w3schools.com/python/pandas/default.asp
add-total-row-to-pandas-dataframe
filter-pandas-dataframe
import pandas as pd
import sqlite3
connection = sqlite3.connect("dental.db")
# to select all data from the table called patient :
df = pd.read_sql("select * from patient",connection)
print(df) # print(df.head(2)) or tail(2) for first 2 or last 2 row
# to select the row containing “Houssam Rassi” from the collumn called “Name” :
filtered_row = df[ df[“Name"] == "Houssam Rassi"]
print(filtered_row)
# to select the row containing a part of “Houssam Rassi”
# and case insensitive from the collumn called “Name” :
filtered_row = df.query("Name.str.contains ('houss',case = False)")
# to select a row by its index number:
filtered_row = df.filter(items = [2], axis=0) # or item=[1,3] for 2 row 1 and 3
or:
print(df.loc[2]) # use location 2 to print row 2
or:
print(df.loc[1,2,3]) # use location 1,2,3 to print row 1,2,3
# to remove the row nbr 3 from the data frame :
df = df.drop(3)
or :
df.drop(x, inplace = True) # where x is the index
# inplace true correct
# directly the same file
# to set in the 3rd row in the ‘paid’ column the value 100
use df.loc :
df.loc [3, ’paid’ ] = 100
# from pandas to sqlite db file :
filtered_row.to_sql("Patient",con=connection,if_exists='replace')
connection.commit() # save
connection.close()
# to open old excel file from old access db (sepcify engine ‘xlrd’) :
pip install xlrd
df = pd.read_excel("patient.xlsx",engine='xlrd')
print(df)
# then to save the file :
df.to_csv("patientxlrd.csv")
# to read the saved csv file :
df = pd.read_csv("patientxlrd.csv")
# SQLITE3 CREATING EMPTY db file and POPULATING IT:
### CONNECTING TO DATABASE.db FILE OR CREATING (IF THE FILE DOES NOT EXIST WILL CREATE AN EMPTY DATABASE.db )
connection = sqlite3.connect("dentabase1.db")
### SAVING TABLE NAMED PATIENT TO THE FILE dentalbase1.db
df.to_sql("Patient",con=connection,if_exists='replace')
connection.commit() # save
### LOADING ALL PATIENT TABLE FROM dentalbase1.db TO THE DATAFRAME df
df = pd.read_sql("select * from patient",connection)
print (df)
connection.close()
PANDAS Functions WIth Examples:
**********************************************
import pandas as pd
# to read excel file in dataframe :
data=pd.read_excel("in_out_eval.xlsx")
data.to_csv("inouteval.csv",sep="\t")
print(data)
# to read xlsx excel file and save it to csv using xlrd engine (pip install xlrd):
df=pd.read_excel("patient.xlsx",engine='xlrd')
# to save dataframe df to .csv file :
df.to_csv("patientxlrd.csv")
print(df)
# load .csv file to dataframe df :
# the optional index_col to remove
# duplicate index collumn
df = pd.read_csv("patientxlrd.csv", index_col=[0])
print (df)
print(df.head()) # or head(2) for first 2
print(df.tail()) # or tail(1) for last one
# to print the type of dataframe
print(df.dtypes)
print(df['name'])
# to print a specific row example for index 4
print(df.loc[4])
# set the index to be the name and overwrite the df :
df=df.set_index('name')
# then print index Moubarak Eliana :
print(df.loc['Moubarak Eliana'])
# to print all available column :
print(df.columns) :
# to select only theses rows :
df=df[['Unnamed: 0', 'name', 'tel', 'referby', 'address']]
print(df)
# to print idex row between 100 and 110
print (df.query('index > 100 and index < 110'))
# to all row contaning case sensitif eliana :
print (df.query("name.str.contains ('eliana',case = False)"))
# to change data type of name column as interger:
df['name']=df['name'].astype('int')
# put last row in a variable called:df_to_append
df_to_append=df.tail(1)
# add a new row to df with append command containing the data of the variable df_to_append:
df=pd.concat([df,df_to_append])
print (df)
#insert a new row in position 3 with value hello:
df.insert(2, "Age", "hello", True)
print (df)
# to get the nmbr of the last available row to put data inside
last=(len(df.index))
print (last)
# to remove collumns from dataframe and leave only first 3 collumn :
df=df[['Unnamed: 0', 'name', 'tel']]
print (df)
# to add a row to the last available row in the dataframe name chakib....
df.loc[len(df.index)]=['5008','chakib','234234']
print (df)
#display max columns or rows, instead of none u can put the nbr of column
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
# to read a single cell using index 1 and column"name"
x=df.at[1,'name']
print(x)
# to write a single entry using index 1 and column"name"
df.at[1,'name']='tralla laa'
# save to file( the optional index=False is to remove the double index column):
df.to_csv("patientxlrd.csv", index=False)
print (df)
# read cvs file and specifying datatype:
(csv dont store data type when saving file)
df=pd.read_csv(´patientxlrd.csv’,
dtype={‘name’ :´category ’ ,
‘tel ‘ : ´ int64 ‘ ,
….. } )