Ecosyste.ms: Awesome
An open API service indexing awesome lists of open source software.
https://github.com/michbur/amyloid_ngram
https://github.com/michbur/amyloid_ngram
Last synced: 9 days ago
JSON representation
- Host: GitHub
- URL: https://github.com/michbur/amyloid_ngram
- Owner: michbur
- Created: 2015-05-08T11:29:35.000Z (over 9 years ago)
- Default Branch: master
- Last Pushed: 2015-11-09T12:04:50.000Z (about 9 years ago)
- Last Synced: 2023-08-14T06:55:55.093Z (over 1 year ago)
- Language: HTML
- Size: 103 MB
- Stars: 0
- Watchers: 2
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: readme.Rmd
Awesome Lists containing this project
README
Readme
========================================================Date: `r format(Sys.Date(), format="%B %d %Y")`
### List of files
```{r,echo=FALSE,results='asis',warning=FALSE}
library(xtable)#list of descriptions
known_files <- list(readme = list(file_name = "readme.Rmd",
descr = "A template for automatically generated readme. Date and list of file included."),
parsed_readme = list(file_name = "readme.md",
descr = paste0("Parsed readme.Rmd")),
read_data = list(file_name = "read_data.R",
descr = "Read data from .fasta files"),
amyloid_pos = list(file_name = "amyloid_pos.fasta",
descr = "Positive (amyloid) sequences."),
amyloid_neg = list(file_name = "amyloid_neg.fasta",
descr = "Negative (non-amyloid) sequences."),
analyze_ngrams_function = list(file_name = "analyze_ngrams_function.R",
descr = "A function that extracts important n-grams and calculate their frequency."),
ngram_analysis = list(file_name = "ngram_analysis.R",
descr = "Analysis of n-grams extracted from amyloids. The analysis was performed separately for several groups of peptides."),
report1 = list(file_name = "report1.Rmd",
descr = "First report containing results from ngram_analysis.R"),
parsed_report1 = list(file_name = "report1.html",
descr = "Parsed report1.Rmd"),
aa_encodings2 = list(file_name = "aa_encodings2.R",
descr = "Amino acid encodings used in the report3. Uses AAIndex database."),
aa_encodings = list(file_name = "aa_encodings.R",
descr = "Amino acid encodings used in the report2. Uses some older variant AAIndex database (?)."),
AA_index_mk2 = list(file_name = "AA_index_mk2.csv",
descr = "Properties from AAIndex annotated by MK."),
aa_nprop = list(file_name = "aa_nprop.RData",
descr = "Normalized values of properties from AAIndex."),
aa_properties = list(file_name = "aa_properties.R",
descr = "Extract from seqinr package AAIndex data, annotate it partially and save."),
all_sizes = list(file_name = "all_sizes.csv",
descr = "Created by ngram_analysis.R, contains important n-grams and their properties in amyloids and non-amyloids."),
amyloid_data = list(file_name = "amyloid_data.csv",
descr = "Data set from Gasior, Kotulska 2015."),
amyloid_data_extraction = list(file_name = "amyloid_data_extraction.R",
descr = "Extraction of data from amyloid_data.csv, some machine learning. Done for presentation.Rnw"),
amyloid_fold_res2 = list(file_name = "amyloid_fold_res2.RData",
descr = "Results of cross-validation of the amyloid data done for report3."),
amyloid_fold_res = list(file_name = "amyloid_fold_res.RData",
descr = "Results of cross-validation of the amyloid data done for report2."),
amyloid_present_bib = list(file_name = "amyloid_present.bib",
descr = "Bibliography for amyloid presentation."),
amyloid_seqs = list(file_name = "amyloid_seqs.RData",
descr = "Created by amyloid_data_extraction.R and used in presentation."),
aoc = list(file_name = "aoc.R",
descr = "Areas of the competence for classifiers."),
aoc_data = list(file_name = "aoc.RData",
descr = "Areas of the competence for classifiers - data for presentation."),
bigger6 = list(file_name = "bigger6.csv",
descr = "Important n-grams extracted from sequences longer than 6."),
bigram_analysis = list(file_name = "bigram_analysis.R",
descr = "Analysis of bigrams for presentation."),
bigram_analysis_data = list(file_name = "bigram_analysis.RData",
descr = "Data from analysis of bigrams for presentation."),
casualRF = list(file_name = "casualRF.R",
descr = "Random forests on amyloid sequences without encoding.")
)known_files_names <- sapply(known_files, function(i) i[["file_name"]])
#all files in repo that aren't in gitignore
repo_files <- sort(setdiff(list.files(), readLines(".gitignore")[- grep("^\\.", readLines(".gitignore"))]))if(!all(repo_files %in% known_files_names)) {
message(paste0("Michal, you slacker! File(s): ",
paste(repo_files[!repo_files %in% known_files_names], collapse = ", "),
" do not have a proper description."))
repo_files <- repo_files[repo_files %in% known_files_names]
}sourced_files <- sapply(lapply(repo_files, function(single_file) {
all_lines <- readLines(single_file)
source_lines <- grep("^source\\(", all_lines, fixed = FALSE)
file_names <- if(length(source_lines) > 0) {
#source allows to call only one file, so maybe we can presume that
#in single line we have only single source call
#I doubt someone would be goung source("xyz"); source("zyx") and so on in a single line
vapply(all_lines[source_lines], function(single_line)
strsplit(strsplit(single_line, 'source("', fixed = TRUE)[[1]][[2]],
'\"')[[1]][[1]], "a")
} else {
""
}
}), paste0, collapse = ", ")files_table <- data.frame(repo_files, sapply(repo_files, function(i) known_files[[which(i == known_files_names)]][["descr"]]), sourced_files)
colnames(files_table) <- c("File name", "Description", "Sourced files")
print(xtable(files_table), type = "html", include.rownames = FALSE)
```### Repository rules
1. Each file must have a desctiption in readme using the R code in *readme.Rmd*.
2. Use comments frequently.