{"id":16651520,"url":"https://github.com/twardoch/pypolona","last_synced_at":"2025-10-05T09:50:17.576Z","repository":{"id":62582665,"uuid":"293609944","full_name":"twardoch/pypolona","owner":"twardoch","description":"Search the Polona.pl website of the Polish National Library and download all images from publications","archived":false,"fork":false,"pushed_at":"2025-07-18T14:04:18.000Z","size":388223,"stargazers_count":4,"open_issues_count":0,"forks_count":3,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-08-19T05:26:13.549Z","etag":null,"topics":["download","gooey","library","polona","python","search"],"latest_commit_sha":null,"homepage":"https://twardoch.github.io/pypolona/","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/twardoch.png","metadata":{"files":{"readme":"README.md","changelog":"CHANGELOG.md","contributing":null,"funding":".github/FUNDING.yml","license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null},"funding":{"github":null}},"created_at":"2020-09-07T19:02:20.000Z","updated_at":"2025-07-18T14:04:21.000Z","dependencies_parsed_at":"2025-07-18T16:20:26.038Z","dependency_job_id":null,"html_url":"https://github.com/twardoch/pypolona","commit_stats":null,"previous_names":[],"tags_count":2,"template":false,"template_full_name":null,"purl":"pkg:github/twardoch/pypolona","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/twardoch%2Fpypolona","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/twardoch%2Fpypolona/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/twardoch%2Fpypolona/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/twardoch%2Fpypolona/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/twardoch","download_url":"https://codeload.github.com/twardoch/pypolona/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/twardoch%2Fpypolona/sbom","scorecard":{"id":903566,"data":{"date":"2025-08-11","repo":{"name":"github.com/twardoch/pypolona","commit":"620865b2f0c5c644f4b68c9e25942d5388f5a07c"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.4,"checks":[{"name":"Maintained","score":10,"reason":"18 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 10","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Dangerous-Workflow","score":10,"reason":"no dangerous workflow patterns detected","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Code-Review","score":0,"reason":"Found 0/21 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Token-Permissions","score":0,"reason":"detected GitHub workflow tokens with excessive permissions","details":["Warn: jobLevel 'contents' permission set to 'write': .github/workflows/ci.yml:267","Warn: no topLevel permission defined: .github/workflows/ci.yml:1"],"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: MIT License: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Pinned-Dependencies","score":0,"reason":"dependency not pinned by hash detected -- score normalized to 0","details":["Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:19: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:24: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/ci.yml:52: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:82: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:87: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:216: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:234: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:239: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/ci.yml:258: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:270: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/ci.yml:273: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/ci.yml:278: update your workflow using https://app.stepsecurity.io/secureworkflow/twardoch/pypolona/ci.yml/master?enable=pin","Warn: pipCommand not pinned by hash: scripts/build.sh:20","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:35","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:36","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:99","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:100","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:250","Warn: pipCommand not pinned by hash: .github/workflows/ci.yml:251","Info:   0 out of   9 GitHub-owned GitHubAction dependencies pinned","Info:   0 out of   3 third-party GitHubAction dependencies pinned","Info:   0 out of   7 pipCommand dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 13 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-24T16:32:55.566Z","repository_id":62582665,"created_at":"2025-08-24T16:32:55.566Z","updated_at":"2025-08-24T16:32:55.566Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":278437946,"owners_count":25986760,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-10-05T02:00:06.059Z","response_time":54,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["download","gooey","library","polona","python","search"],"created_at":"2024-10-12T09:25:34.060Z","updated_at":"2025-10-05T09:50:17.569Z","avatar_url":"https://github.com/twardoch.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# PyPolona: Your Gateway to Poland's Digital Heritage\n\n**PyPolona** is a versatile, free, and open-source application designed to help you explore, search, and download digital treasures from [Polona.pl](https://polona.pl/), the vast digital library of the National Library of Poland. Whether you prefer a graphical interface or a command-line tool, PyPolona offers a seamless experience for accessing Poland's rich cultural heritage.\n\n[Polona.pl](https://polona.pl/) hosts an extensive collection of digitized items, including books, magazines, journals, graphics, maps, musical scores, ephemera, and manuscripts, contributed by the National Library of Poland and numerous partner institutions.\n\n## Key Features\n\n*   **Comprehensive Search:** Effortlessly search the Polona.pl database using simple keywords, advanced queries, specific Polona URLs, or lists of document IDs.\n*   **Flexible Search Results:** View and save your search results in various formats:\n    *   A simple list of Polona document IDs.\n    *   Direct, clickable URLs to the items on Polona.pl.\n    *   Structured data files in YAML or JSON format for further processing.\n*   **High-Resolution Downloads:** Download high-quality images of documents. You can choose to:\n    *   Save all images from a document as individual JPEG files, organized into a dedicated subfolder. This subfolder will also include a YAML file with metadata for the document.\n    *   Combine all images from a document into a single, convenient PDF file, with metadata embedded directly into the PDF.\n*   **Searchable Text PDFs:** Where available, PyPolona can also download an additional, lower-resolution PDF version of a document that includes searchable text (OCR layer).\n*   **User-Friendly GUI:** An intuitive graphical interface powered by `ezgooey`, making it easy for all users to navigate and utilize PyPolona's features.\n*   **Powerful CLI:** A robust command-line interface (`ppolona`) for users who prefer automation, scripting, or a terminal-based workflow.\n*   **Cross-Platform:** Available as a standalone application for macOS and Windows, and as a Python package installable via pip.\n\n## Who is PyPolona For?\n\nPyPolona is an invaluable tool for:\n\n*   **Researchers and Academics:** Accessing primary source materials for scholarly work.\n*   **Historians:** Exploring historical documents, periodicals, and ephemera.\n*   **Students:** Gathering resources for projects and studies related to Polish culture, literature, and history.\n*   **Genealogists:** Searching for family records, old newspapers, and regional histories.\n*   **Librarians and Archivists:** Exploring digital collections and potentially aiding in local archiving efforts.\n*   **Anyone with an interest in Polish cultural heritage** and the vast resources available in digital archives.\n\n## Why Use PyPolona?\n\n*   **Programmatic Access:** Go beyond manual browsing with powerful search and download capabilities.\n*   **Bulk Operations:** Efficiently download multiple items or entire collections for offline use or further analysis.\n*   **Data Portability:** Save search results and metadata in standard formats for easy integration with other tools and workflows.\n*   **Local Archiving:** Create your own local collection of important documents from Polona.pl.\n*   **Accessibility:** Choose between an easy-to-use GUI and a flexible CLI to suit your workflow.\n\n## Installation\n\nYou can install PyPolona either as a standalone application or as a Python package.\n\n### Standalone Application\n\nPre-built versions are available for macOS and Windows, offering the easiest way to get started.\n\n*   **macOS (.dmg):**\n    1.  Download the latest DMG file: [pypolona-mac.dmg](https://github.com/twardoch/pypolona/raw/master/download/pypolona-mac.dmg)\n    2.  Open the downloaded `.dmg` file.\n    3.  Drag the `PyPolona.app` icon to your `/Applications` folder.\n    4.  **Important for first run:** Ctrl-click (or right-click) the `PyPolona.app` in your Applications folder, select \"Open\" from the menu, and then click \"Open\" in the dialog box. You only need to do this once. Subsequent launches can be done by double-clicking the app icon.\n\n*   **Windows (.zip containing installer):**\n    1.  Download the latest ZIP file: [pypolona-win.zip](https://github.com/twardoch/pypolona/raw/master/download/pypolona-win.zip)\n    2.  Unzip the downloaded file.\n    3.  Run the `setup_pypolona.exe` (or similarly named installer) and follow the on-screen instructions.\n\n### Python Package (via PyPI)\n\nIf you have Python 3.9 or newer installed, you can install PyPolona using pip.\n\n1.  **Ensure you have Python 3.9+:** You can check your Python version by opening a terminal or command prompt and typing `python --version` or `python3 --version`.\n2.  **Install PyPolona:**\n    ```bash\n    pip install pypolona\n    ```\n    (You might need to use `python3 -m pip install pypolona` on some systems, especially if you have multiple Python versions installed.)\n\n## How to Use PyPolona (Graphical Interface - GUI)\n\nAfter installing, launch PyPolona:\n\n*   **Standalone App (macOS):** Double-click `PyPolona.app` in your `/Applications` folder.\n*   **Standalone App (Windows):** Find and run `PyPolona` from your Start Menu or Desktop shortcut.\n*   **Python Package:** Open your terminal or command prompt and run `ppolona` or `python3 -m pypolona`. (Note: The GUI is launched by default when running `ppolona` without CLI-specific arguments that would make it run in CLI mode immediately).\n\nThe GUI is organized into tabs for easy navigation.\n\n![Input tab with URL](https://raw.githubusercontent.com/twardoch/pypolona/master/docs/img/pypolona_url.png)\n\n### Input and Search Settings\n\nThe \"Input\" tab is where you define what you're looking for.\n\n![Input tab with search query](https://raw.githubusercontent.com/twardoch/pypolona/master/docs/img/pypolona_search.png)\n\n*   **Query Field:** This is the main field where you enter your search terms or Polona identifiers.\n    *   **Default (Polona URLs):** Paste one or more full Polona.pl item URLs, separated by spaces.\n    *   **Query Type (Choose One):**\n        *   **Search:** Select this to perform a keyword search (e.g., `adam mickiewicz`). Additional search options are in the \"Options\" tab.\n        *   **Advanced:** For complex queries using Polona's advanced search syntax (see [Polona API documentation](https://polona.pl/api/entities/) for syntax details).\n        *   **IDs:** Paste a list of space-separated Polona document IDs.\n\nThe \"Options\" tab allows you to refine your search:\n\n![Options Tab](https://raw.githubusercontent.com/twardoch/pypolona/master/docs/img/pypolona_options.png)\n\n*   **Languages:** Filter search results by language (e.g., `polski niemiecki angielski`). Use language names as found on the Polona.pl website.\n*   **Sort Search Results:** Order results by relevance (score), date, title, or creator, in ascending or descending order.\n*   **Output Search Results Format:** Choose how your search results are presented if you're not downloading:\n    *   `ids`: A space-separated list of Polona document IDs.\n    *   `urls`: A list of clickable URLs to the items on Polona.pl.\n    *   `yaml`: A structured YAML file containing details of the found items.\n    *   `json`: A structured JSON file.\n*   **Save Search Results to File:** Optionally, specify a file path to save the search results directly to a file. If not specified, results are printed in the GUI's output area.\n\n### Download Settings\n\nTo download documents, first check the **\"Download found docs\"** option in the \"Input\" tab.\n\n*   **Download JPEGs into Subfolders vs. Single PDF:**\n    *   **Enable \"Download JPEGs into subfolders\":** Each document will be saved as a collection of individual JPEG images within its own subfolder (named with year, title snippet, and ID). A YAML metadata file and any available text PDF (with `_text` suffix) will also be placed in this subfolder.\n    *   **Disable \"Download JPEGs into subfolders\" (default for PDF):** Each document will be compiled into a single PDF file (named with year, title snippet, and ID). Metadata is embedded within this PDF. Any available text PDF will be saved separately with a `_text` suffix.\n\nFurther download customization is available in the \"Options\" tab:\n\n*   **Save Downloaded Docs in this Folder:** Choose the parent directory where your downloaded files or subfolders will be saved. Defaults to a `polona` folder on your Desktop.\n*   **Download Max Pages Per Doc:** Set a limit on the number of pages to download for each document (0 means all pages). Useful for quick tests or sampling large documents.\n*   **Skip Downloading Searchable PDFs (Option: `-T`/`--no-text-pdf`):** By default, if Polona offers a searchable text PDF for an item, PyPolona downloads it. Check this option to skip these additional text PDFs.\n*   **Skip Existing Subfolders/PDFs (Option: `-O`/`--no-overwrite`):** If a file or folder for a document already exists in the download directory, PyPolona will skip re-downloading it if this option is checked. Otherwise, it will overwrite existing files.\n\n### Main Control Buttons\n\n![Result view showing URLs](https://raw.githubusercontent.com/twardoch/pypolona/master/docs/img/pypolona_result_urls.png)\n\n*   **Start:** Begins the search and/or download process based on your current settings.\n*   **Cancel/Close:** Exits the application.\n*   **Stop (during processing):** Interrupts the current search or download task.\n*   **Edit (after processing):** Returns to the settings tabs to modify your query or options for a new task.\n*   **Restart (after processing):** Runs the same search/download task again with the current settings.\n\n## How to Use PyPolona (Command-Line Interface - CLI)\n\nThe CLI (`ppolona`) offers the same functionality as the GUI but is operated through your terminal or command prompt.\n\n*   **If installed via pip:** Simply type `ppolona [options] query`\n*   **If using standalone macOS app:** The CLI executable is typically at `/Applications/PyPolona.app/Contents/MacOS/ppolona`.\n*   **If using standalone Windows app:** The installer usually adds the location of `ppolona.exe` to your system's PATH, or you may need to navigate to its installation directory.\n\nFor a full list of commands and options, use the help flag:\n\n```bash\nppolona -h\n```\n\nThis will display the following (version 1.6.2 shown as an example):\n\n```\nusage: ppolona [-h] [-S | -A | -I] [-D] [-i] [-l [language [language ...]]]\n               [-s {score desc,date desc,date asc,title asc,creator asc}]\n               [-f {ids,urls,yaml,json}] [-o results_file]\n               [-d download_folder] [-M num_pages] [-T] [-O] [-V]\n               query [query ...]\n\nPyPolona 1.6.2: Search in and download from Polona.pl. GUI: Help \u003e PyPolona Help. CLI: ppolona -h\n\nInput:\n  query                 query is a Polona.pl URL unless you choose search,\n                        advanced or ids\n  -S, --search          Query is search query, see Options\n  -A, --advanced        Query is advanced search query, see Documentation\n  -I, --ids             Query is space-separated IDs\n  -D, --download        Download found docs, see Options\n  -i, --images          Download JPEGs into subfolders instead of PDF\n\nOptions:\n  -l [language [language ...]], --lang [language [language ...]]\n                        Space-separated languages: polski angielski\n                        niemiecki...\n  -s {score desc,date desc,date asc,title asc,creator asc}, --sort {score desc,date desc,date asc,title asc,creator asc}\n                        Sort search results by score, date, title or creator\n                        (descending or ascending)\n  -f {ids,urls,yaml,json}, --format {ids,urls,yaml,json}\n                        Output search results in format\n  -o results_file, --output results_file\n                        Save search results to this file\n  -d download_folder, --download-dir download_folder\n                        Save downloaded docs in this folder\n  -M num_pages, --max-pages num_pages\n                        Download max pages per doc (0: all)\n  -T, --no-text-pdf     Skip downloading searchable PDFs\n  -O, --no-overwrite    Skip existing subfolders/PDFs\n  -V, --version         show program's version number and exit\n```\n\n**CLI Examples:**\n\n1.  **Search for \"warszawa\" and output results as URLs to the console:**\n    ```bash\n    ppolona --search warszawa --format urls\n    ```\n\n2.  **Download documents specified by Polona URLs as PDFs to a custom folder:**\n    ```bash\n    ppolona https://polona.pl/item/some-item,ID123/ https://polona.pl/item/another-item,ID456/ --download --download-dir ~/Documents/PolonaDownloads\n    ```\n\n3.  **Search for items by \"Henryk Sienkiewicz\" in Polish, sort by date descending, and download as JPEGs, max 10 pages per item:**\n    ```bash\n    ppolona --search \"Henryk Sienkiewicz\" --lang polski --sort \"date desc\" --download --images --max-pages 10\n    ```\n\n---\n\n## For Developers: Technical Deep Dive\n\nThis section provides technical details about PyPolona's architecture, codebase, and contribution guidelines.\n\n### How the Code Works: Architecture and Workflow\n\nPyPolona is built in Python and leverages several libraries to interact with Polona.pl and process data.\n\n**Main Components:**\n\n*   **`pypolona/__main__.py`:**\n    *   Serves as the primary entry point for both the GUI and CLI.\n    *   Uses `argparse` to define and parse command-line arguments. These definitions are also used by `ezgooey`.\n    *   Initializes `ezgooey` to generate the graphical user interface dynamically from the `argparse` configuration.\n    *   Instantiates and invokes the `Polona` class from `polona.py` with the parsed arguments to perform the requested actions.\n\n*   **`pypolona/polona.py` (The `Polona` Class):**\n    *   This is the heart of the application, containing all the core logic for interacting with the Polona.pl service and managing data.\n    *   **Query Handling:** Parses input queries, distinguishing between direct Polona URLs, search terms, advanced queries, and lists of document IDs.\n    *   **API Interaction:** Constructs requests to the official Polona.pl JSON API (primarily `https://polona.pl/api/entities/`). It handles pagination, filtering (e.g., by language), and sorting for search queries.\n    *   **Search Result Processing:** Parses JSON responses from the API to extract item metadata (titles, IDs, dates, creator information, etc.) and prepares them for output in various formats (IDs, URLs, YAML, JSON).\n    *   **Download Orchestration:** Manages the entire download process for documents.\n        *   Fetches detailed metadata for each item to get scan URLs and other relevant information like Dublin Core (DC) metadata or links to searchable text PDFs.\n        *   Handles the creation of output directories and filenames based on user options (JPEGs in subfolders or a single PDF).\n        *   Implements logic for the `--no-overwrite` option to skip already downloaded files.\n    *   **Image Downloading \u0026 PDF Creation:**\n        *   Downloads individual high-resolution JPEG images for each page of a document.\n        *   If PDF output is selected, it uses the `img2pdf` library to compile the downloaded JPEGs into a single PDF file.\n        *   Optionally downloads available searchable text PDFs.\n    *   **Metadata Embedding:** Utilizes `pikepdf` to embed rich metadata (title, author, date, source URL, keywords, etc., extracted from Polona's API and DC records) into the generated PDF files.\n    *   **XML Processing:** Uses `lxml` and `lxml2json` to parse Dublin Core XML metadata associated with items, enriching the information available for each document.\n\n*   **`ezgooey` Library:**\n    *   A key external dependency that PyPolona uses to automatically create the graphical user interface. `ezgooey` takes the `argparse.ArgumentParser` object defined in `__main__.py` and translates it into a user-friendly GUI, significantly simplifying GUI development.\n\n**Core Workflows:**\n\n1.  **Search Workflow:**\n    *   User provides input (query terms, URLs, IDs, and options) via the GUI or CLI.\n    *   `__main__.py` parses these inputs using `argparse`.\n    *   An instance of the `Polona` class is created, configured with the parsed options.\n    *   If a search is requested (not direct IDs or URLs), the `Polona.search()` method is called.\n        *   It constructs the appropriate API request URL, including search terms, filters (like language), sorting parameters, and pagination details.\n        *   The request is sent to `https://polona.pl/api/entities/`.\n        *   The JSON response is parsed to extract a list of matching items and their basic metadata.\n    *   The extracted item IDs and metadata are then formatted according to the user's chosen output format (IDs, URLs, YAML, or JSON) and displayed or saved to a file.\n\n2.  **Download Workflow:**\n    *   Triggered if the \"Download found docs\" option is enabled, operating on a list of Polona item IDs (either from a search or directly provided).\n    *   For each item ID:\n        *   The `Polona.download_id()` method fetches detailed metadata for the item by calling the Polona API (e.g., `https://polona.pl/api/entities/{item_id}`).\n        *   Helper methods like `_process_hit()`, `_process_resources()`, and `_process_dc()` parse this detailed metadata to extract:\n            *   URLs for individual page scans (JPEGs).\n            *   URL for any available searchable text PDF.\n            *   Dublin Core metadata.\n        *   The `Polona.save_downloaded()` method orchestrates the actual saving:\n            *   Determines the output path (a subfolder for JPEGs or a filename for a combined PDF) based on user settings.\n            *   Checks `--no-overwrite` status to decide whether to skip or proceed.\n            *   If downloading JPEGs into subfolders, it also saves a YAML file containing the item's metadata within that subfolder.\n            *   Downloads each page's JPEG scan using `Polona.download_scan()`.\n            *   If PDF output is selected:\n                *   The downloaded JPEGs are collected in memory.\n                *   `img2pdf.convert()` is used to create the main image-based PDF.\n                *   `Polona.pdf_add_meta()` is then called to embed metadata into this newly created PDF using `pikepdf`.\n            *   If a searchable text PDF is available and not skipped by the user, `Polona.download_save_textpdf()` downloads it, and `Polona.pdf_add_meta()` is called to add metadata to this text PDF as well.\n\n### Key Libraries and Technologies\n\nPyPolona relies on several powerful Python libraries:\n\n*   **`requests`**: For making HTTP requests to the Polona.pl API.\n*   **`ezgooey`** (which wraps **`Gooey`**): For automatically generating the graphical user interface from `argparse` definitions.\n*   **`argparse`**: Standard Python library for parsing command-line arguments.\n*   **`img2pdf`**: For converting collections of JPEG images into a single PDF document without re-encoding the images.\n*   **`pikepdf`**: For reading, manipulating, and writing PDF files, primarily used here for embedding metadata.\n*   **`lxml`** and **`lxml2json`**: For parsing and converting XML data, specifically the Dublin Core metadata provided by Polona.\n*   **`python-dateutil`**: For robust parsing of date strings from the API.\n*   **`html2text`**: Used to convert HTML error messages from the API (if any) into more readable plain text.\n*   **`yaplon`** (providing **`oyaml`**): For generating YAML formatted output of search results.\n*   **`orderedattrdict`**: Provides dictionary-like objects that allow attribute-style access, used for convenient handling of API response data.\n*   **`colored`**: For adding color to terminal output (used by `ezgooey`'s logging).\n*   **`pywin32`**: Windows-specific functionalities (conditional dependency).\n\n### Project Structure\n\nThe repository is organized as follows:\n\n*   `pypolona/`: Contains the main source code for the PyPolona package.\n    *   `__init__.py`: Package initializer, defines `__version__`.\n    *   `__main__.py`: Entry point for both CLI and GUI, handles argument parsing and GUI setup.\n    *   `polona.py`: Contains the `Polona` class with all core logic for API interaction, searching, and downloading.\n    *   `icons/`: Application icons.\n*   `app/`: Scripts and configuration files related to building standalone applications.\n    *   `dmgbuild_settings.py`: Configuration for `dmgbuild` to create the macOS DMG installer.\n    *   *(A `.spec` file for PyInstaller for Windows builds, and an Inno Setup script `.iss` are typically used, as mentioned in the old README, though not explicitly listed in `llms.txt`'s file structure for the snapshot provided).*\n*   `docs/`: Contains images used in documentation. (Future documentation files might also reside here).\n*   `download/`: Stores the distributable application packages (DMG, ZIP).\n*   `.github/workflows/`: Defines GitHub Actions for Continuous Integration (CI).\n    *   `ci.yml`: Configures linting, type checking, testing, and building on pushes/pulls.\n*   `pyproject.toml`: Project definition file for Hatch (build system). Specifies metadata, dependencies, scripts, and tool configurations (Ruff, Mypy, Pytest).\n*   `.gitignore`: Specifies intentionally untracked files that Git should ignore.\n*   `.pre-commit-config.yaml`: Configuration for pre-commit hooks to enforce code quality before committing.\n*   `LICENSE`: Contains the MIT License text.\n*   `README.md`: This file – comprehensive user and developer documentation.\n*   `CHANGELOG.md`: Tracks notable changes for each version.\n\n### Coding Conventions and Contribution Guidelines\n\nWe welcome contributions to PyPolona! Please follow these guidelines:\n\n*   **Code Style:**\n    *   Adhere to [PEP 8](https://www.python.org/dev/peps/pep-0008/) standards.\n    *   Code formatting is enforced by **Ruff** using the configuration in `pyproject.toml`. Key aspects include a line length of 88 characters and the use of double quotes for strings.\n    *   Run `ruff format .` and `ruff check --fix .` before committing.\n*   **Linting and Type Checking:**\n    *   **Ruff** is used for comprehensive linting (see `pyproject.toml [tool.ruff.lint]` for enabled rules).\n    *   **MyPy** is used for static type checking. Aim for complete and accurate type hinting for all new code. MyPy configuration is also in `pyproject.toml`.\n    *   **Pre-commit Hooks:** The project uses pre-commit hooks (configured in `.pre-commit-config.yaml`) to automatically run Ruff and MyPy on staged files. Please install and use pre-commit:\n        ```bash\n        pip install pre-commit\n        pre-commit install\n        ```\n*   **Testing:**\n    *   **Pytest** is the framework for automated tests.\n    *   Contributions, especially new features or bug fixes, should ideally include corresponding tests.\n    *   Tests are typically located in a `tests/` directory (though not explicitly present in the provided snapshot, it's standard practice).\n    *   Run tests with `pytest`.\n*   **Dependencies:**\n    *   Project dependencies are managed in `pyproject.toml` and handled by the [Hatch](https://hatch.pypa.io/latest/) build backend.\n    *   For development, install dependencies including optional `[dev]` ones: `pip install .[dev]`.\n*   **Commits and Branches:**\n    *   Write clear and descriptive commit messages. While not strictly enforced, [Conventional Commits](https://www.conventionalcommits.org/) are encouraged.\n    *   Develop features or fixes in separate branches created from the `main` (or `master`) branch.\n    *   Submit changes via Pull Requests to the `main` branch.\n*   **Continuous Integration (CI):**\n    *   All pull requests and pushes to main branches are automatically checked by GitHub Actions as defined in `.github/workflows/ci.yml`. This includes linting, type checking, and running tests. Ensure your changes pass CI.\n*   **Issue Tracking:**\n    *   Use [GitHub Issues](https://github.com/twardoch/pypolona/issues) to report bugs, suggest features, or discuss changes.\n\n### Building from Source\n\nPyPolona uses [Hatch](https://hatch.pypa.io/latest/) as its build system.\n\n1.  **Prerequisites:**\n    *   Python 3.9+\n    *   Hatch: `pip install hatch`\n\n2.  **General Build Commands (run from the project root):**\n    *   To build source distribution (sdist) and wheel:\n        ```bash\n        hatch build\n        ```\n    *   To clean previous build artifacts:\n        ```bash\n        hatch build --clean\n        ```\n    *   Refer to `pyproject.toml [tool.hatch.scripts]` for other Hatch scripts like `check` or `publish`.\n\n3.  **Building Standalone Applications:**\n    *   **macOS (.dmg):**\n        *   The DMG is built using `dmgbuild`. The configuration is in `app/dmgbuild_settings.py`.\n        *   The process usually involves first creating a standalone `.app` bundle (e.g., with PyInstaller or potentially `hatch build` if configured for it) and then packaging it with `dmgbuild`.\n        *   The project's original `README.md` mentioned a `./macdeploy` script, which likely automates these steps.\n    *   **Windows (Installer):**\n        *   The process typically involves:\n            1.  Creating a standalone executable using **PyInstaller**. A `.spec` file (e.g., `app/pyinstaller-win.spec`, though not in the provided `llms.txt` snapshot) usually configures this.\n            2.  Packaging the executable and other necessary files into an installer using a tool like **Inno Setup** (configured via an `.iss` script, e.g., `app/pypolona.iss`).\n        *   The project's original `README.md` provides command snippets for these steps which can be adapted.\n\n### More About Polona.pl\n\n*   [Polona.pl](https://polona.pl/) — The main Polona website.\n*   [Polona/API](https://polona.pl/api/entities/) — The JSON API that PyPolona primarily uses.\n*   [Polona/blog](http://www.blog.polona.pl/) — The official blog (Polish).\n*   [Polona/typo](http://typo.polona.pl/en/) — A creative mini-site allowing users to typeset words using letters from random digitized publications.\n\n### License\n\nPyPolona is licensed under the **MIT License**. See the [LICENSE](./LICENSE) file for the full text.\nCopyright (c) 2020 Adam Twardoch.\n\nThis project is not affiliated with and not endorsed by Polona.pl or the National Library of Poland.\n\n\u003c!-- GitHub buttons script, as in original README --\u003e\n\u003cscript async defer src=\"https://buttons.github.io/buttons.js\"\u003e\u003c/script\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ftwardoch%2Fpypolona","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Ftwardoch%2Fpypolona","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ftwardoch%2Fpypolona/lists"}