https://github.com/datadavev/smcat
Sitemap cat
https://github.com/datadavev/smcat
Last synced: 8 months ago
JSON representation
Sitemap cat
- Host: GitHub
- URL: https://github.com/datadavev/smcat
- Owner: datadavev
- License: mit
- Created: 2021-09-01T15:03:45.000Z (almost 5 years ago)
- Default Branch: main
- Last Pushed: 2023-01-05T20:39:40.000Z (over 3 years ago)
- Last Synced: 2025-02-02T09:27:25.909Z (over 1 year ago)
- Language: Python
- Size: 138 KB
- Stars: 0
- Watchers: 2
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: README.md
- License: LICENSE
Awesome Lists containing this project
README
# smcat
Sitemap cat
```
smcat "https://www.bco-dmo.org/sitemap.xml" -d "sqlite:///bco-dmo.db"
```
```
sqlite> .schema
CREATE TABLE sitemapindex (
t_created DATETIME,
t_updated DATETIME,
lastmod DATETIME,
properties JSON,
loc VARCHAR NOT NULL,
source VARCHAR,
PRIMARY KEY (loc),
FOREIGN KEY(source) REFERENCES sitemapindex (loc)
);
CREATE TABLE sitemapentry (
t_created DATETIME,
t_updated DATETIME,
lastmod DATETIME,
properties JSON,
loc VARCHAR NOT NULL,
priority FLOAT,
source VARCHAR,
changefreq VARCHAR,
PRIMARY KEY (loc),
FOREIGN KEY(source) REFERENCES sitemapindex (loc)
);
sqlite> select lastmod, source, loc from sitemapindex;
2022-01-03 17:00:00.000000|https://www.bco-dmo.org/sitemap.xml|http://www.bco-dmo.org/sitemap.xml?page=1
2022-01-03 17:00:00.000000|https://www.bco-dmo.org/sitemap.xml|http://www.bco-dmo.org/sitemap.xml?page=2
sqlite> select lastmod, priority, loc from sitemapentry where source='http://www.bco-dmo.org/sitemap.xml?page=2' limit 5;
2012-11-05 17:06:00.000000||http://www.bco-dmo.org/award/54613
2016-08-20 03:10:00.000000|0.9|http://www.bco-dmo.org/dataset/546131
2012-11-05 17:06:00.000000||http://www.bco-dmo.org/award/54614
2012-11-05 17:06:00.000000||http://www.bco-dmo.org/award/54615
2016-08-20 03:10:00.000000|0.9|http://www.bco-dmo.org/dataset/546152
```