printf "INICIO Current date %s\n" "$(date)"
## INICIO Current date Thu 22 Dec 2022 11:16:45 PM -03
export GRAPH_ALIAS=/app/kgtk/data/wikidata/alias.en.tsv.gz
export GRAPH_QUALS=/app/kgtk/data/wikidata/qualifiers.tsv.gz
export GRAPH_CLAIMS=/app/kgtk/data/wikidata/claims.tsv.gz
### grafo claims ###
printf "Contar Predicados do conjunto completo - grafo claims %s\n" "$(date)"
## Contar Predicados do conjunto completo - grafo claims Thu 22 Dec 2022 11:16:45 PM -03
## Starting 'sort' on pid 276429.
kgtk --debug sort -i $GRAPH_CLAIMS -c label \
--output-file /app/kgtk/data/my-tsv/claims-sorted.tsv.gz
export GRAPH_SORT=/app/kgtk/data/my-tsv/claims-sorted.tsv.gz
## Starting 'unique' on pid 276539.
kgtk --debug unique -i $GRAPH_SORT --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/claims-pred-counted.tsv
export GRAPH_PRED=/app/kgtk/data/my-tsv/claims-pred-counted.tsv
## Início 2022-12-22 11:16:45 - FIm 2022-12-23 01:31:16
## O resultado GRAPH_PRED é um arquivo com pred_label | count | quantity
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 20.48 KB free: 0 Bytes modified: 2022-12-23 01:31:16
KGTK File Information:
Graph Table Information:
## Arquivo sqlite vazio
printf "Contar Predicados_Qualificadores do conjunto completo - grafo claims %s\n" "$(date)"
## Contar Predicados_Qualificadores do conjunto completo - grafo claims Fri 23 Dec 2022 01:31:16 AM -03
## Starting 'query' on pid 276593.
kgtk --debug query -i $GRAPH_CLAIMS --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(), q: (p1)-[q1]->()' \
--order-by 'p1.label, q1.label' \
--return 'p1.label as node1, "quali" as label, q1.label as node2, count(q1.label) as `node2;count_q`' \
--limit 100 \
-o /app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv
## [2022-12-23 01:31:17 sqlstore]: IMPORT graph directly into table graph_1 from /app/kgtk/data/wikidata/claims.tsv.gz ...
## [2022-12-23 03:38:09 sqlstore]: IMPORT graph directly into table graph_2 from /app/kgtk/data/wikidata/qualifiers.tsv.gz ...
## [2022-12-23 05:17:35 query]: SQL Translation:
---------------------------------------------
SELECT graph_1_c1."label" "_aLias.node1", ? "_aLias.label", graph_2_c2."label" "_aLias.node2", count(graph_2_c2."label") "_aLias.node2;count_q"
FROM graph_1 AS graph_1_c1
INNER JOIN graph_2 AS graph_2_c2
ON graph_1_c1."id" = graph_2_c2."node1"
GROUP BY "_aLias.node1", "_aLias.label", "_aLias.node2"
ORDER BY graph_1_c1."label" ASC, graph_2_c2."label" ASC
LIMIT ?
PARAS: ['quali', 100]
---------------------------------------------
## Inicio 2022-12-23 01:31:17 - Fim 2022-12-23 05:43:27
export GRAPH_PQUALS=/app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv
## O resultado GRAPH_PQUALS é um arquivo com pred_label | "quali" | quali_label as node2 | quantity as `node2;count_q`'
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 69.98 GB free: 0 Bytes modified: 2022-12-23 05:43:18
KGTK File Information:
c:
size: 11.53 GB modified: 2022-10-29 23:58:12 graph: graph_1
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
Graph Table Information:
graph_1:
size: 52.15 GB created: 2022-12-23 03:38:09
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
## BD sqlite com 70G depois de carregar GRAPH_CLAIMS(1) e GRAPH_QUALS (2)
printf "Selecionar claims potencialmente controversos do conjunto completo e gerar grafo filtrado %s\n" "$(date)"
## Selecionar claims potencialmente controversos do conjunto completo e gerar grafo filtrado Fri 23 Dec 2022 05:43:26 AM -03
## Starting 'query' on pid 276799.
kgtk --debug query -i $GRAPH_CLAIMS --as c --index none --multi 2 \
--match 'c: (item)-[p1]->(value1 {wikidatatype: dt}), (item)-[p2]->(value2)' \
--where 'value1 < value2 and p1.label = p2.label and dt != "external-id" and dt != "wikibase-property"' \
--return 'distinct p1, item, p1.label, value1, p2, item, p2.label, value2' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims.tsv.gz
## Não foi preciso carregar novamente GRAPH_CLAIMS(1)
## [2022-12-23 05:43:27 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_1_c1."id", graph_1_c1."node1", graph_1_c1."label", graph_1_c1."node2", graph_1_c2."id", graph_1_c1."node1", graph_1_c2."label", graph_1_c2."node2"
FROM graph_1 AS graph_1_c1
INNER JOIN graph_1 AS graph_1_c2
ON graph_1_c1."node1" = graph_1_c2."node1"
AND graph_1_c1."node2;wikidatatype" = graph_1_c1."node2;wikidatatype"
AND ((graph_1_c1."node2" < graph_1_c2."node2") AND ((graph_1_c1."label" = graph_1_c2."label") AND ((graph_1_c1."node2;wikidatatype" != ?) AND (graph_1_c1."node2;wikidatatype" != ?))))
LIMIT ?
PARAS: ['external-id', 'wikibase-property', 100]
---------------------------------------------
## Inicio 2022-12-23 05:43:27 - Fim 2022-12-23 06:51:33
export GRAPH_F1=/app/kgtk/data/my-tsv/filtered-claims.tsv.gz
### grafo base é o GRAPH_F1=/app/kgtk/data/my-tsv/filtered-claims.tsv.gz ###
printf "Contar Predicados do conjunto controversos - grafo base %s\n" "$(date)"
## Contar Predicados do conjunto controversos - grafo base Fri 23 Dec 2022 06:51:33 AM -03
## Starting 'sort' on pid 277322.
kgtk --debug sort -i $GRAPH_F1 -c label \
--output-file /app/kgtk/data/my-tsv/filtered-claims-sorted.tsv.gz
export GRAPH_SORT=/app/kgtk/data/my-tsv/filtered-claims-sorted.tsv.gz
## Starting 'unique' on pid 277346.
kgtk --debug unique -i $GRAPH_SORT --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-pred-counted.tsv
## O resultado GRAPH_XXX é um arquivo com label | count | quantity
kgtk query --show-cache
## Nada mudou em relação ao último
printf "Separar claims SEM qualificadores do conjunto controversos - grafo base-withoutquals %s\n" "$(date)"
## Separar claims SEM qualificadores do conjunto controversos - grafo base-withoutquals Fri 23 Dec 2022 06:51:38 AM -03
## Starting 'query' on pid 277363.
kgtk --debug query -i $GRAPH_F1 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(value1)' \
--opt 'q: (p1)-[]->(q1)' \
--where: 'q1 is null' \
--order-by 'p1.label, item' \
--return 'distinct p1, item, p1.label, value1' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz
## [2022-12-23 06:51:39 sqlstore]: IMPORT graph directly into table graph_3 from /app/kgtk/data/my-tsv/filtered-claims.tsv.gz ...
## [2022-12-23 06:51:39 sqlstore]: DROP graph data table graph_1
## Pq removou este? Só pq não usa nesta query ou pq os metadados não estão preenchidos? NÃO PQ REAPROVEITEI O ALIAS "C"
# [2022-12-23 09:18:12 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_3_c1."id", graph_3_c1."node1", graph_3_c1."label", graph_3_c1."node2"
FROM graph_3 AS graph_3_c1
LEFT JOIN graph_2 AS graph_2_c3
ON graph_3_c1."id" = graph_2_c3."node1"
WHERE (graph_2_c3."node2" IS NULL)
ORDER BY graph_3_c1."label" ASC, graph_3_c1."node1" ASC
LIMIT ?
PARAS: [100]
---------------------------------------------
## Início 2022-12-23 06:51:39 - Fim 2022-12-23 09:23:32
export GRAPH_F11=/app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz
printf "Contar Predicados do conjunto controversos SEM qualificadores - grafo base-withoutquals %s\n" "$(date)"
## Contar Predicados do conjunto controversos SEM qualificadores - grafo base-withoutquals Fri 23 Dec 2022 09:23:32 AM -03
## Starting 'unique' on pid 277433.
kgtk --debug unique -i $GRAPH_F11 --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-claims-without-quals-counted.tsv
## O resultado GRAPH_YYYY é um arquivo com label | count | quantity
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 17.83 GB free: 52.15 GB modified: 2022-12-23 09:18:04
KGTK File Information:
c:
size: 1.22 KB modified: 2022-12-23 06:51:31 graph: graph_3
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_3:
size: 16.38 KB created: 2022-12-23 06:51:39
header: ['id', 'node1', 'label', 'node2']
## BD sqlite com 18G GRAPH_QUALS e o grafo base (filtrado)
printf "Agregar álias de Predicados ao conjunto controversos SEM qualificadores - grafo base-withoutquals %s\n" "$(date)"
## Agregar álias de Predicados ao conjunto controversos SEM qualificadores - grafo base-withoutquals Fri 23 Dec 2022 09:23:36 AM -03
## Starting 'query' on pid 277450.
kgtk --debug query -i $GRAPH_F11 --as f -i $GRAPH_PRED --as c -i $GRAPH_ALIAS \
--match 'f: (item)-[]->(count_1), c: (item)-[]->(count_2), a: (item)-[]->(item_name)' \
--return 'item, GROUP_CONCAT(item_name), cast(count_1, integer), cast(count_2, integer)' \
--limit 100 \
-o /app/kgtk/data/aux-tsv/filtered-claims-without-quals-counted-alias.tsv
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_4 from /app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz ...
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_5 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
## [2022-12-23 09:23:37 sqlstore]: DROP graph data table graph_3
## Drop pq reusei o ALIAS "C"
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_6 from /app/kgtk/data/wikidata/alias.en.tsv.gz ...
## [2022-12-23 09:24:29 query]: SQL Translation:
---------------------------------------------
SELECT graph_4_c1."node1", group_concat(graph_6_c3."node2"), cast(graph_4_c1."node2" AS integer), cast(graph_5_c2."node2" AS integer)
FROM graph_4 AS graph_4_c1
INNER JOIN graph_5 AS graph_5_c2, graph_6 AS graph_6_c3
ON graph_4_c1."node1" = graph_5_c2."node1"
AND graph_4_c1."node1" = graph_6_c3."node1"
GROUP BY graph_4_c1."node1"
LIMIT ?
PARAS: [100]
---------------------------------------------
## [2022-12-23 09:24:29 sqlstore]: CREATE INDEX "graph_6_node1_idx" ON "graph_6" ("node1")
## [2022-12-23 09:24:38 sqlstore]: ANALYZE "graph_6_node1_idx"
## [2022-12-23 09:24:44 sqlstore]: CREATE INDEX "graph_4_node1_idx" ON "graph_4" ("node1")
## [2022-12-23 09:24:44 sqlstore]: ANALYZE "graph_4_node1_idx"
## [2022-12-23 09:24:44 sqlstore]: CREATE INDEX "graph_5_node1_idx" ON "graph_5" ("node1")
## [2022-12-23 09:24:44 sqlstore]: ANALYZE "graph_5_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:24:44
KGTK File Information:
c:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_5
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 383 Bytes modified: 2022-12-23 09:23:30 graph: graph_4
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_4:
size: 8.19 KB created: 2022-12-23 09:23:37
header: ['id', 'node1', 'label', 'node2']
graph_5:
size: 352.26 KB created: 2022-12-23 09:23:37
header: ['node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
## BD sqlite com 19G
printf "Separar claims COM qualificadores do conjunto controversos - grafo base-withquals %s\n" "$(date)"
## Separar claims COM qualificadores do conjunto controversos - grafo base-withquals Fri 23 Dec 2022 09:24:45 AM -03
## Starting 'query' on pid 277517.
kgtk --debug query -i $GRAPH_F1 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(value1), q: (p1)-[]->()' \
--order-by 'p1.label, item' \
--return 'distinct p1, item, p1.label, value1' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz
## [2022-12-23 09:24:46 sqlstore]: IMPORT graph directly into table graph_7 from /app/kgtk/data/my-tsv/filtered-claims.tsv.gz ...
## [2022-12-23 09:24:46 sqlstore]: DROP graph data table graph_5
## Drop novamente por reuso do alias "C"
[2022-12-23 09:24:47 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_7_c1."id", graph_7_c1."node1", graph_7_c1."label", graph_7_c1."node2"
FROM graph_2 AS graph_2_c2
INNER JOIN graph_7 AS graph_7_c1
ON graph_7_c1."id" = graph_2_c2."node1"
ORDER BY graph_7_c1."label" ASC, graph_7_c1."node1" ASC
LIMIT ?
PARAS: [100]
---------------------------------------------
export GRAPH_F12=/app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz
printf "Contar Predicados do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Contar Predicados do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:39 AM -03
## Starting 'unique' on pid 277573.
kgtk --debug unique -i $GRAPH_F12 --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-claims-with-quals-counted.tsv
## O resultado GRAPH_ZZZZ é um arquivo com label | count | quantity
kgtk query --show-cache
## Nada mudou em relação ao último
printf "Agregar álias de Predicados ao conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Agregar álias de Predicados ao conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:41 AM -03
## Starting 'query' on pid 277590.
kgtk --debug query -i $GRAPH_F12 --as f -i $GRAPH_PRED --as c -i $GRAPH_ALIAS \
--match 'f: (item)-[]->(count_1), c: (item)-[]->(count_2), a: (item)-[]->(item_name)' \
--return 'item, GROUP_CONCAT(item_name), cast(count_1, integer), cast(count_2, integer)' \
--limit 100 \
-o /app/kgtk/data/aux-tsv/filtered-claims-with-quals-counted-alias.tsv
## [2022-12-23 09:27:42 sqlstore]: IMPORT graph directly into table graph_5 from /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz ...
## [2022-12-23 09:27:42 sqlstore]: DROP graph data table graph_4
## [2022-12-23 09:27:42 sqlstore]: IMPORT graph directly into table graph_8 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
## [2022-12-23 09:27:42 sqlstore]: DROP graph data table graph_7
## [2022-12-23 09:27:42 query]: SQL Translation:
---------------------------------------------
SELECT graph_5_c1."node1", group_concat(graph_6_c3."node2"), cast(graph_5_c1."node2" AS integer), cast(graph_8_c2."node2" AS integer)
FROM graph_5 AS graph_5_c1
INNER JOIN graph_6 AS graph_6_c3, graph_8 AS graph_8_c2
ON graph_5_c1."node1" = graph_6_c3."node1"
AND graph_5_c1."node1" = graph_8_c2."node1"
GROUP BY graph_5_c1."node1"
LIMIT ?
PARAS: [100]
---------------------------------------------
## [2022-12-23 09:27:42 sqlstore]: CREATE INDEX "graph_8_node1_idx" ON "graph_8" ("node1")
## [2022-12-23 09:27:42 sqlstore]: ANALYZE "graph_8_node1_idx"
## [2022-12-23 09:27:42 sqlstore]: CREATE INDEX "graph_5_node1_idx" ON "graph_5" ("node1")
## [2022-12-23 09:27:42 sqlstore]: ANALYZE "graph_5_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:27:42
KGTK File Information:
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
c:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_8
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_8:
size: 352.26 KB created: 2022-12-23 09:27:42
header: ['node1', 'label', 'node2']
## BD sqlite com 19G
printf "Contar Predicados_Qualificadores do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Contar Predicados_Qualificadores do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:44 AM -03
## Starting 'query' on pid 277639.
kgtk --debug query -i $GRAPH_F12 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(), q: (p1)-[q1]->()' \
--order-by 'p1.label, q1.label' \
--return 'p1.label as node1, "quali" as label, q1.label as node2, count(q1.label) as `node2;count_q`' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv
## [2022-12-23 09:27:45 sqlstore]: IMPORT graph directly into table graph_7 from /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz ...
## [2022-12-23 09:27:45 sqlstore]: DROP graph data table graph_8
## [2022-12-23 09:27:45 query]: SQL Translation:
---------------------------------------------
SELECT graph_7_c1."label" "_aLias.node1", ? "_aLias.label", graph_2_c2."label" "_aLias.node2", count(graph_2_c2."label") "_aLias.node2;count_q"
FROM graph_2 AS graph_2_c2
INNER JOIN graph_7 AS graph_7_c1
ON graph_7_c1."id" = graph_2_c2."node1"
GROUP BY "_aLias.node1", "_aLias.label", "_aLias.node2"
ORDER BY graph_7_c1."label" ASC, graph_2_c2."label" ASC
LIMIT ?
PARAS: ['quali', 100]
---------------------------------------------
export GRAPH_PRED1=/app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:28:58
KGTK File Information:
c:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_7
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_7:
size: 4.10 KB created: 2022-12-23 09:27:45
header: ['id', 'node1', 'label', 'node2']
## BD sqlite com 19G
printf "Agregar álias de Predicados e Qualificadores aos Contadores do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Agregar álias de Predicados e Qualificadores aos Contadores do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:29:00 AM -03
## Starting 'query' on pid 277674.
kgtk --debug query -i $GRAPH_PRED --as p -i $GRAPH_PRED1 --as s -i $GRAPH_ALIAS --as a \
--match 's: (pred)-[]->(quali {count_q: cq}), a: (pred)-[]->(pred_name), (quali)-[]->(quali_name), p: (pred)-[]->(cp)' \
--return 'pred, GROUP_CONCAT(DISTINCT pred_name), cast(cp, integer), quali, GROUP_CONCAT(DISTINCT quali_name), cast(cq, integer)' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-pred-quals-count-alias.tsv
[2022-12-23 09:29:01 sqlstore]: IMPORT graph directly into table graph_8 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
[2022-12-23 09:29:02 sqlstore]: IMPORT graph directly into table graph_9 from /app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv ...
[2022-12-23 09:29:02 query]: SQL Translation:
---------------------------------------------
SELECT graph_9_c1."node1", group_concat(DISTINCT graph_6_c2."node2"), cast(graph_8_c4."node2" AS integer), graph_9_c1."node2", group_concat(DISTINCT graph_6_c3."node2"), cast(graph_9_c1."node2;count_q" AS integer)
FROM graph_6 AS graph_6_c2
INNER JOIN graph_6 AS graph_6_c3, graph_8 AS graph_8_c4, graph_9 AS graph_9_c1
ON graph_9_c1."node1" = graph_6_c2."node1"
AND graph_9_c1."node1" = graph_8_c4."node1"
AND graph_9_c1."node2" = graph_6_c3."node1"
AND graph_9_c1."node2;count_q" = graph_9_c1."node2;count_q"
GROUP BY graph_9_c1."node1", cast(graph_8_c4."node2" AS integer), graph_9_c1."node2"
LIMIT ?
PARAS: [100]
---------------------------------------------
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_8_node1_idx" ON "graph_8" ("node1")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_8_node1_idx"
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_9_node2_idx" ON "graph_9" ("node2")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_9_node2_idx"
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_9_node1_idx" ON "graph_9" ("node1")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_9_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:29:02
KGTK File Information:
p:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_8
s:
size: 374 Bytes modified: 2022-12-23 09:28:58 graph: graph_9
c:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_7
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
a:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_7:
size: 4.10 KB created: 2022-12-23 09:27:45
header: ['id', 'node1', 'label', 'node2']
graph_8:
size: 352.26 KB created: 2022-12-23 09:29:01
header: ['node1', 'label', 'node2']
graph_9:
size: 12.29 KB created: 2022-12-23 09:29:02
header: ['node1', 'label', 'node2', 'node2;count_q']
printf "Agregar álias de Predicados e Qualificadores e Contagem de Predicados do conjunto completo - grafo claims %s\n" "$(date)"
## Agregar álias de Predicados e Qualificadores e Contagem de Predicados do conjunto completo - grafo claims Fri 23 Dec 2022 09:29:03 AM -03
## Starting 'query' on pid 277719.
kgtk --debug query -i $GRAPH_PRED --as p -i $GRAPH_PQUALS --as s -i $GRAPH_ALIAS --as a \
--match 's: (pred)-[]->(quali {count_q: cq}), a: (pred)-[]->(pred_name), (quali)-[]->(quali_name), p: (pred)-[]->(cp)' \
--return 'pred, GROUP_CONCAT(DISTINCT pred_name), cast(cp, integer), quali, GROUP_CONCAT(DISTINCT quali_name), cast(cq, integer)' \
--limit 100 \
-o /app/kgtk/data/my-tsv/all-pred-quals-count-alias.tsv
[2022-12-23 09:29:04 sqlstore]: IMPORT graph directly into table graph_10 from /app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv ...
[2022-12-23 09:29:04 sqlstore]: DROP graph data table graph_9
[2022-12-23 09:29:04 query]: SQL Translation:
---------------------------------------------
SELECT graph_10_c1."node1", group_concat(DISTINCT graph_6_c2."node2"), cast(graph_8_c4."node2" AS integer), graph_10_c1."node2", group_concat(DISTINCT graph_6_c3."node2"), cast(graph_10_c1."node2;count_q" AS integer)
FROM graph_10 AS graph_10_c1
INNER JOIN graph_6 AS graph_6_c2, graph_6 AS graph_6_c3, graph_8 AS graph_8_c4
ON graph_10_c1."node1" = graph_6_c2."node1"
AND graph_10_c1."node1" = graph_8_c4."node1"
AND graph_10_c1."node2" = graph_6_c3."node1"
AND graph_10_c1."node2;count_q" = graph_10_c1."node2;count_q"
GROUP BY graph_10_c1."node1", cast(graph_8_c4."node2" AS integer), graph_10_c1."node2"
LIMIT ?
PARAS: [100]
---------------------------------------------
[2022-12-23 09:29:04 sqlstore]: CREATE INDEX "graph_10_node1_idx" ON "graph_10" ("node1")
[2022-12-23 09:29:04 sqlstore]: ANALYZE "graph_10_node1_idx"
[2022-12-23 09:29:04 sqlstore]: CREATE INDEX "graph_10_node2_idx" ON "graph_10" ("node2")
[2022-12-23 09:29:04 sqlstore]: ANALYZE "graph_10_node2_idx"
printf "FIM Current date %s\n" "$(date)"
FIM Current date Fri 23 Dec 2022 09:29:04 AM -03
## INICIO Current date Thu 22 Dec 2022 11:16:45 PM -03
export GRAPH_ALIAS=/app/kgtk/data/wikidata/alias.en.tsv.gz
export GRAPH_QUALS=/app/kgtk/data/wikidata/qualifiers.tsv.gz
export GRAPH_CLAIMS=/app/kgtk/data/wikidata/claims.tsv.gz
### grafo claims ###
printf "Contar Predicados do conjunto completo - grafo claims %s\n" "$(date)"
## Contar Predicados do conjunto completo - grafo claims Thu 22 Dec 2022 11:16:45 PM -03
## Starting 'sort' on pid 276429.
kgtk --debug sort -i $GRAPH_CLAIMS -c label \
--output-file /app/kgtk/data/my-tsv/claims-sorted.tsv.gz
export GRAPH_SORT=/app/kgtk/data/my-tsv/claims-sorted.tsv.gz
## Starting 'unique' on pid 276539.
kgtk --debug unique -i $GRAPH_SORT --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/claims-pred-counted.tsv
export GRAPH_PRED=/app/kgtk/data/my-tsv/claims-pred-counted.tsv
## Início 2022-12-22 11:16:45 - FIm 2022-12-23 01:31:16
## O resultado GRAPH_PRED é um arquivo com pred_label | count | quantity
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 20.48 KB free: 0 Bytes modified: 2022-12-23 01:31:16
KGTK File Information:
Graph Table Information:
## Arquivo sqlite vazio
printf "Contar Predicados_Qualificadores do conjunto completo - grafo claims %s\n" "$(date)"
## Contar Predicados_Qualificadores do conjunto completo - grafo claims Fri 23 Dec 2022 01:31:16 AM -03
## Starting 'query' on pid 276593.
kgtk --debug query -i $GRAPH_CLAIMS --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(), q: (p1)-[q1]->()' \
--order-by 'p1.label, q1.label' \
--return 'p1.label as node1, "quali" as label, q1.label as node2, count(q1.label) as `node2;count_q`' \
--limit 100 \
-o /app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv
## [2022-12-23 01:31:17 sqlstore]: IMPORT graph directly into table graph_1 from /app/kgtk/data/wikidata/claims.tsv.gz ...
## [2022-12-23 03:38:09 sqlstore]: IMPORT graph directly into table graph_2 from /app/kgtk/data/wikidata/qualifiers.tsv.gz ...
## [2022-12-23 05:17:35 query]: SQL Translation:
---------------------------------------------
SELECT graph_1_c1."label" "_aLias.node1", ? "_aLias.label", graph_2_c2."label" "_aLias.node2", count(graph_2_c2."label") "_aLias.node2;count_q"
FROM graph_1 AS graph_1_c1
INNER JOIN graph_2 AS graph_2_c2
ON graph_1_c1."id" = graph_2_c2."node1"
GROUP BY "_aLias.node1", "_aLias.label", "_aLias.node2"
ORDER BY graph_1_c1."label" ASC, graph_2_c2."label" ASC
LIMIT ?
PARAS: ['quali', 100]
---------------------------------------------
## Inicio 2022-12-23 01:31:17 - Fim 2022-12-23 05:43:27
export GRAPH_PQUALS=/app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv
## O resultado GRAPH_PQUALS é um arquivo com pred_label | "quali" | quali_label as node2 | quantity as `node2;count_q`'
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 69.98 GB free: 0 Bytes modified: 2022-12-23 05:43:18
KGTK File Information:
c:
size: 11.53 GB modified: 2022-10-29 23:58:12 graph: graph_1
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
Graph Table Information:
graph_1:
size: 52.15 GB created: 2022-12-23 03:38:09
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
## BD sqlite com 70G depois de carregar GRAPH_CLAIMS(1) e GRAPH_QUALS (2)
printf "Selecionar claims potencialmente controversos do conjunto completo e gerar grafo filtrado %s\n" "$(date)"
## Selecionar claims potencialmente controversos do conjunto completo e gerar grafo filtrado Fri 23 Dec 2022 05:43:26 AM -03
## Starting 'query' on pid 276799.
kgtk --debug query -i $GRAPH_CLAIMS --as c --index none --multi 2 \
--match 'c: (item)-[p1]->(value1 {wikidatatype: dt}), (item)-[p2]->(value2)' \
--where 'value1 < value2 and p1.label = p2.label and dt != "external-id" and dt != "wikibase-property"' \
--return 'distinct p1, item, p1.label, value1, p2, item, p2.label, value2' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims.tsv.gz
## Não foi preciso carregar novamente GRAPH_CLAIMS(1)
## [2022-12-23 05:43:27 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_1_c1."id", graph_1_c1."node1", graph_1_c1."label", graph_1_c1."node2", graph_1_c2."id", graph_1_c1."node1", graph_1_c2."label", graph_1_c2."node2"
FROM graph_1 AS graph_1_c1
INNER JOIN graph_1 AS graph_1_c2
ON graph_1_c1."node1" = graph_1_c2."node1"
AND graph_1_c1."node2;wikidatatype" = graph_1_c1."node2;wikidatatype"
AND ((graph_1_c1."node2" < graph_1_c2."node2") AND ((graph_1_c1."label" = graph_1_c2."label") AND ((graph_1_c1."node2;wikidatatype" != ?) AND (graph_1_c1."node2;wikidatatype" != ?))))
LIMIT ?
PARAS: ['external-id', 'wikibase-property', 100]
---------------------------------------------
## Inicio 2022-12-23 05:43:27 - Fim 2022-12-23 06:51:33
export GRAPH_F1=/app/kgtk/data/my-tsv/filtered-claims.tsv.gz
### grafo base é o GRAPH_F1=/app/kgtk/data/my-tsv/filtered-claims.tsv.gz ###
printf "Contar Predicados do conjunto controversos - grafo base %s\n" "$(date)"
## Contar Predicados do conjunto controversos - grafo base Fri 23 Dec 2022 06:51:33 AM -03
## Starting 'sort' on pid 277322.
kgtk --debug sort -i $GRAPH_F1 -c label \
--output-file /app/kgtk/data/my-tsv/filtered-claims-sorted.tsv.gz
export GRAPH_SORT=/app/kgtk/data/my-tsv/filtered-claims-sorted.tsv.gz
## Starting 'unique' on pid 277346.
kgtk --debug unique -i $GRAPH_SORT --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-pred-counted.tsv
## O resultado GRAPH_XXX é um arquivo com label | count | quantity
kgtk query --show-cache
## Nada mudou em relação ao último
printf "Separar claims SEM qualificadores do conjunto controversos - grafo base-withoutquals %s\n" "$(date)"
## Separar claims SEM qualificadores do conjunto controversos - grafo base-withoutquals Fri 23 Dec 2022 06:51:38 AM -03
## Starting 'query' on pid 277363.
kgtk --debug query -i $GRAPH_F1 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(value1)' \
--opt 'q: (p1)-[]->(q1)' \
--where: 'q1 is null' \
--order-by 'p1.label, item' \
--return 'distinct p1, item, p1.label, value1' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz
## [2022-12-23 06:51:39 sqlstore]: IMPORT graph directly into table graph_3 from /app/kgtk/data/my-tsv/filtered-claims.tsv.gz ...
## [2022-12-23 06:51:39 sqlstore]: DROP graph data table graph_1
## Pq removou este? Só pq não usa nesta query ou pq os metadados não estão preenchidos? NÃO PQ REAPROVEITEI O ALIAS "C"
# [2022-12-23 09:18:12 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_3_c1."id", graph_3_c1."node1", graph_3_c1."label", graph_3_c1."node2"
FROM graph_3 AS graph_3_c1
LEFT JOIN graph_2 AS graph_2_c3
ON graph_3_c1."id" = graph_2_c3."node1"
WHERE (graph_2_c3."node2" IS NULL)
ORDER BY graph_3_c1."label" ASC, graph_3_c1."node1" ASC
LIMIT ?
PARAS: [100]
---------------------------------------------
## Início 2022-12-23 06:51:39 - Fim 2022-12-23 09:23:32
export GRAPH_F11=/app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz
printf "Contar Predicados do conjunto controversos SEM qualificadores - grafo base-withoutquals %s\n" "$(date)"
## Contar Predicados do conjunto controversos SEM qualificadores - grafo base-withoutquals Fri 23 Dec 2022 09:23:32 AM -03
## Starting 'unique' on pid 277433.
kgtk --debug unique -i $GRAPH_F11 --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-claims-without-quals-counted.tsv
## O resultado GRAPH_YYYY é um arquivo com label | count | quantity
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 17.83 GB free: 52.15 GB modified: 2022-12-23 09:18:04
KGTK File Information:
c:
size: 1.22 KB modified: 2022-12-23 06:51:31 graph: graph_3
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_3:
size: 16.38 KB created: 2022-12-23 06:51:39
header: ['id', 'node1', 'label', 'node2']
## BD sqlite com 18G GRAPH_QUALS e o grafo base (filtrado)
printf "Agregar álias de Predicados ao conjunto controversos SEM qualificadores - grafo base-withoutquals %s\n" "$(date)"
## Agregar álias de Predicados ao conjunto controversos SEM qualificadores - grafo base-withoutquals Fri 23 Dec 2022 09:23:36 AM -03
## Starting 'query' on pid 277450.
kgtk --debug query -i $GRAPH_F11 --as f -i $GRAPH_PRED --as c -i $GRAPH_ALIAS \
--match 'f: (item)-[]->(count_1), c: (item)-[]->(count_2), a: (item)-[]->(item_name)' \
--return 'item, GROUP_CONCAT(item_name), cast(count_1, integer), cast(count_2, integer)' \
--limit 100 \
-o /app/kgtk/data/aux-tsv/filtered-claims-without-quals-counted-alias.tsv
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_4 from /app/kgtk/data/my-tsv/filtered-claims-without-quals-sorted.tsv.gz ...
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_5 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
## [2022-12-23 09:23:37 sqlstore]: DROP graph data table graph_3
## Drop pq reusei o ALIAS "C"
## [2022-12-23 09:23:37 sqlstore]: IMPORT graph directly into table graph_6 from /app/kgtk/data/wikidata/alias.en.tsv.gz ...
## [2022-12-23 09:24:29 query]: SQL Translation:
---------------------------------------------
SELECT graph_4_c1."node1", group_concat(graph_6_c3."node2"), cast(graph_4_c1."node2" AS integer), cast(graph_5_c2."node2" AS integer)
FROM graph_4 AS graph_4_c1
INNER JOIN graph_5 AS graph_5_c2, graph_6 AS graph_6_c3
ON graph_4_c1."node1" = graph_5_c2."node1"
AND graph_4_c1."node1" = graph_6_c3."node1"
GROUP BY graph_4_c1."node1"
LIMIT ?
PARAS: [100]
---------------------------------------------
## [2022-12-23 09:24:29 sqlstore]: CREATE INDEX "graph_6_node1_idx" ON "graph_6" ("node1")
## [2022-12-23 09:24:38 sqlstore]: ANALYZE "graph_6_node1_idx"
## [2022-12-23 09:24:44 sqlstore]: CREATE INDEX "graph_4_node1_idx" ON "graph_4" ("node1")
## [2022-12-23 09:24:44 sqlstore]: ANALYZE "graph_4_node1_idx"
## [2022-12-23 09:24:44 sqlstore]: CREATE INDEX "graph_5_node1_idx" ON "graph_5" ("node1")
## [2022-12-23 09:24:44 sqlstore]: ANALYZE "graph_5_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:24:44
KGTK File Information:
c:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_5
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 383 Bytes modified: 2022-12-23 09:23:30 graph: graph_4
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_4:
size: 8.19 KB created: 2022-12-23 09:23:37
header: ['id', 'node1', 'label', 'node2']
graph_5:
size: 352.26 KB created: 2022-12-23 09:23:37
header: ['node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
## BD sqlite com 19G
printf "Separar claims COM qualificadores do conjunto controversos - grafo base-withquals %s\n" "$(date)"
## Separar claims COM qualificadores do conjunto controversos - grafo base-withquals Fri 23 Dec 2022 09:24:45 AM -03
## Starting 'query' on pid 277517.
kgtk --debug query -i $GRAPH_F1 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(value1), q: (p1)-[]->()' \
--order-by 'p1.label, item' \
--return 'distinct p1, item, p1.label, value1' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz
## [2022-12-23 09:24:46 sqlstore]: IMPORT graph directly into table graph_7 from /app/kgtk/data/my-tsv/filtered-claims.tsv.gz ...
## [2022-12-23 09:24:46 sqlstore]: DROP graph data table graph_5
## Drop novamente por reuso do alias "C"
[2022-12-23 09:24:47 query]: SQL Translation:
---------------------------------------------
SELECT DISTINCT graph_7_c1."id", graph_7_c1."node1", graph_7_c1."label", graph_7_c1."node2"
FROM graph_2 AS graph_2_c2
INNER JOIN graph_7 AS graph_7_c1
ON graph_7_c1."id" = graph_2_c2."node1"
ORDER BY graph_7_c1."label" ASC, graph_7_c1."node1" ASC
LIMIT ?
PARAS: [100]
---------------------------------------------
export GRAPH_F12=/app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz
printf "Contar Predicados do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Contar Predicados do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:39 AM -03
## Starting 'unique' on pid 277573.
kgtk --debug unique -i $GRAPH_F12 --columns label --presorted \
--output-file /app/kgtk/data/my-tsv/filtered-claims-with-quals-counted.tsv
## O resultado GRAPH_ZZZZ é um arquivo com label | count | quantity
kgtk query --show-cache
## Nada mudou em relação ao último
printf "Agregar álias de Predicados ao conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Agregar álias de Predicados ao conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:41 AM -03
## Starting 'query' on pid 277590.
kgtk --debug query -i $GRAPH_F12 --as f -i $GRAPH_PRED --as c -i $GRAPH_ALIAS \
--match 'f: (item)-[]->(count_1), c: (item)-[]->(count_2), a: (item)-[]->(item_name)' \
--return 'item, GROUP_CONCAT(item_name), cast(count_1, integer), cast(count_2, integer)' \
--limit 100 \
-o /app/kgtk/data/aux-tsv/filtered-claims-with-quals-counted-alias.tsv
## [2022-12-23 09:27:42 sqlstore]: IMPORT graph directly into table graph_5 from /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz ...
## [2022-12-23 09:27:42 sqlstore]: DROP graph data table graph_4
## [2022-12-23 09:27:42 sqlstore]: IMPORT graph directly into table graph_8 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
## [2022-12-23 09:27:42 sqlstore]: DROP graph data table graph_7
## [2022-12-23 09:27:42 query]: SQL Translation:
---------------------------------------------
SELECT graph_5_c1."node1", group_concat(graph_6_c3."node2"), cast(graph_5_c1."node2" AS integer), cast(graph_8_c2."node2" AS integer)
FROM graph_5 AS graph_5_c1
INNER JOIN graph_6 AS graph_6_c3, graph_8 AS graph_8_c2
ON graph_5_c1."node1" = graph_6_c3."node1"
AND graph_5_c1."node1" = graph_8_c2."node1"
GROUP BY graph_5_c1."node1"
LIMIT ?
PARAS: [100]
---------------------------------------------
## [2022-12-23 09:27:42 sqlstore]: CREATE INDEX "graph_8_node1_idx" ON "graph_8" ("node1")
## [2022-12-23 09:27:42 sqlstore]: ANALYZE "graph_8_node1_idx"
## [2022-12-23 09:27:42 sqlstore]: CREATE INDEX "graph_5_node1_idx" ON "graph_5" ("node1")
## [2022-12-23 09:27:42 sqlstore]: ANALYZE "graph_5_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:27:42
KGTK File Information:
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
c:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_8
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_8:
size: 352.26 KB created: 2022-12-23 09:27:42
header: ['node1', 'label', 'node2']
## BD sqlite com 19G
printf "Contar Predicados_Qualificadores do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Contar Predicados_Qualificadores do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:27:44 AM -03
## Starting 'query' on pid 277639.
kgtk --debug query -i $GRAPH_F12 --as c --index none -i $GRAPH_QUALS \
--match 'c: (item)-[p1]->(), q: (p1)-[q1]->()' \
--order-by 'p1.label, q1.label' \
--return 'p1.label as node1, "quali" as label, q1.label as node2, count(q1.label) as `node2;count_q`' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv
## [2022-12-23 09:27:45 sqlstore]: IMPORT graph directly into table graph_7 from /app/kgtk/data/my-tsv/filtered-claims-with-quals-sorted.tsv.gz ...
## [2022-12-23 09:27:45 sqlstore]: DROP graph data table graph_8
## [2022-12-23 09:27:45 query]: SQL Translation:
---------------------------------------------
SELECT graph_7_c1."label" "_aLias.node1", ? "_aLias.label", graph_2_c2."label" "_aLias.node2", count(graph_2_c2."label") "_aLias.node2;count_q"
FROM graph_2 AS graph_2_c2
INNER JOIN graph_7 AS graph_7_c1
ON graph_7_c1."id" = graph_2_c2."node1"
GROUP BY "_aLias.node1", "_aLias.label", "_aLias.node2"
ORDER BY graph_7_c1."label" ASC, graph_2_c2."label" ASC
LIMIT ?
PARAS: ['quali', 100]
---------------------------------------------
export GRAPH_PRED1=/app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:28:58
KGTK File Information:
c:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_7
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
/app/kgtk/data/wikidata/alias.en.tsv.gz:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_7:
size: 4.10 KB created: 2022-12-23 09:27:45
header: ['id', 'node1', 'label', 'node2']
## BD sqlite com 19G
printf "Agregar álias de Predicados e Qualificadores aos Contadores do conjunto controversos COM qualificadores - grafo base-withquals %s\n" "$(date)"
## Agregar álias de Predicados e Qualificadores aos Contadores do conjunto controversos COM qualificadores - grafo base-withquals Fri 23 Dec 2022 09:29:00 AM -03
## Starting 'query' on pid 277674.
kgtk --debug query -i $GRAPH_PRED --as p -i $GRAPH_PRED1 --as s -i $GRAPH_ALIAS --as a \
--match 's: (pred)-[]->(quali {count_q: cq}), a: (pred)-[]->(pred_name), (quali)-[]->(quali_name), p: (pred)-[]->(cp)' \
--return 'pred, GROUP_CONCAT(DISTINCT pred_name), cast(cp, integer), quali, GROUP_CONCAT(DISTINCT quali_name), cast(cq, integer)' \
--limit 100 \
-o /app/kgtk/data/my-tsv/filtered-pred-quals-count-alias.tsv
[2022-12-23 09:29:01 sqlstore]: IMPORT graph directly into table graph_8 from /app/kgtk/data/my-tsv/claims-pred-counted.tsv ...
[2022-12-23 09:29:02 sqlstore]: IMPORT graph directly into table graph_9 from /app/kgtk/data/my-tsv/filtered-pred-quals-count-sorted.tsv ...
[2022-12-23 09:29:02 query]: SQL Translation:
---------------------------------------------
SELECT graph_9_c1."node1", group_concat(DISTINCT graph_6_c2."node2"), cast(graph_8_c4."node2" AS integer), graph_9_c1."node2", group_concat(DISTINCT graph_6_c3."node2"), cast(graph_9_c1."node2;count_q" AS integer)
FROM graph_6 AS graph_6_c2
INNER JOIN graph_6 AS graph_6_c3, graph_8 AS graph_8_c4, graph_9 AS graph_9_c1
ON graph_9_c1."node1" = graph_6_c2."node1"
AND graph_9_c1."node1" = graph_8_c4."node1"
AND graph_9_c1."node2" = graph_6_c3."node1"
AND graph_9_c1."node2;count_q" = graph_9_c1."node2;count_q"
GROUP BY graph_9_c1."node1", cast(graph_8_c4."node2" AS integer), graph_9_c1."node2"
LIMIT ?
PARAS: [100]
---------------------------------------------
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_8_node1_idx" ON "graph_8" ("node1")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_8_node1_idx"
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_9_node2_idx" ON "graph_9" ("node2")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_9_node2_idx"
[2022-12-23 09:29:02 sqlstore]: CREATE INDEX "graph_9_node1_idx" ON "graph_9" ("node1")
[2022-12-23 09:29:02 sqlstore]: ANALYZE "graph_9_node1_idx"
kgtk query --show-cache
Graph Cache:
DB file: /tmp/kgtk-graph-cache-root.sqlite3.db
size: 18.96 GB free: 51.02 GB modified: 2022-12-23 09:29:02
KGTK File Information:
p:
size: 156.45 KB modified: 2022-12-23 01:31:15 graph: graph_8
s:
size: 374 Bytes modified: 2022-12-23 09:28:58 graph: graph_9
c:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_7
/app/kgtk/data/wikidata/qualifiers.tsv.gz:
size: 2.66 GB modified: 2022-12-02 18:31:14 graph: graph_2
f:
size: 653 Bytes modified: 2022-12-23 09:27:39 graph: graph_5
a:
size: 187.64 MB modified: 2022-11-01 16:45:54 graph: graph_6
Graph Table Information:
graph_2:
size: 17.83 GB created: 2022-12-23 05:17:35
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_5:
size: 8.19 KB created: 2022-12-23 09:27:42
header: ['id', 'node1', 'label', 'node2']
graph_6:
size: 1.13 GB created: 2022-12-23 09:24:29
header: ['id', 'node1', 'label', 'node2', 'lang', 'rank', 'node2;wikidatatype']
graph_7:
size: 4.10 KB created: 2022-12-23 09:27:45
header: ['id', 'node1', 'label', 'node2']
graph_8:
size: 352.26 KB created: 2022-12-23 09:29:01
header: ['node1', 'label', 'node2']
graph_9:
size: 12.29 KB created: 2022-12-23 09:29:02
header: ['node1', 'label', 'node2', 'node2;count_q']
printf "Agregar álias de Predicados e Qualificadores e Contagem de Predicados do conjunto completo - grafo claims %s\n" "$(date)"
## Agregar álias de Predicados e Qualificadores e Contagem de Predicados do conjunto completo - grafo claims Fri 23 Dec 2022 09:29:03 AM -03
## Starting 'query' on pid 277719.
kgtk --debug query -i $GRAPH_PRED --as p -i $GRAPH_PQUALS --as s -i $GRAPH_ALIAS --as a \
--match 's: (pred)-[]->(quali {count_q: cq}), a: (pred)-[]->(pred_name), (quali)-[]->(quali_name), p: (pred)-[]->(cp)' \
--return 'pred, GROUP_CONCAT(DISTINCT pred_name), cast(cp, integer), quali, GROUP_CONCAT(DISTINCT quali_name), cast(cq, integer)' \
--limit 100 \
-o /app/kgtk/data/my-tsv/all-pred-quals-count-alias.tsv
[2022-12-23 09:29:04 sqlstore]: IMPORT graph directly into table graph_10 from /app/kgtk/data/my-tsv/claims-pred-quals-count-sorted.tsv ...
[2022-12-23 09:29:04 sqlstore]: DROP graph data table graph_9
[2022-12-23 09:29:04 query]: SQL Translation:
---------------------------------------------
SELECT graph_10_c1."node1", group_concat(DISTINCT graph_6_c2."node2"), cast(graph_8_c4."node2" AS integer), graph_10_c1."node2", group_concat(DISTINCT graph_6_c3."node2"), cast(graph_10_c1."node2;count_q" AS integer)
FROM graph_10 AS graph_10_c1
INNER JOIN graph_6 AS graph_6_c2, graph_6 AS graph_6_c3, graph_8 AS graph_8_c4
ON graph_10_c1."node1" = graph_6_c2."node1"
AND graph_10_c1."node1" = graph_8_c4."node1"
AND graph_10_c1."node2" = graph_6_c3."node1"
AND graph_10_c1."node2;count_q" = graph_10_c1."node2;count_q"
GROUP BY graph_10_c1."node1", cast(graph_8_c4."node2" AS integer), graph_10_c1."node2"
LIMIT ?
PARAS: [100]
---------------------------------------------
[2022-12-23 09:29:04 sqlstore]: CREATE INDEX "graph_10_node1_idx" ON "graph_10" ("node1")
[2022-12-23 09:29:04 sqlstore]: ANALYZE "graph_10_node1_idx"
[2022-12-23 09:29:04 sqlstore]: CREATE INDEX "graph_10_node2_idx" ON "graph_10" ("node2")
[2022-12-23 09:29:04 sqlstore]: ANALYZE "graph_10_node2_idx"
printf "FIM Current date %s\n" "$(date)"
FIM Current date Fri 23 Dec 2022 09:29:04 AM -03
Comentários
Postar um comentário
Sinta-se a vontade para comentar. Críticas construtivas são sempre bem vindas.