From 5b8f43120c7085f3b3b34073e3f86a5f10fc68a8 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith M." Date: Thu, 20 Jun 2024 10:50:46 -0500 Subject: [PATCH 01/11] Fix read_any function to correctly read any given file --- src/bibx/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 917af4a..667ca59 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -57,6 +57,12 @@ def read_any(file: TextIO) -> Collection: Tries to read a file with the supported formats. """ for handler in (read_wos, read_scopus_ris, read_scopus_bib): - with suppress(BibXError): + try: return handler(file) - raise ValueError("Unsuported file type") + except BibXError as e: + print(f"Error: {e}") + except ValueError as e: + if "invalid literal" in str(e): + print( + f"Error: the {handler} function does not support this file type") + raise ValueError("Unsupported file type") From fbb799aed643fcf945319018c9c4cd9e2900ff6a Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith M." Date: Wed, 26 Jun 2024 10:11:31 -0500 Subject: [PATCH 02/11] Configuration and implementation of the logger to record eventual errors and warnings --- src/bibx/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 667ca59..9d902a5 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -9,6 +9,10 @@ from bibx.algorithms.sap import Sap from bibx.exceptions import BibXError +import logging + +logger = logging.getLogger(__name__) + __all__ = [ "Article", "Collection", @@ -60,9 +64,8 @@ def read_any(file: TextIO) -> Collection: try: return handler(file) except BibXError as e: - print(f"Error: {e}") + logger.debug(f"Error: {e}") except ValueError as e: if "invalid literal" in str(e): - print( - f"Error: the {handler} function does not support this file type") + logger.debug(f'Error: the {handler.__name__} function does not support this file') raise ValueError("Unsupported file type") From 746be5771d5f966f612a8d5267d9e0b51cb67c18 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith M." Date: Sat, 29 Jun 2024 13:30:05 -0500 Subject: [PATCH 03/11] Removed the if statement and the module suppression --- src/bibx/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 9d902a5..2e721fc 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -1,4 +1,3 @@ -from contextlib import suppress from typing import TextIO from bibx._entities.article import Article @@ -65,7 +64,6 @@ def read_any(file: TextIO) -> Collection: return handler(file) except BibXError as e: logger.debug(f"Error: {e}") - except ValueError as e: - if "invalid literal" in str(e): - logger.debug(f'Error: the {handler.__name__} function does not support this file') + except ValueError: + logger.debug(f'Error: the {handler.__name__} function does not support this file') raise ValueError("Unsupported file type") From 4e6c47a30519113393d433db5aff21e8a5a1c651 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith M." Date: Sat, 29 Jun 2024 15:09:03 -0500 Subject: [PATCH 04/11] Formated imports --- src/bibx/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 2e721fc..907dae7 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -1,3 +1,4 @@ +import logging from typing import TextIO from bibx._entities.article import Article @@ -8,8 +9,6 @@ from bibx.algorithms.sap import Sap from bibx.exceptions import BibXError -import logging - logger = logging.getLogger(__name__) __all__ = [ From 52c726d138757918d93b838804523eaa5476e07b Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Wed, 3 Jul 2024 15:40:13 -0500 Subject: [PATCH 05/11] Updated with pre-commit and formated with Ruff --- src/bibx/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 907dae7..2f83b79 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -64,5 +64,7 @@ def read_any(file: TextIO) -> Collection: except BibXError as e: logger.debug(f"Error: {e}") except ValueError: - logger.debug(f'Error: the {handler.__name__} function does not support this file') + logger.debug( + f"Error: the {handler.__name__} function does not support this file" + ) raise ValueError("Unsupported file type") From e37f10b0dc576eb3b7c63196abd527cd673d3fbd Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Wed, 10 Jul 2024 15:42:41 -0500 Subject: [PATCH 06/11] Added functionality to search for articles using CrossRef --- .../collection_builders/cross_ref.py | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/bibx/_entities/collection_builders/cross_ref.py b/src/bibx/_entities/collection_builders/cross_ref.py index 8cb9e40..a7a5c4c 100644 --- a/src/bibx/_entities/collection_builders/cross_ref.py +++ b/src/bibx/_entities/collection_builders/cross_ref.py @@ -1,4 +1,6 @@ -from bibx._entities.collection import Collection +import requests + +from bibx._entities.collection import Article, Collection from bibx._entities.collection_builders.base import CollectionBuilder @@ -12,4 +14,37 @@ def with_count(self, count: int): return self def build(self) -> Collection: - return Collection([]) + url = f"https://api.crossref.org/works?query={self._query.lower().replace(' ', '+')}&filter=has-orcid:true,type:journal-article,has-references:true,from-pub-date:2003-01-01&rows={self._count}" + response = requests.get(url) + data = response.json() + items = data.get("message", {}).get("items", []) + + articles = [] + for item in items: + author_list = item.get("author", []) + authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in author_list] + publication_year = item.get("published").get("date-parts", [[2000]])[0][0] + title = item.get("title", None)[0] + journal = item.get("container-title", None)[0] + volume = item.get("volume", None) + issue = item.get("issue", None) + page = item.get("page", None) + doi = item.get("DOI", None) + times_cited = item.get("is-referenced-by-count", 0) + references = item.get("reference", []) + + article = Article( + authors=authors, + year=publication_year, + title=title, + journal=journal, + volume=volume, + issue=issue, + page=page, + doi=doi, + times_cited=times_cited, + references=references, + ) + articles.append(article) + + return Collection(articles) From 6b32522c18fa279ceb55f0b9598bcb844600a427 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Wed, 10 Jul 2024 15:43:18 -0500 Subject: [PATCH 07/11] Added functionality to search for articles using CrossRef --- src/bibx/_entities/collection_builders/cross_ref.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bibx/_entities/collection_builders/cross_ref.py b/src/bibx/_entities/collection_builders/cross_ref.py index a7a5c4c..2430b95 100644 --- a/src/bibx/_entities/collection_builders/cross_ref.py +++ b/src/bibx/_entities/collection_builders/cross_ref.py @@ -22,7 +22,10 @@ def build(self) -> Collection: articles = [] for item in items: author_list = item.get("author", []) - authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in author_list] + authors = [ + f"{author.get('given', '')} {author.get('family', '')}" + for author in author_list + ] publication_year = item.get("published").get("date-parts", [[2000]])[0][0] title = item.get("title", None)[0] journal = item.get("container-title", None)[0] From 950543f5199611e613457ae377983d88111d93da Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Thu, 11 Jul 2024 12:11:17 -0500 Subject: [PATCH 08/11] Added types-requests dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1f5c3d0..71abf04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "networkx~=3.0", "typer[all]~=0.9.0", "xlsxwriter~=3.2.0", + "types-requests~=2.32.0.20240622", ] [project.optional-dependencies] From 22840e70c4477106d1c93b22e1dffedb46438978 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Thu, 11 Jul 2024 16:22:32 -0500 Subject: [PATCH 09/11] Changed pre-commit config --- .pre-commit-config.yaml | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 93303a3..2c18ef3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,20 +1,21 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.3.3 - hooks: - # Run the linter. - - id: ruff - args: [ --fix ] - # Run the formatter. - - id: ruff-format - - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.9.0' - hooks: - - id: mypy + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.5.1 + hooks: + # Run the linter. + - id: ruff + args: [--fix] + # Run the formatter. + - id: ruff-format + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.10.1' + hooks: + - id: mypy + additional_dependencies: ['types-requests'] From 3c3c22d15119573d9912e15523439dfc0ff528d8 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Thu, 11 Jul 2024 16:32:22 -0500 Subject: [PATCH 10/11] Adjusted the return by a method in the file scopus_ris --- src/bibx/_entities/collection_builders/scopus_ris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bibx/_entities/collection_builders/scopus_ris.py b/src/bibx/_entities/collection_builders/scopus_ris.py index 65f00ba..4597ae0 100644 --- a/src/bibx/_entities/collection_builders/scopus_ris.py +++ b/src/bibx/_entities/collection_builders/scopus_ris.py @@ -177,7 +177,7 @@ def _article_from_record(cls, record: str) -> Article: @classmethod def _parse_file(cls, file: TextIO) -> Iterable[Article]: if not _size(file): - return [] + yield from [] for item in file.read().split("\n\n"): if item.isspace(): continue From 4c71b7902fdaa347f2ddfa63c4a96a07d1ddc640 Mon Sep 17 00:00:00 2001 From: "Juan G. Saurith" Date: Fri, 12 Jul 2024 11:01:14 -0500 Subject: [PATCH 11/11] Set the format of Article() to each article reference extracted from CrossRef --- .../_entities/collection_builders/cross_ref.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/bibx/_entities/collection_builders/cross_ref.py b/src/bibx/_entities/collection_builders/cross_ref.py index 2430b95..181abf3 100644 --- a/src/bibx/_entities/collection_builders/cross_ref.py +++ b/src/bibx/_entities/collection_builders/cross_ref.py @@ -34,7 +34,23 @@ def build(self) -> Collection: page = item.get("page", None) doi = item.get("DOI", None) times_cited = item.get("is-referenced-by-count", 0) - references = item.get("reference", []) + reference = item.get("reference", []) + references = [] + for ref in reference: + if ref.get("unstructured", None) is not None: + unique_reference = Article(title=ref.get("unstructured", None)) + else: + unique_reference = Article( + authors=[ref.get("author", [])], + year=ref.get("year", None), + title=ref.get("article-title", None), + journal=ref.get("journal-title", None), + volume=ref.get("volume", None), + issue=ref.get("issue", None), + page=ref.get("page", None), + doi=ref.get("DOI", None), + ) + references.append(unique_reference) article = Article( authors=authors,