Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.3
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.9.0'
hooks:
- id: mypy
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.1
hooks:
# Run the linter.
- id: ruff
args: [--fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.10.1'
hooks:
- id: mypy
additional_dependencies: ['types-requests']
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"networkx~=3.0",
"typer[all]~=0.9.0",
"xlsxwriter~=3.2.0",
"types-requests~=2.32.0.20240622",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 things here, you might want to add types request to the dev dependencies, and you'll need to include requests in the actual package dependencies:

diff --git a/pyproject.toml b/pyproject.toml
index 71abf04..845842a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,8 @@ dependencies = [
     "networkx~=3.0",
     "typer[all]~=0.9.0",
     "xlsxwriter~=3.2.0",
-    "types-requests~=2.32.0.20240622",
+    "requests~=2.32.3",
+    "pydantic~=2.8.2",
 ]
 
 [project.optional-dependencies]
@@ -27,6 +28,7 @@ dev = [
     "pre-commit~=2.20.0",
     "ruff~=0.3.3",
     "mypy~=1.9.0",
+    "types-requests~=2.32.0.20240622",
 ]
 
 [project.scripts]

]

[project.optional-dependencies]
Expand Down
58 changes: 56 additions & 2 deletions src/bibx/_entities/collection_builders/cross_ref.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from bibx._entities.collection import Collection
import requests

from bibx._entities.collection import Article, Collection
from bibx._entities.collection_builders.base import CollectionBuilder


Expand All @@ -12,4 +14,56 @@ def with_count(self, count: int):
return self

def build(self) -> Collection:
return Collection([])
url = f"https://api.crossref.org/works?query={self._query.lower().replace(' ', '+')}&filter=has-orcid:true,type:journal-article,has-references:true,from-pub-date:2003-01-01&rows={self._count}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest you create a client and type-check the API response using pydantic, the client would look like this:

https://gist.github.com/odarbelaeze/8c9fa5b8735463c94137116af48a73e2

response = requests.get(url)
data = response.json()
items = data.get("message", {}).get("items", [])

articles = []
for item in items:
author_list = item.get("author", [])
authors = [
f"{author.get('given', '')} {author.get('family', '')}"
for author in author_list
]
publication_year = item.get("published").get("date-parts", [[2000]])[0][0]
title = item.get("title", None)[0]
journal = item.get("container-title", None)[0]
volume = item.get("volume", None)
issue = item.get("issue", None)
page = item.get("page", None)
doi = item.get("DOI", None)
times_cited = item.get("is-referenced-by-count", 0)
reference = item.get("reference", [])
references = []
for ref in reference:
if ref.get("unstructured", None) is not None:
unique_reference = Article(title=ref.get("unstructured", None))
else:
unique_reference = Article(
authors=[ref.get("author", [])],
year=ref.get("year", None),
title=ref.get("article-title", None),
journal=ref.get("journal-title", None),
volume=ref.get("volume", None),
issue=ref.get("issue", None),
page=ref.get("page", None),
doi=ref.get("DOI", None),
)
references.append(unique_reference)
Comment on lines +24 to +53
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you type check the author, this will end up looking more like:

diff --git a/src/bibx/_entities/collection_builders/cross_ref.py b/src/bibx/_entities/collection_builders/cross_ref.py
index 181abf3..1b330c7 100644
--- a/src/bibx/_entities/collection_builders/cross_ref.py
+++ b/src/bibx/_entities/collection_builders/cross_ref.py
@@ -1,5 +1,8 @@
-import requests
-
+from bibx._entities.clients.cross_ref import (
+    CrossRefClient,
+    StructuredReference,
+    UnstructuredReference,
+)
 from bibx._entities.collection import Article, Collection
 from bibx._entities.collection_builders.base import CollectionBuilder
 
@@ -14,43 +17,43 @@ class CrossRefCollectionBuilder(CollectionBuilder):
         return self
 
     def build(self) -> Collection:
-        url = f"https://api.crossref.org/works?query={self._query.lower().replace(' ', '+')}&filter=has-orcid:true,type:journal-article,has-references:true,from-pub-date:2003-01-01&rows={self._count}"
-        response = requests.get(url)
-        data = response.json()
-        items = data.get("message", {}).get("items", [])
+        client = CrossRefClient()
+        works = client.get_works(self._query, self._count)
+        items = works.message.items
 
         articles = []
         for item in items:
-            author_list = item.get("author", [])
+            author_list = item.author
             authors = [
-                f"{author.get('given', '')} {author.get('family', '')}"
-                for author in author_list
+                f"{author.given or ''} {author.family or ''}" for author in author_list
             ]
-            publication_year = item.get("published").get("date-parts", [[2000]])[0][0]
-            title = item.get("title", None)[0]
-            journal = item.get("container-title", None)[0]
-            volume = item.get("volume", None)
-            issue = item.get("issue", None)
-            page = item.get("page", None)
-            doi = item.get("DOI", None)
-            times_cited = item.get("is-referenced-by-count", 0)
-            reference = item.get("reference", [])
+            publication_year = item.published.date_parts[0][0]
+            title = item.title[0]
+            journal = item.container_title[0]
+            volume = item.volume
+            issue = item.issue
+            page = item.page
+            doi = item.doi
+            times_cited = item.is_referenced_by_count
+            reference = item.reference
             references = []
             for ref in reference:
-                if ref.get("unstructured", None) is not None:
-                    unique_reference = Article(title=ref.get("unstructured", None))
-                else:
+                if isinstance(ref, UnstructuredReference):
+                    unique_reference = Article(title=ref.unstructured)
+                    references.append(unique_reference)
+                    continue
+                if isinstance(ref, StructuredReference):
                     unique_reference = Article(
-                        authors=[ref.get("author", [])],
-                        year=ref.get("year", None),
-                        title=ref.get("article-title", None),
-                        journal=ref.get("journal-title", None),
-                        volume=ref.get("volume", None),
-                        issue=ref.get("issue", None),
-                        page=ref.get("page", None),
-                        doi=ref.get("DOI", None),
+                        authors=[ref.author or ""],
+                        year=ref.year,
+                        title=ref.article_title,
+                        journal=ref.journal_title,
+                        volume=ref.volume,
+                        issue=ref.issue,
+                        page=ref.page,
+                        doi=ref.doi,
                     )
-                references.append(unique_reference)
+                    references.append(unique_reference)
 
             article = Article(
                 authors=authors,


article = Article(
authors=authors,
year=publication_year,
title=title,
journal=journal,
volume=volume,
issue=issue,
page=page,
doi=doi,
times_cited=times_cited,
references=references,
)
articles.append(article)

return Collection(articles)
2 changes: 1 addition & 1 deletion src/bibx/_entities/collection_builders/scopus_ris.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def _article_from_record(cls, record: str) -> Article:
@classmethod
def _parse_file(cls, file: TextIO) -> Iterable[Article]:
if not _size(file):
return []
yield from []
for item in file.read().split("\n\n"):
if item.isspace():
continue
Expand Down