Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion db/news.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
---
news:
- id: 13
url: https://news.coderdojo.jp/2025/10/04/dojoletter-vol-89-2025%e5%b9%b408%e6%9c%88%e5%8f%b7/
title: DojoLetter Vol.89 2025年08月号
Expand Down
189 changes: 72 additions & 117 deletions lib/tasks/news.rake
Original file line number Diff line number Diff line change
@@ -1,118 +1,75 @@
require 'rss'
require 'net/http'
require 'uri'
require 'yaml'
require 'time'
require 'active_support/broadcast_logger'

def safe_open(url)
uri = URI.parse(url)
raise "不正なURLです: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)

Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
request = Net::HTTP::Get.new(uri)
response = http.request(request)
response.body
end
end

def fetch_rss_items(url, logger)
logger.info("Fetching RSS → #{url}")
begin
rss = safe_open(url)
feed = RSS::Parser.parse(rss, false)
feed.items.map { |item| item_to_hash(item) }
rescue => e
logger.warn("⚠️ Failed to fetch #{url}: #{e.message}")
[]
end
end

def item_to_hash(item)
{
'url' => item.link,
'title' => item.title,
'published_at' => item.pubDate.to_s
}
end
NEWS_YAML_PATH = 'db/news.yml'.freeze
NEWS_LOG_PATH = 'log/news.log'.freeze

namespace :news do
desc 'RSS フィードを取得し、db/news.yml に保存'
desc "RSS フィードを取得し、#{NEWS_YAML_PATH} に保存"
task fetch: :environment do
# ロガー設定(ファイル+コンソール出力)
file_logger = ActiveSupport::Logger.new('log/news.log')
console = ActiveSupport::Logger.new(STDOUT)
logger = ActiveSupport::BroadcastLogger.new(file_logger, console)
logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH)
logger = ActiveSupport::BroadcastLogger.new(logger_file, console)

logger.info('==== START news:fetch ====')

# 既存の news.yml を読み込み
yaml_path = Rails.root.join('db', 'news.yml')
existing_news = if File.exist?(yaml_path)
YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)['news'] || []
else
[]
end

# テスト/ステージング環境ではサンプルファイル、本番は実サイトのフィード
feed_urls = if Rails.env.test? || Rails.env.staging?
[Rails.root.join('spec', 'fixtures', 'sample_news.rss').to_s]
else
[
'https://news.coderdojo.jp/feed/'
# 必要に応じて他 Dojo の RSS もここに追加可能
# 'https://coderdojotokyo.org/feed',
]
end

new_items = feed_urls.flat_map { |url| fetch_rss_items(url, logger) }

# 既存データをハッシュに変換(URL をキーに)
existing_items_hash = existing_news.index_by { |item| item['url'] }

# 新しいアイテムと既存アイテムを分離
truly_new_items = []
# 本番/開発環境では実フィード、それ以外(テスト環境など)ではテスト用フィード
DOJO_NEWS_FEED = 'https://news.coderdojo.jp/feed/'
TEST_NEWS_FEED = Rails.root.join('spec', 'fixtures', 'sample_news.rss')
RSS_FEED_LIST = (Rails.env.test? || Rails.env.staging?) ?
[TEST_NEWS_FEED] :
[DOJO_NEWS_FEED]

# RSS のデータ構造を、News のデータ構造に変換
fetched_items = RSS_FEED_LIST.flat_map do |feed|
feed = RSS::Parser.parse(feed, false)
feed.items.map { |item|
{
'url' => item.link,
'title' => item.title,
'published_at' => item.pubDate.to_s
}
}
end

# 取得済みニュース (YAML) を読み込み、URL をキーとしたハッシュに変換
existing_items = YAML.safe_load(File.read NEWS_YAML_PATH).index_by { it['url'] }
existing_max_id = existing_items.flat_map { |url, item| item['id'].to_i }.max || 0

# 新規記事と既存記事を分離
created_items = []
updated_items = []

new_items.each do |new_item|
if existing_items_hash.key?(new_item['url'])
existing_item = existing_items_hash[new_item['url']]
# タイトルまたは公開日が変わった場合のみ更新
if existing_item['title'] != new_item['title'] || existing_item['published_at'] != new_item['published_at']
updated_items << existing_item.merge(new_item)
end
else
truly_new_items << new_item
fetched_items.each do |fetched_item|
existing_item = existing_items[fetched_item['url']]

if existing_item.nil?
# 新規アイテムならそのまま追加
created_items << fetched_item
elsif existing_item['title'] != fetched_item['title'] || existing_item['published_at'] != fetched_item['published_at']
# タイトルまたは公開日が変わっていたら更新
updated_items << existing_item.merge(fetched_item)
end
end

# 既存の最大IDを取得
max_existing_id = existing_news.map { |item| item['id'].to_i }.max || 0

# 新しいアイテムのみに ID を割り当て(古い順)
truly_new_items_sorted = truly_new_items.sort_by { |item|
Time.parse(item['published_at'])
}

truly_new_items_sorted.each_with_index do |item, index|
item['id'] = max_existing_id + index + 1
created_items.sort_by! { Time.parse it['published_at'] }
created_items.each.with_index(1) do |item, index|
item['id'] = existing_max_id + index
end

# 更新されなかった既存アイテムを取得
updated_urls = updated_items.map { |item| item['url'] }
unchanged_items = existing_news.reject { |item| updated_urls.include?(item['url']) }

# 全アイテムをマージ
all_items = unchanged_items + updated_items + truly_new_items_sorted
# URL をキーに、更新されなかった既存の YAML データを取得・保持
updated_urls = updated_items.map { it['url'] }
unchanged_items = existing_items.values.reject { updated_urls.include?(it['url']) }

# 日付降順ソート
sorted_items = all_items.sort_by { |item|
Time.parse(item['published_at'])
# 新規・更新・既存の各アイテムをマージし、日付降順でソート
merged_items = (unchanged_items + updated_items + created_items).sort_by {
Time.parse(it['published_at'])
}.reverse

# YAML ファイルに書き出し
File.open('db/news.yml', 'w') do |f|
formatted_items = sorted_items.map do |item|
File.open(NEWS_YAML_PATH, 'w') do |f|
formatted_items = merged_items.map do |item|
{
'id' => item['id'],
'url' => item['url'],
Expand All @@ -121,51 +78,49 @@ namespace :news do
}
end

f.write({ 'news' => formatted_items }.to_yaml)
f.write(formatted_items.to_yaml)
end

logger.info("✅ Wrote #{sorted_items.size} items to db/news.yml (#{truly_new_items_sorted.size} new, #{updated_items.size} updated)")
logger.info('==== END news:fetch ====')
logger.info "✅ Wrote #{merged_items.size} items to #{NEWS_YAML_PATH} (#{created_items.size} new, #{updated_items.size} updated)"
logger.info "==== END news:fetch ===="
logger.info ""
end

desc 'db/news.yml からデータベースに upsert'
desc "#{NEWS_YAML_PATH} からデータベースに upsert"
task upsert: :environment do
file_logger = ActiveSupport::Logger.new('log/news.log')
console = ActiveSupport::Logger.new(STDOUT)
logger = ActiveSupport::BroadcastLogger.new(file_logger, console)
logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH)
logger = ActiveSupport::BroadcastLogger.new(logger_file, console)

logger.info "==== START news:upsert ===="

yaml_path = Rails.root.join('db', 'news.yml')
raw = YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)

entries = raw['news'] || []
new_count = 0
news_items = YAML.safe_load File.read(NEWS_YAML_PATH)
created_count = 0
updated_count = 0

News.transaction do
entries.each do |attrs|
news = News.find_or_initialize_by(url: attrs['url'])
is_new = news.new_record?

news_items.each do |item|
news = News.find_or_initialize_by(url: item['url'])
news.assign_attributes(
title: attrs['title'],
published_at: attrs['published_at']
title: item['title'],
published_at: item['published_at']
)

if is_new || news.changed?

is_new_record = news.new_record?
if is_new_record || news.changed?
news.save!
status = is_new ? 'new' : 'updated'
new_count += 1 if is_new
updated_count += 1 unless is_new

status = is_new_record ? 'new' : 'updated'
created_count += 1 if is_new_record
updated_count += 1 unless is_new_record

logger.info "[News] #{news.published_at.to_date} #{news.title} (#{status})"
end
end
end

logger.info "Upserted #{new_count + updated_count} items (#{new_count} new, #{updated_count} updated)."
logger.info "Upserted #{created_count + updated_count} items (#{created_count} new, #{updated_count} updated)."
logger.info "==== END news:upsert ===="
logger.info ""
end

end