Fetch Posts From HIVE Blockchain: Difference between revisions
Created page with "* Fetching a syntax_wiki post from HIVE ==Container Setup== * create container <code>lxc launch ubuntu:24.04 hive</code> * Login to container <code>lxc exec hive bash</code> * switch to user ubuntu <code>su - ubuntu</code> ===Update System and Install Dependencies=== * Update package lists <code>sudo apt update && sudo apt upgrade -y</code> * Install essential packages <code>sudo apt install -y python3 python3-pip python3-venv git build-essential</code> * Install de..." |
No edit summary |
||
Line 1: | Line 1: | ||
* Fetching a syntax_wiki post from HIVE | * Fetching a syntax_wiki post from HIVE | ||
* https://peakd.com/hive-138301/@completenoobs/how-to-download-pages-from-hive-blockchain | |||
==Container Setup== | ==Container Setup== | ||
Latest revision as of 05:20, 16 June 2025
- Fetching a syntax_wiki post from HIVE
- https://peakd.com/hive-138301/@completenoobs/how-to-download-pages-from-hive-blockchain
Container Setup
- create container
lxc launch ubuntu:24.04 hive
- Login to container
lxc exec hive bash
- switch to user ubuntu
su - ubuntu
Update System and Install Dependencies
- Update package lists
sudo apt update && sudo apt upgrade -y
- Install essential packages
sudo apt install -y python3 python3-pip python3-venv git build-essential
- Install development libraries needed for compilation
sudo apt install -y libssl-dev libffi-dev python3-dev
- Verify Python installation
python3 --version
pip3 --version
Create Virtual Environment (Recommended)
- Create a virtual environment
python3 -m venv hive_beem_env
- Activate the virtual environment
source hive_beem_env/bin/activate
- Verify you're in the virtual environment (should show the path)
(hive_beem_env) ubuntu@hive:~$
Install Beem
- Install beem from PyPI
- NOTE: Beem is installed in Virtual Environment to avoid
error: externally-managed-environment
pip install beem
Script to Fetch Post from HIVE
$EDITOR hive-fetch.py
- WARNING THIS SCRIPT IS EARLY CONCEPT DRAFT BUGGY!
#!/usr/bin/env python3 """ Hive Post Fetcher using Beem Fetches raw markdown content from Hive blockchain posts """ import argparse import json import time import xml.etree.ElementTree as ET import xml.dom.minidom as minidom import re from datetime import datetime from urllib.parse import urlparse from beem import Hive from beem.comment import Comment from beem.account import Account class HivePostFetcher: def __init__(self, nodes=None): if nodes is None: nodes = [ "https://api.hive.blog", "https://rpc.ecency.com", "https://api.deathwing.me", "https://fin.hive.3speak.co" ] self.hive = Hive(node=nodes) def parse_hive_url(self, url): """Parse Hive blog URL to extract author and permlink""" try: # Remove trailing slash if present url = url.rstrip('/') # Handle different URL formats: # https://hive.blog/completenoobs/@completenoobs/completenoobs-genesis # https://hive.blog/@completenoobs/completenoobs-genesis # https://peakd.com/@completenoobs/completenoobs-genesis # Extract the path part parsed = urlparse(url) path = parsed.path # Split and filter empty parts parts = [p for p in path.split('/') if p] if len(parts) >= 2: # Find the author (starts with @) author_part = None permlink_part = None for i, part in enumerate(parts): if part.startswith('@'): author_part = part[1:] # Remove @ if i + 1 < len(parts): permlink_part = parts[i + 1] break if author_part and permlink_part: return f"@{author_part}/{permlink_part}" return None except Exception as e: print(f"Error parsing URL: {e}") return None def fetch_single_post(self, authorperm_or_url): """Fetch a single post by author/permlink or URL""" try: # Check if input is a URL if authorperm_or_url.startswith('http'): authorperm = self.parse_hive_url(authorperm_or_url) if not authorperm: return {'success': False, 'error': 'Could not parse URL format'} print(f"Parsed URL to: {authorperm}") else: authorperm = authorperm_or_url post = Comment(authorperm, blockchain_instance=self.hive) return { 'success': True, 'data': { 'title': post.get('title', 'No Title'), 'author': post['author'], 'permlink': post['permlink'], 'authorperm': post['authorperm'], 'created': str(post['created']), 'body': post['body'], 'url': f"https://hive.blog/@{post['author']}/{post['permlink']}", 'tags': post.get('json_metadata', {}).get('tags', []) if post.get('json_metadata') else [], 'category': post.get('category', ''), 'votes': len(post.get('active_votes', [])), 'payout': str(post.get('pending_payout_value', '0.000 HBD')) } } except Exception as e: return {'success': False, 'error': str(e)} def fetch_user_posts(self, username, limit=50): """Fetch all posts by a user using get_account_posts""" try: account = Account(username, blockchain_instance=self.hive) posts = [] # Use get_account_posts method (correct approach) for post in account.get_account_posts(sort="posts", limit=limit): try: posts.append({ 'title': post.get('title', 'No Title'), 'author': post['author'], 'permlink': post['permlink'], 'authorperm': post['authorperm'], 'created': str(post.get('created', '')), 'body': post['body'], 'url': f"https://hive.blog/@{post['author']}/{post['permlink']}", 'tags': post.get('json_metadata', {}).get('tags', []) if post.get('json_metadata') else [], 'category': post.get('category', ''), 'votes': len(post.get('active_votes', [])), 'payout': str(post.get('pending_payout_value', '0.000 HBD')) }) time.sleep(0.3) # Be nice to the API except Exception as e: print(f"Error processing post: {e}") continue return {'success': True, 'data': posts} except Exception as e: return {'success': False, 'error': str(e)} def save_as_json(self, posts, filename): """Save posts as JSON""" with open(filename, 'w', encoding='utf-8') as f: json.dump(posts, f, indent=2, ensure_ascii=False) print(f"✅ Saved {len(posts)} posts to {filename}") def save_as_xml(self, posts, filename): """Save posts as XML""" root = ET.Element("hive_posts") root.set("generated", datetime.now().isoformat()) root.set("count", str(len(posts))) for post in posts: post_elem = ET.SubElement(root, "post") # Add post attributes for key, value in post.items(): if key == 'tags' and isinstance(value, list): tags_elem = ET.SubElement(post_elem, "tags") for tag in value: tag_elem = ET.SubElement(tags_elem, "tag") tag_elem.text = str(tag) elif key == 'body': # Handle body content with CDATA body_elem = ET.SubElement(post_elem, "body") body_elem.text = str(value) else: elem = ET.SubElement(post_elem, key) elem.text = str(value) # Pretty print XML rough_string = ET.tostring(root, encoding='unicode') reparsed = minidom.parseString(rough_string) pretty_xml = reparsed.toprettyxml(indent=" ") with open(filename, 'w', encoding='utf-8') as f: f.write(pretty_xml) print(f"✅ Saved {len(posts)} posts to {filename}") def save_as_wiki(self, posts, filename): """Save posts as MediaWiki syntax""" with open(filename, 'w', encoding='utf-8') as f: f.write("= Hive Posts Export =\n\n") f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"Total Posts: {len(posts)}\n\n") for i, post in enumerate(posts, 1): # Post header f.write(f"== Post {i}: {post['title']} ==\n\n") # Post metadata table f.write("{| class=\"wikitable\"\n") f.write("|-\n") f.write("! Property !! Value\n") f.write("|-\n") f.write(f"| Author || @{post['author']}\n") f.write("|-\n") f.write(f"| Created || {post['created']}\n") f.write("|-\n") f.write(f"| URL || [{post['url']} View on Hive]\n") f.write("|-\n") f.write(f"| AuthorPerm || {post['authorperm']}\n") f.write("|-\n") f.write(f"| Category || {post.get('category', 'N/A')}\n") f.write("|-\n") f.write(f"| Votes || {post.get('votes', 0)}\n") f.write("|-\n") f.write(f"| Payout || {post.get('payout', 'N/A')}\n") # Tags if post.get('tags'): tags_str = ', '.join([f"[[{tag}]]" for tag in post['tags']]) f.write("|-\n") f.write(f"| Tags || {tags_str}\n") f.write("|}\n\n") # Post content f.write("=== Content ===\n\n") # Convert markdown to wiki syntax (basic conversion) wiki_content = self.markdown_to_wiki(post['body']) f.write(wiki_content) f.write("\n\n") f.write("----\n\n") # Horizontal rule between posts print(f"✅ Saved {len(posts)} posts to {filename}") def markdown_to_wiki(self, markdown_text): """Basic conversion from Markdown to MediaWiki syntax""" wiki_text = markdown_text # Headers wiki_text = re.sub(r'^# (.*)', r'= \1 =', wiki_text, flags=re.MULTILINE) wiki_text = re.sub(r'^## (.*)', r'== \1 ==', wiki_text, flags=re.MULTILINE) wiki_text = re.sub(r'^### (.*)', r'=== \1 ===', wiki_text, flags=re.MULTILINE) wiki_text = re.sub(r'^#### (.*)', r'==== \1 ====', wiki_text, flags=re.MULTILINE) # Bold and italic wiki_text = re.sub(r'\*\*(.*?)\*\*', r"'''\1'''", wiki_text) # Bold wiki_text = re.sub(r'\*(.*?)\*', r"''\1''", wiki_text) # Italic # Links wiki_text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'[\2 \1]', wiki_text) # Code blocks wiki_text = re.sub(r'```(\w*)\n(.*?)```', r'<syntaxhighlight lang="\1">\n\2</syntaxhighlight>', wiki_text, flags=re.DOTALL) wiki_text = re.sub(r'`([^`]+)`', r'<code>\1</code>', wiki_text) # Lists (basic conversion) wiki_text = re.sub(r'^- (.*)', r'* \1', wiki_text, flags=re.MULTILINE) wiki_text = re.sub(r'^\d+\. (.*)', r'# \1', wiki_text, flags=re.MULTILINE) return wiki_text def main(): parser = argparse.ArgumentParser(description='Fetch posts from Hive blockchain') parser.add_argument('--user', type=str, help='Username to fetch posts from') parser.add_argument('--post', type=str, help='Single post to fetch (format: @author/permlink or full URL)') parser.add_argument('--url', type=str, help='Hive blog URL to fetch (e.g., https://hive.blog/@user/post)') parser.add_argument('--limit', type=int, default=20, help='Limit number of posts to fetch') parser.add_argument('--output', type=str, help='Output file name (extension determines format)') parser.add_argument('--format', type=str, choices=['json', 'xml', 'wiki', 'txt'], default='json', help='Output format (json, xml, wiki, txt)') args = parser.parse_args() fetcher = HivePostFetcher() # Handle single post (either --post or --url) single_post_input = args.post or args.url if single_post_input: result = fetcher.fetch_single_post(single_post_input) if result['success']: post_data = result['data'] print(f"Title: {post_data['title']}") print(f"Author: @{post_data['author']}") print(f"Created: {post_data['created']}") print(f"URL: {post_data['url']}") print(f"Category: {post_data['category']}") print(f"Tags: {', '.join(post_data['tags'])}") print(f"Votes: {post_data['votes']}") print(f"Payout: {post_data['payout']}") print("\n" + "="*60 + "\n") print(post_data['body']) # Save single post if output specified if args.output: posts_list = [post_data] output_format = args.format # Auto-detect format from filename extension if not specified if '.' in args.output: ext = args.output.split('.')[-1].lower() if ext in ['json', 'xml', 'wiki', 'txt']: output_format = ext if output_format == 'json': fetcher.save_as_json(posts_list, args.output) elif output_format == 'xml': fetcher.save_as_xml(posts_list, args.output) elif output_format == 'wiki': fetcher.save_as_wiki(posts_list, args.output) else: # txt format with open(args.output, 'w', encoding='utf-8') as f: f.write(f"Title: {post_data['title']}\n") f.write(f"Author: @{post_data['author']}\n") f.write(f"Created: {post_data['created']}\n") f.write(f"URL: {post_data['url']}\n") f.write(f"AuthorPerm: {post_data['authorperm']}\n") f.write("="*60 + "\n\n") f.write(post_data['body']) print(f"✅ Saved to {args.output}") else: print(f"Error: {result['error']}") elif args.user: result = fetcher.fetch_user_posts(args.user, args.limit) if result['success']: posts = result['data'] print(f"Fetched {len(posts)} posts for @{args.user}") if args.output: output_format = args.format # Auto-detect format from filename extension if '.' in args.output: ext = args.output.split('.')[-1].lower() if ext in ['json', 'xml', 'wiki', 'txt']: output_format = ext if output_format == 'json': fetcher.save_as_json(posts, args.output) elif output_format == 'xml': fetcher.save_as_xml(posts, args.output) elif output_format == 'wiki': fetcher.save_as_wiki(posts, args.output) else: # txt format with open(args.output, 'w', encoding='utf-8') as f: for i, post in enumerate(posts, 1): f.write(f"{'='*60}\n") f.write(f"POST {i}: {post['title']}\n") f.write(f"Author: @{post['author']}\n") f.write(f"Created: {post['created']}\n") f.write(f"URL: {post['url']}\n") f.write(f"AuthorPerm: {post['authorperm']}\n") f.write(f"Tags: {', '.join(post['tags'])}\n") f.write(f"{'='*60}\n\n") f.write(post['body']) f.write(f"\n\n{'='*60}\n\n") print(f"✅ Saved to {args.output}") else: for i, post in enumerate(posts, 1): print(f"\n{i}. {post['title']}") print(f" Created: {post['created']}") print(f" URL: {post['url']}") print(f" Tags: {', '.join(post['tags'])}") else: print(f"Error: {result['error']}") else: parser.print_help() print("\nExamples:") print(" # Fetch user posts as JSON") print(" python hive_fetcher.py --user completenoobs --limit 10 --output posts.json") print("") print(" # Fetch user posts as XML") print(" python hive_fetcher.py --user completenoobs --output posts.xml") print("") print(" # Fetch user posts as Wiki syntax") print(" python hive_fetcher.py --user completenoobs --output posts.wiki") print("") print(" # Fetch specific post by URL") print(" python hive_fetcher.py --url 'https://hive.blog/completenoobs/@completenoobs/completenoobs-genesis'") print("") print(" # Fetch specific post by authorperm") print(" python hive_fetcher.py --post '@completenoobs/completenoobs-genesis' --output genesis.xml") if __name__ == "__main__": main()
How to use script
usage: hive-fetch.py [-h] [--user USER] [--post POST] [--url URL] [--limit LIMIT] [--output OUTPUT] [--format {json,xml,wiki,txt}] Fetch posts from Hive blockchain options: -h, --help show this help message and exit --user USER Username to fetch posts from --post POST Single post to fetch (format: @author/permlink or full URL) --url URL Hive blog URL to fetch (e.g., https://hive.blog/@user/post) --limit LIMIT Limit number of posts to fetch --output OUTPUT Output file name (extension determines format) --format {json,xml,wiki,txt} Output format (json, xml, wiki, txt) Examples: # Fetch user posts as JSON python hive_fetcher.py --user completenoobs --limit 10 --output posts.json # Fetch user posts as XML python hive_fetcher.py --user completenoobs --output posts.xml # Fetch user posts as Wiki syntax python hive_fetcher.py --user completenoobs --output posts.wiki # Fetch specific post by URL python hive_fetcher.py --url 'https://hive.blog/completenoobs/@completenoobs/completenoobs-genesis' # Fetch specific post by authorperm python hive_fetcher.py --post '@completenoobs/completenoobs-genesis' --output genesis.xml
Examples
- syntax_wiki example post: https://hive.blog/hive-138301/@completenoobs/find-hive-users-memo-key-and-send-encrypted-message-l7a
- syntax_mark example post: https://hive.blog/completenoobs/@completenoobs/n33bcoin-how-to-fork-bitcoin-for-learning-by-tinkering
To print text of post to terminal:
python3 hive-fetch.py --url https://hive.blog/hive-138301/@completenoobs/find-hive-users-memo-key-and-send-encrypted-message-l7a
To print syntax_wiki to file
python3 hive-fetch.py --url "https://hive.blog/hive-138301/@completenoobs/find-hive-users-memo-key-and-send-encrypted-message-l7a" --output test.wiki
To print syntax_markdown to syntax_wiki - BUGGY
python3 hive-fetch.py --url "https://hive.blog/completenoobs/@completenoobs/n33bcoin-how-to-fork-bitcoin-for-learning-by-tinkering" --output test2.wiki