From aedd97bb7a2192b3680a18a766e54a5daed20a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Fri, 29 May 2015 02:24:47 +0200 Subject: [PATCH] Initial commit --- .gitignore | 8 ++ manage.py | 10 ++ qa/__init__.py | 1 + qa/admin.py | 3 + qa/apps.py | 5 + qa/management/__init__.py | 0 qa/management/commands/__init__.py | 0 qa/management/commands/load_dump.py | 199 +++++++++++++++++++++++++++ qa/models.py | 67 +++++++++ qa/templates/qa/question_detail.html | 35 +++++ qa/templates/qa/question_list.html | 5 + qa/urls.py | 8 ++ qa/views.py | 14 ++ stack/__init__.py | 0 stack/settings.py | 84 +++++++++++ stack/urls.py | 5 + stack/wsgi.py | 14 ++ 17 files changed, 458 insertions(+) create mode 100644 .gitignore create mode 100755 manage.py create mode 100644 qa/__init__.py create mode 100644 qa/admin.py create mode 100644 qa/apps.py create mode 100644 qa/management/__init__.py create mode 100644 qa/management/commands/__init__.py create mode 100644 qa/management/commands/load_dump.py create mode 100644 qa/models.py create mode 100644 qa/templates/qa/question_detail.html create mode 100644 qa/templates/qa/question_list.html create mode 100644 qa/urls.py create mode 100644 qa/views.py create mode 100644 stack/__init__.py create mode 100644 stack/settings.py create mode 100644 stack/urls.py create mode 100644 stack/wsgi.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a89c19f --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.*.swp +*~ +*.py[co] +.coverage +*.egg-info +dist +*.egg +db.sqlite3 diff --git a/manage.py b/manage.py new file mode 100755 index 0000000..462ebaf --- /dev/null +++ b/manage.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "stack.settings") + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) diff --git a/qa/__init__.py b/qa/__init__.py new file mode 100644 index 0000000..e9cc0ce --- /dev/null +++ b/qa/__init__.py @@ -0,0 +1 @@ +default_app_config = 'qa.apps.QAConfig' diff --git a/qa/admin.py b/qa/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/qa/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/qa/apps.py b/qa/apps.py new file mode 100644 index 0000000..ae869c5 --- /dev/null +++ b/qa/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + +class QAConfig(AppConfig): + name = 'qa' + verbose_name = "Q & A" diff --git a/qa/management/__init__.py b/qa/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/qa/management/commands/__init__.py b/qa/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/qa/management/commands/load_dump.py b/qa/management/commands/load_dump.py new file mode 100644 index 0000000..cd91c74 --- /dev/null +++ b/qa/management/commands/load_dump.py @@ -0,0 +1,199 @@ +from __future__ import print_function + +import time +from os.path import join +from xml.etree import cElementTree +from dateutil import parser as date_parser + +from django.core.management.base import BaseCommand + +from qa.models import User, Question, Answer, QuestionComment, AnswerComment + +POST_TYPES = { + 1: Question, + 2: Answer, +} + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('dump_directory', + help='Directory containing the XML files.') + + def handle(self, **options): + self.dir = options['dump_directory'] + self._answers = set() + self._questions = set() + + for m, f in ( + ('Users', self.parse_users), + ('Posts', self.parse_posts), + ('Comments', self.parse_comments) + ): + self.verbose_run(f, m) + + def verbose_run(self, cmd_func, name, report_every=100): + print('Loading %s: ' % name, end='') + start = time.time() + cnt = 0 + for u in cmd_func(): + cnt += 1 + if cnt % report_every: + print('.', end='', flush=True) + print('DONE\nLoaded %d %s in %d seconds'% ( + cnt, name, time.time() - start + )) + + def _parse_file(self, xml_file): + with open(join(self.dir, xml_file)) as input: + root = cElementTree.iterparse(input) + + for event, e in root: + if event != 'end' or e.tag != 'row': + continue + yield dict( + (k, int(v) if v.isdigit() else v) + for (k, v) in e.items() + ) + + def parse_users(self, users_file='Users.xml'): + """ + Parse data into User objects + + + """ + for user in self._parse_file(users_file): + if user['Id'] == '-1': + continue + yield User.objects.create( + id=user['Id'], + email=user.get('EmailHash', ''), + date_joined=date_parser.parse(user['CreationDate']), + + display_name=user['DisplayName'], + url=user.get('WebsiteUrl', ''), + location=user.get('Location', ''), + description=user.get('AboutMe', ''), + + views=user['Views'], + votes_up=user['UpVotes'], + votes_down=user['DownVotes'], + age=user.get('Age', 0) + ) + + + def parse_comments(self, comments_file='Comments.xml'): + """ + Comments.xml: + + + """ + for comment in self._parse_file(comments_file): + if comment['PostId'] in self._answers: + cls = AnswerComment + elif comment['PostId'] in self._questions: + cls = QuestionComment + else: + continue + + yield cls.objects.create( + post_id=comment['PostId'], + owner_id=comment['UserId'], + creation_date=date_parser.parse(comment['CreationDate']), + score=comment['Score'], + text=comment['Text'] + ) + + + def parse_posts(self, posts_file='Posts.xml'): + """ + Posts.xml: + Q: + + A: + """ + + for data in self._parse_file(posts_file): + try: + cls = POST_TYPES[data['PostTypeId']] + except KeyError: + # unknown post type, ignore + continue + + post = cls( + id=data['Id'], + owner_id=data['OwnerUserId'], + creation_date=date_parser.parse(data['CreationDate']), + last_activity_date=date_parser.parse(data['LastActivityDate']), + score=data['Score'], + body=data['Body'], + comment_count=data['CommentCount'] + ) + + if isinstance(post, Question): + post.answer_count = data['AnswerCount'] + post.tags = data['Tags'] + post.title = data['Title'] + post.favorite_count = data.get('FavoriteCount', 0) + post.view_count = data['ViewCount'] + if 'AcceptedAnswerId' in data: + post.accepted_answer_id = data['AcceptedAnswerId'] + if 'LastEditorUserId' in data: + post.last_editor_id = data['LastEditorUserId'] + post.last_edit_date = date_parser.parse(data['LastEditDate']) + + self._questions.add(post.pk) + else: + self._answers.add(post.pk) + post.question_id = data['ParentId'] + + post.save(force_insert=True) + + yield post diff --git a/qa/models.py b/qa/models.py new file mode 100644 index 0000000..8bdf3af --- /dev/null +++ b/qa/models.py @@ -0,0 +1,67 @@ +from django.db import models + +class User(models.Model): + email = models.CharField(max_length=200) + date_joined = models.DateTimeField() + display_name = models.CharField(max_length=200) + url = models.CharField(max_length=400) + location = models.CharField(max_length=400) + description = models.TextField() + views = models.PositiveIntegerField() + votes_up = models.PositiveIntegerField() + votes_down = models.PositiveIntegerField() + age = models.PositiveIntegerField() + +class Post(models.Model): + owner = models.ForeignKey(User) + creation_date = models.DateTimeField() + last_activity_date = models.DateTimeField() + score = models.IntegerField() + body = models.TextField() + comment_count = models.PositiveIntegerField() + + def get_comments(self): + return self.comments.order_by('-creation_date') + + class Meta: + abstract = True + + +class Question(Post): + answer_count = models.PositiveIntegerField() + tags = models.TextField() + title = models.CharField(max_length=1024) + favorite_count = models.PositiveIntegerField() + view_count = models.PositiveIntegerField() + accepted_answer = models.ForeignKey('Answer', related_name='accepted_for', + null=True, blank=True) + + last_editor = models.ForeignKey(User, null=True, blank=True, + related_name='last_edited_questions') + last_edit_date = models.DateTimeField(null=True, blank=True) + + def get_answers(self): + return self.answer_set.order_by('-creation_date') + + +class Answer(Post): + question = models.ForeignKey(Question) + + +class Comment(models.Model): + owner = models.ForeignKey(User) + creation_date = models.DateTimeField() + score = models.IntegerField() + text = models.TextField() + + class Meta: + abstract = True + + +class QuestionComment(Comment): + post = models.ForeignKey(Question, related_name='comments') + + +class AnswerComment(Comment): + post = models.ForeignKey(Answer, related_name='comments') + diff --git a/qa/templates/qa/question_detail.html b/qa/templates/qa/question_detail.html new file mode 100644 index 0000000..902a6ef --- /dev/null +++ b/qa/templates/qa/question_detail.html @@ -0,0 +1,35 @@ +

{{ object.title }}

+question by {{ object.owner.display_name }} on {{ object.creation_date|date }}, {{ object.creation_date|time }} +{% if object.score %} + with score {{ object.score }} +{% endif %} + +{{ object.body|safe }} + +{% for comment in object.get_comments %} + comment by {{ comment.owner.display_name }} on {{ comment.creation_date|date }}, {{ comment.creation_date|time }} + {% if comment.score %} + with score {{ comment.score }} + {% endif %} + +

{{ comment.text|safe }}

+{% endfor %} + +{% for answer in object.get_answers %} + answer by {{ answer.owner.display_name }} on {{ answer.creation_date|date }}, {{ answer.creation_date|time }} + {% if answer.score %} + with score {{ answer.score }} + {% endif %} + + {{answer.body|safe}} + + {% for comment in answer.get_comments %} + comment by {{ comment.owner.display_name }} on {{ comment.creation_date|date }}, {{ comment.creation_date|time }} + {% if comment.score %} + with score {{ comment.score }} + {% endif %} + +

{{ comment.text|safe }}

+ {% endfor %} + +{% endfor %} diff --git a/qa/templates/qa/question_list.html b/qa/templates/qa/question_list.html new file mode 100644 index 0000000..73e8138 --- /dev/null +++ b/qa/templates/qa/question_list.html @@ -0,0 +1,5 @@ +{% for question in object_list %} +

{{ question.title }}

+ {{ question.creation_date|date }} + {{ question.creation_date|time }} +{% endfor %} diff --git a/qa/urls.py b/qa/urls.py new file mode 100644 index 0000000..88e361a --- /dev/null +++ b/qa/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls import patterns, url + +from .views import QuestionDetail, QuestionList + +urlpatterns = patterns('', + url(r'^$', QuestionList.as_view(), name='qa-list'), + url(r'^(?P\d+)/$', QuestionDetail.as_view(), name='qa-question'), +) diff --git a/qa/views.py b/qa/views.py new file mode 100644 index 0000000..6e98277 --- /dev/null +++ b/qa/views.py @@ -0,0 +1,14 @@ +from django.views.generic.list import ListView +from django.views.generic.detail import DetailView + +from .models import Question + +class QuestionList(ListView): + model = Question + ordering = '-creation_date' + paginate_by = 10 + + +class QuestionDetail(DetailView): + model = Question + diff --git a/stack/__init__.py b/stack/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stack/settings.py b/stack/settings.py new file mode 100644 index 0000000..8cee15e --- /dev/null +++ b/stack/settings.py @@ -0,0 +1,84 @@ +""" +Django settings for stack project. + +For more information on this file, see +https://docs.djangoproject.com/en/1.7/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/1.7/ref/settings/ +""" + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +import os +BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/1.7/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = '6q3dr8ffl^d79npzgr6wg!r$oe^a(au_o=&31v^orh&t)e=dp!' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +TEMPLATE_DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = ( + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'qa' +) + +MIDDLEWARE_CLASSES = ( + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +) + +ROOT_URLCONF = 'stack.urls' + +WSGI_APPLICATION = 'stack.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/1.7/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + +# Internationalization +# https://docs.djangoproject.com/en/1.7/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = False + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/1.7/howto/static-files/ + +STATIC_URL = '/static/' diff --git a/stack/urls.py b/stack/urls.py new file mode 100644 index 0000000..9b0f202 --- /dev/null +++ b/stack/urls.py @@ -0,0 +1,5 @@ +from django.conf.urls import patterns, include, url + +urlpatterns = patterns('', + url(r'^', include('qa.urls')), +) diff --git a/stack/wsgi.py b/stack/wsgi.py new file mode 100644 index 0000000..e9675df --- /dev/null +++ b/stack/wsgi.py @@ -0,0 +1,14 @@ +""" +WSGI config for stack project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/1.7/howto/deployment/wsgi/ +""" + +import os +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "stack.settings") + +from django.core.wsgi import get_wsgi_application +application = get_wsgi_application()