Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
honzakral committed May 29, 2015
0 parents commit aedd97b
Show file tree
Hide file tree
Showing 17 changed files with 458 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.*.swp
*~
*.py[co]
.coverage
*.egg-info
dist
*.egg
db.sqlite3
10 changes: 10 additions & 0 deletions manage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python
import os
import sys

if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "stack.settings")

from django.core.management import execute_from_command_line

execute_from_command_line(sys.argv)
1 change: 1 addition & 0 deletions qa/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
default_app_config = 'qa.apps.QAConfig'
3 changes: 3 additions & 0 deletions qa/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
5 changes: 5 additions & 0 deletions qa/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig

class QAConfig(AppConfig):
name = 'qa'
verbose_name = "Q & A"
Empty file added qa/management/__init__.py
Empty file.
Empty file.
199 changes: 199 additions & 0 deletions qa/management/commands/load_dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
from __future__ import print_function

import time
from os.path import join
from xml.etree import cElementTree
from dateutil import parser as date_parser

from django.core.management.base import BaseCommand

from qa.models import User, Question, Answer, QuestionComment, AnswerComment

POST_TYPES = {
1: Question,
2: Answer,
}

class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('dump_directory',
help='Directory containing the XML files.')

def handle(self, **options):
self.dir = options['dump_directory']
self._answers = set()
self._questions = set()

for m, f in (
('Users', self.parse_users),
('Posts', self.parse_posts),
('Comments', self.parse_comments)
):
self.verbose_run(f, m)

def verbose_run(self, cmd_func, name, report_every=100):
print('Loading %s: ' % name, end='')
start = time.time()
cnt = 0
for u in cmd_func():
cnt += 1
if cnt % report_every:
print('.', end='', flush=True)
print('DONE\nLoaded %d %s in %d seconds'% (
cnt, name, time.time() - start
))

def _parse_file(self, xml_file):
with open(join(self.dir, xml_file)) as input:
root = cElementTree.iterparse(input)

for event, e in root:
if event != 'end' or e.tag != 'row':
continue
yield dict(
(k, int(v) if v.isdigit() else v)
for (k, v) in e.items()
)

def parse_users(self, users_file='Users.xml'):
"""
Parse data into User objects
<row
Id="2"
Reputation="101"
CreationDate="2011-01-03T20:14:55.783"
DisplayName="Geoff Dalgas"
LastAccessDate="2012-12-19T00:28:45.110"
WebsiteUrl="http://stackoverflow.com"
Location="Corvallis, OR"
AboutMe="&lt;p&gt;Developer on ...."
Views="6"
UpVotes="6"
DownVotes="0"
EmailHash="b437f461b3fd27387c5d8ab47a293d35"
Age="36"
/>
"""
for user in self._parse_file(users_file):
if user['Id'] == '-1':
continue
yield User.objects.create(
id=user['Id'],
email=user.get('EmailHash', ''),
date_joined=date_parser.parse(user['CreationDate']),

display_name=user['DisplayName'],
url=user.get('WebsiteUrl', ''),
location=user.get('Location', ''),
description=user.get('AboutMe', ''),

views=user['Views'],
votes_up=user['UpVotes'],
votes_down=user['DownVotes'],
age=user.get('Age', 0)
)


def parse_comments(self, comments_file='Comments.xml'):
"""
Comments.xml:
<row
Id="9"
PostId="9"
Score="3"
Text="Point.... "
CreationDate="2011-01-03T21:16:09.603"
UserId="60"
/>
"""
for comment in self._parse_file(comments_file):
if comment['PostId'] in self._answers:
cls = AnswerComment
elif comment['PostId'] in self._questions:
cls = QuestionComment
else:
continue

yield cls.objects.create(
post_id=comment['PostId'],
owner_id=comment['UserId'],
creation_date=date_parser.parse(comment['CreationDate']),
score=comment['Score'],
text=comment['Text']
)


def parse_posts(self, posts_file='Posts.xml'):
"""
Posts.xml:
Q: <row
Id="5"
PostTypeId="1"
AcceptedAnswerId="73"
CreationDate="2011-01-03T20:52:52.880"
Score="39"
ViewCount="5638"
Body="&lt;p&gt;....."
OwnerUserId="24"
LastEditorUserId="97"
LastEditDate="2011-01-06T11:34:27.610"
LastActivityDate="2012-01-27T19:12:50.900"
Title="What are the differences between NoSQL and a traditional RDBMS?"
Tags="&lt;nosql&gt;&lt;rdbms&gt;&lt;database-recommendation&gt;"
AnswerCount="5"
CommentCount="0"
FavoriteCount="22"
/>
A: <row
Id="12"
PostTypeId="2"
ParentId="3"
CreationDate="2011-01-03T21:01:19.160"
Score="15"
Body="&lt;p&gt;In ..."
OwnerUserId="14"
LastActivityDate="2011-01-03T21:01:19.160"
CommentCount="3"
/>
"""

for data in self._parse_file(posts_file):
try:
cls = POST_TYPES[data['PostTypeId']]
except KeyError:
# unknown post type, ignore
continue

post = cls(
id=data['Id'],
owner_id=data['OwnerUserId'],
creation_date=date_parser.parse(data['CreationDate']),
last_activity_date=date_parser.parse(data['LastActivityDate']),
score=data['Score'],
body=data['Body'],
comment_count=data['CommentCount']
)

if isinstance(post, Question):
post.answer_count = data['AnswerCount']
post.tags = data['Tags']
post.title = data['Title']
post.favorite_count = data.get('FavoriteCount', 0)
post.view_count = data['ViewCount']
if 'AcceptedAnswerId' in data:
post.accepted_answer_id = data['AcceptedAnswerId']
if 'LastEditorUserId' in data:
post.last_editor_id = data['LastEditorUserId']
post.last_edit_date = date_parser.parse(data['LastEditDate'])

self._questions.add(post.pk)
else:
self._answers.add(post.pk)
post.question_id = data['ParentId']

post.save(force_insert=True)

yield post
67 changes: 67 additions & 0 deletions qa/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from django.db import models

class User(models.Model):
email = models.CharField(max_length=200)
date_joined = models.DateTimeField()
display_name = models.CharField(max_length=200)
url = models.CharField(max_length=400)
location = models.CharField(max_length=400)
description = models.TextField()
views = models.PositiveIntegerField()
votes_up = models.PositiveIntegerField()
votes_down = models.PositiveIntegerField()
age = models.PositiveIntegerField()

class Post(models.Model):
owner = models.ForeignKey(User)
creation_date = models.DateTimeField()
last_activity_date = models.DateTimeField()
score = models.IntegerField()
body = models.TextField()
comment_count = models.PositiveIntegerField()

def get_comments(self):
return self.comments.order_by('-creation_date')

class Meta:
abstract = True


class Question(Post):
answer_count = models.PositiveIntegerField()
tags = models.TextField()
title = models.CharField(max_length=1024)
favorite_count = models.PositiveIntegerField()
view_count = models.PositiveIntegerField()
accepted_answer = models.ForeignKey('Answer', related_name='accepted_for',
null=True, blank=True)

last_editor = models.ForeignKey(User, null=True, blank=True,
related_name='last_edited_questions')
last_edit_date = models.DateTimeField(null=True, blank=True)

def get_answers(self):
return self.answer_set.order_by('-creation_date')


class Answer(Post):
question = models.ForeignKey(Question)


class Comment(models.Model):
owner = models.ForeignKey(User)
creation_date = models.DateTimeField()
score = models.IntegerField()
text = models.TextField()

class Meta:
abstract = True


class QuestionComment(Comment):
post = models.ForeignKey(Question, related_name='comments')


class AnswerComment(Comment):
post = models.ForeignKey(Answer, related_name='comments')

35 changes: 35 additions & 0 deletions qa/templates/qa/question_detail.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<h1>{{ object.title }}</h1>
question by {{ object.owner.display_name }} on {{ object.creation_date|date }}, {{ object.creation_date|time }}
{% if object.score %}
with score {{ object.score }}
{% endif %}

{{ object.body|safe }}

{% for comment in object.get_comments %}
comment by {{ comment.owner.display_name }} on {{ comment.creation_date|date }}, {{ comment.creation_date|time }}
{% if comment.score %}
with score {{ comment.score }}
{% endif %}

<p>{{ comment.text|safe }}</p>
{% endfor %}

{% for answer in object.get_answers %}
answer by {{ answer.owner.display_name }} on {{ answer.creation_date|date }}, {{ answer.creation_date|time }}
{% if answer.score %}
with score {{ answer.score }}
{% endif %}

{{answer.body|safe}}

{% for comment in answer.get_comments %}
comment by {{ comment.owner.display_name }} on {{ comment.creation_date|date }}, {{ comment.creation_date|time }}
{% if comment.score %}
with score {{ comment.score }}
{% endif %}

<p>{{ comment.text|safe }}</p>
{% endfor %}

{% endfor %}
5 changes: 5 additions & 0 deletions qa/templates/qa/question_list.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{% for question in object_list %}
<h2><a href="{% url "qa-question" pk=question.pk %}">{{ question.title }}</a></h2>
{{ question.creation_date|date }}
{{ question.creation_date|time }}
{% endfor %}
8 changes: 8 additions & 0 deletions qa/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from django.conf.urls import patterns, url

from .views import QuestionDetail, QuestionList

urlpatterns = patterns('',
url(r'^$', QuestionList.as_view(), name='qa-list'),
url(r'^(?P<pk>\d+)/$', QuestionDetail.as_view(), name='qa-question'),
)
14 changes: 14 additions & 0 deletions qa/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from django.views.generic.list import ListView
from django.views.generic.detail import DetailView

from .models import Question

class QuestionList(ListView):
model = Question
ordering = '-creation_date'
paginate_by = 10


class QuestionDetail(DetailView):
model = Question

Empty file added stack/__init__.py
Empty file.
Loading

0 comments on commit aedd97b

Please sign in to comment.