-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclustering.py~
55 lines (42 loc) · 1.32 KB
/
clustering.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 5 19:10:39 2012
@author: mike
"""
# here we do some importing
from pyspatialite import dbapi2 as spatialdb
import numpy as np
from scipy.cluster.vq import kmeans as kmeans
from pylab import show, plot
from collections import defaultdict
from pysal.core.IOHandlers import wkt
import pysal
wkt = wkt.WKTParser()
inDb = 'newdb.db'
conn = spatialdb.Connection(inDb)
cur = conn.cursor()
print cur
print dir(cur)
# simple queries to get some information out of the database
words = """select distinct(word) from words"""
hashes = cur.execute("""select distinct(hash) from hash_tweet
group by hash
having count(*) > 10;""")
selectCoords = """select astext(transform(status.coords, 5070))
from status join words
on status.tweet_id = words.tweet_id
where words.word = ?"""
hashDict = {}
for hashtag in cur.execute(words):
print hashtag
points = []
for point in cur.execute(selectCoords, hashtag).fetchall():
points.append(wkt(point[0]))
hashDict[hashtag[0]] = np.array(points)
# a little idea
a = kmeans(hashDict['Obama'], 5)
a_x = [x[0] for x in a[0]]
a_y = [x[1] for x in a[0]]
pl = plot(a_x, a_y, 'rd')
print dir(pl)
show()