-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathoutput.c
113 lines (97 loc) · 3.36 KB
/
output.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
** Probabilistic latent semantic analysis (PLSA, baseline version)
** Copyright (C) 2009-2010 by Raymond Wan ([email protected])
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h> /* UINT_MAX */
#include <stdbool.h>
#include <math.h>
#include <float.h>
#include <time.h>
#include "wmalloc.h"
#include "plsa-defn.h"
#include "output.h"
void printCoProb (INFO *info) {
unsigned int num_clusters = info -> num_clusters;
unsigned int i = 0; /* Index into w1 */
unsigned int j = 0; /* Index into w2 */
unsigned int k = 0; /* Index into clusters */
PROBNODE temp;
PROBNODE tempsum = 0.0;
unsigned int nonprob = 0;
FILE *fp = NULL;
char *fn = wmalloc (sizeof (char) * (strlen (info -> base_fn) + 10));
static unsigned int snapshot_count = 0;
time_t start;
time_t end;
time (&start);
snapshot_count++;
sprintf (fn, "%s.plsa", info -> base_fn);
if (info -> textio) {
FOPEN (fn, fp, "w");
fprintf (fp, "%u\t", info -> m);
fprintf (fp, "%u\t", info -> n);
for (i = 0; i < info -> m; i++) {
fprintf (fp, "%u\t", info -> row_ids[i]);
}
for (j = 0; j < info -> n; j++) {
fprintf (fp, "%u\t", info -> column_ids[j]);
}
}
else {
FOPEN (fn, fp, "wb");
fwrite (&info -> m, sizeof (unsigned int), 1, fp);
fwrite (&info -> n, sizeof (unsigned int), 1, fp);
fwrite (info -> row_ids, sizeof (unsigned int), info -> m, fp);
fwrite (info -> column_ids, sizeof (unsigned int), info -> n, fp);
}
for (i = 0; i < info -> m; i++) {
for (j = 0; j < info -> n; j++) {
temp = info -> probz[0] + GET_PROBW1_Z (0, i) + GET_PROBW2_Z (0, j);
for (k = 1; k < num_clusters; k++) {
/* temp stores logarithms */
logSumsInline (temp, (GET_PROBZ (k) + GET_PROBW1_Z (k, i) + GET_PROBW2_Z (k, j)));
}
/* temp stores logarithms; round it to ROUND_DIGITS */
if (info -> rounding) {
temp = (round (temp * ROUND_DIGITS)) / ROUND_DIGITS;
}
/* temp stores logarithms */
if (info -> textio) {
fprintf (fp, "%lf\t", temp);
}
else {
fwrite (&temp, sizeof (PROBNODE), 1, fp);
}
if (temp > 0) {
nonprob++;
}
tempsum += DOEXP (temp);
}
}
FCLOSE (fp);
wfree (fn);
if ((info -> verbose) && (info -> iter == UINT_MAX)) {
fprintf (stderr, "==\tNon-probabilities: %u\n", nonprob);
fprintf (stderr, "==\tSum of p(x,y): %f\n", tempsum);
fprintf (stderr, "==\tTotal output files printed %u\n", snapshot_count);
}
time (&end);
info -> printCoProbs_time += difftime (end, start);
return;
}