新月実装開発部
Bottom of this page.
|
<<last
0
1
2
afd30e3f
anonymous
2015-12-12 15:29
@markdown
クローラがようやくちゃんと動くようになりました。
/getをうまく使わないとすぐタイムアウトになっちゃいまね、これ。
/headでレスの欠損を調べて、一括でダウンロードできなかったスレは
レコードを個別にダウンロードしています。
```clojure
(defn get-files-with-recent-command []
(let [records (clojure.string/split (apply str (pmap #(recent %1 "0-") @active-nodes)) #"\n")
records (remove #(not (re-find #"^[0-9]+<>[0-9a-f]{32}<>thread_[0-9A-F]+(<>.*)?$" %)) records)
file-names (map #(second (re-find #"^[0-9]+<>[0-9a-f]{32}<>(thread_[0-9A-F]+)(<>.*)?$" %)) records)
file-names (clojure.set/difference (into #{} file-names) known-corrupt-files)]
file-names))
(defn download-thread-from-node
([node-name file-name]
(download-thread-from-node node-name file-name "0-"))
([node-name file-name range]
(timbre/debug "download-thread-from-node:" node-name file-name range)
(if-not (valid-node-name? node-name)
(throw (IllegalArgumentException. "Invalid node name.")))
(if-not (valid-file-name? file-name)
(throw (IllegalArgumentException. "Invalid file name.")))
(if-not (valid-range? range)
(throw (IllegalArgumentException. "Invalid range.")))
(try
(let [file-id (db/get-file-id file-name)
existing-records (and file-id (db/get-all-records-in-file-without-bodies file-id))]
(if (and (= range "0-")
existing-records
(pos? (count existing-records)))
; Use /head to find missing records.
(let [file (:body (client/get (str "http://" node-name "/head/" file-name "/" range) http-params))
file (clojure.string/replace file #"(?m)^(?![0-9]+<>[0-9a-f]{32}).*$" "")
file (clojure.string/replace file #"\r" "")
file (clojure.string/replace file #"\n+" "\n")
records (remove #(zero? (count %)) (clojure.string/split-lines file))
records (map #(let [match (re-find #"^([0-9]+)<>([0-9a-f]{32})" %)]
{:stamp (Integer/parseInt (nth match 1)) :record-id (nth match 2)})
records)
existing-records (map #(identity {:stamp (:stamp %) :record-id (:record-id %)}) existing-records)
records (clojure.set/difference (into #{} records) (into #{} existing-records))]
(if (empty? records)
0
(let [stamps (map :stamp records)
oldest (apply min stamps)
newest (apply max stamps)]
(download-thread-from-node node-name file-name (str oldest "-" newest))
(let [existing-records (map #(identity {:stamp (:stamp %) :record-id (:record-id %)}) existing-records)
records (clojure.set/difference (into #{} records) (into #{} existing-records))
stamps (map :stamp records)]
(dorun (map #(download-thread-from-node node-name file-name (str %)) stamps))))))
; Use the supplied range.
(let [file (:body (client/get (str "http://" node-name "/get/" file-name "/" range) http-params))
file (clojure.string/replace file #"(?m)^(?![0-9]+<>[0-9a-f]{32}<>).*$" "")
file (clojure.string/replace file #"\r" "")
file (clojure.string/replace file #"\n+" "\n")
records (remove #(zero? (count %)) (clojure.string/split-lines file))]
(dorun
(pmap
#(try
(let [match (re-find #"^([0-9]+)<>([0-9a-f]{32})<>(.*)$" %)
stamp (nth match 1)
record-id (nth match 2)
body (nth match 3)]
(db/add-record file-id stamp record-id body))
(catch Throwable _ (timbre/debug (str "download-thread-from-node: Record skipped: " %))))
records))
;(if-not (valid-file? file)
; (throw (Exception. "Invalid file.")))
(count records))))
(catch Exception e
(timbre/error e)
nil))))
(defn download-thread-from-all-active-nodes
([file-name]
(download-thread-from-all-active-nodes file-name "0-"))
([file-name range]
(if-not (valid-file-name? file-name)
(throw (IllegalArgumentException. "Invalid file name.")))
(if-not (valid-range? range)
(throw (IllegalArgumentException. "Invalid range.")))
(dorun
(map
#(download-thread-from-node % file-name range)
(shuffle @active-nodes)))
true))
(defn crawl-node [node-name]
(timbre/debug "crawl-node:" node-name)
(try
(dorun
(map
#(if (some #{ node-name } @active-nodes)
(download-thread-from-node node-name %))
(shuffle (map :file-name (db/get-all-files)))))
(catch Throwable t
(timbre/error t)
nil)))
(defn crawl-nodes []
(timbre/debug "crawl-nodes")
(try
(comment let [file-names (get-files-with-recent-command)]
(dorun (pmap #(db/add-file %) file-names)))
(dorun
(pmap crawl-node (shuffle @active-nodes)))
(catch Throwable t
(timbre/error t)
nil)))
```
Top of this page.
|
<<last
0
1
2
Name
E-mail
Post text
Agreement
Attach
limit: 15360KB
Suffix
AUTO
.1
.123
.1905.1
.1km
.2
.3
.3dm
.3dml
.3g2
.3gp
.3gpp
.3gpp2
.3mf
.4
.5
.6
.669
.7
.726
.8
.SAR
.VES
.a
.a2l
.aa3
.aac
.aal
.abc
.ac
.ac2
.ac3
.acc
.acn
.acu
.acutc
.adts
.aep
.afp
.ahead
.ai
.aif
.aifc
.aiff
.ait
.ami
.aml
.amr
.anx
.apk
.apkg
.apng
.appcache
.apr
.apxml
.art
.artisan
.asc
.ascii
.asf
.asice
.asics
.aso
.ass
.asx
.at3
.atc
.atf
.atfx
.atom
.atomcat
.atomdeleted
.atomsvc
.atx
.atxml
.au
.auc
.avci
.avcs
.avi
.awb
.axa
.axv
.azf
.azs
.azv
.azw3
.bar
.bat
.bcpio
.bdm
.bed
.bh2
.bik
.bin
.bk2
.bkm
.bmed
.bmi
.bmml
.bmp
.bmpr
.box
.bpd
.bsp
.btf
.btif
.bz2
.c
.c11amc
.c11amz
.c3ex
.c4d
.c4f
.c4g
.c4p
.c4u
.cab
.cap
.cbor
.cbr
.cbz
.cc
.ccc
.ccmp
.ccxml
.cdbcmsg
.cdf
.cdfx
.cdkey
.cdmia
.cdmic
.cdmid
.cdmio
.cdmiq
.cdxml
.cdy
.cea
.cellml
.cer
.cgm
.chm
.chrt
.cif
.cii
.cil
.cl
.cla
.class
.clkk
.clkp
.clkt
.clkw
.clkx
.clue
.cmc
.cml
.cmp
.cmsc
.cnd
.coffee
.conf
.copyright
.cpio
.cpkg
.cpl
.cpt
.crl
.crtr
.crx
.cryptonote
.csh
.csl
.csp
.csrattrs
.css
.cst
.csv
.csvs
.cuc
.curl
.cw
.cww
.cxx
.dae
.daf
.dart
.dataless
.davmount
.dbf
.dcd
.dcf
.dcm
.dcr
.dd
.dd2
.ddd
.ddf
.deb
.dfac
.dib
.dii
.dim
.dir
.dis
.dist
.distz
.dit
.dive
.djv
.djvu
.dll
.dls
.dm
.dmp
.dms
.dna
.doc
.docjson
.docm
.docx
.dor
.dot
.dotm
.dotx
.dp
.dpg
.dpgraph
.dpkg
.dr
.drc
.drle
.dsc
.dsm
.dssc
.dtd
.dts
.dtshd
.dvb
.dvc
.dvi
.dwd
.dwf
.dwg
.dxf
.dxp
.dxr
.dzr
.ecelp4800
.ecelp7470
.ecelp9600
.ecig
.ecigprofile
.ecigtheme
.edm
.edx
.efi
.efif
.ei6
.el
.emf
.eml
.emm
.emma
.emotionml
.ent
.entity
.enw
.eol
.eot
.ep
.eps
.epub
.es
.es3
.esa
.esf
.espass
.et3
.etx
.evb
.evc
.evw
.exe
.exi
.exr
.ext
.ez
.ez2
.ez3
.f90
.fbs
.fcdt
.fcs
.fdf
.fdt
.fe_launch
.fg5
.finf
.fit
.fits
.fla
.flac
.flb
.flo
.flt
.flv
.flw
.flx
.fly
.fm
.fnc
.fo
.fpx
.frm
.fsc
.fst
.ftc
.fti
.fts
.fvt
.fxm
.fxp
.fxpl
.fzs
.g2w
.g3
.g3w
.gac
.gbr
.gdl
.geo
.geojson
.gex
.ggb
.ggt
.ghf
.gif
.gim
.glb
.glbin
.glbuf
.gltf
.gml
.gmx
.gph
.gpkg
.gqf
.gqs
.gram
.gre
.grv
.grxml
.gsheet
.gsm
.gtar
.gtm
.gtw
.gv
.gxt
.gz
.g³
.h
.hal
.hbc
.hbci
.hdf
.hdr
.hdt
.heic
.heics
.heif
.heifs
.hej2
.held
.hgl
.hh
.hpgl
.hpi
.hpid
.hps
.hpub
.hqx
.hsj2
.htke
.htm
.html
.hvd
.hvp
.hvs
.hxx
.i2g
.ic0
.ic1
.ic2
.ic3
.ic4
.ic5
.ic6
.ic7
.ic8
.ica
.icc
.icd
.ice
.icf
.icm
.ico
.ics
.ief
.ifb
.ifm
.iges
.igl
.igm
.ign
.ignition
.igs
.igx
.iif
.imf
.img
.imgcal
.imi
.imp
.ims
.imscc
.ink
.inkml
.iota
.ipfix
.ipk
.irm
.irp
.ism
.iso
.istc
.isws
.itp
.its
.ivp
.ivu
.jad
.jam
.jar
.jfif
.jhc
.jisp
.jls
.jlt
.jnlp
.joda
.jp2
.jpe
.jpeg
.jpf
.jpg
.jpg2
.jpgm
.jph
.jpm
.jpx
.jrd
.js
.json
.json-patch
.jsonld
.jsontd
.jtd
.jxr
.jxra
.jxrs
.jxs
.jxsc
.jxsi
.jxss
.kar
.karbon
.kcm
.keynote
.kfo
.kia
.kil
.kml
.kmz
.kne
.knp
.kom
.kon
.koz
.kpr
.kpt
.ksh
.ksp
.ktr
.ktx
.ktz
.kwd
.kwt
.l16
.lasjson
.lasxml
.latex
.lbc
.lbd
.lbe
.lca
.lcs
.le
.les
.lgr
.lha
.link66
.list3820
.listafp
.lmp
.loas
.log
.loom
.lostsyncxml
.lostxml
.lpf
.lrm
.ltf
.lvp
.lwp
.lxf
.lzh
.m
.m15
.m1v
.m21
.m2v
.m3u
.m3u8
.m4a
.m4s
.m4u
.m4v
.ma
.mads
.maei
.mag
.mail
.man
.manifest
.markdown
.mb
.mbk
.mbox
.mc1
.mc2
.mcd
.md
.mdc
.mdi
.me
.med
.mesh
.meta4
.metalink
.mets
.mf4
.mfm
.mft
.mgp
.mgz
.mhas
.mht
.mhtml
.mid
.midi
.mif
.miz
.mj2
.mjp2
.mjs
.mk3d
.mka
.mkv
.mlp
.mmd
.mmdb
.mmf
.mml
.mmr
.mms
.mod
.model-inter
.mods
.moml
.mov
.movie
.mp1
.mp2
.mp21
.mp3
.mp4
.mpa
.mpc
.mpd
.mpdd
.mpe
.mpeg
.mpf
.mpg
.mpg4
.mpga
.mpkg
.mpm
.mpn
.mpp
.mpt
.mpw
.mpy
.mqy
.mrc
.mrcx
.ms
.msa
.msd
.mseed
.mseq
.msf
.msh
.msl
.msm
.msty
.mtl
.mtm
.mts
.multitrack
.mus
.musd
.mvt
.mwc
.mwf
.mxf
.mxi
.mxl
.mxmf
.mxml
.mxs
.mxu
.n-gage
.n3
.nb
.nbp
.nc
.ndc
.ndl
.nds
.ngdat
.nim
.nimn
.nitf
.nlu
.nml
.nnd
.nns
.nnw
.notebook
.nq
.ns2
.ns3
.ns4
.nsf
.nsg
.nsh
.nt
.ntf
.numbers
.nws
.o
.o4a
.o4v
.oa2
.oa3
.oas
.obg
.obgx
.obj
.oda
.odb
.odc
.odd
.odf
.odg
.odi
.odm
.odp
.ods
.odt
.odx
.oeb
.oga
.ogex
.ogg
.ogv
.ogx
.omg
.opf
.oprc
.opus
.or2
.or3
.org
.orq
.ors
.osf
.osm
.ota
.otc
.otf
.otg
.oth
.oti
.otp
.ots
.ott
.ovl
.owx
.oxlicg
.oxps
.oxt
.p10
.p12
.p2p
.p7c
.p7m
.p7s
.p8
.p8e
.pack
.package
.pages
.paw
.pbd
.pbm
.pcap
.pcl
.pcx
.pdb
.pdf
.pdx
.pem
.pfr
.pfx
.pgb
.pgm
.pgn
.pgp
.pil
.pkd
.pkg
.pki
.pkipath
.pl
.plb
.plc
.plf
.plj
.plp
.pls
.pm
.pml
.png
.pnm
.pod
.portpkg
.pot
.potm
.potx
.ppa
.ppam
.ppd
.ppkg
.ppm
.pps
.ppsm
.ppsx
.ppt
.pptm
.ppttc
.pptx
.pqa
.prc
.pre
.preminet
.provn
.provx
.prz
.ps
.psb
.psd
.pseg3820
.psfs
.psg
.psid
.pskcxml
.pt
.pti
.ptid
.ptrom
.pub
.pvb
.pwn
.pwz
.py
.pya
.pyc
.pyo
.pyv
.qam
.qbo
.qca
.qcall
.qcp
.qfx
.qps
.qt
.quiz
.quox
.qvd
.qwd
.qwt
.qxb
.qxd
.qxl
.qxt
.ra
.ram
.rapd
.rar
.ras
.rcprofile
.rct
.rdf
.rdf-crypt
.rdz
.relo
.rep
.request
.rfcxml
.rgb
.rgbe
.rif
.rip
.rl
.rlc
.rld
.rm
.rms
.rnc
.rnd
.rng
.roa
.roff
.rp9
.rpm
.rpss
.rpst
.rq
.rs
.rsat
.rsheet
.rsm
.rss
.rst
.rtf
.rtx
.rusd
.s11
.s14
.s1a
.s1e
.s1g
.s1h
.s1j
.s1m
.s1n
.s1p
.s1q
.s1w
.s3df
.s3m
.sac
.saf
.sam
.sandboxed
.sc
.scd
.sce
.scim
.scld
.scm
.scq
.scs
.scsf
.sdf
.sdkd
.sdkm
.sdo
.sdoc
.sdp
.see
.seed
.sem
.sema
.semd
.semf
.seml
.senml
.senml-etchc
.senml-etchj
.senmlc
.senmle
.senmlx
.sensml
.sensmlc
.sensmle
.sensmlx
.sfc
.sfd
.sfd-hdstx
.sfs
.sgi
.sgif
.sgm
.sgml
.sh
.shar
.shf
.shp
.shx
.si
.sic
.sid
.sieve
.sig
.silo
.sis
.sisx
.sit
.siv
.sjp
.sjpg
.skd
.skm
.skp
.skt
.sl
.sla
.slaz
.slc
.sldm
.sldx
.sls
.slt
.sm
.smc
.smh
.smht
.smi
.smil
.smk
.sml
.smo
.smov
.smp
.smp3
.smpg
.sms
.smv
.smzip
.snd
.so
.soa
.soc
.sos
.spd
.spdf
.spf
.spl
.spn
.spng
.spo
.spot
.spp
.sppt
.spq
.spx
.sql
.sqlite
.sqlite3
.sr
.src
.sru
.srx
.sse
.ssf
.ssml
.ssv
.ssvc
.ssw
.sswf
.st
.stc
.std
.stf
.sti
.stif
.stix
.stk
.stl
.stm
.stml
.str
.study-inter
.stw
.sub
.sus
.susp
.sv4cpio
.sv4crc
.svc
.svg
.svgz
.swf
.swi
.swidtag
.sxc
.sxd
.sxg
.sxi
.sxl
.sxls
.sxm
.sxw
.t
.t38
.tag
.taglet
.tam
.tamp
.tamx
.tao
.tap
.tar
.tat
.tatp
.tatx
.tau
.tcap
.tcl
.tcu
.td
.teacher
.tei
.teiCorpus
.ter
.tex
.texi
.texinfo
.text
.tfi
.tfx
.tga
.tgz
.thmx
.tif
.tiff
.tlclient
.tmo
.tnef
.tnf
.torrent
.tpl
.tpt
.tr
.tra
.tree
.trig
.ts
.tsa
.tsd
.tsq
.tsr
.tst
.tsv
.ttc
.ttf
.ttl
.ttml
.tuc
.tur
.twd
.twds
.txd
.txf
.txt
.u8dsn
.u8hdr
.u8mdn
.u8msg
.udeb
.ufd
.ufdl
.uis
.ult
.umj
.uni
.unityweb
.uo
.uoml
.upa
.uri
.uric
.urim
.urimap
.uris
.usdz
.ustar
.utz
.uva
.uvd
.uvf
.uvg
.uvh
.uvi
.uvm
.uvp
.uvs
.uvt
.uvu
.uvv
.uvva
.uvvd
.uvvf
.uvvg
.uvvh
.uvvi
.uvvm
.uvvp
.uvvs
.uvvt
.uvvu
.uvvv
.uvvx
.uvvz
.uvx
.uvz
.vbk
.vbox
.vcard
.vcd
.vcf
.vcg
.vcj
.vcx
.vew
.vfr
.viaframe
.vis
.viv
.vmt
.vpm
.vrml
.vsc
.vsd
.vsf
.vss
.vst
.vsw
.vtf
.vtt
.vtu
.vwx
.vxml
.wadl
.wasm
.wav
.wax
.wbmp
.wbs
.wbxml
.wcm
.wdb
.webm
.webmanifest
.webp
.wg
.wgt
.wif
.win
.wiz
.wk1
.wk3
.wk4
.wks
.wlnk
.wm
.wma
.wmc
.wmf
.wml
.wmlc
.wmls
.wmlsc
.wmv
.wmx
.woff
.woff2
.wpd
.wpl
.wps
.wqd
.wrl
.wsc
.wsdl
.wspolicy
.wtb
.wv
.wvx
.x3d
.x3db
.x3dv
.x3dvz
.x_b
.x_t
.xar
.xav
.xbd
.xbm
.xca
.xcs
.xct
.xdd
.xdf
.xdm
.xdp
.xdssc
.xdw
.xel
.xer
.xfd
.xfdf
.xfdl
.xhe
.xht
.xhtm
.xhtml
.xhvml
.xif
.xla
.xlam
.xlb
.xlc
.xlf
.xlim
.xlm
.xls
.xlsb
.xlsm
.xlsx
.xlt
.xltm
.xltx
.xlw
.xml
.xmls
.xmt_bin
.xmt_txt
.xns
.xo
.xodp
.xods
.xodt
.xop
.xotp
.xots
.xott
.xpdl
.xpi
.xpm
.xpr
.xps
.xpw
.xpx
.xsd
.xsf
.xsl
.xslt
.xsm
.xspf
.xul
.xvm
.xvml
.xwd
.xyz
.xyze
.xz
.yang
.yin
.yme
.yt
.zaz
.zfc
.zfo
.zip
.zir
.zirz
.zmm
.zone
.zst
Send to other nodes
Turn off first post for new BBS when you want to save your anonymity.
Error in timestamp
To save anonymity. Turn off for consecutive post.
POST
(新月実装開発部/255/0.8MB)
Powered by
shinGETsu
.